X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fdriver%2Fmangler%2Fghc-asm.lprl;h=760175a5751c22f47fe5e305db24f1ccec8af787;hb=6f917d9600e7e5ba63a33f4079bcbc6ebbdb10fb;hp=77010c8555f59889cd5174945dfc8d38889956bf;hpb=6f985ae88171fb52ca68d75f667669e139b6b8c2;p=ghc-hetmet.git diff --git a/ghc/driver/mangler/ghc-asm.lprl b/ghc/driver/mangler/ghc-asm.lprl index 77010c8..760175a 100644 --- a/ghc/driver/mangler/ghc-asm.lprl +++ b/ghc/driver/mangler/ghc-asm.lprl @@ -146,7 +146,7 @@ sub init_TARGET_STUFF { $T_X86_PRE_LLBL = 'L'; $T_X86_BADJMP = '^\tjmp [^L\*]'; - $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s+\d+(,0x90)?|\.globl\s+\S+|\.text|\.data|\.stab[^n].*|\.type\s+.*|\.size\s+.*|\.lcomm.*)\n)'; + $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.stab[^n].*|\.type\s+.*|\.size\s+.*|\.lcomm.*)\n)'; $T_COPY_DIRVS = '\.(globl|stab|lcomm)'; $T_DOT_WORD = '\.(long|word|value|byte|space)'; $T_DOT_GLOBAL = '\.globl'; @@ -160,19 +160,19 @@ sub init_TARGET_STUFF { $T_HDR_vector = "\.text\n\t\.align 2\n"; # NB: requires padding #--------------------------------------------------------# - } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|freebsd|netbsd|openbsd)$/ ) { + } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|gnu|freebsd|netbsd|openbsd|kfreebsdgnu)$/ ) { $T_STABBY = 0; # 1 iff .stab things (usually if a.out format) $T_US = ''; # _ if symbols have an underscore on the front $T_PRE_APP = # regexp that says what comes before APP/NO_APP - ($TargetPlatform =~ /-(linux|freebsd|netbsd|openbsd)$/) ? '#' : '/' ; + ($TargetPlatform =~ /-(linux|gnu|freebsd|netbsd|openbsd)$/) ? '#' : '/' ; $T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like $T_POST_LBL = ':'; $T_X86_PRE_LLBL_PAT = '\.L'; $T_X86_PRE_LLBL = '.L'; $T_X86_BADJMP = '^\tjmp\s+[^\.\*]'; - $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)'; + $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)'; $T_COPY_DIRVS = '^\s*\.(globl|type|size|local)'; $T_DOT_WORD = '\.(long|value|word|byte|zero)'; @@ -223,10 +223,21 @@ sub init_TARGET_STUFF { $T_DOT_WORD = '\.(quad|long|value|byte|zero)'; $T_DOT_GLOBAL = '\.global'; + + $T_HDR_literal16 = "\.section\t\.rodata.cst16\n\t.align 16\n"; $T_HDR_literal = "\.section\t\.rodata\n"; + $T_HDR_misc = "\.text\n\t\.align 8\n"; $T_HDR_data = "\.data\n\t\.align 8\n"; $T_HDR_rodata = "\.section\t\.rodata\n\t\.align 8\n"; + + # the assembler on x86_64/Linux refuses to generate code for + # .quad x - y + # where x is in the text section and y in the rodata section. + # It works if y is in the text section, though. This is probably + # going to cause difficulties for PIC, I imagine. + $T_HDR_relrodata= "\.text\n\t\.align 8\n"; + $T_HDR_closure = "\.data\n\t\.align 8\n"; $T_HDR_info = "\.text\n\t\.align 8\n"; $T_HDR_entry = "\.text\n\t\.align 8\n"; @@ -279,13 +290,41 @@ sub init_TARGET_STUFF { $T_HDR_vector = "\t\.text\n\t\.align 2\n"; #--------------------------------------------------------# - } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ ) { + } elsif ( $TargetPlatform =~ /^powerpc-apple-darwin.*/ ) { + # Apple PowerPC Darwin/MacOS X. + $T_STABBY = 0; # 1 iff .stab things (usually if a.out format) + $T_US = '_'; # _ if symbols have an underscore on the front + $T_PRE_APP = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP + $T_CONST_LBL = '^\LC\d+:'; # regexp for what such a lbl looks like + $T_POST_LBL = ':'; + + $T_MOVE_DIRVS = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)'; + $T_COPY_DIRVS = '\.(globl|lcomm)'; + + $T_DOT_WORD = '\.(long|short|byte|fill|space)'; + $T_DOT_GLOBAL = '\.globl'; + $T_HDR_toc = "\.toc\n"; + $T_HDR_literal = "\t\.const\n\t\.align 2\n"; + $T_HDR_misc = "\t\.text\n\t\.align 2\n"; + $T_HDR_data = "\t\.data\n\t\.align 2\n"; + $T_HDR_rodata = "\t\.const\n\t\.align 2\n"; + $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n"; + $T_HDR_closure = "\t\.data\n\t\.align 2\n"; + $T_HDR_info = "\t\.text\n\t\.align 2\n"; + $T_HDR_entry = "\t\.text\n\t\.align 2\n"; + $T_HDR_vector = "\t\.text\n\t\.align 2\n"; + + #--------------------------------------------------------# + } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ ) { # Apple PowerPC Darwin/MacOS X. $T_STABBY = 0; # 1 iff .stab things (usually if a.out format) $T_US = '_'; # _ if symbols have an underscore on the front $T_PRE_APP = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP $T_CONST_LBL = '^\LC\d+:'; # regexp for what such a lbl looks like $T_POST_LBL = ':'; + $T_X86_PRE_LLBL_PAT = 'L'; + $T_X86_PRE_LLBL = 'L'; + $T_X86_BADJMP = '^\tjmp [^L\*]'; $T_MOVE_DIRVS = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)'; $T_COPY_DIRVS = '\.(globl|lcomm)'; @@ -293,11 +332,13 @@ sub init_TARGET_STUFF { $T_DOT_WORD = '\.(long|short|byte|fill|space)'; $T_DOT_GLOBAL = '\.globl'; $T_HDR_toc = "\.toc\n"; - $T_HDR_literal = "\t\.const_data\n\t\.align 2\n"; + $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n"; + $T_HDR_literal = "\t\.const\n\t\.align 4\n"; $T_HDR_misc = "\t\.text\n\t\.align 2\n"; $T_HDR_data = "\t\.data\n\t\.align 2\n"; - $T_HDR_rodata = "\t\.const_data\n\t\.align 2\n"; - $T_HDR_closure = "\t\.const_data\n\t\.align 2\n"; + $T_HDR_rodata = "\t\.const\n\t\.align 2\n"; + $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n"; + $T_HDR_closure = "\t\.data\n\t\.align 2\n"; $T_HDR_info = "\t\.text\n\t\.align 2\n"; $T_HDR_entry = "\t\.text\n\t\.align 2\n"; $T_HDR_vector = "\t\.text\n\t\.align 2\n"; @@ -359,8 +400,8 @@ sub init_TARGET_STUFF { $T_CONST_LBL = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like $T_POST_LBL = ':'; - $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*|\.section.*|\s+\.type.*|\s+\.size.*)\n)'; - $T_COPY_DIRVS = '\.(global|proc|stab)'; + $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)'; + $T_COPY_DIRVS = '\.(global|local|proc|stab)'; $T_DOT_WORD = '\.(long|word|byte|half|skip|uahalf|uaword)'; $T_DOT_GLOBAL = '^\t\.global'; @@ -425,6 +466,12 @@ sub init_TARGET_STUFF { exit 1; } + if($T_HDR_relrodata eq "") { + # default values: + # relrodata defaults to rodata. + $T_HDR_relrodata = $T_HDR_rodata; + } + if ( 0 ) { print STDERR "T_STABBY: $T_STABBY\n"; print STDERR "T_US: $T_US\n"; @@ -524,6 +571,34 @@ sub mangle_asm { $chk[++$i] = $_; $chkcat[$i] = 'rodata'; $chksymb[$i] = ''; + } elsif ( $TargetPlatform =~ /-darwin/ + && (/^\s*\.subsections_via_symbols/ + ||/^\s*\.no_dead_strip.*/)) { + # Don't allow Apple's linker to do any dead-stripping of symbols + # in this file, because it will mess up info-tables in mangled + # code. + # The .no_dead_strip directives are actually put there by + # the gcc3 "used" attribute on entry points. + + } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && ( + /^\s*\.picsymbol_stub/ + || /^\s*\.section __TEXT,__picsymbol_stub\d,.*/ + || /^\s*\.section __TEXT,__picsymbolstub\d,.*/ + || /^\s*\.symbol_stub/ + || /^\s*\.section __TEXT,__symbol_stub\d,.*/ + || /^\s*\.section __TEXT,__symbolstub\d,.*/ + || /^\s*\.lazy_symbol_pointer/ + || /^\s*\.non_lazy_symbol_pointer/ )) + { + $chk[++$i] = $_; + $chkcat[$i] = 'dyld'; + $chksymb[$i] = ''; + + } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.data/) + { # non_lazy_symbol_ptrs that point to local symbols + $chk[++$i] = $_; + $chkcat[$i] = 'dyld'; + $chksymb[$i] = ''; } elsif ( /^\s+/ ) { # most common case first -- a simple line! # duplicated from the bottom @@ -533,12 +608,6 @@ sub mangle_asm { } elsif ( /\.\.ng:$/ && $TargetPlatform =~ /^alpha-/ ) { # Alphas: Local labels not to be confused with new chunks $chk[$i] .= $_; - } elsif ( $TargetPlatform =~ /-darwin/ - && /^\t\.subsections_via_symbols/) { - # Don't allow Apple's linker to do any dead-stripping of symbols - # in this file, because it will mess up info-tables in mangled - # code. - # NB: all the rest start with a non-space } elsif ( $TargetPlatform =~ /^mips-/ @@ -643,6 +712,15 @@ sub mangle_asm { $chkcat[$i] = 'misc'; $chksymb[$i] = $1; + } elsif ( $TargetPlatform =~ /^i386-apple-darwin/ && /^(___i686\.get_pc_thunk\.[abcd]x):/o) { + # To handle PIC on Darwin/x86, we need to appropriately pass through + # the get_pc_thunk functions. The need to be put into a special section + # marked as coalesced (otherwise the .weak_definition doesn't work + # on Darwin). + $chk[++$i] = $_; + $chkcat[$i] = 'get_pc_thunk'; + $chksymb[$i] = $1; + } elsif ( /^${T_US}[A-Za-z0-9_]/o && ( $TargetPlatform !~ /^hppa/ # need to avoid local labels in this case || ! /^L\$\d+$/ ) @@ -670,31 +748,13 @@ sub mangle_asm { || /^${T_US}.*_closure_tbl${T_POST_LBL}$/o # closure tables ) { - $chkcat[$i] = 'rodata'; + $chkcat[$i] = 'relrodata'; } else { print STDERR "Warning: retaining unknown function \`$thing' in output from C compiler\n"; $chkcat[$i] = 'unknown'; } - } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && ( - /^\.picsymbol_stub/ - || /^\.section __TEXT,__picsymbol_stub1,.*/ - || /^\.section __TEXT,__picsymbolstub1,.*/ - || /^\.symbol_stub/ - || /^\.section __TEXT,__symbol_stub1,.*/ - || /^\.section __TEXT,__symbolstub1,.*/ - || /^\.lazy_symbol_pointer/ - || /^\.non_lazy_symbol_pointer/ )) - { - $chk[++$i] = $_; - $chkcat[$i] = 'dyld'; - $chksymb[$i] = ''; - } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.data/ && $chkcat[$i] eq 'dyld') - { # non_lazy_symbol_ptrs that point to local symbols - $chk[++$i] = $_; - $chkcat[$i] = 'dyld'; - $chksymb[$i] = ''; } elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ && /^\.LCTOC1 = /o ) { # PowerPC Linux's large-model PIC (-fPIC) generates a gobal offset # table "by hand". Be sure to copy it over. @@ -756,23 +816,47 @@ sub mangle_asm { $r =~ s/^\s*\n//; if ($TargetPlatform =~ /^i386-/) { - $p =~ s/^\tpushl\s+\%edi\n//; - $p =~ s/^\tpushl\s+\%esi\n//; - $p =~ s/^\tpushl\s+\%ebx\n//; - $p =~ s/^\tmovl\s+\%esi,\s*\d*\(\%esp\)\n//; - $p =~ s/^\tmovl\s+\%edi,\s*\d*\(\%esp\)\n//; - $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//; - $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-cygwin32/); - - # GCC 3.1 is in the habit of adding spurious writes to the - # stack in the prologue. Just to be on the safe side, - # chuck these over the fence into the main code. - while ($p =~ /^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n/) { - # print "Spurious instruction: $&"; - $p = $` . $'; - $r = $& . $r; + if ($p =~ /^\tsubl\s+\$(\d+),\s*\%esp\n/) { + if ($1 >= 8192) { + die "Error: reserved stack space exceeded!\n Possible workarounds: compile with -fasm, or try another version of gcc.\n" + } } + # gcc 3.4.3 puts this kind of stuff in the prologue, eg. + # when compiling PrimOps.cmm with -optc-O2: + # xorl %ecx, %ecx + # xorl %edx, %edx + # movl %ecx, 16(%esp) + # movl %edx, 20(%esp) + # but then the code of the function doesn't assume + # anything about the contnets of these stack locations. + # I think it's to do with the use of inline functions for + # PK_Word64() and friends, where gcc is initialising the + # contents of the struct to zero, and failing to optimise + # away the initialisation. Let's live dangerously and + # discard these initalisations. + + $p =~ s/^\tpushl\s+\%e(di|si|bx)\n//g; + $p =~ s/^\txorl\s+\%e(ax|cx|dx),\s*\%e(ax|cx|dx)\n//g; + $p =~ s/^\tmovl\s+\%e(ax|cx|dx|si|di),\s*\d*\(\%esp\)\n//g; + $p =~ s/^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n//g; + $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//; + $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-(cygwin32|mingw32)/); + + if ($TargetPlatform =~ /^i386-apple-darwin/) { + $pcrel_label = $p; + $pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = ""; + $pcrel_reg = $p; + $pcrel_reg =~ s/(.|\n)*.*___i686\.get_pc_thunk\.([abcd]x)\n(.|\n)*/$2/ or $pcrel_reg = ""; + $p =~ s/^\s+call\s+___i686\.get_pc_thunk\..x//; + $p =~ s/^\"?L\d+\$pb\"?:\n//; + + if ($pcrel_reg eq "bx") { + # Bad gcc. Goes and uses %ebx, our BaseReg, for PIC. Bad gcc. + die "Darwin/x86: -fPIC -via-C doesn't work yet, use -fasm. Aborting." + } + } + } elsif ($TargetPlatform =~ /^x86_64-/) { $p =~ s/^\tpushq\s+\%r(bx|bp|12|13|14)\n//g; $p =~ s/^\tmovq\s+\%r(bx|bp|12|13|14),\s*\d*\(\%rsp\)\n//g; @@ -815,7 +899,7 @@ sub mangle_asm { $p =~ s/^\tsw\t\$fp,\d+\(\$sp\)\n//; $p =~ s/^\tsw\t\$28,\d+\(\$sp\)\n//; $p =~ s/__FRAME__/$FRAME/; - } elsif ($TargetPlatform =~ /^powerpc-apple-.*/) { + } elsif ($TargetPlatform =~ /^powerpc-apple-darwin.*/) { $pcrel_label = $p; $pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = ""; @@ -893,7 +977,7 @@ sub mangle_asm { die "Prologue junk?: $p\n" if $p =~ /^\s+[^\s\.]/; # For PIC, we want to keep part of the prologue - if ($TargetPlatform =~ /^powerpc-apple-.*/ && $pcrel_label ne "") { + if ($TargetPlatform =~ /^powerpc-apple-darwin.*/ && $pcrel_label ne "") { # Darwin: load the current instruction pointer into register r31 $p .= "bcl 20,31,$pcrel_label\n"; $p .= "$pcrel_label:\n"; @@ -905,6 +989,9 @@ sub mangle_asm { $p .= "\tmflr 30\n"; $p .= "\tlwz 0,.LCL$pcrel_label-.LCF$pcrel_label(30)\n"; $p .= "\tadd 30,0,30\n"; + } elsif ($TargetPlatform =~ /^i386-apple-darwin.*/ && $pcrel_label ne "") { + $p .= "\tcall ___i686.get_pc_thunk.$pcrel_reg\n"; + $p .= "$pcrel_label:\n"; } # glue together what's left @@ -944,7 +1031,7 @@ sub mangle_asm { $e =~ s/^\tlw\t\$fp,\d+\(\$sp\)\n//; $e =~ s/^\taddu\t\$sp,\$sp,\d+\n//; $e =~ s/^\tj\t\$31\n//; - } elsif ($TargetPlatform =~ /^powerpc-apple-.*/) { + } elsif ($TargetPlatform =~ /^powerpc-apple-darwin.*/) { $e =~ s/^\taddi r1,r1,\d+\n//; $e =~ s/^\tlwz r\d+,\d+\(r1\)\n//; $e =~ s/^\tlmw r\d+,-\d+\(r1\)\n//; @@ -995,9 +1082,10 @@ sub mangle_asm { # toss all calls to __DISCARD__ $c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go; $c =~ s/^\tjsr\s+\$26\s*,\s*${T_US}__DISCARD__\n//go if $TargetPlatform =~ /^alpha-/; - $c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-.*/; + $c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-darwin.*/; $c =~ s/^\tbl\s+__DISCARD__(\@plt)?\n//go if $TargetPlatform =~ /^powerpc-.*-linux/; $c =~ s/^\tbl\s+\.__DISCARD__\n\s+nop\n//go if $TargetPlatform =~ /^powerpc64-.*-linux/; + $c =~ s/^\tcall\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /i386-apple-darwin.*/; # IA64: mangle tailcalls into jumps here if ($TargetPlatform =~ /^ia64-/) { @@ -1125,7 +1213,23 @@ sub mangle_asm { # print out all the literal strings next for ($i = 0; $i < $numchks; $i++) { if ( $chkcat[$i] eq 'literal' ) { - print OUTASM $T_HDR_literal, $chk[$i]; + + # HACK: try to detect 16-byte constants and align them + # on a 16-byte boundary. x86_64 sometimes needs 128-bit + # aligned constants, and so does Darwin/x86. + if ( $TargetPlatform =~ /^x86_64/ + || $TargetPlatform =~ /^i386-apple-darwin/ ) { + $z = $chk[$i]; + if ($z =~ /(\.long.*\n.*\.long.*\n.*\.long.*\n.*\.long|\.quad.*\n.*\.quad)/) { + print OUTASM $T_HDR_literal16; + } else { + print OUTASM $T_HDR_literal; + } + } else { + print OUTASM $T_HDR_literal; + } + + print OUTASM $chk[$i]; print OUTASM "; end literal\n" if $TargetPlatform =~ /^hppa/; # for the splitter $chkcat[$i] = 'DONE ALREADY'; @@ -1185,7 +1289,7 @@ sub mangle_asm { # SRT if ( defined($srtchk{$symb}) ) { - print OUTASM $T_HDR_rodata; + print OUTASM $T_HDR_relrodata; print OUTASM $chk[$srtchk{$symb}]; $chkcat[$srtchk{$symb}] = 'DONE ALREADY'; } @@ -1209,8 +1313,23 @@ sub mangle_asm { # If this is an entry point with an info table, # eliminate the entry symbol and all directives involving it. if (defined($infochk{$symb}) && $TargetPlatform !~ /^ia64-/) { - $c =~ s/^.*$symb_(entry|ret)${T_POST_LBL}\n//; - $c =~ s/^\s*\..*$symb.*\n//g; + @o = (); + foreach $l (split(/\n/,$c)) { + next if $l =~ /^.*$symb_(entry|ret)${T_POST_LBL}/; + + # If we have .type/.size direrctives involving foo_entry, + # then make them refer to foo_info instead. The information + # in these directives is used by the cachegrind annotator, + # so it is worthwhile keeping. + if ($l =~ /^\s*\.(type|size).*$symb_(entry|ret)/) { + $l =~ s/$symb(_entry|_ret)/${symb}_info/g; + push(@o,$l); + next; + } + next if $l =~ /^\s*\..*$symb.*\n?/; + push(@o,$l); + } + $c = join("\n",@o) . "\n"; } print OUTASM $T_HDR_entry; @@ -1245,7 +1364,10 @@ sub mangle_asm { print OUTASM $T_HDR_rodata; print OUTASM $chk[$i]; $chkcat[$i] = 'DONE ALREADY'; - + } elsif ( $chkcat[$i] eq 'relrodata' ) { + print OUTASM $T_HDR_relrodata; + print OUTASM $chk[$i]; + $chkcat[$i] = 'DONE ALREADY'; } elsif ( $chkcat[$i] eq 'toc' ) { # silly optimisation to print tocs, since they come in groups... print OUTASM $T_HDR_toc; @@ -1260,12 +1382,16 @@ sub mangle_asm { $j++; } - } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && $chkcat[$i] eq 'dyld' ) { - # powerpc-apple: dynamic linker stubs + } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' ) { + # apple-darwin: dynamic linker stubs if($chk[$i] !~ /\.indirect_symbol ___DISCARD__/) { # print them out unchanged, but remove the stubs for __DISCARD__ print OUTASM $chk[$i]; } + } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ && $chkcat[$i] eq 'get_pc_thunk' ) { + # i386-apple-darwin: __i686.get_pc_thunk.[abcd]x + print OUTASM ".section __TEXT,__textcoal_nt,coalesced,no_toc\n"; + print OUTASM $chk[$i]; } else { &tidy_up_and_die(1,"$Pgm: unknown chkcat (ghc-asm: $TargetPlatform)\n$chkcat[$i]\n$chk[$i]\n"); } @@ -1308,12 +1434,25 @@ sub hppa_mash_prologue { # OK, epilogue, too sub print_doctored { local($_, $need_fallthru_patch) = @_; + if ( $TargetPlatform =~ /^x86_64-/ ) { + # Catch things like + # + # movq -4(%ebp), %rax + # jmp *%rax + # + # and optimise: + # + s/^\tmovq\s+(-?\d*\(\%r(bx|bp|13)\)),\s*(\%r(ax|cx|dx|10|11))\n\tjmp\s+\*\3/\tjmp\t\*$1/g; + s/^\tmovl\s+\$${T_US}(.*),\s*(\%e(ax|cx|si|di))\n\tjmp\s+\*\%r\3/\tjmp\t$T_US$1/g; + } + if ( $TargetPlatform !~ /^i386-/ || ! /^\t[a-z]/ # no instructions in here, apparently || /^${T_US}__stginit_[A-Za-z0-9_]+${T_POST_LBL}/) { print OUTASM $_; return; } + # OK, must do some x86 **HACKING** local($entry_patch) = ''; @@ -1352,8 +1491,16 @@ sub print_doctored { # movl $_blah, # jmp * # + s/^\tmovl\s+\$${T_US}(.*),\s*(\%e[acd]x)\n\tjmp\s+\*\2/\tjmp $T_US$1/g; - s/^\tmovl\s+\$${T_US}(.*),\s*(\%e[abcd]x)\n\tjmp\s+\*\2/\tjmp $T_US$1/g; + # Catch things like + # + # movl -4(%ebx), %eax + # jmp *%eax + # + # and optimise: + # + s/^\tmovl\s+(-?\d*\(\%e(bx|si)\)),\s*(\%e[acd]x)\n\tjmp\s+\*\3/\tjmp\t\*$1/g; if ($StolenX86Regs <= 2 ) { # YURGH! spurious uses of esi? s/^\tmovl\s+(.*),\s*\%esi\n\tjmp\s+\*%esi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g; @@ -1404,6 +1551,16 @@ sub print_doctored { # fix _all_ non-local jumps: + if ( $TargetPlatform =~ /^.*-apple-darwin.*/ ) { + # On Darwin, we've got local-looking jumps that are + # actually global (i.e. jumps to Lfoo$stub or via + # Lfoo$non_lazy_ptr), so we fix those first. + # In fact, we just fix everything that contains a dollar + # because false positives don't hurt here. + + s/^(\tjmp\s+\*?L.*\$.*\n)/$exit_patch$1/g; + } + s/^\tjmp\s+\*${T_X86_PRE_LLBL_PAT}/\tJMP___SL/go; s/^\tjmp\s+${T_X86_PRE_LLBL_PAT}/\tJMP___L/go;