$T_HDR_vector = "\.text\n\t\.align 2\n"; # NB: requires padding
#--------------------------------------------------------#
- } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|freebsd|netbsd|openbsd)$/ ) {
+ } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|gnu|freebsd|netbsd|openbsd|kfreebsdgnu)$/ ) {
$T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
$T_US = ''; # _ if symbols have an underscore on the front
$T_PRE_APP = # regexp that says what comes before APP/NO_APP
- ($TargetPlatform =~ /-(linux|freebsd|netbsd|openbsd)$/) ? '#' : '/' ;
+ ($TargetPlatform =~ /-(linux|gnu|freebsd|netbsd|openbsd)$/) ? '#' : '/' ;
$T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
$T_POST_LBL = ':';
$T_X86_PRE_LLBL_PAT = '\.L';
$T_POST_LBL = ':';
$T_MOVE_DIRVS = '^(\s*\.(globl|text|data|section|align|size|type|ident|local)\s+.*\n)';
- $T_COPY_DIRVS = '\.(globl|local)';
+ $T_COPY_DIRVS = '\.(globl|type|size|local)';
$T_DOT_WORD = '\.(quad|long|value|byte|zero)';
$T_DOT_GLOBAL = '\.global';
+
+ $T_HDR_literal16 = "\.section\t\.rodata.cst16\n\t.align 16\n";
$T_HDR_literal = "\.section\t\.rodata\n";
+
$T_HDR_misc = "\.text\n\t\.align 8\n";
$T_HDR_data = "\.data\n\t\.align 8\n";
$T_HDR_rodata = "\.section\t\.rodata\n\t\.align 8\n";
+
+ # the assembler on x86_64/Linux refuses to generate code for
+ # .quad x - y
+ # where x is in the text section and y in the rodata section.
+ # It works if y is in the text section, though. This is probably
+ # going to cause difficulties for PIC, I imagine.
+ $T_HDR_relrodata= "\.text\n\t\.align 8\n";
+
$T_HDR_closure = "\.data\n\t\.align 8\n";
$T_HDR_info = "\.text\n\t\.align 8\n";
$T_HDR_entry = "\.text\n\t\.align 8\n";
$T_HDR_vector = "\t\.text\n\t\.align 2\n";
#--------------------------------------------------------#
- } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ ) {
+ } elsif ( $TargetPlatform =~ /^powerpc-apple-darwin.*/ ) {
# Apple PowerPC Darwin/MacOS X.
$T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
$T_US = '_'; # _ if symbols have an underscore on the front
$T_HDR_vector = "\t\.text\n\t\.align 2\n";
#--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ ) {
+ # Apple PowerPC Darwin/MacOS X.
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\LC\d+:'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+ $T_X86_PRE_LLBL_PAT = 'L';
+ $T_X86_PRE_LLBL = 'L';
+ $T_X86_BADJMP = '^\tjmp [^L\*]';
+
+ $T_MOVE_DIRVS = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
+ $T_COPY_DIRVS = '\.(globl|lcomm)';
+
+ $T_DOT_WORD = '\.(long|short|byte|fill|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_toc = "\.toc\n";
+ $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n";
+ $T_HDR_literal = "\t\.const\n\t\.align 4\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.const\n\t\.align 2\n";
+ $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
} elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ ) {
# PowerPC Linux
$T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
$T_CONST_LBL = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
$T_POST_LBL = ':';
- $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)';
- $T_COPY_DIRVS = '\.(global|proc|stab)';
+ $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)';
+ $T_COPY_DIRVS = '\.(global|local|proc|stab)';
$T_DOT_WORD = '\.(long|word|byte|half|skip|uahalf|uaword)';
$T_DOT_GLOBAL = '^\t\.global';
# code.
# The .no_dead_strip directives are actually put there by
# the gcc3 "used" attribute on entry points.
+
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && (
+ /^\s*\.picsymbol_stub/
+ || /^\s*\.section __TEXT,__picsymbol_stub\d,.*/
+ || /^\s*\.section __TEXT,__picsymbolstub\d,.*/
+ || /^\s*\.symbol_stub/
+ || /^\s*\.section __TEXT,__symbol_stub\d,.*/
+ || /^\s*\.section __TEXT,__symbolstub\d,.*/
+ || /^\s*\.lazy_symbol_pointer/
+ || /^\s*\.non_lazy_symbol_pointer/
+ || /^\s*\.section __IMPORT.*/))
+ {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'dyld';
+ $chksymb[$i] = '';
+ $dyld_section = $_;
+
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.data/)
+ { # non_lazy_symbol_ptrs that point to local symbols
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'dyld';
+ $chksymb[$i] = '';
+ $dyld_section = $_;
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.align/)
+ { # non_lazy_symbol_ptrs that point to local symbols
+ $dyld_section .= $_;
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^L_.*:$/)
+ { # non_lazy_symbol_ptrs that point to local symbols
+ $chk[++$i] = $dyld_section . $_;
+ $chkcat[$i] = 'dyld';
+ $chksymb[$i] = '';
+
} elsif ( /^\s+/ ) { # most common case first -- a simple line!
# duplicated from the bottom
$chkcat[$i] = 'misc';
$chksymb[$i] = $1;
+ } elsif ( $TargetPlatform =~ /^i386-apple-darwin/ && /^(___i686\.get_pc_thunk\.[abcd]x):/o) {
+ # To handle PIC on Darwin/x86, we need to appropriately pass through
+ # the get_pc_thunk functions. The need to be put into a special section
+ # marked as coalesced (otherwise the .weak_definition doesn't work
+ # on Darwin).
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'get_pc_thunk';
+ $chksymb[$i] = $1;
+
} elsif ( /^${T_US}[A-Za-z0-9_]/o
&& ( $TargetPlatform !~ /^hppa/ # need to avoid local labels in this case
|| ! /^L\$\d+$/ )
$chkcat[$i] = 'unknown';
}
- } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && (
- /^\.picsymbol_stub/
- || /^\.section __TEXT,__picsymbol_stub1,.*/
- || /^\.section __TEXT,__picsymbolstub1,.*/
- || /^\.symbol_stub/
- || /^\.section __TEXT,__symbol_stub1,.*/
- || /^\.section __TEXT,__symbolstub1,.*/
- || /^\.lazy_symbol_pointer/
- || /^\.non_lazy_symbol_pointer/ ))
- {
- $chk[++$i] = $_;
- $chkcat[$i] = 'dyld';
- $chksymb[$i] = '';
- } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && /^\.data/ && $chkcat[$i] eq 'dyld')
- { # non_lazy_symbol_ptrs that point to local symbols
- $chk[++$i] = $_;
- $chkcat[$i] = 'dyld';
- $chksymb[$i] = '';
} elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ && /^\.LCTOC1 = /o ) {
# PowerPC Linux's large-model PIC (-fPIC) generates a gobal offset
# table "by hand". Be sure to copy it over.
$r =~ s/^\s*\n//;
if ($TargetPlatform =~ /^i386-/) {
- $p =~ s/^\tpushl\s+\%edi\n//;
- $p =~ s/^\tpushl\s+\%esi\n//;
- $p =~ s/^\tpushl\s+\%ebx\n//;
- $p =~ s/^\tmovl\s+\%esi,\s*\d*\(\%esp\)\n//;
- $p =~ s/^\tmovl\s+\%edi,\s*\d*\(\%esp\)\n//;
- $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//;
- $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-cygwin32/);
-
- # GCC 3.1 is in the habit of adding spurious writes to the
- # stack in the prologue. Just to be on the safe side,
- # chuck these over the fence into the main code.
- while ($p =~ /^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n/) {
- # print "Spurious instruction: $&";
- $p = $` . $';
- $r = $& . $r;
+ if ($p =~ /^\tsubl\s+\$(\d+),\s*\%esp\n/) {
+ if ($1 >= 8192) {
+ die "Error: reserved stack space exceeded!\n Possible workarounds: compile with -fasm, or try another version of gcc.\n"
+ }
}
+ # gcc 3.4.3 puts this kind of stuff in the prologue, eg.
+ # when compiling PrimOps.cmm with -optc-O2:
+ # xorl %ecx, %ecx
+ # xorl %edx, %edx
+ # movl %ecx, 16(%esp)
+ # movl %edx, 20(%esp)
+ # but then the code of the function doesn't assume
+ # anything about the contnets of these stack locations.
+ # I think it's to do with the use of inline functions for
+ # PK_Word64() and friends, where gcc is initialising the
+ # contents of the struct to zero, and failing to optimise
+ # away the initialisation. Let's live dangerously and
+ # discard these initalisations.
+
+ $p =~ s/^\tpushl\s+\%e(di|si|bx)\n//g;
+ $p =~ s/^\txorl\s+\%e(ax|cx|dx),\s*\%e(ax|cx|dx)\n//g;
+ $p =~ s/^\tmovl\s+\%e(ax|cx|dx|si|di),\s*\d*\(\%esp\)\n//g;
+ $p =~ s/^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n//g;
+ $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//;
+ $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-(cygwin32|mingw32)/);
+
+ if ($TargetPlatform =~ /^i386-apple-darwin/) {
+ $pcrel_label = $p;
+ $pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = "";
+ $pcrel_reg = $p;
+ $pcrel_reg =~ s/(.|\n)*.*___i686\.get_pc_thunk\.([abcd]x)\n(.|\n)*/$2/ or $pcrel_reg = "";
+ $p =~ s/^\s+call\s+___i686\.get_pc_thunk\..x//;
+ $p =~ s/^\"?L\d+\$pb\"?:\n//;
+
+ if ($pcrel_reg eq "bx") {
+ # Bad gcc. Goes and uses %ebx, our BaseReg, for PIC. Bad gcc.
+ die "Darwin/x86: -fPIC -via-C doesn't work yet, use -fasm. Aborting."
+ }
+ }
+
} elsif ($TargetPlatform =~ /^x86_64-/) {
$p =~ s/^\tpushq\s+\%r(bx|bp|12|13|14)\n//g;
$p =~ s/^\tmovq\s+\%r(bx|bp|12|13|14),\s*\d*\(\%rsp\)\n//g;
$p =~ s/^\tsw\t\$fp,\d+\(\$sp\)\n//;
$p =~ s/^\tsw\t\$28,\d+\(\$sp\)\n//;
$p =~ s/__FRAME__/$FRAME/;
- } elsif ($TargetPlatform =~ /^powerpc-apple-.*/) {
+ } elsif ($TargetPlatform =~ /^powerpc-apple-darwin.*/) {
$pcrel_label = $p;
$pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = "";
$p =~ s/^\tstdu 1,-\d+\(1\)\n//;
$p =~ s/^\tstd \d+,-?\d+\(1\)\n//g;
- # GCC's "large-model" PIC (-fPIC)
- $pcrel_label = $p;
- $pcrel_label =~ s/(.|\n)*^.LCF(\d+):\n(.|\n)*/$2/ or $pcrel_label = "";
-
- $p =~ s/^\tbcl 20,31,.LCF\d+\n//;
- $p =~ s/^.LCF\d+:\n//;
- $p =~ s/^\tmflr 30\n//;
- $p =~ s/^\tlwz 0,\.LCL\d+-\.LCF\d+\(30\)\n//;
- $p =~ s/^\tadd 30,0,30\n//;
-
# This is bad: GCC 3 seems to zero-fill some local variables in the prologue
# under some circumstances, only when generating position dependent code.
# I have no idea why, and I don't think it is necessary, so let's toss it.
die "Prologue junk?: $p\n" if $p =~ /^\s+[^\s\.]/;
# For PIC, we want to keep part of the prologue
- if ($TargetPlatform =~ /^powerpc-apple-.*/ && $pcrel_label ne "") {
+ if ($TargetPlatform =~ /^powerpc-apple-darwin.*/ && $pcrel_label ne "") {
# Darwin: load the current instruction pointer into register r31
$p .= "bcl 20,31,$pcrel_label\n";
$p .= "$pcrel_label:\n";
$p .= "\tmflr 30\n";
$p .= "\tlwz 0,.LCL$pcrel_label-.LCF$pcrel_label(30)\n";
$p .= "\tadd 30,0,30\n";
+ } elsif ($TargetPlatform =~ /^i386-apple-darwin.*/ && $pcrel_label ne "") {
+ $p .= "\tcall ___i686.get_pc_thunk.$pcrel_reg\n";
+ $p .= "$pcrel_label:\n";
}
# glue together what's left
$e =~ s/^\tlw\t\$fp,\d+\(\$sp\)\n//;
$e =~ s/^\taddu\t\$sp,\$sp,\d+\n//;
$e =~ s/^\tj\t\$31\n//;
- } elsif ($TargetPlatform =~ /^powerpc-apple-.*/) {
+ } elsif ($TargetPlatform =~ /^powerpc-apple-darwin.*/) {
$e =~ s/^\taddi r1,r1,\d+\n//;
$e =~ s/^\tlwz r\d+,\d+\(r1\)\n//;
$e =~ s/^\tlmw r\d+,-\d+\(r1\)\n//;
# toss all calls to __DISCARD__
$c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go;
$c =~ s/^\tjsr\s+\$26\s*,\s*${T_US}__DISCARD__\n//go if $TargetPlatform =~ /^alpha-/;
- $c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-.*/;
+ $c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-darwin.*/;
$c =~ s/^\tbl\s+__DISCARD__(\@plt)?\n//go if $TargetPlatform =~ /^powerpc-.*-linux/;
$c =~ s/^\tbl\s+\.__DISCARD__\n\s+nop\n//go if $TargetPlatform =~ /^powerpc64-.*-linux/;
+ $c =~ s/^\tcall\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /i386-apple-darwin.*/;
# IA64: mangle tailcalls into jumps here
if ($TargetPlatform =~ /^ia64-/) {
# print out all the literal strings next
for ($i = 0; $i < $numchks; $i++) {
if ( $chkcat[$i] eq 'literal' ) {
- print OUTASM $T_HDR_literal, $chk[$i];
+
+ # HACK: try to detect 16-byte constants and align them
+ # on a 16-byte boundary. x86_64 sometimes needs 128-bit
+ # aligned constants, and so does Darwin/x86.
+ if ( $TargetPlatform =~ /^x86_64/
+ || $TargetPlatform =~ /^i386-apple-darwin/ ) {
+ $z = $chk[$i];
+ if ($z =~ /(\.long.*\n.*\.long.*\n.*\.long.*\n.*\.long|\.quad.*\n.*\.quad)/) {
+ print OUTASM $T_HDR_literal16;
+ } else {
+ print OUTASM $T_HDR_literal;
+ }
+ } else {
+ print OUTASM $T_HDR_literal;
+ }
+
+ print OUTASM $chk[$i];
print OUTASM "; end literal\n" if $TargetPlatform =~ /^hppa/; # for the splitter
$chkcat[$i] = 'DONE ALREADY';
$j++;
}
- } elsif ( $TargetPlatform =~ /^powerpc-apple-.*/ && $chkcat[$i] eq 'dyld' ) {
- # powerpc-apple: dynamic linker stubs
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' ) {
+ # apple-darwin: dynamic linker stubs
if($chk[$i] !~ /\.indirect_symbol ___DISCARD__/)
{ # print them out unchanged, but remove the stubs for __DISCARD__
print OUTASM $chk[$i];
}
+ } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ && $chkcat[$i] eq 'get_pc_thunk' ) {
+ # i386-apple-darwin: __i686.get_pc_thunk.[abcd]x
+ print OUTASM ".section __TEXT,__textcoal_nt,coalesced,no_toc\n";
+ print OUTASM $chk[$i];
} else {
&tidy_up_and_die(1,"$Pgm: unknown chkcat (ghc-asm: $TargetPlatform)\n$chkcat[$i]\n$chk[$i]\n");
}
sub print_doctored {
local($_, $need_fallthru_patch) = @_;
+ if ( $TargetPlatform =~ /^x86_64-/ ) {
+ # Catch things like
+ #
+ # movq -4(%ebp), %rax
+ # jmp *%rax
+ #
+ # and optimise:
+ #
+ s/^\tmovq\s+(-?\d*\(\%r(bx|bp|13)\)),\s*(\%r(ax|cx|dx|10|11))\n\tjmp\s+\*\3/\tjmp\t\*$1/g;
+ s/^\tmovl\s+\$${T_US}(.*),\s*(\%e(ax|cx|si|di))\n\tjmp\s+\*\%r\3/\tjmp\t$T_US$1/g;
+ }
+
if ( $TargetPlatform !~ /^i386-/
|| ! /^\t[a-z]/ # no instructions in here, apparently
|| /^${T_US}__stginit_[A-Za-z0-9_]+${T_POST_LBL}/) {
print OUTASM $_;
return;
}
+
# OK, must do some x86 **HACKING**
local($entry_patch) = '';
# fix _all_ non-local jumps:
+ if ( $TargetPlatform =~ /^.*-apple-darwin.*/ ) {
+ # On Darwin, we've got local-looking jumps that are
+ # actually global (i.e. jumps to Lfoo$stub or via
+ # Lfoo$non_lazy_ptr), so we fix those first.
+ # In fact, we just fix everything that contains a dollar
+ # because false positives don't hurt here.
+
+ s/^(\tjmp\s+\*?L.*\$.*\n)/$exit_patch$1/g;
+ }
+
s/^\tjmp\s+\*${T_X86_PRE_LLBL_PAT}/\tJMP___SL/go;
s/^\tjmp\s+${T_X86_PRE_LLBL_PAT}/\tJMP___L/go;