$T_DOT_WORD = '\.(long|short|byte|fill|space)';
$T_DOT_GLOBAL = '\.globl';
$T_HDR_toc = "\.toc\n";
- $T_HDR_literal = "\t\.const\n\t\.align 4\n"; # align for SSE
+ $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n";
+ $T_HDR_literal = "\t\.const\n\t\.align 4\n";
$T_HDR_misc = "\t\.text\n\t\.align 2\n";
$T_HDR_data = "\t\.data\n\t\.align 2\n";
$T_HDR_rodata = "\t\.const\n\t\.align 2\n";
$T_CONST_LBL = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
$T_POST_LBL = ':';
- $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)';
- $T_COPY_DIRVS = '\.(global|proc|stab)';
+ $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)';
+ $T_COPY_DIRVS = '\.(global|local|proc|stab)';
$T_DOT_WORD = '\.(long|word|byte|half|skip|uahalf|uaword)';
$T_DOT_GLOBAL = '^\t\.global';
}
}
- $p =~ s/^\tpushl\s+\%edi\n//;
- $p =~ s/^\tpushl\s+\%esi\n//;
- $p =~ s/^\tpushl\s+\%ebx\n//;
- $p =~ s/^\tmovl\s+\%esi,\s*\d*\(\%esp\)\n//;
- $p =~ s/^\tmovl\s+\%edi,\s*\d*\(\%esp\)\n//;
+ # gcc 3.4.3 puts this kind of stuff in the prologue, eg.
+ # when compiling PrimOps.cmm with -optc-O2:
+ # xorl %ecx, %ecx
+ # xorl %edx, %edx
+ # movl %ecx, 16(%esp)
+ # movl %edx, 20(%esp)
+ # but then the code of the function doesn't assume
+ # anything about the contnets of these stack locations.
+ # I think it's to do with the use of inline functions for
+ # PK_Word64() and friends, where gcc is initialising the
+ # contents of the struct to zero, and failing to optimise
+ # away the initialisation. Let's live dangerously and
+ # discard these initalisations.
+
+ $p =~ s/^\tpushl\s+\%e(di|si|bx)\n//g;
+ $p =~ s/^\txorl\s+\%e(ax|cx|dx),\s*\%e(ax|cx|dx)\n//g;
+ $p =~ s/^\tmovl\s+\%e(ax|cx|dx|si|di),\s*\d*\(\%esp\)\n//g;
+ $p =~ s/^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n//g;
$p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//;
$p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-(cygwin32|mingw32)/);
- # GCC 3.1 is in the habit of adding spurious writes to the
- # stack in the prologue. Just to be on the safe side,
- # chuck these over the fence into the main code.
- while ($p =~ /^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n/) {
- # print "Spurious instruction: $&";
- $p = $` . $';
- $r = $& . $r;
- }
-
if ($TargetPlatform =~ /^i386-apple-darwin/) {
$pcrel_label = $p;
$pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = "";
# HACK: try to detect 16-byte constants and align them
# on a 16-byte boundary. x86_64 sometimes needs 128-bit
- # aligned constants.
- if ( $TargetPlatform =~ /^x86_64/ ) {
+ # aligned constants, and so does Darwin/x86.
+ if ( $TargetPlatform =~ /^x86_64/
+ || $TargetPlatform =~ /^i386-apple-darwin/ ) {
$z = $chk[$i];
if ($z =~ /(\.long.*\n.*\.long.*\n.*\.long.*\n.*\.long|\.quad.*\n.*\.quad)/) {
print OUTASM $T_HDR_literal16;
sub print_doctored {
local($_, $need_fallthru_patch) = @_;
+ if ( $TargetPlatform =~ /^x86_64-/ ) {
+ # Catch things like
+ #
+ # movq -4(%ebp), %rax
+ # jmp *%rax
+ #
+ # and optimise:
+ #
+ s/^\tmovq\s+(-?\d*\(\%r(bx|bp|13)\)),\s*(\%r(ax|cx|dx|10|11))\n\tjmp\s+\*\3/\tjmp\t\*$1/g;
+ s/^\tmovl\s+\$${T_US}(.*),\s*(\%e(ax|cx|si|di))\n\tjmp\s+\*\%r\3/\tjmp\t$T_US$1/g;
+ }
+
if ( $TargetPlatform !~ /^i386-/
|| ! /^\t[a-z]/ # no instructions in here, apparently
|| /^${T_US}__stginit_[A-Za-z0-9_]+${T_POST_LBL}/) {
print OUTASM $_;
return;
}
+
# OK, must do some x86 **HACKING**
local($entry_patch) = '';