1 %************************************************************************
3 \section[Driver-asm-fiddling]{Fiddling with assembler files (iX86)}
5 %************************************************************************
10 Utterly stomp out C functions' prologues and epilogues; i.e., the
11 stuff to do with the C stack.
13 Any other required tidying up.
18 local($in_asmf, $out_asmf) = @_;
20 # multi-line regexp matching:
25 open(INASM, "< $in_asmf")
26 || &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n");
27 open(OUTASM,"> $out_asmf")
28 || &tidy_up_and_die(1,"$Pgm: failed to open `$out_asmf' (to write)\n");
30 # read whole file, divide into "chunks":
31 # record some info about what we've found...
33 @chk = (); # contents of the chunk
34 $numchks = 0; # number of them
35 @chkcat = (); # what category of thing in each chunk
36 @chksymb = (); # what symbol(base) is defined in this chunk
37 %slowchk = (); # ditto, its regular "slow" entry code
38 %fastchk = (); # ditto, fast entry code
39 %closurechk = (); # ditto, the (static) closure
40 %infochk = (); # given a symbol base, say what chunk its info tbl is in
41 %vectorchk = (); # ditto, return vector table
42 %directchk = (); # ditto, direct return code
48 #??? next if /^\.stab.*___stg_split_marker/;
49 #??? next if /^\.stab.*ghc.*c_ID/;
50 next if /^#(NO_)?APP/;
52 if ( /^\s+/ ) { # most common case first -- a simple line!
53 # duplicated from the bottom
57 } elsif ( /^_(ret_|djn_)/ ) {
62 } elsif ( /^_vtbl_([A-Za-z0-9_]+):$/ ) {
64 $chkcat[$i] = 'vector';
69 } elsif ( /^_([A-Za-z0-9_]+)DirectReturn:$/ ) {
71 $chkcat[$i] = 'direct';
76 } elsif ( /^_[A-Za-z0-9_]+_upd:$/ ) {
81 } elsif ( /^LC(\d+):$/ ) {
83 $chkcat[$i] = 'string';
86 } elsif ( /^___stg_split_marker(\d+):$/ ) {
88 $chkcat[$i] = 'splitmarker';
91 } elsif ( /^_([A-Za-z0-9_]+)_info:$/ ) {
94 $chkcat[$i] = 'infotbl';
99 } elsif ( /^_([A-Za-z0-9_]+)_entry:$/ ) {
101 $chkcat[$i] = 'slow';
106 } elsif ( /^_([A-Za-z0-9_]+)_fast\d+:$/ ) {
108 $chkcat[$i] = 'fast';
113 } elsif ( /^_([A-Za-z0-9_]+)_closure:$/ ) {
115 $chkcat[$i] = 'closure';
118 $closurechk{$1} = $i;
120 } elsif ( /^_ghc.*c_ID:/ ) {
122 $chkcat[$i] = 'consist';
124 } elsif ( /^(___gnu_compiled_c|gcc2_compiled\.):/ ) {
127 } elsif ( /^_ErrorIO_call_count:/ # HACK!!!!
128 || /^_[A-Za-z0-9_]+\.\d+:$/
129 || /^_.*_CAT:/ # PROF: _entryname_CAT
130 || /^_CC_.*_struct:/ # PROF: _CC_ccident_struct
131 || /^_.*_done:/ # PROF: _module_done
132 || /^__module_registered:/ # PROF: _module_registered
135 $chkcat[$i] = 'data';
138 } elsif ( /^_[A-Za-z0-9_]/ ) {
141 print STDERR "Funny global thing?: $_"
142 unless $KNOWN_FUNNY_THING{$thing}
143 || /^__(PRIn|PRStart).*:/ # pointer reversal GC routines
144 || /^_CC_.*:/ # PROF: _CC_ccident
145 || /^__reg.*:/; # PROF: __reg<module>
147 $chkcat[$i] = 'misc';
150 } else { # simple line (duplicated at the top)
155 $numchks = $#chk + 1;
157 # the division into chunks is imperfect;
158 # we throw some things over the fence into the next
161 # also, there are things we would like to know
162 # about the whole module before we start spitting
165 # NB: we start meddling at chunk 1, not chunk 0
167 for ($i = 1; $i < $numchks; $i++) {
168 $c = $chk[$i]; # convenience copy
170 # print STDERR "\nCHK $i (BEFORE) (",$chkcat[$i],"):\n", $c;
172 # toss all prologue stuff;
173 # be slightly paranoid to make sure there's
174 # nothing surprising in there
175 if ( $c =~ /--- BEGIN ---/ ) {
176 if (($p, $r) = split(/--- BEGIN ---/, $c)) {
177 $p =~ s/^\tpushl \%edi\n//;
178 $p =~ s/^\tpushl \%esi\n//;
179 $p =~ s/^\tsubl \$\d+,\%esp\n//;
180 die "Prologue junk?: $p\n" if $p =~ /^\t[^\.]/;
182 # glue together what's left
187 # toss all epilogue stuff; again, paranoidly
188 if ( $c =~ /--- END ---/ ) {
189 if (($r, $e) = split(/--- END ---/, $c)) {
191 $e =~ s/^\tpopl \%edi\n//;
192 $e =~ s/^\tpopl \%esi\n//;
193 $e =~ s/^\taddl \$\d+,\%esp\n//;
194 die "Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/;
196 # glue together what's left
201 # toss all calls to __DISCARD__
202 $c =~ s/^\tcall ___DISCARD__\n//g;
204 # pin a funny end-thing on (for easier matching):
205 $c .= 'FUNNY#END#THING';
207 # pick some end-things and move them to the next chunk
209 while ( $c =~ /^\s*(\.align\s+\d+(,0x90)?\n|\.globl\s+\S+\n|\.text\n|\.data\n|\.stab[^n].*\n)FUNNY#END#THING/ ) {
212 if ( $to_move =~ /\.(globl|stab)/ && $i < ($numchks - 1) ) {
213 $chk[$i + 1] = $to_move . $chk[$i + 1];
214 # otherwise they're tossed
217 $c =~ s/^.*\nFUNNY#END#THING/FUNNY#END#THING/;
220 $c =~ s/FUNNY#END#THING//;
221 $chk[$i] = $c; # update w/ convenience copy
224 # print out all the literal strings first
225 for ($i = 0; $i < $numchks; $i++) {
226 if ( $chkcat[$i] eq 'string' ) {
227 print OUTASM "\.text\n\t\.align 4\n";
228 # not sure what alignment is required (WDP 95/02)
229 # .align 4 (on 16-byte boundaries) is 486-cache friendly
230 print OUTASM $chk[$i];
232 $chkcat[$i] = 'DONE ALREADY';
236 for ($i = 0; $i < $numchks; $i++) {
237 # print STDERR "$i: cat $chkcat[$i], symb $chksymb[$i]\n";
239 next if $chkcat[$i] eq 'DONE ALREADY';
241 if ( $chkcat[$i] eq 'misc' ) {
242 print OUTASM "\.text\n\t\.align 4\n";
243 &print_doctored($chk[$i], 0);
245 } elsif ( $chkcat[$i] eq 'data' ) {
246 print OUTASM "\.data\n\t\.align 2\n"; # ToDo: change align??
247 print OUTASM $chk[$i];
249 } elsif ( $chkcat[$i] eq 'consist' ) {
250 if ( $chk[$i] =~ /\.ascii.*\)(hsc|cc) (.*)\\11"\n\t\.ascii\s+"(.*)\\0"/ ) {
251 local($consist) = "$1.$2.$3";
253 $consist =~ s/\//./g;
255 $consist =~ s/[^A-Za-z0-9_.]/ZZ/g; # ToDo: properly?
256 print OUTASM "\.text\n$consist:\n";
258 print STDERR "Couldn't grok consistency: ", $chk[$i];
261 } elsif ( $chkcat[$i] eq 'splitmarker' ) {
262 # we can just re-constitute this one...
263 print OUTASM "___stg_split_marker",$chksymb[$i],":\n";
265 } elsif ( $chkcat[$i] eq 'closure'
266 || $chkcat[$i] eq 'infotbl'
267 || $chkcat[$i] eq 'slow'
268 || $chkcat[$i] eq 'fast' ) { # do them in that order
269 $symb = $chksymb[$i];
272 if ( defined($closurechk{$symb}) ) {
273 print OUTASM "\.data\n\t\.align 2\n"; # ToDo: change align?
274 print OUTASM $chk[$closurechk{$symb}];
275 $chkcat[$closurechk{$symb}] = 'DONE ALREADY';
279 if ( defined($infochk{$symb}) ) {
281 print OUTASM "\.text\n\t\.align 4\n"; # NB: requires padding
282 print OUTASM &rev_tbl($symb, $chk[$infochk{$symb}], 1);
283 # entry code will be put here!
285 $chkcat[$infochk{$symb}] = 'DONE ALREADY';
289 if ( defined($slowchk{$symb}) ) {
291 # teach it to drop through to the fast entry point:
292 $c = $chk[$slowchk{$symb}];
293 $c =~ s/^\tmovl \$_${symb}_fast\d+,\%edx\n\tjmp \*\%edx\n//;
294 $c =~ s/^\tmovl \$_${symb}_fast\d+,\%eax\n\tjmp \*\%eax\n//;
296 print STDERR "still has jump to fast entry point:\n$c"
297 if $c =~ /_${symb}_fast/;
299 print OUTASM "\.text\n\t\.align 4\n";
300 &print_doctored($c, 1); # NB: the 1!!!
301 $chkcat[$slowchk{$symb}] = 'DONE ALREADY';
305 if ( defined($fastchk{$symb}) ) {
306 print OUTASM "\.text\n\t\.align 4\n"; # Fills w/ no-ops!
307 &print_doctored($chk[$fastchk{$symb}], 0);
308 $chkcat[$fastchk{$symb}] = 'DONE ALREADY';
311 } elsif ( $chkcat[$i] eq 'vector'
312 || $chkcat[$i] eq 'direct' ) { # do them in that order
313 $symb = $chksymb[$i];
316 if ( defined($vectorchk{$symb}) ) {
317 print OUTASM "\.text\n\t\.align 4\n"; # NB: requires padding
318 print OUTASM &rev_tbl($symb, $chk[$vectorchk{$symb}], 0);
319 # direct return code will be put here!
320 $chkcat[$vectorchk{$symb}] = 'DONE ALREADY';
324 if ( defined($directchk{$symb}) ) {
325 print OUTASM "\.text\n\t\.align 4\n";
326 &print_doctored($chk[$directchk{$symb}], 0);
327 $chkcat[$directchk{$symb}] = 'DONE ALREADY';
331 &tidy_up_and_die(1,"$Pgm: unknown chkcat (ghc-asm iX86)\n$chkcat[$i]\n$chk[$i]\n");
335 close(OUTASM) || &tidy_up_and_die(1,"Failed writing to $out_asmf\n");
336 close(INASM) || &tidy_up_and_die(1,"Failed reading from $in_asmf\n");
342 local($_, $need_fallthru_patch) = @_;
344 if ( ! /^\t[a-z]/ ) { # no instructions in here, apparently
347 } else { # must do some **HACKING**
348 local($entry_patch) = '';
349 local($exit_patch) = '';
350 local($call_entry_patch)= '';
351 local($call_exit_patch) = '';
352 local($sp_entry_patch) = '';
353 local($sp_exit_patch) = '';
355 # gotta watch out for weird instructions that
356 # invisibly smash various regs:
357 # rep* %ecx used for counting
358 # scas* %edi used for destination index
359 # cmps* %e[sd]i used for indices
360 # loop* %ecx used for counting
363 print STDERR "WEIRD INSN!\n$_" if /^\t(rep|scas|loop|cmps)/;
365 # WDP: this still looks highly dubious to me. 95/07
367 # * use of STG reg [ nn(%ebx) ] where no machine reg avail
368 # * some secret uses of machine reg, requiring STG reg
369 # to be saved/restored
370 # * but what about totally-unexpected uses of machine reg?
371 # (maybe I've forgotten how this works...)
373 if ( $StolenX86Regs < 3
374 && ( /32\(\%ebx\)/ || /^\tcmps/ ) ) { # R1 (esi)
375 $entry_patch .= "\tmovl \%esi,32(\%ebx)\n";
376 $exit_patch .= "\tmovl 32(\%ebx),\%esi\n";
377 # nothing for call_{entry,exit} because %esi is callee-save
379 if ( $StolenX86Regs < 4
380 && ( /64\(\%ebx\)/ || /^\t(scas|cmps)/ ) ) { # SpA (edi)
381 $entry_patch .= "\tmovl \%edi,64(\%ebx)\n";
382 $exit_patch .= "\tmovl 64(\%ebx),\%edi\n";
383 # nothing for call_{entry,exit} because %edi is callee-save
385 if ( $StolenX86Regs < 5
386 && ( /36\(\%ebx\)/ || /^\t(rep|loop)/ ) ) { # R2 (ecx)
387 $entry_patch .= "\tmovl \%ecx,36(\%ebx)\n";
388 $exit_patch .= "\tmovl 36(\%ebx),\%ecx\n";
390 $call_exit_patch .= "\tmovl \%ecx,108(\%ebx)\n";
391 $call_entry_patch .= "\tmovl 108(\%ebx),\%ecx\n";
393 # first, convert calls to *very magic form*: (ToDo: document for real!)
396 # call _PerformGC_wrapper
400 # call _PerformGC_wrapper
402 # Special macros in ghc/includes/COptWraps.lh, used in
403 # ghc/runtime/CallWrap_C.lc, are required for this to work!
405 s/^\tpushl \$(\d+)\n\tcall _PerformGC_wrapper\n\taddl \$4,\%esp\nL(\d+):\n/\tmovl \$$1,\%eax\n\tmovl \$L$2a,104\(\%ebx\)\n\tmovl \$_PerformGC_wrapper,\%edx\n\tjmp \*\%edx\nL$2a:\n__SP_ENTRY_PATCH__L$2:\n/g;
406 s/^\tpushl \%eax\n\tcall _PerformGC_wrapper\n\taddl \$4,\%esp\nL(\d+):\n/\tmovl \$L$1a,104\(\%ebx\)\n\tmovl \$_PerformGC_wrapper,\%edx\n\tjmp \*\%edx\nL$1a:\n__SP_ENTRY_PATCH__L$1:\n/g;
408 s/^\tpushl \%edx\n\tcall _PerformGC_wrapper\n\taddl \$4,\%esp\nL(\d+):\n/\tmovl \%edx,\%eax\n\tmovl \$L$1a,104\(\%ebx\)\n\tmovl \$_PerformGC_wrapper,\%edx\n\tjmp \*\%edx\nL$1a:\n__SP_ENTRY_PATCH__L$1:\n/g;
410 if ( $StolenX86Regs < 5 ) { # %ecx is ordinary reg
411 s/^\tpushl \%ecx\n\tcall _PerformGC_wrapper\n\taddl \$4,\%esp\nL(\d+):\n/\tmovl \%ecx,\%eax\n\tmovl \$L$1a,104\(\%ebx\)\n\tmovl \$_PerformGC_wrapper,\%edx\n\tjmp \*\%edx\nL$1a:\n__SP_ENTRY_PATCH__L$1:\n/g;
414 die "PerformGC_wrapper still alive!\n$_" if / _PerformGC_wrapper/;
416 # --------------------------------------------------------
417 # OK, now acct for the fact that %esp holds Hp on entry;
419 # * must hold C-stack ptr if we go to C
420 # * must get Hp ( 80(%ebx) ) back in it if we come back from C
421 # * must hold Hp when we go on to the next guy
422 # * don't worry about PerformGC_wrapper -- it is magic
423 # * we have a "save location" for %esp ( 100(%ebx) )
424 # * because C-stack ptr doesn't change in Haskell-land,
425 # we don't have to save it -- just restore it when
428 if ( $SpX86Mangling ) { # NB: not used in RTS
429 if ( /(\tcall |\tpushl |\%esp)/ ) { # *anything* C-stack-ish...
430 # then we patch up...
431 $sp_entry_patch = "\tmovl \%esp,80(\%ebx)\n\tmovl 100(\%ebx),%esp\n";
432 $sp_exit_patch = "\tmovl 80(\%ebx),\%esp\n";
434 } elsif ( /80\(\%ebx\)/ ) { # no C-stack stuff: try to squash Hp refs!
435 $sp_entry_patch = '';
438 # mangle heap-check code
440 s/\tmovl 80\(\%ebx\),%eax\n\taddl \$(\d+),\%eax\n\tmovl \%eax,80\(\%ebx\)\n\tcmpl \%eax,84\(\%ebx\)\n/\taddl \$$1,\%esp\n\tcmpl \%esp,84\(\%ebx\)\n/g;
442 # mangle other Hp refs
443 s/80\(\%ebx\)/\%esp/g;
445 # squash some repeated reloadings of Hp
446 while ( /\tmovl \%esp,\%eax\n\t([a-z].*)\n\tmovl \%esp,\%eax\n/ ) {
448 $x =~ s/\%eax/\%esp/g;
449 s/\tmovl \%esp,\%eax\n\t([a-z].*)\n\tmovl \%esp,\%eax\n/\t$x\n\tmovl \%esp,\%eax\n/;
452 while ( /\tmovl \%esp,\%edx\n\t([a-z].*)\n\tmovl \%esp,\%edx\n/ ) {
454 $x =~ s/\%edx/\%esp/g;
455 s/\tmovl \%esp,\%edx\n\t([a-z].*)\n\tmovl \%esp,\%edx\n/\t$x\n\tmovl \%esp,\%edx\n/;
458 if ( $StolenX86Regs < 5 ) { # %ecx is ordinary reg
459 while ( /\tmovl \%esp,\%ecx\n\t([a-z].*)\n\tmovl \%esp,\%ecx\n/ ) {
461 $x =~ s/\%ecx/\%esp/g;
462 s/\tmovl \%esp,\%ecx\n\t([a-z].*)\n\tmovl \%esp,\%ecx\n/\t$x\n\tmovl \%esp,\%ecx\n/;
466 s/\tmovl \%esp,\%eax\n\tmovl \%eax,\%edx\n\taddl \$-(\d+),\%edx\n\tmovl \%edx,(-\d+)?\(\%eax\)\n/\tmovl \%esp,\%edx\n\taddl \$-$1,\%edx\n\tmovl \%edx,$2\(\%esp\)\n/g;
471 # --------------------------------------------------------
472 # next, here we go with non-%esp patching!
474 s/^(\t[a-z])/$sp_entry_patch$entry_patch$1/; # before first instruction
475 s/^(\tcall .*\n(\taddl \$\d+,\%esp\n)?)/$call_exit_patch$1$call_entry_patch/g; # _all_ calls
477 if ($StolenX86Regs == 2 ) { # YURGH! spurious uses of esi,edi,ecx?
478 s/^(\tjmp .*)(\%esi|\%edi|\%ecx)(.*\n)/\tmovl $2,\%eax\n$1\%eax$3/g;
479 } elsif ($StolenX86Regs == 3 ) { # spurious uses of edi,ecx?
480 s/^(\tjmp .*)(\%edi|\%ecx)(.*\n)/\tmovl $2,\%eax\n$1\%eax$3/g;
481 } elsif ($StolenX86Regs == 4 ) { # spurious uses of ecx?
482 s/^(\tjmp .*)(\%ecx)(.*\n)/\tmovl $2,\%eax\n$1\%eax$3/g;
485 s/^\tjmp \*L/\tJMP___L/g;
488 # while ( /^(\tjmp (\*)?[^L].*\n)/ && $sp_exit_patch ) {
489 # print STDERR "Converting\n$1to\n$sp_exit_patch$exit_patch$1";
490 # s/^(\tjmp)( (\*)?[^L].*\n)/$sp_exit_patch$exit_patch\tJMPME$2/;
493 # fix _all_ non-local jumps
494 s/^(\tjmp (\*)?[^L].*\n)/$sp_exit_patch$exit_patch$1/g;
496 #test: s/JMPME/jmp /g;
498 s/^\tJMP___L/\tjmp \*L/g;
500 # fix post-PerformGC wrapper (re-)entries
501 s/__SP_ENTRY_PATCH__/$sp_entry_patch/g;
503 if ($StolenX86Regs == 2 ) {
504 die "ARGH! Jump uses \%esi, \%edi, or \%ecx with -monly-2-regs:\n$_"
505 if /^\t(jmp|call) .*\%e(si|di|cx)/;
506 } elsif ($StolenX86Regs == 3 ) {
507 die "ARGH! Jump uses \%edi or \%ecx with -monly-3-regs:\n$_"
508 if /^\t(jmp|call) .*\%e(di|cx)/;
509 } elsif ($StolenX86Regs == 4 ) {
510 die "ARGH! Jump uses \%ecx with -monly-4-regs:\n$_"
511 if /^\t(jmp|call) .*\%ecx/;
515 s/^\tmovl 36\(\%ebx\),\%ecx\n\tjmp \*36\(\%ebx\)\n/\tmovl 36\(\%ebx\),\%ecx\n\tjmp \*\%ecx\n/;
517 # --------------------------------------------------------
518 # that's it -- print it
520 die "Funny jumps?\n$_" if /^\tjmp [^L\*]/; # paranoia
524 if ( $need_fallthru_patch ) { # exit patch for end of slow entry code
525 print OUTASM $sp_exit_patch, $exit_patch;
526 # ToDo: make it not print if there is a "jmp" at the end
533 sub init_FUNNY_THINGS {
534 %KNOWN_FUNNY_THING = (
535 '_CheckHeapCode:', 1,
536 '_CommonUnderflow:', 1,
538 '_EnterNodeCode:', 1,
539 '_ErrorIO_call_count:', 1,
540 '_ErrorIO_innards:', 1,
550 '_PrimUnderflow:', 1,
551 '_StackUnderflowEnterNode:', 1,
553 '_UnderflowVect0:', 1,
554 '_UnderflowVect1:', 1,
555 '_UnderflowVect2:', 1,
556 '_UnderflowVect3:', 1,
557 '_UnderflowVect4:', 1,
558 '_UnderflowVect5:', 1,
559 '_UnderflowVect6:', 1,
560 '_UnderflowVect7:', 1,
563 '_WorldStateToken:', 1,
564 '__Enter_Internal:', 1,
565 '__PRMarking_MarkNextAStack:', 1,
566 '__PRMarking_MarkNextBStack:', 1,
567 '__PRMarking_MarkNextCAF:', 1,
568 '__PRMarking_MarkNextGA:', 1,
569 '__PRMarking_MarkNextRoot:', 1,
570 '__PRMarking_MarkNextSpark:', 1,
571 '__Scavenge_Forward_Ref:', 1,
572 '___std_entry_error__:', 1,
573 '__startMarkWorld:', 1,
575 '_startCcRegisteringWorld:', 1,
576 '_startEnterFloat:', 1,
577 '_startEnterInt:', 1,
578 '_startPerformIO:', 1,
579 '_startStgWorld:', 1,
585 The following table reversal is used for both info tables and return
586 vectors. In both cases, we remove the first entry from the table,
587 reverse the table, put the label at the end, and paste some code
588 (that which is normally referred to by the first entry in the table)
589 right after the table itself. (The code pasting is done elsewhere.)
593 local($symb, $tbl, $discard1) = @_;
599 local(@lines) = split(/\n/, $tbl);
600 local($i, $extra, $words_to_pad, $j);
602 for ($i = 0; $i <= $#lines && $lines[$i] !~ /^\t\.long\s+/; $i++) {
603 $label .= $lines[$i] . "\n",
604 next if $lines[$i] =~ /^[A-Za-z0-9_]+_info:$/
605 || $lines[$i] =~ /^\.globl/
606 || $lines[$i] =~ /^_vtbl_\S+:$/;
608 $before .= $lines[$i] . "\n"; # otherwise...
611 for ( ; $i <= $#lines && $lines[$i] =~ /^\t\.long\s+/; $i++) {
612 push(@words, $lines[$i]);
614 # now throw away the first word (entry code):
615 shift(@words) if $discard1;
617 # for 486-cache-friendliness, we want our tables aligned
618 # on 16-byte boundaries (.align 4). Let's pad:
619 $extra = ($#words + 1) % 4;
620 $words_to_pad = ($extra == 0) ? 0 : 4 - $extra;
621 for ($j = 0; $j < $words_to_pad; $j++) { push(@words, "\t\.long 0"); }
623 for (; $i <= $#lines; $i++) {
624 $after .= $lines[$i] . "\n";
627 $tbl = $before . join("\n", (reverse @words)) . "\n" . $label . $after;
629 # print STDERR "before=$before\n";
630 # print STDERR "label=$label\n";
631 # print STDERR "words=",(reverse @words),"\n";
632 # print STDERR "after=$after\n";
637 # make "require"r happy...