1 %************************************************************************
3 \section[Driver-asm-fiddling]{Fiddling with assembler files (alpha)}
5 %************************************************************************
10 Utterly stomp out C functions' prologues and epilogues; i.e., the
11 stuff to do with the C stack.
13 Any other required tidying up.
18 local($in_asmf, $out_asmf) = @_;
20 # multi-line regexp matching:
25 open(INASM, "< $in_asmf")
26 || &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n");
27 open(OUTASM,"> $out_asmf")
28 || &tidy_up_and_die(1,"$Pgm: failed to open `$out_asmf' (to write)\n");
30 # read whole file, divide into "chunks":
31 # record some info about what we've found...
33 @chk = (); # contents of the chunk
34 $numchks = 0; # number of them
35 @chkcat = (); # what category of thing in each chunk
36 @chksymb = (); # what symbol(base) is defined in this chunk
37 %slowchk = (); # ditto, its regular "slow" entry code
38 %fastchk = (); # ditto, fast entry code
39 %closurechk = (); # ditto, the (static) closure
40 %infochk = (); # given a symbol base, say what chunk its info tbl is in
41 %vectorchk = (); # ditto, return vector table
42 %directchk = (); # ditto, direct return code
48 #??? next if /^\.stab.*___stg_split_marker/;
49 #??? next if /^\.stab.*ghc.*c_ID/;
53 if ( /^\s+/ ) { # most common case first -- a simple line!
54 # duplicated from the bottom
57 } elsif ( /\.\.ng:$/ ) { # Local labels not to be confused with new chunks
60 } elsif ( /^\$C(\d+):$/ ) {
62 $chkcat[$i] = 'string';
65 } elsif ( /^__stg_split_marker(\d+):$/ ) {
67 $chkcat[$i] = 'splitmarker';
70 } elsif ( /^([A-Za-z0-9_]+)_info:$/ ) {
73 $chkcat[$i] = 'infotbl';
76 die "Info table already? $symb; $i\n" if defined($infochk{$symb});
80 } elsif ( /^([A-Za-z0-9_]+)_entry:$/ ) {
87 } elsif ( /^([A-Za-z0-9_]+)_fast\d+:$/ ) {
94 } elsif ( /^([A-Za-z0-9_]+)_closure:$/ ) {
96 $chkcat[$i] = 'closure';
101 } elsif ( /^ghc.*c_ID:/ ) {
103 $chkcat[$i] = 'consist';
105 } elsif ( /^(__gnu_compiled_c|gcc2_compiled\.):/ ) {
108 } elsif ( /^ErrorIO_call_count:/ # HACK!!!!
109 || /^[A-Za-z0-9_]+\.\d+:$/
110 || /^.*_CAT:/ # PROF: _entryname_CAT
111 || /^CC_.*_struct:/ # PROF: _CC_ccident_struct
112 || /^.*_done:/ # PROF: _module_done
113 || /^_module_registered:/ # PROF: _module_registered
116 $chkcat[$i] = 'data';
119 } elsif ( /^(ret_|djn_)/ ) {
121 $chkcat[$i] = 'misc';
124 } elsif ( /^vtbl_([A-Za-z0-9_]+):$/ ) {
126 $chkcat[$i] = 'vector';
131 } elsif ( /^([A-Za-z0-9_]+)DirectReturn:$/ ) {
133 $chkcat[$i] = 'direct';
138 } elsif ( /^[A-Za-z0-9_]+_upd:$/ ) {
140 $chkcat[$i] = 'misc';
143 } elsif ( /^[A-Za-z0-9_]/ ) {
146 print STDERR "Funny global thing?: $_"
147 unless $KNOWN_FUNNY_THING{$thing}
148 || /^_(PRIn|PRStart).*:/ # pointer reversal GC routines
149 || /^CC_.*:/ # PROF: _CC_ccident
150 || /^_reg.*:/; # PROF: _reg<module>
152 $chkcat[$i] = 'misc';
155 } else { # simple line (duplicated at the top)
159 $numchks = $#chk + 1;
161 # print STDERR "\nCLOSURES:\n";
162 # foreach $s (sort (keys %closurechk)) {
163 # print STDERR "$s:\t\t",$closurechk{$s},"\n";
165 # print STDERR "\nINFOS:\n";
166 # foreach $s (sort (keys %infochk)) {
167 # print STDERR "$s:\t\t",$infochk{$s},"\n";
169 # print STDERR "SLOWS:\n";
170 # foreach $s (sort (keys %slowchk)) {
171 # print STDERR "$s:\t\t",$slowchk{$s},"\n";
173 # print STDERR "\nFASTS:\n";
174 # foreach $s (sort (keys %fastchk)) {
175 # print STDERR "$s:\t\t",$fastchk{$s},"\n";
178 # the division into chunks is imperfect;
179 # we throw some things over the fence into the next
182 # also, there are things we would like to know
183 # about the whole module before we start spitting
186 # NB: we start meddling at chunk 1, not chunk 0
188 # the first ".rdata" is quite magical; as of GCC 2.7.x, it
189 # spits a ".quad 0" in after the v first ".rdata"; we
190 # detect this special case (tossing the ".quad 0")!
191 $magic_rdata_seen = 0;
193 for ($i = 1; $i < $numchks; $i++) {
194 $c = $chk[$i]; # convenience copy
196 # print STDERR "\nCHK $i (BEFORE):\n", $c;
198 # pin a funny end-thing on (for easier matching):
199 $c .= 'FUNNY#END#THING';
201 if ( ! $magic_rdata_seen && $c =~ /^\s*\.rdata\n\t\.quad 0\n\t\.align \d\n/ ) {
202 $c =~ s/^\s*\.rdata\n\t\.quad 0\n\t\.align (\d)\n/\.rdata\n\t\.align $1\n/;
203 $magic_rdata_seen = 1;
206 # pick some end-things and move them to the next chunk
208 while ( $c =~ /^(\s*\.align\s+\d+\n)FUNNY#END#THING/
209 || $c =~ /^(\s*\.(globl|ent)\s+\S+\n)FUNNY#END#THING/
210 || $c =~ /^(\s*\#.*\n)FUNNY#END#THING/
211 || $c =~ /^(\s*\.(file|loc)\s+\S+\s+\S+\n)FUNNY#END#THING/
212 || $c =~ /^(\s*\.text\n|\s*\.r?data\n)FUNNY#END#THING/ ) {
215 if ( $to_move =~ /^\s*(\#|\.(file|globl|ent|loc))/ && $i < ($numchks - 1) ) {
216 $chk[$i + 1] = $to_move . $chk[$i + 1];
217 # otherwise they're tossed
220 $c =~ s/^.*\nFUNNY#END#THING/FUNNY#END#THING/;
223 if ($c =~ /^\t\.ent\s+(\S+)/) {
225 # toss all prologue stuff, except for loading gp, and the ..ng address
226 if (($p, $r) = split(/^\t\.prologue/, $c)) {
227 # print STDERR "$ent: prologue:\n$p\nrest:\n$r\n";
228 if (($keep, $junk) = split(/\.\.ng:/, $p)) {
229 $c = $keep . "..ng:\n";
231 print STDERR "malformed code block ($ent)?\n"
234 $c .= "\t.frame \$30,0,\$26,0\n\t.prologue" . $r;
237 $c =~ s/FUNNY#END#THING//;
238 $chk[$i] = $c; # update w/ convenience copy
240 # print STDERR "\nCHK $i (AFTER):\n", $c;
243 # print out the header stuff first
245 $chk[0] =~ s/^(\t\.file.*)"(ghc\d+\.c)"/\1"$ifile_root.hc"/;
246 print OUTASM $chk[0];
248 # print out all the literal strings second
249 for ($i = 1; $i < $numchks; $i++) {
250 if ( $chkcat[$i] eq 'string' ) {
251 print OUTASM "\.rdata\n\t\.align 3\n";
252 print OUTASM $chk[$i];
254 $chkcat[$i] = 'DONE ALREADY';
258 for ($i = 1; $i < $numchks; $i++) {
259 # print STDERR "$i: cat $chkcat[$i], symb $chksymb[$i]\n";
261 next if $chkcat[$i] eq 'DONE ALREADY';
263 if ( $chkcat[$i] eq 'misc' ) {
264 print OUTASM "\.text\n\t\.align 3\n";
265 print OUTASM $chk[$i];
267 } elsif ( $chkcat[$i] eq 'data' ) {
268 print OUTASM "\.data\n\t\.align 3\n";
269 print OUTASM $chk[$i];
271 } elsif ( $chkcat[$i] eq 'consist' ) {
272 if ( $chk[$i] =~ /\.ascii.*\)(hsc|cc) (.*)\\11"\n\t\.ascii\s+"(.*)\\0"/ ) {
273 local($consist) = "$1.$2.$3";
275 $consist =~ s/\//./g;
277 $consist =~ s/[^A-Za-z0-9_.]/ZZ/g; # ToDo: properly?
278 print OUTASM "\.text\n$consist:\n";
280 print STDERR "Couldn't grok consistency: ", $chk[$i];
283 } elsif ( $chkcat[$i] eq 'splitmarker' ) {
284 # we can just re-constitute this one...
285 # ignore the final split marker, to save an empty object module
286 # Use _three_ underscores so that ghc-split doesn't get overly complicated
287 print OUTASM "___stg_split_marker",$chksymb[$i],":\n";
289 } elsif ( $chkcat[$i] eq 'closure'
290 || $chkcat[$i] eq 'infotbl'
291 || $chkcat[$i] eq 'slow'
292 || $chkcat[$i] eq 'fast' ) { # do them in that order
293 $symb = $chksymb[$i];
296 if ( defined($closurechk{$symb}) ) {
297 print OUTASM "\.data\n\t\.align 3\n";
298 print OUTASM $chk[$closurechk{$symb}];
299 $chkcat[$closurechk{$symb}] = 'DONE ALREADY';
303 if ( defined($infochk{$symb}) ) {
305 print OUTASM "\.text\n\t\.align 3\n";
306 print OUTASM &rev_tbl($symb, $chk[$infochk{$symb}], 1);
307 # entry code will be put here!
310 if ( $chk[$infochk{$symb}] =~ /\.quad\s+([A-Za-z0-9_]+_entry)$/
311 && $1 ne "${symb}_entry" ) {
312 print STDERR "!!! entry point???\n",$chk[$infochk{$symb}];
315 $chkcat[$infochk{$symb}] = 'DONE ALREADY';
319 if ( defined($slowchk{$symb}) ) {
321 # teach it to drop through to the fast entry point:
322 $c = $chk[$slowchk{$symb}];
323 if ( defined($fastchk{$symb}) ) {
324 $c =~ s/^\tjmp \$31,\(\$27\),0\n\t\.align 4\n\t\.end/\t.align 4\n\t.end/;
327 # NB: no very good way to look for "dangling" references
330 print OUTASM "\.text\n\t\.align 3\n";
332 $chkcat[$slowchk{$symb}] = 'DONE ALREADY';
336 if ( defined($fastchk{$symb}) ) {
337 $c = $chk[$fastchk{$symb}];
338 if ( ! defined($slowchk{$symb}) ) {
339 print OUTASM "\.text\n\t\.align 3\n";
342 $chkcat[$fastchk{$symb}] = 'DONE ALREADY';
345 } elsif ( $chkcat[$i] eq 'vector'
346 || $chkcat[$i] eq 'direct' ) { # do them in that order
347 $symb = $chksymb[$i];
350 if ( defined($vectorchk{$symb}) ) {
351 print OUTASM "\.text\n\t\.align 3\n";
352 print OUTASM &rev_tbl($symb, $chk[$vectorchk{$symb}], 0);
353 # direct return code will be put here!
354 $chkcat[$vectorchk{$symb}] = 'DONE ALREADY';
358 if ( defined($directchk{$symb}) ) {
359 print OUTASM "\.text\n\t\.align 3\n";
360 print OUTASM $chk[$directchk{$symb}];
361 $chkcat[$directchk{$symb}] = 'DONE ALREADY';
363 # The commented nop is for the splitter, to ensure
364 # that no module ends with a label as the very last
365 # thing. (The linker will adjust the label to point
366 # to the first code word of the next module linked in,
367 # even if alignment constraints cause the label to move!)
369 print OUTASM "\t# nop\n";
372 &tidy_up_and_die(1,"$Pgm: unknown chkcat (ghc-asm alpha)\n$chkcat[$i]\n$chk[$i]\n");
377 close(OUTASM) || &tidy_up_and_die(1,"Failed writing to $out_asmf\n");
378 close(INASM) || &tidy_up_and_die(1,"Failed reading from $in_asmf\n");
383 sub init_FUNNY_THINGS {
384 %KNOWN_FUNNY_THING = (
386 'CommonUnderflow:', 1,
389 'ErrorIO_call_count:', 1,
390 'ErrorIO_innards:', 1,
401 'StackUnderflowEnterNode:', 1,
403 'UnderflowVect0:', 1,
404 'UnderflowVect1:', 1,
405 'UnderflowVect2:', 1,
406 'UnderflowVect3:', 1,
407 'UnderflowVect4:', 1,
408 'UnderflowVect5:', 1,
409 'UnderflowVect6:', 1,
410 'UnderflowVect7:', 1,
413 'WorldStateToken:', 1,
414 '_Enter_Internal:', 1,
415 '_PRMarking_MarkNextAStack:', 1,
416 '_PRMarking_MarkNextBStack:', 1,
417 '_PRMarking_MarkNextCAF:', 1,
418 '_PRMarking_MarkNextGA:', 1,
419 '_PRMarking_MarkNextRoot:', 1,
420 '_PRMarking_MarkNextSpark:', 1,
421 '_Scavenge_Forward_Ref:', 1,
422 '__std_entry_error__:', 1,
423 '_startMarkWorld:', 1,
425 'startCcRegisteringWorld:', 1,
426 'startEnterFloat:', 1,
428 'startPerformIO:', 1,
435 The following table reversal is used for both info tables and return
436 vectors. In both cases, we remove the first entry from the table,
437 reverse the table, put the label at the end, and paste some code
438 (that which is normally referred to by the first entry in the table)
439 right after the table itself. (The code pasting is done elsewhere.)
443 local($symb, $tbl, $discard1) = @_;
449 local(@lines) = split(/\n/, $tbl);
452 for ($i = 0; $i <= $#lines && $lines[$i] !~ /^\t\.quad\s+/; $i++) {
453 $label .= $lines[$i] . "\n",
454 next if $lines[$i] =~ /^[A-Za-z0-9_]+:$/
455 || $lines[$i] =~ /^\t\.globl/;
457 $before .= $lines[$i] . "\n"; # otherwise...
460 for ( ; $i <= $#lines && $lines[$i] =~ /^\t\.quad\s+/; $i++) {
461 push(@words, $lines[$i]);
463 # now throw away the first word (entry code):
464 shift(@words) if $discard1;
466 for (; $i <= $#lines; $i++) {
467 $after .= $lines[$i] . "\n";
470 # If we have anonymous text (not part of a procedure), the linker
471 # may complain about missing exception information. Bleh.
472 if ($label =~ /^([A-Za-z0-9_]+):$/) {
473 $before = "\t.ent $1\n" . $before;
474 $after .= "\t.end $1\n";
477 $tbl = $before . join("\n", (reverse @words)) . "\n" . $label . $after;
479 # print STDERR "before=$before\n";
480 # print STDERR "label=$label\n";
481 # print STDERR "words=",(reverse @words),"\n";
482 # print STDERR "after=$after\n";
488 %************************************************************************
490 \subsection[Driver-asm-info]{Collect interesting (static) info from an assembler file}
492 %************************************************************************
494 How many times is each asm instruction used?
497 %AsmInsn = (); # init
499 sub dump_asm_insn_counts {
502 open(INASM, "< $asmf") || &tidy_up_and_die(1,"$Pgm: failed to open `$asmf' (to read)\n");
504 if ( /^\t([a-z][a-z0-9]+)\b/ ) {
508 close(INASM) || &tidy_up_and_die(1,"Failed reading from $asmf\n");
510 # OK, now print what we collected (to stderr)
511 foreach $i (sort (keys %AsmInsn)) {
512 print STDERR "INSN:: $i\t",$AsmInsn{$i},"\n";
516 sub dump_asm_globals_info {
519 # make "require"r happy...