1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
20 #include "HsVersions.h"
21 #include "nativeGen/NCG.h"
31 import CLabel ( CLabel, mkAsmTempLabel )
32 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
33 import CLabel ( mkDeadStripPreventer )
36 import Unique ( pprUnique )
39 import qualified Outputable
40 import Outputable (panic)
42 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
43 pprUserReg :: Reg -> Doc
44 pprUserReg = pprReg IF_ARCH_i386(II32,) IF_ARCH_x86_64(II64,)
47 pprUserReg :: Reg -> Doc
48 pprUserReg = panic "X86.Ppr.pprUserReg: not defined"
53 pprReg :: Size -> Reg -> Doc
57 RealReg i -> ppr_reg_no s i
58 VirtualRegI u -> text "%vI_" <> asmSDoc (pprUnique u)
59 VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u)
60 VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u)
61 VirtualRegD u -> text "%vD_" <> asmSDoc (pprUnique u)
64 ppr_reg_no :: Size -> Int -> Doc
65 ppr_reg_no II8 = ppr_reg_byte
66 ppr_reg_no II16 = ppr_reg_word
67 ppr_reg_no _ = ppr_reg_long
69 ppr_reg_byte i = ptext
71 0 -> sLit "%al"; 1 -> sLit "%bl";
72 2 -> sLit "%cl"; 3 -> sLit "%dl";
73 _ -> sLit "very naughty I386 byte register"
76 ppr_reg_word i = ptext
78 0 -> sLit "%ax"; 1 -> sLit "%bx";
79 2 -> sLit "%cx"; 3 -> sLit "%dx";
80 4 -> sLit "%si"; 5 -> sLit "%di";
81 6 -> sLit "%bp"; 7 -> sLit "%sp";
82 _ -> sLit "very naughty I386 word register"
85 ppr_reg_long i = ptext
87 0 -> sLit "%eax"; 1 -> sLit "%ebx";
88 2 -> sLit "%ecx"; 3 -> sLit "%edx";
89 4 -> sLit "%esi"; 5 -> sLit "%edi";
90 6 -> sLit "%ebp"; 7 -> sLit "%esp";
91 8 -> sLit "%fake0"; 9 -> sLit "%fake1";
92 10 -> sLit "%fake2"; 11 -> sLit "%fake3";
93 12 -> sLit "%fake4"; 13 -> sLit "%fake5";
94 _ -> sLit "very naughty I386 register"
96 #elif x86_64_TARGET_ARCH
97 ppr_reg_no :: Size -> Int -> Doc
98 ppr_reg_no II8 = ppr_reg_byte
99 ppr_reg_no II16 = ppr_reg_word
100 ppr_reg_no II32 = ppr_reg_long
101 ppr_reg_no _ = ppr_reg_quad
103 ppr_reg_byte i = ptext
105 0 -> sLit "%al"; 1 -> sLit "%bl";
106 2 -> sLit "%cl"; 3 -> sLit "%dl";
107 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
108 6 -> sLit "%bpl"; 7 -> sLit "%spl";
109 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
110 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
111 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
112 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
113 _ -> sLit "very naughty x86_64 byte register"
116 ppr_reg_word i = ptext
118 0 -> sLit "%ax"; 1 -> sLit "%bx";
119 2 -> sLit "%cx"; 3 -> sLit "%dx";
120 4 -> sLit "%si"; 5 -> sLit "%di";
121 6 -> sLit "%bp"; 7 -> sLit "%sp";
122 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
123 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
124 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
125 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
126 _ -> sLit "very naughty x86_64 word register"
129 ppr_reg_long i = ptext
131 0 -> sLit "%eax"; 1 -> sLit "%ebx";
132 2 -> sLit "%ecx"; 3 -> sLit "%edx";
133 4 -> sLit "%esi"; 5 -> sLit "%edi";
134 6 -> sLit "%ebp"; 7 -> sLit "%esp";
135 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
136 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
137 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
138 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
139 _ -> sLit "very naughty x86_64 register"
142 ppr_reg_quad i = ptext
144 0 -> sLit "%rax"; 1 -> sLit "%rbx";
145 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
146 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
147 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
148 8 -> sLit "%r8"; 9 -> sLit "%r9";
149 10 -> sLit "%r10"; 11 -> sLit "%r11";
150 12 -> sLit "%r12"; 13 -> sLit "%r13";
151 14 -> sLit "%r14"; 15 -> sLit "%r15";
152 16 -> sLit "%xmm0"; 17 -> sLit "%xmm1";
153 18 -> sLit "%xmm2"; 19 -> sLit "%xmm3";
154 20 -> sLit "%xmm4"; 21 -> sLit "%xmm5";
155 22 -> sLit "%xmm6"; 23 -> sLit "%xmm7";
156 24 -> sLit "%xmm8"; 25 -> sLit "%xmm9";
157 26 -> sLit "%xmm10"; 27 -> sLit "%xmm11";
158 28 -> sLit "%xmm12"; 29 -> sLit "%xmm13";
159 30 -> sLit "%xmm14"; 31 -> sLit "%xmm15";
160 _ -> sLit "very naughty x86_64 register"
163 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
167 pprSize :: Size -> Doc
178 #elif x86_64_TARGET_ARCH
179 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
180 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
182 _ -> panic "X86.Ppr.pprSize: no match"
186 pprCond :: Cond -> Doc
189 GEU -> sLit "ae"; LU -> sLit "b";
190 EQQ -> sLit "e"; GTT -> sLit "g";
191 GE -> sLit "ge"; GU -> sLit "a";
192 LTT -> sLit "l"; LE -> sLit "le";
193 LEU -> sLit "be"; NE -> sLit "ne";
194 NEG -> sLit "s"; POS -> sLit "ns";
195 CARRY -> sLit "c"; OFLO -> sLit "o";
196 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
197 ALWAYS -> sLit "mp"})
201 pprImm (ImmInt i) = int i
202 pprImm (ImmInteger i) = integer i
203 pprImm (ImmCLbl l) = pprCLabel_asm l
204 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
205 pprImm (ImmLit s) = s
207 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
208 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
210 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
211 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
212 <> lparen <> pprImm b <> rparen
216 pprAddr :: AddrMode -> Doc
217 pprAddr (ImmAddr imm off)
218 = let pp_imm = pprImm imm
222 else if (off < 0) then
225 pp_imm <> char '+' <> int off
227 pprAddr (AddrBaseIndex base index displacement)
229 pp_disp = ppr_disp displacement
230 pp_off p = pp_disp <> char '(' <> p <> char ')'
231 pp_reg r = pprReg wordSize r
233 case (base, index) of
234 (EABaseNone, EAIndexNone) -> pp_disp
235 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
236 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
237 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
238 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
240 _ -> panic "X86.Ppr.pprAddr: no match"
243 ppr_disp (ImmInt 0) = empty
244 ppr_disp imm = pprImm imm
247 pprSectionHeader :: Section -> Doc
250 # if darwin_TARGET_OS
253 Text -> ptext (sLit ".text\n\t.align 2")
254 Data -> ptext (sLit ".data\n\t.align 2")
255 ReadOnlyData -> ptext (sLit ".const\n.align 2")
256 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
257 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
258 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
259 OtherSection sec -> panic "X86.Ppr.pprSectionHeader: unknown section"
264 Text -> ptext (sLit ".text\n\t.align 4,0x90")
265 Data -> ptext (sLit ".data\n\t.align 4")
266 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
267 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
268 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
269 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
270 OtherSection sec -> panic "X86.Ppr.pprSectionHeader: unknown section"
274 #elif x86_64_TARGET_ARCH
275 # if darwin_TARGET_OS
278 Text -> ptext (sLit ".text\n.align 3")
279 Data -> ptext (sLit ".data\n.align 3")
280 ReadOnlyData -> ptext (sLit ".const\n.align 3")
281 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
282 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
283 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
284 OtherSection sec -> panic "PprMach.pprSectionHeader: unknown section"
289 Text -> ptext (sLit ".text\n\t.align 8")
290 Data -> ptext (sLit ".data\n\t.align 8")
291 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
292 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
293 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
294 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
295 OtherSection sec -> panic "PprMach.pprSectionHeader: unknown section"
300 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
307 pprDataItem :: CmmLit -> Doc
309 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
313 -- These seem to be common:
314 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
315 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
317 ppr_item FF32 (CmmFloat r _)
318 = let bs = floatToBytes (fromRational r)
319 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
321 ppr_item FF64 (CmmFloat r _)
322 = let bs = doubleToBytes (fromRational r)
323 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
325 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
326 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
328 #if i386_TARGET_ARCH && darwin_TARGET_OS
329 ppr_item II64 (CmmInt x _) =
330 [ptext (sLit "\t.long\t")
331 <> int (fromIntegral (fromIntegral x :: Word32)),
332 ptext (sLit "\t.long\t")
334 (fromIntegral (x `shiftR` 32) :: Word32))]
336 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
337 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
339 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
340 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
341 -- type, which means we can't do pc-relative 64-bit addresses.
342 -- Fortunately we're assuming the small memory model, in which
343 -- all such offsets will fit into 32 bits, so we have to stick
344 -- to 32-bit offset fields and modify the RTS appropriately
346 -- See Note [x86-64-relative] in includes/InfoTables.h
349 | isRelativeReloc x =
350 [ptext (sLit "\t.long\t") <> pprImm imm,
351 ptext (sLit "\t.long\t0")]
353 [ptext (sLit "\t.quad\t") <> pprImm imm]
355 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
356 isRelativeReloc _ = False
360 = panic "X86.Ppr.ppr_item: no match"
364 pprInstr :: Instr -> Doc
366 pprInstr (COMMENT _) = empty -- nuke 'em
369 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
370 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
371 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
372 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
373 ,IF_ARCH_powerpc( IF_OS_linux(
374 ((<>) (ptext (sLit "# ")) (ftext s)),
375 ((<>) (ptext (sLit "; ")) (ftext s)))
379 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
381 pprInstr (NEWBLOCK _)
382 = panic "PprMach.pprInstr: NEWBLOCK"
385 = panic "PprMach.pprInstr: LDATA"
387 pprInstr (SPILL reg slot)
389 ptext (sLit "\tSPILL"),
393 ptext (sLit "SLOT") <> parens (int slot)]
395 pprInstr (RELOAD slot reg)
397 ptext (sLit "\tRELOAD"),
399 ptext (sLit "SLOT") <> parens (int slot),
403 pprInstr (MOV size src dst)
404 = pprSizeOpOp (sLit "mov") size src dst
406 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
407 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
408 -- movl. But we represent it as a MOVZxL instruction, because
409 -- the reg alloc would tend to throw away a plain reg-to-reg
410 -- move, and we still want it to do that.
412 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
413 -- zero-extension only needs to extend to 32 bits: on x86_64,
414 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
415 -- instruction is shorter.
417 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordSize src dst
419 -- here we do some patching, since the physical registers are only set late
420 -- in the code generation.
421 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
423 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
425 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
427 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
429 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
431 = pprInstr (ADD size (OpImm displ) dst)
433 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
435 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
436 = pprSizeOp (sLit "dec") size dst
437 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
438 = pprSizeOp (sLit "inc") size dst
439 pprInstr (ADD size src dst)
440 = pprSizeOpOp (sLit "add") size src dst
441 pprInstr (ADC size src dst)
442 = pprSizeOpOp (sLit "adc") size src dst
443 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
444 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
446 {- A hack. The Intel documentation says that "The two and three
447 operand forms [of IMUL] may also be used with unsigned operands
448 because the lower half of the product is the same regardless if
449 (sic) the operands are signed or unsigned. The CF and OF flags,
450 however, cannot be used to determine if the upper half of the
451 result is non-zero." So there.
453 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
454 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
456 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
457 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
458 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
460 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
461 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
463 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
464 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
465 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
467 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
469 pprInstr (CMP size src dst)
470 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
471 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
473 -- This predicate is needed here and nowhere else
479 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
480 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
481 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
483 -- both unused (SDM):
484 -- pprInstr PUSHA = ptext (sLit "\tpushal")
485 -- pprInstr POPA = ptext (sLit "\tpopal")
487 pprInstr NOP = ptext (sLit "\tnop")
488 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
489 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
491 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
493 pprInstr (JXX cond (BlockId id))
494 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
495 where lab = mkAsmTempLabel id
497 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
499 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
500 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordSize op)
501 pprInstr (JMP_TBL op _) = pprInstr (JMP op)
502 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
503 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg wordSize reg)
505 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
506 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
507 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
510 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
512 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
514 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
515 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
516 pprInstr (CVTTSS2SIQ from to) = pprOpReg (sLit "cvttss2siq") from to
517 pprInstr (CVTTSD2SIQ from to) = pprOpReg (sLit "cvttsd2siq") from to
518 pprInstr (CVTSI2SS from to) = pprOpReg (sLit "cvtsi2ssq") from to
519 pprInstr (CVTSI2SD from to) = pprOpReg (sLit "cvtsi2sdq") from to
521 -- FETCHGOT for PIC on ELF platforms
522 pprInstr (FETCHGOT reg)
523 = vcat [ ptext (sLit "\tcall 1f"),
524 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
525 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
529 -- FETCHPC for PIC on Darwin/x86
530 -- get the instruction pointer into a register
531 -- (Terminology note: the IP is called Program Counter on PPC,
532 -- and it's a good thing to use the same name on both platforms)
533 pprInstr (FETCHPC reg)
534 = vcat [ ptext (sLit "\tcall 1f"),
535 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
539 -- -----------------------------------------------------------------------------
540 -- i386 floating-point
542 -- Simulating a flat register set on the x86 FP stack is tricky.
543 -- you have to free %st(7) before pushing anything on the FP reg stack
544 -- so as to preclude the possibility of a FP stack overflow exception.
545 pprInstr g@(GMOV src dst)
549 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
551 -- GLD sz addr dst ==> FFREE %st(7) ; FLDsz addr ; FSTP (dst+1)
552 pprInstr g@(GLD sz addr dst)
553 = pprG g (hcat [gtab, text "ffree %st(7) ; fld", pprSize sz, gsp,
554 pprAddr addr, gsemi, gpop dst 1])
556 -- GST sz src addr ==> FFREE %st(7) ; FLD dst ; FSTPsz addr
557 pprInstr g@(GST sz src addr)
558 = pprG g (hcat [gtab, gpush src 0, gsemi,
559 text "fstp", pprSize sz, gsp, pprAddr addr])
561 pprInstr g@(GLDZ dst)
562 = pprG g (hcat [gtab, text "ffree %st(7) ; fldz ; ", gpop dst 1])
563 pprInstr g@(GLD1 dst)
564 = pprG g (hcat [gtab, text "ffree %st(7) ; fld1 ; ", gpop dst 1])
566 pprInstr (GFTOI src dst)
567 = pprInstr (GDTOI src dst)
569 pprInstr g@(GDTOI src dst)
571 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
572 hcat [gtab, gpush src 0],
573 hcat [gtab, text "movzwl 4(%esp), ", reg,
574 text " ; orl $0xC00, ", reg],
575 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
576 hcat [gtab, text "fistpl 0(%esp)"],
577 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
578 hcat [gtab, text "addl $8, %esp"]
581 reg = pprReg II32 dst
583 pprInstr (GITOF src dst)
584 = pprInstr (GITOD src dst)
586 pprInstr g@(GITOD src dst)
587 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
588 text " ; ffree %st(7); fildl (%esp) ; ",
589 gpop dst 1, text " ; addl $4,%esp"])
591 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
592 this far into the jungle AND you give a Rat's Ass (tm) what's going
593 on, here's the deal. Generate code to do a floating point comparison
594 of src1 and src2, of kind cond, and set the Zero flag if true.
596 The complications are to do with handling NaNs correctly. We want the
597 property that if either argument is NaN, then the result of the
598 comparison is False ... except if we're comparing for inequality,
599 in which case the answer is True.
601 Here's how the general (non-inequality) case works. As an
602 example, consider generating the an equality test:
604 pushl %eax -- we need to mess with this
605 <get src1 to top of FPU stack>
606 fcomp <src2 location in FPU stack> and pop pushed src1
607 -- Result of comparison is in FPU Status Register bits
609 fstsw %ax -- Move FPU Status Reg to %ax
610 sahf -- move C3 C2 C0 from %ax to integer flag reg
611 -- now the serious magic begins
612 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
613 sete %al -- %al = if arg1 == arg2 then 1 else 0
614 andb %ah,%al -- %al &= %ah
615 -- so %al == 1 iff (comparable && same); else it holds 0
616 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
617 else %al == 0xFF, ZeroFlag=0
618 -- the zero flag is now set as we desire.
621 The special case of inequality differs thusly:
623 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
624 setne %al -- %al = if arg1 /= arg2 then 1 else 0
625 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
626 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
627 else (%al == 0xFF, ZF=0)
629 pprInstr g@(GCMP cond src1 src2)
630 | case cond of { NE -> True; _ -> False }
632 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
633 hcat [gtab, text "fcomp ", greg src2 1,
634 text "; fstsw %ax ; sahf ; setpe %ah"],
635 hcat [gtab, text "setne %al ; ",
636 text "orb %ah,%al ; decb %al ; popl %eax"]
640 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
641 hcat [gtab, text "fcomp ", greg src2 1,
642 text "; fstsw %ax ; sahf ; setpo %ah"],
643 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
644 text "andb %ah,%al ; decb %al ; popl %eax"]
647 {- On the 486, the flags set by FP compare are the unsigned ones!
648 (This looks like a HACK to me. WDP 96/03)
650 fix_FP_cond :: Cond -> Cond
655 fix_FP_cond EQQ = EQQ
657 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
658 -- there should be no others
661 pprInstr g@(GABS _ src dst)
662 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
664 pprInstr g@(GNEG _ src dst)
665 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
667 pprInstr g@(GSQRT sz src dst)
668 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
669 hcat [gtab, gcoerceto sz, gpop dst 1])
671 pprInstr g@(GSIN sz l1 l2 src dst)
672 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
674 pprInstr g@(GCOS sz l1 l2 src dst)
675 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
677 pprInstr g@(GTAN sz l1 l2 src dst)
678 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
680 -- In the translations for GADD, GMUL, GSUB and GDIV,
681 -- the first two cases are mere optimisations. The otherwise clause
682 -- generates correct code under all circumstances.
684 pprInstr g@(GADD _ src1 src2 dst)
686 = pprG g (text "\t#GADD-xxxcase1" $$
687 hcat [gtab, gpush src2 0,
688 text " ; faddp %st(0),", greg src1 1])
690 = pprG g (text "\t#GADD-xxxcase2" $$
691 hcat [gtab, gpush src1 0,
692 text " ; faddp %st(0),", greg src2 1])
694 = pprG g (hcat [gtab, gpush src1 0,
695 text " ; fadd ", greg src2 1, text ",%st(0)",
699 pprInstr g@(GMUL _ src1 src2 dst)
701 = pprG g (text "\t#GMUL-xxxcase1" $$
702 hcat [gtab, gpush src2 0,
703 text " ; fmulp %st(0),", greg src1 1])
705 = pprG g (text "\t#GMUL-xxxcase2" $$
706 hcat [gtab, gpush src1 0,
707 text " ; fmulp %st(0),", greg src2 1])
709 = pprG g (hcat [gtab, gpush src1 0,
710 text " ; fmul ", greg src2 1, text ",%st(0)",
714 pprInstr g@(GSUB _ src1 src2 dst)
716 = pprG g (text "\t#GSUB-xxxcase1" $$
717 hcat [gtab, gpush src2 0,
718 text " ; fsubrp %st(0),", greg src1 1])
720 = pprG g (text "\t#GSUB-xxxcase2" $$
721 hcat [gtab, gpush src1 0,
722 text " ; fsubp %st(0),", greg src2 1])
724 = pprG g (hcat [gtab, gpush src1 0,
725 text " ; fsub ", greg src2 1, text ",%st(0)",
729 pprInstr g@(GDIV _ src1 src2 dst)
731 = pprG g (text "\t#GDIV-xxxcase1" $$
732 hcat [gtab, gpush src2 0,
733 text " ; fdivrp %st(0),", greg src1 1])
735 = pprG g (text "\t#GDIV-xxxcase2" $$
736 hcat [gtab, gpush src1 0,
737 text " ; fdivp %st(0),", greg src2 1])
739 = pprG g (hcat [gtab, gpush src1 0,
740 text " ; fdiv ", greg src2 1, text ",%st(0)",
745 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
746 ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)")
750 = panic "X86.Ppr.pprInstr: no match"
753 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
754 pprTrigOp op -- fsin, fcos or fptan
755 isTan -- we need a couple of extra steps if we're doing tan
756 l1 l2 -- internal labels for us to use
758 = -- We'll be needing %eax later on
759 hcat [gtab, text "pushl %eax;"] $$
760 -- tan is going to use an extra space on the FP stack
761 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
762 -- First put the value in %st(0) and try to apply the op to it
763 hcat [gpush src 0, text ("; " ++ op)] $$
764 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
765 hcat [gtab, text "fnstsw %ax"] $$
766 hcat [gtab, text "test $0x400,%eax"] $$
767 -- If we were in bounds then jump to the end
768 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
769 -- Otherwise we need to shrink the value. Start by
770 -- loading pi, doubleing it (by adding it to itself),
771 -- and then swapping pi with the value, so the value we
772 -- want to apply op to is in %st(0) again
773 hcat [gtab, text "ffree %st(7); fldpi"] $$
774 hcat [gtab, text "fadd %st(0),%st"] $$
775 hcat [gtab, text "fxch %st(1)"] $$
776 -- Now we have a loop in which we make the value smaller,
777 -- see if it's small enough, and loop if not
778 (pprCLabel_asm l2 <> char ':') $$
779 hcat [gtab, text "fprem1"] $$
780 -- My Debian libc uses fstsw here for the tan code, but I can't
781 -- see any reason why it should need to be different for tan.
782 hcat [gtab, text "fnstsw %ax"] $$
783 hcat [gtab, text "test $0x400,%eax"] $$
784 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
785 hcat [gtab, text "fstp %st(1)"] $$
786 hcat [gtab, text op] $$
787 (pprCLabel_asm l1 <> char ':') $$
788 -- Pop the 1.0 tan gave us
789 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
791 hcat [gtab, text "popl %eax;"] $$
792 -- And finally make the result the right size
793 hcat [gtab, gcoerceto sz, gpop dst 1]
795 --------------------------
797 -- coerce %st(0) to the specified size
798 gcoerceto :: Size -> Doc
799 gcoerceto FF64 = empty
800 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
801 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
803 gpush :: Reg -> RegNo -> Doc
805 = hcat [text "ffree %st(7) ; fld ", greg reg offset]
808 gpop :: Reg -> RegNo -> Doc
810 = hcat [text "fstp ", greg reg offset]
812 greg :: Reg -> RegNo -> Doc
813 greg reg offset = text "%st(" <> int (gregno reg - 8+offset) <> char ')'
824 gregno :: Reg -> RegNo
825 gregno (RealReg i) = i
826 gregno _ = --pprPanic "gregno" (ppr other)
827 999 -- bogus; only needed for debug printing
829 pprG :: Instr -> Doc -> Doc
831 = (char '#' <> pprGInstr fake) $$ actual
834 pprGInstr :: Instr -> Doc
835 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
836 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
837 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
839 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
840 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
842 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
843 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
845 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
846 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
848 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
849 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
850 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
851 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
852 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
853 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
854 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
856 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
857 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
858 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
859 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
861 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
863 pprDollImm :: Imm -> Doc
864 pprDollImm i = ptext (sLit "$") <> pprImm i
867 pprOperand :: Size -> Operand -> Doc
868 pprOperand s (OpReg r) = pprReg s r
869 pprOperand _ (OpImm i) = pprDollImm i
870 pprOperand _ (OpAddr ea) = pprAddr ea
873 pprMnemonic_ :: LitString -> Doc
875 char '\t' <> ptext name <> space
878 pprMnemonic :: LitString -> Size -> Doc
879 pprMnemonic name size =
880 char '\t' <> ptext name <> pprSize size <> space
883 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
884 pprSizeImmOp name size imm op1
886 pprMnemonic name size,
894 pprSizeOp :: LitString -> Size -> Operand -> Doc
895 pprSizeOp name size op1
897 pprMnemonic name size,
902 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
903 pprSizeOpOp name size op1 op2
905 pprMnemonic name size,
912 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
913 pprOpOp name size op1 op2
922 pprSizeReg :: LitString -> Size -> Reg -> Doc
923 pprSizeReg name size reg1
925 pprMnemonic name size,
930 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
931 pprSizeRegReg name size reg1 reg2
933 pprMnemonic name size,
940 pprRegReg :: LitString -> Reg -> Reg -> Doc
941 pprRegReg name reg1 reg2
944 pprReg wordSize reg1,
950 pprOpReg :: LitString -> Operand -> Reg -> Doc
951 pprOpReg name op1 reg2
954 pprOperand wordSize op1,
960 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
961 pprCondRegReg name size cond reg1 reg2
972 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
973 pprSizeSizeRegReg name size1 size2 reg1 reg2
987 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
988 pprSizeRegRegReg name size reg1 reg2 reg3
990 pprMnemonic name size,
999 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1000 pprSizeAddrReg name size op dst
1002 pprMnemonic name size,
1009 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1010 pprSizeRegAddr name size src op
1012 pprMnemonic name size,
1019 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1020 pprShift name size src dest
1022 pprMnemonic name size,
1023 pprOperand II8 src, -- src is 8-bit sized
1025 pprOperand size dest
1029 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1030 pprSizeOpOpCoerce name size1 size2 op1 op2
1031 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1032 pprOperand size1 op1,
1034 pprOperand size2 op2
1038 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1039 pprCondInstr name cond arg
1040 = hcat [ char '\t', ptext name, pprCond cond, space, arg]