1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
22 #include "HsVersions.h"
23 #include "nativeGen/NCG.h"
36 import Unique ( pprUnique, Uniquable(..) )
39 import qualified Outputable
40 import Outputable (panic, Outputable)
44 #if i386_TARGET_ARCH && darwin_TARGET_OS
48 -- -----------------------------------------------------------------------------
49 -- Printing this stuff out
51 pprNatCmmTop :: NatCmmTop Instr -> Doc
52 pprNatCmmTop (CmmData section dats) =
53 pprSectionHeader section $$ vcat (map pprData dats)
55 -- special case for split markers:
56 pprNatCmmTop (CmmProc [] lbl (ListGraph [])) = pprLabel lbl
58 pprNatCmmTop (CmmProc info lbl (ListGraph blocks)) =
59 pprSectionHeader Text $$
60 (if null info then -- blocks guaranteed not null, so label needed
63 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
64 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
67 vcat (map pprData info) $$
68 pprLabel (entryLblToInfoLbl lbl)
70 vcat (map pprBasicBlock blocks)
71 -- above: Even the first block gets a label, because with branch-chain
72 -- elimination, it might be the target of a goto.
73 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
74 -- If we are using the .subsections_via_symbols directive
75 -- (available on recent versions of Darwin),
76 -- we have to make sure that there is some kind of reference
77 -- from the entry code to a label on the _top_ of of the info table,
78 -- so that the linker will not think it is unreferenced and dead-strip
79 -- it. That's why the label is called a DeadStripPreventer (_dsp).
82 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
84 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
87 $$ pprSizeDecl (if null info then lbl else entryLblToInfoLbl lbl)
89 -- | Output the ELF .size directive.
90 pprSizeDecl :: CLabel -> Doc
93 ptext (sLit "\t.size") <+> pprCLabel_asm lbl
94 <> ptext (sLit ", .-") <> pprCLabel_asm lbl
99 pprBasicBlock :: NatBasicBlock Instr -> Doc
100 pprBasicBlock (BasicBlock blockid instrs) =
101 pprLabel (mkAsmTempLabel (getUnique blockid)) $$
102 vcat (map pprInstr instrs)
105 pprData :: CmmStatic -> Doc
106 pprData (CmmAlign bytes) = pprAlign bytes
107 pprData (CmmDataLabel lbl) = pprLabel lbl
108 pprData (CmmString str) = pprASCII str
111 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
113 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
116 pprData (CmmStaticLit lit) = pprDataItem lit
118 pprGloblDecl :: CLabel -> Doc
120 | not (externallyVisibleCLabel lbl) = empty
121 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
125 pprTypeAndSizeDecl :: CLabel -> Doc
127 pprTypeAndSizeDecl lbl
128 | not (externallyVisibleCLabel lbl) = empty
129 | otherwise = ptext (sLit ".type ") <>
130 pprCLabel_asm lbl <> ptext (sLit ", @object")
136 pprLabel :: CLabel -> Doc
137 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
140 pprASCII :: [Word8] -> Doc
142 = vcat (map do1 str) $$ do1 0
145 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
147 pprAlign :: Int -> Doc
151 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
157 log2 :: Int -> Int -- cache the common ones
162 log2 n = 1 + log2 (n `quot` 2)
165 -- -----------------------------------------------------------------------------
166 -- pprInstr: print an 'Instr'
168 instance Outputable Instr where
169 ppr instr = Outputable.docToSDoc $ pprInstr instr
172 pprReg :: Size -> Reg -> Doc
176 RegReal (RealRegSingle i) -> ppr_reg_no s i
177 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
178 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
179 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
180 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
182 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
185 ppr_reg_no :: Size -> Int -> Doc
186 ppr_reg_no II8 = ppr_reg_byte
187 ppr_reg_no II16 = ppr_reg_word
188 ppr_reg_no _ = ppr_reg_long
190 ppr_reg_byte i = ptext
192 0 -> sLit "%al"; 1 -> sLit "%bl";
193 2 -> sLit "%cl"; 3 -> sLit "%dl";
194 _ -> sLit "very naughty I386 byte register"
197 ppr_reg_word i = ptext
199 0 -> sLit "%ax"; 1 -> sLit "%bx";
200 2 -> sLit "%cx"; 3 -> sLit "%dx";
201 4 -> sLit "%si"; 5 -> sLit "%di";
202 6 -> sLit "%bp"; 7 -> sLit "%sp";
203 _ -> sLit "very naughty I386 word register"
206 ppr_reg_long i = ptext
208 0 -> sLit "%eax"; 1 -> sLit "%ebx";
209 2 -> sLit "%ecx"; 3 -> sLit "%edx";
210 4 -> sLit "%esi"; 5 -> sLit "%edi";
211 6 -> sLit "%ebp"; 7 -> sLit "%esp";
214 #elif x86_64_TARGET_ARCH
215 ppr_reg_no :: Size -> Int -> Doc
216 ppr_reg_no II8 = ppr_reg_byte
217 ppr_reg_no II16 = ppr_reg_word
218 ppr_reg_no II32 = ppr_reg_long
219 ppr_reg_no _ = ppr_reg_quad
221 ppr_reg_byte i = ptext
223 0 -> sLit "%al"; 1 -> sLit "%bl";
224 2 -> sLit "%cl"; 3 -> sLit "%dl";
225 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
226 6 -> sLit "%bpl"; 7 -> sLit "%spl";
227 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
228 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
229 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
230 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
231 _ -> sLit "very naughty x86_64 byte register"
234 ppr_reg_word i = ptext
236 0 -> sLit "%ax"; 1 -> sLit "%bx";
237 2 -> sLit "%cx"; 3 -> sLit "%dx";
238 4 -> sLit "%si"; 5 -> sLit "%di";
239 6 -> sLit "%bp"; 7 -> sLit "%sp";
240 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
241 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
242 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
243 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
244 _ -> sLit "very naughty x86_64 word register"
247 ppr_reg_long i = ptext
249 0 -> sLit "%eax"; 1 -> sLit "%ebx";
250 2 -> sLit "%ecx"; 3 -> sLit "%edx";
251 4 -> sLit "%esi"; 5 -> sLit "%edi";
252 6 -> sLit "%ebp"; 7 -> sLit "%esp";
253 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
254 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
255 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
256 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
257 _ -> sLit "very naughty x86_64 register"
260 ppr_reg_quad i = ptext
262 0 -> sLit "%rax"; 1 -> sLit "%rbx";
263 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
264 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
265 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
266 8 -> sLit "%r8"; 9 -> sLit "%r9";
267 10 -> sLit "%r10"; 11 -> sLit "%r11";
268 12 -> sLit "%r12"; 13 -> sLit "%r13";
269 14 -> sLit "%r14"; 15 -> sLit "%r15";
273 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
276 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
277 ppr_reg_float :: Int -> LitString
278 ppr_reg_float i = case i of
279 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
280 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
281 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
282 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
283 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
284 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
285 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
286 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
287 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
288 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
289 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
290 _ -> sLit "very naughty x86 register"
293 pprSize :: Size -> Doc
300 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
301 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
305 pprSize_x87 :: Size -> Doc
311 _ -> panic "X86.Ppr.pprSize_x87"
313 pprCond :: Cond -> Doc
316 GEU -> sLit "ae"; LU -> sLit "b";
317 EQQ -> sLit "e"; GTT -> sLit "g";
318 GE -> sLit "ge"; GU -> sLit "a";
319 LTT -> sLit "l"; LE -> sLit "le";
320 LEU -> sLit "be"; NE -> sLit "ne";
321 NEG -> sLit "s"; POS -> sLit "ns";
322 CARRY -> sLit "c"; OFLO -> sLit "o";
323 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
324 ALWAYS -> sLit "mp"})
328 pprImm (ImmInt i) = int i
329 pprImm (ImmInteger i) = integer i
330 pprImm (ImmCLbl l) = pprCLabel_asm l
331 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
332 pprImm (ImmLit s) = s
334 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
335 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
337 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
338 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
339 <> lparen <> pprImm b <> rparen
343 pprAddr :: AddrMode -> Doc
344 pprAddr (ImmAddr imm off)
345 = let pp_imm = pprImm imm
349 else if (off < 0) then
352 pp_imm <> char '+' <> int off
354 pprAddr (AddrBaseIndex base index displacement)
356 pp_disp = ppr_disp displacement
357 pp_off p = pp_disp <> char '(' <> p <> char ')'
358 pp_reg r = pprReg archWordSize r
360 case (base, index) of
361 (EABaseNone, EAIndexNone) -> pp_disp
362 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
363 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
364 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
365 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
367 _ -> panic "X86.Ppr.pprAddr: no match"
370 ppr_disp (ImmInt 0) = empty
371 ppr_disp imm = pprImm imm
374 pprSectionHeader :: Section -> Doc
377 # if darwin_TARGET_OS
380 Text -> ptext (sLit ".text\n\t.align 2")
381 Data -> ptext (sLit ".data\n\t.align 2")
382 ReadOnlyData -> ptext (sLit ".const\n.align 2")
383 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
384 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
385 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
386 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
391 Text -> ptext (sLit ".text\n\t.align 4,0x90")
392 Data -> ptext (sLit ".data\n\t.align 4")
393 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
394 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
395 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
396 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
397 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
401 #elif x86_64_TARGET_ARCH
402 # if darwin_TARGET_OS
405 Text -> ptext (sLit ".text\n.align 3")
406 Data -> ptext (sLit ".data\n.align 3")
407 ReadOnlyData -> ptext (sLit ".const\n.align 3")
408 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
409 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
410 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
411 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
416 Text -> ptext (sLit ".text\n\t.align 8")
417 Data -> ptext (sLit ".data\n\t.align 8")
418 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
419 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
420 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
421 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
422 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
427 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
434 pprDataItem :: CmmLit -> Doc
436 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
440 -- These seem to be common:
441 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
442 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
444 ppr_item FF32 (CmmFloat r _)
445 = let bs = floatToBytes (fromRational r)
446 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
448 ppr_item FF64 (CmmFloat r _)
449 = let bs = doubleToBytes (fromRational r)
450 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
452 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
453 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
455 #if i386_TARGET_ARCH && darwin_TARGET_OS
456 ppr_item II64 (CmmInt x _) =
457 [ptext (sLit "\t.long\t")
458 <> int (fromIntegral (fromIntegral x :: Word32)),
459 ptext (sLit "\t.long\t")
461 (fromIntegral (x `shiftR` 32) :: Word32))]
463 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
464 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
466 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
467 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
468 -- type, which means we can't do pc-relative 64-bit addresses.
469 -- Fortunately we're assuming the small memory model, in which
470 -- all such offsets will fit into 32 bits, so we have to stick
471 -- to 32-bit offset fields and modify the RTS appropriately
473 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
476 | isRelativeReloc x =
477 [ptext (sLit "\t.long\t") <> pprImm imm,
478 ptext (sLit "\t.long\t0")]
480 [ptext (sLit "\t.quad\t") <> pprImm imm]
482 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
483 isRelativeReloc _ = False
487 = panic "X86.Ppr.ppr_item: no match"
491 pprInstr :: Instr -> Doc
493 pprInstr (COMMENT _) = empty -- nuke 'em
496 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
497 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
498 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
499 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
500 ,IF_ARCH_powerpc( IF_OS_linux(
501 ((<>) (ptext (sLit "# ")) (ftext s)),
502 ((<>) (ptext (sLit "; ")) (ftext s)))
506 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
508 pprInstr (NEWBLOCK _)
509 = panic "PprMach.pprInstr: NEWBLOCK"
512 = panic "PprMach.pprInstr: LDATA"
515 pprInstr (SPILL reg slot)
517 ptext (sLit "\tSPILL"),
521 ptext (sLit "SLOT") <> parens (int slot)]
523 pprInstr (RELOAD slot reg)
525 ptext (sLit "\tRELOAD"),
527 ptext (sLit "SLOT") <> parens (int slot),
532 pprInstr (MOV size src dst)
533 = pprSizeOpOp (sLit "mov") size src dst
535 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
536 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
537 -- movl. But we represent it as a MOVZxL instruction, because
538 -- the reg alloc would tend to throw away a plain reg-to-reg
539 -- move, and we still want it to do that.
541 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
542 -- zero-extension only needs to extend to 32 bits: on x86_64,
543 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
544 -- instruction is shorter.
546 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
548 -- here we do some patching, since the physical registers are only set late
549 -- in the code generation.
550 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
552 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
554 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
556 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
558 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
560 = pprInstr (ADD size (OpImm displ) dst)
562 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
564 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
565 = pprSizeOp (sLit "dec") size dst
566 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
567 = pprSizeOp (sLit "inc") size dst
568 pprInstr (ADD size src dst)
569 = pprSizeOpOp (sLit "add") size src dst
570 pprInstr (ADC size src dst)
571 = pprSizeOpOp (sLit "adc") size src dst
572 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
573 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
575 {- A hack. The Intel documentation says that "The two and three
576 operand forms [of IMUL] may also be used with unsigned operands
577 because the lower half of the product is the same regardless if
578 (sic) the operands are signed or unsigned. The CF and OF flags,
579 however, cannot be used to determine if the upper half of the
580 result is non-zero." So there.
582 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
583 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
585 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
586 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
587 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
589 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
590 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
592 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
593 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
594 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
596 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
598 pprInstr (CMP size src dst)
599 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
600 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
602 -- This predicate is needed here and nowhere else
608 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
609 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
610 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
612 -- both unused (SDM):
613 -- pprInstr PUSHA = ptext (sLit "\tpushal")
614 -- pprInstr POPA = ptext (sLit "\tpopal")
616 pprInstr NOP = ptext (sLit "\tnop")
617 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
618 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
620 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
622 pprInstr (JXX cond blockid)
623 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
624 where lab = mkAsmTempLabel (getUnique blockid)
626 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
628 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
629 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
630 pprInstr (JMP_TBL op _ _ _) = pprInstr (JMP op)
631 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
632 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
634 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
635 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
636 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
639 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
641 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
643 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
644 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
645 pprInstr (CVTTSS2SIQ sz from to) = pprSizeSizeOpReg (sLit "cvttss2si") FF32 sz from to
646 pprInstr (CVTTSD2SIQ sz from to) = pprSizeSizeOpReg (sLit "cvttsd2si") FF64 sz from to
647 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
648 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
650 -- FETCHGOT for PIC on ELF platforms
651 pprInstr (FETCHGOT reg)
652 = vcat [ ptext (sLit "\tcall 1f"),
653 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
654 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
658 -- FETCHPC for PIC on Darwin/x86
659 -- get the instruction pointer into a register
660 -- (Terminology note: the IP is called Program Counter on PPC,
661 -- and it's a good thing to use the same name on both platforms)
662 pprInstr (FETCHPC reg)
663 = vcat [ ptext (sLit "\tcall 1f"),
664 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
668 -- -----------------------------------------------------------------------------
669 -- i386 floating-point
671 -- Simulating a flat register set on the x86 FP stack is tricky.
672 -- you have to free %st(7) before pushing anything on the FP reg stack
673 -- so as to preclude the possibility of a FP stack overflow exception.
674 pprInstr g@(GMOV src dst)
678 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
680 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
681 pprInstr g@(GLD sz addr dst)
682 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
683 pprAddr addr, gsemi, gpop dst 1])
685 -- GST sz src addr ==> FLD dst ; FSTPsz addr
686 pprInstr g@(GST sz src addr)
687 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
688 = pprG g (hcat [gtab,
689 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
691 = pprG g (hcat [gtab, gpush src 0, gsemi,
692 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
694 pprInstr g@(GLDZ dst)
695 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
696 pprInstr g@(GLD1 dst)
697 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
699 pprInstr (GFTOI src dst)
700 = pprInstr (GDTOI src dst)
702 pprInstr g@(GDTOI src dst)
704 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
705 hcat [gtab, gpush src 0],
706 hcat [gtab, text "movzwl 4(%esp), ", reg,
707 text " ; orl $0xC00, ", reg],
708 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
709 hcat [gtab, text "fistpl 0(%esp)"],
710 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
711 hcat [gtab, text "addl $8, %esp"]
714 reg = pprReg II32 dst
716 pprInstr (GITOF src dst)
717 = pprInstr (GITOD src dst)
719 pprInstr g@(GITOD src dst)
720 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
721 text " ; fildl (%esp) ; ",
722 gpop dst 1, text " ; addl $4,%esp"])
724 pprInstr g@(GDTOF src dst)
725 = pprG g (vcat [gtab <> gpush src 0,
726 gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;",
729 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
730 this far into the jungle AND you give a Rat's Ass (tm) what's going
731 on, here's the deal. Generate code to do a floating point comparison
732 of src1 and src2, of kind cond, and set the Zero flag if true.
734 The complications are to do with handling NaNs correctly. We want the
735 property that if either argument is NaN, then the result of the
736 comparison is False ... except if we're comparing for inequality,
737 in which case the answer is True.
739 Here's how the general (non-inequality) case works. As an
740 example, consider generating the an equality test:
742 pushl %eax -- we need to mess with this
743 <get src1 to top of FPU stack>
744 fcomp <src2 location in FPU stack> and pop pushed src1
745 -- Result of comparison is in FPU Status Register bits
747 fstsw %ax -- Move FPU Status Reg to %ax
748 sahf -- move C3 C2 C0 from %ax to integer flag reg
749 -- now the serious magic begins
750 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
751 sete %al -- %al = if arg1 == arg2 then 1 else 0
752 andb %ah,%al -- %al &= %ah
753 -- so %al == 1 iff (comparable && same); else it holds 0
754 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
755 else %al == 0xFF, ZeroFlag=0
756 -- the zero flag is now set as we desire.
759 The special case of inequality differs thusly:
761 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
762 setne %al -- %al = if arg1 /= arg2 then 1 else 0
763 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
764 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
765 else (%al == 0xFF, ZF=0)
767 pprInstr g@(GCMP cond src1 src2)
768 | case cond of { NE -> True; _ -> False }
770 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
771 hcat [gtab, text "fcomp ", greg src2 1,
772 text "; fstsw %ax ; sahf ; setpe %ah"],
773 hcat [gtab, text "setne %al ; ",
774 text "orb %ah,%al ; decb %al ; popl %eax"]
778 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
779 hcat [gtab, text "fcomp ", greg src2 1,
780 text "; fstsw %ax ; sahf ; setpo %ah"],
781 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
782 text "andb %ah,%al ; decb %al ; popl %eax"]
785 {- On the 486, the flags set by FP compare are the unsigned ones!
786 (This looks like a HACK to me. WDP 96/03)
788 fix_FP_cond :: Cond -> Cond
793 fix_FP_cond EQQ = EQQ
795 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
796 -- there should be no others
799 pprInstr g@(GABS _ src dst)
800 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
802 pprInstr g@(GNEG _ src dst)
803 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
805 pprInstr g@(GSQRT sz src dst)
806 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
807 hcat [gtab, gcoerceto sz, gpop dst 1])
809 pprInstr g@(GSIN sz l1 l2 src dst)
810 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
812 pprInstr g@(GCOS sz l1 l2 src dst)
813 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
815 pprInstr g@(GTAN sz l1 l2 src dst)
816 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
818 -- In the translations for GADD, GMUL, GSUB and GDIV,
819 -- the first two cases are mere optimisations. The otherwise clause
820 -- generates correct code under all circumstances.
822 pprInstr g@(GADD _ src1 src2 dst)
824 = pprG g (text "\t#GADD-xxxcase1" $$
825 hcat [gtab, gpush src2 0,
826 text " ; faddp %st(0),", greg src1 1])
828 = pprG g (text "\t#GADD-xxxcase2" $$
829 hcat [gtab, gpush src1 0,
830 text " ; faddp %st(0),", greg src2 1])
832 = pprG g (hcat [gtab, gpush src1 0,
833 text " ; fadd ", greg src2 1, text ",%st(0)",
837 pprInstr g@(GMUL _ src1 src2 dst)
839 = pprG g (text "\t#GMUL-xxxcase1" $$
840 hcat [gtab, gpush src2 0,
841 text " ; fmulp %st(0),", greg src1 1])
843 = pprG g (text "\t#GMUL-xxxcase2" $$
844 hcat [gtab, gpush src1 0,
845 text " ; fmulp %st(0),", greg src2 1])
847 = pprG g (hcat [gtab, gpush src1 0,
848 text " ; fmul ", greg src2 1, text ",%st(0)",
852 pprInstr g@(GSUB _ src1 src2 dst)
854 = pprG g (text "\t#GSUB-xxxcase1" $$
855 hcat [gtab, gpush src2 0,
856 text " ; fsubrp %st(0),", greg src1 1])
858 = pprG g (text "\t#GSUB-xxxcase2" $$
859 hcat [gtab, gpush src1 0,
860 text " ; fsubp %st(0),", greg src2 1])
862 = pprG g (hcat [gtab, gpush src1 0,
863 text " ; fsub ", greg src2 1, text ",%st(0)",
867 pprInstr g@(GDIV _ src1 src2 dst)
869 = pprG g (text "\t#GDIV-xxxcase1" $$
870 hcat [gtab, gpush src2 0,
871 text " ; fdivrp %st(0),", greg src1 1])
873 = pprG g (text "\t#GDIV-xxxcase2" $$
874 hcat [gtab, gpush src1 0,
875 text " ; fdivp %st(0),", greg src2 1])
877 = pprG g (hcat [gtab, gpush src1 0,
878 text " ; fdiv ", greg src2 1, text ",%st(0)",
883 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
884 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
888 = panic "X86.Ppr.pprInstr: no match"
891 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
892 pprTrigOp op -- fsin, fcos or fptan
893 isTan -- we need a couple of extra steps if we're doing tan
894 l1 l2 -- internal labels for us to use
896 = -- We'll be needing %eax later on
897 hcat [gtab, text "pushl %eax;"] $$
898 -- tan is going to use an extra space on the FP stack
899 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
900 -- First put the value in %st(0) and try to apply the op to it
901 hcat [gpush src 0, text ("; " ++ op)] $$
902 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
903 hcat [gtab, text "fnstsw %ax"] $$
904 hcat [gtab, text "test $0x400,%eax"] $$
905 -- If we were in bounds then jump to the end
906 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
907 -- Otherwise we need to shrink the value. Start by
908 -- loading pi, doubleing it (by adding it to itself),
909 -- and then swapping pi with the value, so the value we
910 -- want to apply op to is in %st(0) again
911 hcat [gtab, text "ffree %st(7); fldpi"] $$
912 hcat [gtab, text "fadd %st(0),%st"] $$
913 hcat [gtab, text "fxch %st(1)"] $$
914 -- Now we have a loop in which we make the value smaller,
915 -- see if it's small enough, and loop if not
916 (pprCLabel_asm l2 <> char ':') $$
917 hcat [gtab, text "fprem1"] $$
918 -- My Debian libc uses fstsw here for the tan code, but I can't
919 -- see any reason why it should need to be different for tan.
920 hcat [gtab, text "fnstsw %ax"] $$
921 hcat [gtab, text "test $0x400,%eax"] $$
922 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
923 hcat [gtab, text "fstp %st(1)"] $$
924 hcat [gtab, text op] $$
925 (pprCLabel_asm l1 <> char ':') $$
926 -- Pop the 1.0 tan gave us
927 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
929 hcat [gtab, text "popl %eax;"] $$
930 -- And finally make the result the right size
931 hcat [gtab, gcoerceto sz, gpop dst 1]
933 --------------------------
935 -- coerce %st(0) to the specified size
936 gcoerceto :: Size -> Doc
937 gcoerceto FF64 = empty
938 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
939 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
941 gpush :: Reg -> RegNo -> Doc
943 = hcat [text "fld ", greg reg offset]
945 gpop :: Reg -> RegNo -> Doc
947 = hcat [text "fstp ", greg reg offset]
949 greg :: Reg -> RegNo -> Doc
950 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
961 gregno :: Reg -> RegNo
962 gregno (RegReal (RealRegSingle i)) = i
963 gregno _ = --pprPanic "gregno" (ppr other)
964 999 -- bogus; only needed for debug printing
966 pprG :: Instr -> Doc -> Doc
968 = (char '#' <> pprGInstr fake) $$ actual
971 pprGInstr :: Instr -> Doc
972 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
973 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
974 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
976 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
977 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
979 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
980 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
982 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
983 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
984 pprGInstr (GDTOF src dst) = pprSizeSizeRegReg (sLit "gdtof") FF64 FF32 src dst
986 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
987 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
988 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
989 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
990 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
991 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
992 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
994 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
995 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
996 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
997 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
999 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
1001 pprDollImm :: Imm -> Doc
1002 pprDollImm i = ptext (sLit "$") <> pprImm i
1005 pprOperand :: Size -> Operand -> Doc
1006 pprOperand s (OpReg r) = pprReg s r
1007 pprOperand _ (OpImm i) = pprDollImm i
1008 pprOperand _ (OpAddr ea) = pprAddr ea
1011 pprMnemonic_ :: LitString -> Doc
1013 char '\t' <> ptext name <> space
1016 pprMnemonic :: LitString -> Size -> Doc
1017 pprMnemonic name size =
1018 char '\t' <> ptext name <> pprSize size <> space
1021 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1022 pprSizeImmOp name size imm op1
1024 pprMnemonic name size,
1032 pprSizeOp :: LitString -> Size -> Operand -> Doc
1033 pprSizeOp name size op1
1035 pprMnemonic name size,
1040 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1041 pprSizeOpOp name size op1 op2
1043 pprMnemonic name size,
1044 pprOperand size op1,
1050 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1051 pprOpOp name size op1 op2
1054 pprOperand size op1,
1060 pprSizeReg :: LitString -> Size -> Reg -> Doc
1061 pprSizeReg name size reg1
1063 pprMnemonic name size,
1068 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1069 pprSizeRegReg name size reg1 reg2
1071 pprMnemonic name size,
1078 pprRegReg :: LitString -> Reg -> Reg -> Doc
1079 pprRegReg name reg1 reg2
1082 pprReg archWordSize reg1,
1084 pprReg archWordSize reg2
1088 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1089 pprSizeOpReg name size op1 reg2
1091 pprMnemonic name size,
1092 pprOperand size op1,
1094 pprReg archWordSize reg2
1097 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1098 pprCondRegReg name size cond reg1 reg2
1109 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1110 pprSizeSizeRegReg name size1 size2 reg1 reg2
1122 pprSizeSizeOpReg :: LitString -> Size -> Size -> Operand -> Reg -> Doc
1123 pprSizeSizeOpReg name size1 size2 op1 reg2
1125 pprMnemonic name size2,
1126 pprOperand size1 op1,
1131 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1132 pprSizeRegRegReg name size reg1 reg2 reg3
1134 pprMnemonic name size,
1143 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1144 pprSizeAddrReg name size op dst
1146 pprMnemonic name size,
1153 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1154 pprSizeRegAddr name size src op
1156 pprMnemonic name size,
1163 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1164 pprShift name size src dest
1166 pprMnemonic name size,
1167 pprOperand II8 src, -- src is 8-bit sized
1169 pprOperand size dest
1173 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1174 pprSizeOpOpCoerce name size1 size2 op1 op2
1175 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1176 pprOperand size1 op1,
1178 pprOperand size2 op2
1182 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1183 pprCondInstr name cond arg
1184 = hcat [ char '\t', ptext name, pprCond cond, space, arg]