1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
39 import Unique ( pprUnique )
42 import qualified Outputable
43 import Outputable (panic, Outputable)
46 import Distribution.System
48 #if i386_TARGET_ARCH && darwin_TARGET_OS
52 -- -----------------------------------------------------------------------------
53 -- Printing this stuff out
55 pprNatCmmTop :: NatCmmTop Instr -> Doc
56 pprNatCmmTop (CmmData section dats) =
57 pprSectionHeader section $$ vcat (map pprData dats)
59 -- special case for split markers:
60 pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
62 pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) =
63 pprSectionHeader Text $$
64 (if null info then -- blocks guaranteed not null, so label needed
67 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
68 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
71 vcat (map pprData info) $$
72 pprLabel (entryLblToInfoLbl lbl)
74 vcat (map pprBasicBlock blocks)
75 -- above: Even the first block gets a label, because with branch-chain
76 -- elimination, it might be the target of a goto.
77 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
78 -- If we are using the .subsections_via_symbols directive
79 -- (available on recent versions of Darwin),
80 -- we have to make sure that there is some kind of reference
81 -- from the entry code to a label on the _top_ of of the info table,
82 -- so that the linker will not think it is unreferenced and dead-strip
83 -- it. That's why the label is called a DeadStripPreventer (_dsp).
86 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
88 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
93 pprBasicBlock :: NatBasicBlock Instr -> Doc
94 pprBasicBlock (BasicBlock (BlockId id) instrs) =
95 pprLabel (mkAsmTempLabel id) $$
96 vcat (map pprInstr instrs)
99 pprData :: CmmStatic -> Doc
100 pprData (CmmAlign bytes) = pprAlign bytes
101 pprData (CmmDataLabel lbl) = pprLabel lbl
102 pprData (CmmString str) = pprASCII str
105 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
107 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
110 pprData (CmmStaticLit lit) = pprDataItem lit
112 pprGloblDecl :: CLabel -> Doc
114 | not (externallyVisibleCLabel lbl) = empty
115 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
119 pprTypeAndSizeDecl :: CLabel -> Doc
121 pprTypeAndSizeDecl lbl
122 | not (externallyVisibleCLabel lbl) = empty
123 | otherwise = ptext (sLit ".type ") <>
124 pprCLabel_asm lbl <> ptext (sLit ", @object")
130 pprLabel :: CLabel -> Doc
131 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
134 pprASCII :: [Word8] -> Doc
136 = vcat (map do1 str) $$ do1 0
139 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
141 pprAlign :: Int -> Doc
145 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
151 log2 :: Int -> Int -- cache the common ones
156 log2 n = 1 + log2 (n `quot` 2)
159 -- -----------------------------------------------------------------------------
160 -- pprInstr: print an 'Instr'
162 instance Outputable Instr where
163 ppr instr = Outputable.docToSDoc $ pprInstr instr
166 pprUserReg :: Reg -> Doc
168 | cTargetArch == I386 = pprReg II32
169 | cTargetArch == X86_64 = pprReg II64
170 | otherwise = panic "X86.Ppr.pprUserReg: not defined"
172 pprReg :: Size -> Reg -> Doc
176 RegReal (RealRegSingle i) -> ppr_reg_no s i
177 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
178 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
179 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
180 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
182 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
185 ppr_reg_no :: Size -> Int -> Doc
186 ppr_reg_no II8 = ppr_reg_byte
187 ppr_reg_no II16 = ppr_reg_word
188 ppr_reg_no _ = ppr_reg_long
190 ppr_reg_byte i = ptext
192 0 -> sLit "%al"; 1 -> sLit "%bl";
193 2 -> sLit "%cl"; 3 -> sLit "%dl";
194 _ -> sLit "very naughty I386 byte register"
197 ppr_reg_word i = ptext
199 0 -> sLit "%ax"; 1 -> sLit "%bx";
200 2 -> sLit "%cx"; 3 -> sLit "%dx";
201 4 -> sLit "%si"; 5 -> sLit "%di";
202 6 -> sLit "%bp"; 7 -> sLit "%sp";
203 _ -> sLit "very naughty I386 word register"
206 ppr_reg_long i = ptext
208 0 -> sLit "%eax"; 1 -> sLit "%ebx";
209 2 -> sLit "%ecx"; 3 -> sLit "%edx";
210 4 -> sLit "%esi"; 5 -> sLit "%edi";
211 6 -> sLit "%ebp"; 7 -> sLit "%esp";
214 #elif x86_64_TARGET_ARCH
215 ppr_reg_no :: Size -> Int -> Doc
216 ppr_reg_no II8 = ppr_reg_byte
217 ppr_reg_no II16 = ppr_reg_word
218 ppr_reg_no II32 = ppr_reg_long
219 ppr_reg_no _ = ppr_reg_quad
221 ppr_reg_byte i = ptext
223 0 -> sLit "%al"; 1 -> sLit "%bl";
224 2 -> sLit "%cl"; 3 -> sLit "%dl";
225 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
226 6 -> sLit "%bpl"; 7 -> sLit "%spl";
227 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
228 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
229 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
230 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
231 _ -> sLit "very naughty x86_64 byte register"
234 ppr_reg_word i = ptext
236 0 -> sLit "%ax"; 1 -> sLit "%bx";
237 2 -> sLit "%cx"; 3 -> sLit "%dx";
238 4 -> sLit "%si"; 5 -> sLit "%di";
239 6 -> sLit "%bp"; 7 -> sLit "%sp";
240 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
241 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
242 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
243 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
244 _ -> sLit "very naughty x86_64 word register"
247 ppr_reg_long i = ptext
249 0 -> sLit "%eax"; 1 -> sLit "%ebx";
250 2 -> sLit "%ecx"; 3 -> sLit "%edx";
251 4 -> sLit "%esi"; 5 -> sLit "%edi";
252 6 -> sLit "%ebp"; 7 -> sLit "%esp";
253 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
254 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
255 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
256 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
257 _ -> sLit "very naughty x86_64 register"
260 ppr_reg_quad i = ptext
262 0 -> sLit "%rax"; 1 -> sLit "%rbx";
263 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
264 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
265 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
266 8 -> sLit "%r8"; 9 -> sLit "%r9";
267 10 -> sLit "%r10"; 11 -> sLit "%r11";
268 12 -> sLit "%r12"; 13 -> sLit "%r13";
269 14 -> sLit "%r14"; 15 -> sLit "%r15";
273 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
276 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
277 ppr_reg_float :: Int -> LitString
278 ppr_reg_float i = case i of
279 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
280 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
281 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
282 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
283 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
284 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
285 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
286 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
287 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
288 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
289 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
290 _ -> sLit "very naughty x86 register"
293 pprSize :: Size -> Doc
300 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
301 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
305 pprSize_x87 :: Size -> Doc
311 _ -> panic "X86.Ppr.pprSize_x87"
313 pprCond :: Cond -> Doc
316 GEU -> sLit "ae"; LU -> sLit "b";
317 EQQ -> sLit "e"; GTT -> sLit "g";
318 GE -> sLit "ge"; GU -> sLit "a";
319 LTT -> sLit "l"; LE -> sLit "le";
320 LEU -> sLit "be"; NE -> sLit "ne";
321 NEG -> sLit "s"; POS -> sLit "ns";
322 CARRY -> sLit "c"; OFLO -> sLit "o";
323 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
324 ALWAYS -> sLit "mp"})
328 pprImm (ImmInt i) = int i
329 pprImm (ImmInteger i) = integer i
330 pprImm (ImmCLbl l) = pprCLabel_asm l
331 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
332 pprImm (ImmLit s) = s
334 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
335 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
337 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
338 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
339 <> lparen <> pprImm b <> rparen
343 pprAddr :: AddrMode -> Doc
344 pprAddr (ImmAddr imm off)
345 = let pp_imm = pprImm imm
349 else if (off < 0) then
352 pp_imm <> char '+' <> int off
354 pprAddr (AddrBaseIndex base index displacement)
356 pp_disp = ppr_disp displacement
357 pp_off p = pp_disp <> char '(' <> p <> char ')'
358 pp_reg r = pprReg archWordSize r
360 case (base, index) of
361 (EABaseNone, EAIndexNone) -> pp_disp
362 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
363 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
364 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
365 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
367 _ -> panic "X86.Ppr.pprAddr: no match"
370 ppr_disp (ImmInt 0) = empty
371 ppr_disp imm = pprImm imm
374 pprSectionHeader :: Section -> Doc
377 # if darwin_TARGET_OS
380 Text -> ptext (sLit ".text\n\t.align 2")
381 Data -> ptext (sLit ".data\n\t.align 2")
382 ReadOnlyData -> ptext (sLit ".const\n.align 2")
383 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
384 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
385 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
386 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
391 Text -> ptext (sLit ".text\n\t.align 4,0x90")
392 Data -> ptext (sLit ".data\n\t.align 4")
393 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
394 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
395 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
396 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
397 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
401 #elif x86_64_TARGET_ARCH
402 # if darwin_TARGET_OS
405 Text -> ptext (sLit ".text\n.align 3")
406 Data -> ptext (sLit ".data\n.align 3")
407 ReadOnlyData -> ptext (sLit ".const\n.align 3")
408 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
409 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
410 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
411 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
416 Text -> ptext (sLit ".text\n\t.align 8")
417 Data -> ptext (sLit ".data\n\t.align 8")
418 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
419 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
420 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
421 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
422 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
427 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
434 pprDataItem :: CmmLit -> Doc
436 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
440 -- These seem to be common:
441 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
442 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
444 ppr_item FF32 (CmmFloat r _)
445 = let bs = floatToBytes (fromRational r)
446 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
448 ppr_item FF64 (CmmFloat r _)
449 = let bs = doubleToBytes (fromRational r)
450 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
452 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
453 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
455 #if i386_TARGET_ARCH && darwin_TARGET_OS
456 ppr_item II64 (CmmInt x _) =
457 [ptext (sLit "\t.long\t")
458 <> int (fromIntegral (fromIntegral x :: Word32)),
459 ptext (sLit "\t.long\t")
461 (fromIntegral (x `shiftR` 32) :: Word32))]
463 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
464 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
466 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
467 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
468 -- type, which means we can't do pc-relative 64-bit addresses.
469 -- Fortunately we're assuming the small memory model, in which
470 -- all such offsets will fit into 32 bits, so we have to stick
471 -- to 32-bit offset fields and modify the RTS appropriately
473 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
476 | isRelativeReloc x =
477 [ptext (sLit "\t.long\t") <> pprImm imm,
478 ptext (sLit "\t.long\t0")]
480 [ptext (sLit "\t.quad\t") <> pprImm imm]
482 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
483 isRelativeReloc _ = False
487 = panic "X86.Ppr.ppr_item: no match"
491 pprInstr :: Instr -> Doc
493 pprInstr (COMMENT _) = empty -- nuke 'em
496 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
497 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
498 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
499 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
500 ,IF_ARCH_powerpc( IF_OS_linux(
501 ((<>) (ptext (sLit "# ")) (ftext s)),
502 ((<>) (ptext (sLit "; ")) (ftext s)))
506 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
508 pprInstr (NEWBLOCK _)
509 = panic "PprMach.pprInstr: NEWBLOCK"
512 = panic "PprMach.pprInstr: LDATA"
515 pprInstr (SPILL reg slot)
517 ptext (sLit "\tSPILL"),
521 ptext (sLit "SLOT") <> parens (int slot)]
523 pprInstr (RELOAD slot reg)
525 ptext (sLit "\tRELOAD"),
527 ptext (sLit "SLOT") <> parens (int slot),
532 pprInstr (MOV size src dst)
533 = pprSizeOpOp (sLit "mov") size src dst
535 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
536 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
537 -- movl. But we represent it as a MOVZxL instruction, because
538 -- the reg alloc would tend to throw away a plain reg-to-reg
539 -- move, and we still want it to do that.
541 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
542 -- zero-extension only needs to extend to 32 bits: on x86_64,
543 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
544 -- instruction is shorter.
546 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
548 -- here we do some patching, since the physical registers are only set late
549 -- in the code generation.
550 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
552 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
554 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
556 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
558 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
560 = pprInstr (ADD size (OpImm displ) dst)
562 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
564 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
565 = pprSizeOp (sLit "dec") size dst
566 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
567 = pprSizeOp (sLit "inc") size dst
568 pprInstr (ADD size src dst)
569 = pprSizeOpOp (sLit "add") size src dst
570 pprInstr (ADC size src dst)
571 = pprSizeOpOp (sLit "adc") size src dst
572 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
573 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
575 {- A hack. The Intel documentation says that "The two and three
576 operand forms [of IMUL] may also be used with unsigned operands
577 because the lower half of the product is the same regardless if
578 (sic) the operands are signed or unsigned. The CF and OF flags,
579 however, cannot be used to determine if the upper half of the
580 result is non-zero." So there.
582 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
583 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
585 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
586 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
587 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
589 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
590 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
592 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
593 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
594 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
596 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
598 pprInstr (CMP size src dst)
599 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
600 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
602 -- This predicate is needed here and nowhere else
608 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
609 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
610 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
612 -- both unused (SDM):
613 -- pprInstr PUSHA = ptext (sLit "\tpushal")
614 -- pprInstr POPA = ptext (sLit "\tpopal")
616 pprInstr NOP = ptext (sLit "\tnop")
617 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
618 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
620 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
622 pprInstr (JXX cond (BlockId id))
623 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
624 where lab = mkAsmTempLabel id
626 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
628 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
629 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
630 pprInstr (JMP_TBL op _) = pprInstr (JMP op)
631 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
632 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
634 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
635 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
636 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
639 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
641 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
643 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
644 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
645 pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
646 pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
647 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
648 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
650 -- FETCHGOT for PIC on ELF platforms
651 pprInstr (FETCHGOT reg)
652 = vcat [ ptext (sLit "\tcall 1f"),
653 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
654 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
658 -- FETCHPC for PIC on Darwin/x86
659 -- get the instruction pointer into a register
660 -- (Terminology note: the IP is called Program Counter on PPC,
661 -- and it's a good thing to use the same name on both platforms)
662 pprInstr (FETCHPC reg)
663 = vcat [ ptext (sLit "\tcall 1f"),
664 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
668 -- -----------------------------------------------------------------------------
669 -- i386 floating-point
671 -- Simulating a flat register set on the x86 FP stack is tricky.
672 -- you have to free %st(7) before pushing anything on the FP reg stack
673 -- so as to preclude the possibility of a FP stack overflow exception.
674 pprInstr g@(GMOV src dst)
678 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
680 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
681 pprInstr g@(GLD sz addr dst)
682 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
683 pprAddr addr, gsemi, gpop dst 1])
685 -- GST sz src addr ==> FLD dst ; FSTPsz addr
686 pprInstr g@(GST sz src addr)
687 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
688 = pprG g (hcat [gtab,
689 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
691 = pprG g (hcat [gtab, gpush src 0, gsemi,
692 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
694 pprInstr g@(GLDZ dst)
695 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
696 pprInstr g@(GLD1 dst)
697 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
699 pprInstr (GFTOI src dst)
700 = pprInstr (GDTOI src dst)
702 pprInstr g@(GDTOI src dst)
704 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
705 hcat [gtab, gpush src 0],
706 hcat [gtab, text "movzwl 4(%esp), ", reg,
707 text " ; orl $0xC00, ", reg],
708 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
709 hcat [gtab, text "fistpl 0(%esp)"],
710 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
711 hcat [gtab, text "addl $8, %esp"]
714 reg = pprReg II32 dst
716 pprInstr (GITOF src dst)
717 = pprInstr (GITOD src dst)
719 pprInstr g@(GITOD src dst)
720 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
721 text " ; fildl (%esp) ; ",
722 gpop dst 1, text " ; addl $4,%esp"])
724 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
725 this far into the jungle AND you give a Rat's Ass (tm) what's going
726 on, here's the deal. Generate code to do a floating point comparison
727 of src1 and src2, of kind cond, and set the Zero flag if true.
729 The complications are to do with handling NaNs correctly. We want the
730 property that if either argument is NaN, then the result of the
731 comparison is False ... except if we're comparing for inequality,
732 in which case the answer is True.
734 Here's how the general (non-inequality) case works. As an
735 example, consider generating the an equality test:
737 pushl %eax -- we need to mess with this
738 <get src1 to top of FPU stack>
739 fcomp <src2 location in FPU stack> and pop pushed src1
740 -- Result of comparison is in FPU Status Register bits
742 fstsw %ax -- Move FPU Status Reg to %ax
743 sahf -- move C3 C2 C0 from %ax to integer flag reg
744 -- now the serious magic begins
745 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
746 sete %al -- %al = if arg1 == arg2 then 1 else 0
747 andb %ah,%al -- %al &= %ah
748 -- so %al == 1 iff (comparable && same); else it holds 0
749 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
750 else %al == 0xFF, ZeroFlag=0
751 -- the zero flag is now set as we desire.
754 The special case of inequality differs thusly:
756 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
757 setne %al -- %al = if arg1 /= arg2 then 1 else 0
758 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
759 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
760 else (%al == 0xFF, ZF=0)
762 pprInstr g@(GCMP cond src1 src2)
763 | case cond of { NE -> True; _ -> False }
765 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
766 hcat [gtab, text "fcomp ", greg src2 1,
767 text "; fstsw %ax ; sahf ; setpe %ah"],
768 hcat [gtab, text "setne %al ; ",
769 text "orb %ah,%al ; decb %al ; popl %eax"]
773 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
774 hcat [gtab, text "fcomp ", greg src2 1,
775 text "; fstsw %ax ; sahf ; setpo %ah"],
776 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
777 text "andb %ah,%al ; decb %al ; popl %eax"]
780 {- On the 486, the flags set by FP compare are the unsigned ones!
781 (This looks like a HACK to me. WDP 96/03)
783 fix_FP_cond :: Cond -> Cond
788 fix_FP_cond EQQ = EQQ
790 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
791 -- there should be no others
794 pprInstr g@(GABS _ src dst)
795 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
797 pprInstr g@(GNEG _ src dst)
798 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
800 pprInstr g@(GSQRT sz src dst)
801 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
802 hcat [gtab, gcoerceto sz, gpop dst 1])
804 pprInstr g@(GSIN sz l1 l2 src dst)
805 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
807 pprInstr g@(GCOS sz l1 l2 src dst)
808 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
810 pprInstr g@(GTAN sz l1 l2 src dst)
811 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
813 -- In the translations for GADD, GMUL, GSUB and GDIV,
814 -- the first two cases are mere optimisations. The otherwise clause
815 -- generates correct code under all circumstances.
817 pprInstr g@(GADD _ src1 src2 dst)
819 = pprG g (text "\t#GADD-xxxcase1" $$
820 hcat [gtab, gpush src2 0,
821 text " ; faddp %st(0),", greg src1 1])
823 = pprG g (text "\t#GADD-xxxcase2" $$
824 hcat [gtab, gpush src1 0,
825 text " ; faddp %st(0),", greg src2 1])
827 = pprG g (hcat [gtab, gpush src1 0,
828 text " ; fadd ", greg src2 1, text ",%st(0)",
832 pprInstr g@(GMUL _ src1 src2 dst)
834 = pprG g (text "\t#GMUL-xxxcase1" $$
835 hcat [gtab, gpush src2 0,
836 text " ; fmulp %st(0),", greg src1 1])
838 = pprG g (text "\t#GMUL-xxxcase2" $$
839 hcat [gtab, gpush src1 0,
840 text " ; fmulp %st(0),", greg src2 1])
842 = pprG g (hcat [gtab, gpush src1 0,
843 text " ; fmul ", greg src2 1, text ",%st(0)",
847 pprInstr g@(GSUB _ src1 src2 dst)
849 = pprG g (text "\t#GSUB-xxxcase1" $$
850 hcat [gtab, gpush src2 0,
851 text " ; fsubrp %st(0),", greg src1 1])
853 = pprG g (text "\t#GSUB-xxxcase2" $$
854 hcat [gtab, gpush src1 0,
855 text " ; fsubp %st(0),", greg src2 1])
857 = pprG g (hcat [gtab, gpush src1 0,
858 text " ; fsub ", greg src2 1, text ",%st(0)",
862 pprInstr g@(GDIV _ src1 src2 dst)
864 = pprG g (text "\t#GDIV-xxxcase1" $$
865 hcat [gtab, gpush src2 0,
866 text " ; fdivrp %st(0),", greg src1 1])
868 = pprG g (text "\t#GDIV-xxxcase2" $$
869 hcat [gtab, gpush src1 0,
870 text " ; fdivp %st(0),", greg src2 1])
872 = pprG g (hcat [gtab, gpush src1 0,
873 text " ; fdiv ", greg src2 1, text ",%st(0)",
878 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
879 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
883 = panic "X86.Ppr.pprInstr: no match"
886 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
887 pprTrigOp op -- fsin, fcos or fptan
888 isTan -- we need a couple of extra steps if we're doing tan
889 l1 l2 -- internal labels for us to use
891 = -- We'll be needing %eax later on
892 hcat [gtab, text "pushl %eax;"] $$
893 -- tan is going to use an extra space on the FP stack
894 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
895 -- First put the value in %st(0) and try to apply the op to it
896 hcat [gpush src 0, text ("; " ++ op)] $$
897 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
898 hcat [gtab, text "fnstsw %ax"] $$
899 hcat [gtab, text "test $0x400,%eax"] $$
900 -- If we were in bounds then jump to the end
901 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
902 -- Otherwise we need to shrink the value. Start by
903 -- loading pi, doubleing it (by adding it to itself),
904 -- and then swapping pi with the value, so the value we
905 -- want to apply op to is in %st(0) again
906 hcat [gtab, text "ffree %st(7); fldpi"] $$
907 hcat [gtab, text "fadd %st(0),%st"] $$
908 hcat [gtab, text "fxch %st(1)"] $$
909 -- Now we have a loop in which we make the value smaller,
910 -- see if it's small enough, and loop if not
911 (pprCLabel_asm l2 <> char ':') $$
912 hcat [gtab, text "fprem1"] $$
913 -- My Debian libc uses fstsw here for the tan code, but I can't
914 -- see any reason why it should need to be different for tan.
915 hcat [gtab, text "fnstsw %ax"] $$
916 hcat [gtab, text "test $0x400,%eax"] $$
917 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
918 hcat [gtab, text "fstp %st(1)"] $$
919 hcat [gtab, text op] $$
920 (pprCLabel_asm l1 <> char ':') $$
921 -- Pop the 1.0 tan gave us
922 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
924 hcat [gtab, text "popl %eax;"] $$
925 -- And finally make the result the right size
926 hcat [gtab, gcoerceto sz, gpop dst 1]
928 --------------------------
930 -- coerce %st(0) to the specified size
931 gcoerceto :: Size -> Doc
932 gcoerceto FF64 = empty
933 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
934 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
936 gpush :: Reg -> RegNo -> Doc
938 = hcat [text "fld ", greg reg offset]
940 gpop :: Reg -> RegNo -> Doc
942 = hcat [text "fstp ", greg reg offset]
944 greg :: Reg -> RegNo -> Doc
945 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
956 gregno :: Reg -> RegNo
957 gregno (RegReal (RealRegSingle i)) = i
958 gregno _ = --pprPanic "gregno" (ppr other)
959 999 -- bogus; only needed for debug printing
961 pprG :: Instr -> Doc -> Doc
963 = (char '#' <> pprGInstr fake) $$ actual
966 pprGInstr :: Instr -> Doc
967 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
968 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
969 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
971 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
972 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
974 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
975 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
977 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
978 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
980 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
981 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
982 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
983 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
984 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
985 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
986 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
988 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
989 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
990 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
991 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
993 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
995 pprDollImm :: Imm -> Doc
996 pprDollImm i = ptext (sLit "$") <> pprImm i
999 pprOperand :: Size -> Operand -> Doc
1000 pprOperand s (OpReg r) = pprReg s r
1001 pprOperand _ (OpImm i) = pprDollImm i
1002 pprOperand _ (OpAddr ea) = pprAddr ea
1005 pprMnemonic_ :: LitString -> Doc
1007 char '\t' <> ptext name <> space
1010 pprMnemonic :: LitString -> Size -> Doc
1011 pprMnemonic name size =
1012 char '\t' <> ptext name <> pprSize size <> space
1015 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1016 pprSizeImmOp name size imm op1
1018 pprMnemonic name size,
1026 pprSizeOp :: LitString -> Size -> Operand -> Doc
1027 pprSizeOp name size op1
1029 pprMnemonic name size,
1034 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1035 pprSizeOpOp name size op1 op2
1037 pprMnemonic name size,
1038 pprOperand size op1,
1044 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1045 pprOpOp name size op1 op2
1048 pprOperand size op1,
1054 pprSizeReg :: LitString -> Size -> Reg -> Doc
1055 pprSizeReg name size reg1
1057 pprMnemonic name size,
1062 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1063 pprSizeRegReg name size reg1 reg2
1065 pprMnemonic name size,
1072 pprRegReg :: LitString -> Reg -> Reg -> Doc
1073 pprRegReg name reg1 reg2
1076 pprReg archWordSize reg1,
1078 pprReg archWordSize reg2
1082 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1083 pprSizeOpReg name size op1 reg2
1085 pprMnemonic name size,
1086 pprOperand size op1,
1088 pprReg archWordSize reg2
1092 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1093 pprCondRegReg name size cond reg1 reg2
1104 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1105 pprSizeSizeRegReg name size1 size2 reg1 reg2
1119 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1120 pprSizeRegRegReg name size reg1 reg2 reg3
1122 pprMnemonic name size,
1131 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1132 pprSizeAddrReg name size op dst
1134 pprMnemonic name size,
1141 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1142 pprSizeRegAddr name size src op
1144 pprMnemonic name size,
1151 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1152 pprShift name size src dest
1154 pprMnemonic name size,
1155 pprOperand II8 src, -- src is 8-bit sized
1157 pprOperand size dest
1161 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1162 pprSizeOpOpCoerce name size1 size2 op1 op2
1163 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1164 pprOperand size1 op1,
1166 pprOperand size2 op2
1170 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1171 pprCondInstr name cond arg
1172 = hcat [ char '\t', ptext name, pprCond cond, space, arg]