1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
38 import Unique ( pprUnique, Uniquable(..) )
41 import qualified Outputable
42 import Outputable (panic, Outputable)
45 import Distribution.System
47 #if i386_TARGET_ARCH && darwin_TARGET_OS
51 -- -----------------------------------------------------------------------------
52 -- Printing this stuff out
54 pprNatCmmTop :: NatCmmTop Instr -> Doc
55 pprNatCmmTop (CmmData section dats) =
56 pprSectionHeader section $$ vcat (map pprData dats)
58 -- special case for split markers:
59 pprNatCmmTop (CmmProc [] lbl (ListGraph [])) = pprLabel lbl
61 pprNatCmmTop (CmmProc info lbl (ListGraph blocks)) =
62 pprSectionHeader Text $$
63 (if null info then -- blocks guaranteed not null, so label needed
66 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
67 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
70 vcat (map pprData info) $$
71 pprLabel (entryLblToInfoLbl lbl)
73 vcat (map pprBasicBlock blocks)
74 -- above: Even the first block gets a label, because with branch-chain
75 -- elimination, it might be the target of a goto.
76 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
77 -- If we are using the .subsections_via_symbols directive
78 -- (available on recent versions of Darwin),
79 -- we have to make sure that there is some kind of reference
80 -- from the entry code to a label on the _top_ of of the info table,
81 -- so that the linker will not think it is unreferenced and dead-strip
82 -- it. That's why the label is called a DeadStripPreventer (_dsp).
85 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
87 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
92 pprBasicBlock :: NatBasicBlock Instr -> Doc
93 pprBasicBlock (BasicBlock blockid instrs) =
94 pprLabel (mkAsmTempLabel (getUnique blockid)) $$
95 vcat (map pprInstr instrs)
98 pprData :: CmmStatic -> Doc
99 pprData (CmmAlign bytes) = pprAlign bytes
100 pprData (CmmDataLabel lbl) = pprLabel lbl
101 pprData (CmmString str) = pprASCII str
104 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
106 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
109 pprData (CmmStaticLit lit) = pprDataItem lit
111 pprGloblDecl :: CLabel -> Doc
113 | not (externallyVisibleCLabel lbl) = empty
114 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
118 pprTypeAndSizeDecl :: CLabel -> Doc
120 pprTypeAndSizeDecl lbl
121 | not (externallyVisibleCLabel lbl) = empty
122 | otherwise = ptext (sLit ".type ") <>
123 pprCLabel_asm lbl <> ptext (sLit ", @object")
129 pprLabel :: CLabel -> Doc
130 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
133 pprASCII :: [Word8] -> Doc
135 = vcat (map do1 str) $$ do1 0
138 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
140 pprAlign :: Int -> Doc
144 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
150 log2 :: Int -> Int -- cache the common ones
155 log2 n = 1 + log2 (n `quot` 2)
158 -- -----------------------------------------------------------------------------
159 -- pprInstr: print an 'Instr'
161 instance Outputable Instr where
162 ppr instr = Outputable.docToSDoc $ pprInstr instr
165 pprUserReg :: Reg -> Doc
167 | cTargetArch == I386 = pprReg II32
168 | cTargetArch == X86_64 = pprReg II64
169 | otherwise = panic "X86.Ppr.pprUserReg: not defined"
171 pprReg :: Size -> Reg -> Doc
175 RegReal (RealRegSingle i) -> ppr_reg_no s i
176 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
177 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
178 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
179 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
180 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
184 ppr_reg_no :: Size -> Int -> Doc
185 ppr_reg_no II8 = ppr_reg_byte
186 ppr_reg_no II16 = ppr_reg_word
187 ppr_reg_no _ = ppr_reg_long
189 ppr_reg_byte i = ptext
191 0 -> sLit "%al"; 1 -> sLit "%bl";
192 2 -> sLit "%cl"; 3 -> sLit "%dl";
193 _ -> sLit "very naughty I386 byte register"
196 ppr_reg_word i = ptext
198 0 -> sLit "%ax"; 1 -> sLit "%bx";
199 2 -> sLit "%cx"; 3 -> sLit "%dx";
200 4 -> sLit "%si"; 5 -> sLit "%di";
201 6 -> sLit "%bp"; 7 -> sLit "%sp";
202 _ -> sLit "very naughty I386 word register"
205 ppr_reg_long i = ptext
207 0 -> sLit "%eax"; 1 -> sLit "%ebx";
208 2 -> sLit "%ecx"; 3 -> sLit "%edx";
209 4 -> sLit "%esi"; 5 -> sLit "%edi";
210 6 -> sLit "%ebp"; 7 -> sLit "%esp";
213 #elif x86_64_TARGET_ARCH
214 ppr_reg_no :: Size -> Int -> Doc
215 ppr_reg_no II8 = ppr_reg_byte
216 ppr_reg_no II16 = ppr_reg_word
217 ppr_reg_no II32 = ppr_reg_long
218 ppr_reg_no _ = ppr_reg_quad
220 ppr_reg_byte i = ptext
222 0 -> sLit "%al"; 1 -> sLit "%bl";
223 2 -> sLit "%cl"; 3 -> sLit "%dl";
224 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
225 6 -> sLit "%bpl"; 7 -> sLit "%spl";
226 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
227 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
228 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
229 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
230 _ -> sLit "very naughty x86_64 byte register"
233 ppr_reg_word i = ptext
235 0 -> sLit "%ax"; 1 -> sLit "%bx";
236 2 -> sLit "%cx"; 3 -> sLit "%dx";
237 4 -> sLit "%si"; 5 -> sLit "%di";
238 6 -> sLit "%bp"; 7 -> sLit "%sp";
239 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
240 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
241 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
242 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
243 _ -> sLit "very naughty x86_64 word register"
246 ppr_reg_long i = ptext
248 0 -> sLit "%eax"; 1 -> sLit "%ebx";
249 2 -> sLit "%ecx"; 3 -> sLit "%edx";
250 4 -> sLit "%esi"; 5 -> sLit "%edi";
251 6 -> sLit "%ebp"; 7 -> sLit "%esp";
252 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
253 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
254 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
255 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
256 _ -> sLit "very naughty x86_64 register"
259 ppr_reg_quad i = ptext
261 0 -> sLit "%rax"; 1 -> sLit "%rbx";
262 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
263 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
264 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
265 8 -> sLit "%r8"; 9 -> sLit "%r9";
266 10 -> sLit "%r10"; 11 -> sLit "%r11";
267 12 -> sLit "%r12"; 13 -> sLit "%r13";
268 14 -> sLit "%r14"; 15 -> sLit "%r15";
272 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
275 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
276 ppr_reg_float :: Int -> LitString
277 ppr_reg_float i = case i of
278 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
279 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
280 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
281 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
282 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
283 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
284 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
285 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
286 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
287 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
288 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
289 _ -> sLit "very naughty x86 register"
292 pprSize :: Size -> Doc
299 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
300 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
304 pprSize_x87 :: Size -> Doc
310 _ -> panic "X86.Ppr.pprSize_x87"
312 pprCond :: Cond -> Doc
315 GEU -> sLit "ae"; LU -> sLit "b";
316 EQQ -> sLit "e"; GTT -> sLit "g";
317 GE -> sLit "ge"; GU -> sLit "a";
318 LTT -> sLit "l"; LE -> sLit "le";
319 LEU -> sLit "be"; NE -> sLit "ne";
320 NEG -> sLit "s"; POS -> sLit "ns";
321 CARRY -> sLit "c"; OFLO -> sLit "o";
322 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
323 ALWAYS -> sLit "mp"})
327 pprImm (ImmInt i) = int i
328 pprImm (ImmInteger i) = integer i
329 pprImm (ImmCLbl l) = pprCLabel_asm l
330 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
331 pprImm (ImmLit s) = s
333 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
334 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
336 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
337 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
338 <> lparen <> pprImm b <> rparen
342 pprAddr :: AddrMode -> Doc
343 pprAddr (ImmAddr imm off)
344 = let pp_imm = pprImm imm
348 else if (off < 0) then
351 pp_imm <> char '+' <> int off
353 pprAddr (AddrBaseIndex base index displacement)
355 pp_disp = ppr_disp displacement
356 pp_off p = pp_disp <> char '(' <> p <> char ')'
357 pp_reg r = pprReg archWordSize r
359 case (base, index) of
360 (EABaseNone, EAIndexNone) -> pp_disp
361 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
362 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
363 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
364 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
366 _ -> panic "X86.Ppr.pprAddr: no match"
369 ppr_disp (ImmInt 0) = empty
370 ppr_disp imm = pprImm imm
373 pprSectionHeader :: Section -> Doc
376 # if darwin_TARGET_OS
379 Text -> ptext (sLit ".text\n\t.align 2")
380 Data -> ptext (sLit ".data\n\t.align 2")
381 ReadOnlyData -> ptext (sLit ".const\n.align 2")
382 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
383 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
384 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
385 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
390 Text -> ptext (sLit ".text\n\t.align 4,0x90")
391 Data -> ptext (sLit ".data\n\t.align 4")
392 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
393 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
394 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
395 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
396 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
400 #elif x86_64_TARGET_ARCH
401 # if darwin_TARGET_OS
404 Text -> ptext (sLit ".text\n.align 3")
405 Data -> ptext (sLit ".data\n.align 3")
406 ReadOnlyData -> ptext (sLit ".const\n.align 3")
407 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
408 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
409 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
410 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
415 Text -> ptext (sLit ".text\n\t.align 8")
416 Data -> ptext (sLit ".data\n\t.align 8")
417 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
418 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
419 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
420 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
421 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
426 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
433 pprDataItem :: CmmLit -> Doc
435 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
439 -- These seem to be common:
440 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
441 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
443 ppr_item FF32 (CmmFloat r _)
444 = let bs = floatToBytes (fromRational r)
445 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
447 ppr_item FF64 (CmmFloat r _)
448 = let bs = doubleToBytes (fromRational r)
449 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
451 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
452 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
454 #if i386_TARGET_ARCH && darwin_TARGET_OS
455 ppr_item II64 (CmmInt x _) =
456 [ptext (sLit "\t.long\t")
457 <> int (fromIntegral (fromIntegral x :: Word32)),
458 ptext (sLit "\t.long\t")
460 (fromIntegral (x `shiftR` 32) :: Word32))]
462 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
463 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
465 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
466 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
467 -- type, which means we can't do pc-relative 64-bit addresses.
468 -- Fortunately we're assuming the small memory model, in which
469 -- all such offsets will fit into 32 bits, so we have to stick
470 -- to 32-bit offset fields and modify the RTS appropriately
472 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
475 | isRelativeReloc x =
476 [ptext (sLit "\t.long\t") <> pprImm imm,
477 ptext (sLit "\t.long\t0")]
479 [ptext (sLit "\t.quad\t") <> pprImm imm]
481 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
482 isRelativeReloc _ = False
486 = panic "X86.Ppr.ppr_item: no match"
490 pprInstr :: Instr -> Doc
492 pprInstr (COMMENT _) = empty -- nuke 'em
495 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
496 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
497 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
498 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
499 ,IF_ARCH_powerpc( IF_OS_linux(
500 ((<>) (ptext (sLit "# ")) (ftext s)),
501 ((<>) (ptext (sLit "; ")) (ftext s)))
505 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
507 pprInstr (NEWBLOCK _)
508 = panic "PprMach.pprInstr: NEWBLOCK"
511 = panic "PprMach.pprInstr: LDATA"
514 pprInstr (SPILL reg slot)
516 ptext (sLit "\tSPILL"),
520 ptext (sLit "SLOT") <> parens (int slot)]
522 pprInstr (RELOAD slot reg)
524 ptext (sLit "\tRELOAD"),
526 ptext (sLit "SLOT") <> parens (int slot),
531 pprInstr (MOV size src dst)
532 = pprSizeOpOp (sLit "mov") size src dst
534 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
535 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
536 -- movl. But we represent it as a MOVZxL instruction, because
537 -- the reg alloc would tend to throw away a plain reg-to-reg
538 -- move, and we still want it to do that.
540 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
541 -- zero-extension only needs to extend to 32 bits: on x86_64,
542 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
543 -- instruction is shorter.
545 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
547 -- here we do some patching, since the physical registers are only set late
548 -- in the code generation.
549 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
551 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
553 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
555 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
557 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
559 = pprInstr (ADD size (OpImm displ) dst)
561 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
563 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
564 = pprSizeOp (sLit "dec") size dst
565 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
566 = pprSizeOp (sLit "inc") size dst
567 pprInstr (ADD size src dst)
568 = pprSizeOpOp (sLit "add") size src dst
569 pprInstr (ADC size src dst)
570 = pprSizeOpOp (sLit "adc") size src dst
571 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
572 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
574 {- A hack. The Intel documentation says that "The two and three
575 operand forms [of IMUL] may also be used with unsigned operands
576 because the lower half of the product is the same regardless if
577 (sic) the operands are signed or unsigned. The CF and OF flags,
578 however, cannot be used to determine if the upper half of the
579 result is non-zero." So there.
581 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
582 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
584 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
585 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
586 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
588 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
589 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
591 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
592 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
593 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
595 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
597 pprInstr (CMP size src dst)
598 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
599 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
601 -- This predicate is needed here and nowhere else
607 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
608 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
609 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
611 -- both unused (SDM):
612 -- pprInstr PUSHA = ptext (sLit "\tpushal")
613 -- pprInstr POPA = ptext (sLit "\tpopal")
615 pprInstr NOP = ptext (sLit "\tnop")
616 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
617 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
619 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
621 pprInstr (JXX cond blockid)
622 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
623 where lab = mkAsmTempLabel (getUnique blockid)
625 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
627 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
628 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
629 pprInstr (JMP_TBL op _) = pprInstr (JMP op)
630 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
631 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
633 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
634 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
635 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
638 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
640 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
642 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
643 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
644 pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
645 pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
646 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
647 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
649 -- FETCHGOT for PIC on ELF platforms
650 pprInstr (FETCHGOT reg)
651 = vcat [ ptext (sLit "\tcall 1f"),
652 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
653 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
657 -- FETCHPC for PIC on Darwin/x86
658 -- get the instruction pointer into a register
659 -- (Terminology note: the IP is called Program Counter on PPC,
660 -- and it's a good thing to use the same name on both platforms)
661 pprInstr (FETCHPC reg)
662 = vcat [ ptext (sLit "\tcall 1f"),
663 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
667 -- -----------------------------------------------------------------------------
668 -- i386 floating-point
670 -- Simulating a flat register set on the x86 FP stack is tricky.
671 -- you have to free %st(7) before pushing anything on the FP reg stack
672 -- so as to preclude the possibility of a FP stack overflow exception.
673 pprInstr g@(GMOV src dst)
677 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
679 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
680 pprInstr g@(GLD sz addr dst)
681 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
682 pprAddr addr, gsemi, gpop dst 1])
684 -- GST sz src addr ==> FLD dst ; FSTPsz addr
685 pprInstr g@(GST sz src addr)
686 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
687 = pprG g (hcat [gtab,
688 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
690 = pprG g (hcat [gtab, gpush src 0, gsemi,
691 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
693 pprInstr g@(GLDZ dst)
694 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
695 pprInstr g@(GLD1 dst)
696 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
698 pprInstr (GFTOI src dst)
699 = pprInstr (GDTOI src dst)
701 pprInstr g@(GDTOI src dst)
703 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
704 hcat [gtab, gpush src 0],
705 hcat [gtab, text "movzwl 4(%esp), ", reg,
706 text " ; orl $0xC00, ", reg],
707 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
708 hcat [gtab, text "fistpl 0(%esp)"],
709 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
710 hcat [gtab, text "addl $8, %esp"]
713 reg = pprReg II32 dst
715 pprInstr (GITOF src dst)
716 = pprInstr (GITOD src dst)
718 pprInstr g@(GITOD src dst)
719 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
720 text " ; fildl (%esp) ; ",
721 gpop dst 1, text " ; addl $4,%esp"])
723 pprInstr g@(GDTOF src dst)
724 = pprG g (vcat [gtab <> gpush src 0,
725 gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;",
728 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
729 this far into the jungle AND you give a Rat's Ass (tm) what's going
730 on, here's the deal. Generate code to do a floating point comparison
731 of src1 and src2, of kind cond, and set the Zero flag if true.
733 The complications are to do with handling NaNs correctly. We want the
734 property that if either argument is NaN, then the result of the
735 comparison is False ... except if we're comparing for inequality,
736 in which case the answer is True.
738 Here's how the general (non-inequality) case works. As an
739 example, consider generating the an equality test:
741 pushl %eax -- we need to mess with this
742 <get src1 to top of FPU stack>
743 fcomp <src2 location in FPU stack> and pop pushed src1
744 -- Result of comparison is in FPU Status Register bits
746 fstsw %ax -- Move FPU Status Reg to %ax
747 sahf -- move C3 C2 C0 from %ax to integer flag reg
748 -- now the serious magic begins
749 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
750 sete %al -- %al = if arg1 == arg2 then 1 else 0
751 andb %ah,%al -- %al &= %ah
752 -- so %al == 1 iff (comparable && same); else it holds 0
753 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
754 else %al == 0xFF, ZeroFlag=0
755 -- the zero flag is now set as we desire.
758 The special case of inequality differs thusly:
760 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
761 setne %al -- %al = if arg1 /= arg2 then 1 else 0
762 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
763 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
764 else (%al == 0xFF, ZF=0)
766 pprInstr g@(GCMP cond src1 src2)
767 | case cond of { NE -> True; _ -> False }
769 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
770 hcat [gtab, text "fcomp ", greg src2 1,
771 text "; fstsw %ax ; sahf ; setpe %ah"],
772 hcat [gtab, text "setne %al ; ",
773 text "orb %ah,%al ; decb %al ; popl %eax"]
777 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
778 hcat [gtab, text "fcomp ", greg src2 1,
779 text "; fstsw %ax ; sahf ; setpo %ah"],
780 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
781 text "andb %ah,%al ; decb %al ; popl %eax"]
784 {- On the 486, the flags set by FP compare are the unsigned ones!
785 (This looks like a HACK to me. WDP 96/03)
787 fix_FP_cond :: Cond -> Cond
792 fix_FP_cond EQQ = EQQ
794 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
795 -- there should be no others
798 pprInstr g@(GABS _ src dst)
799 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
801 pprInstr g@(GNEG _ src dst)
802 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
804 pprInstr g@(GSQRT sz src dst)
805 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
806 hcat [gtab, gcoerceto sz, gpop dst 1])
808 pprInstr g@(GSIN sz l1 l2 src dst)
809 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
811 pprInstr g@(GCOS sz l1 l2 src dst)
812 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
814 pprInstr g@(GTAN sz l1 l2 src dst)
815 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
817 -- In the translations for GADD, GMUL, GSUB and GDIV,
818 -- the first two cases are mere optimisations. The otherwise clause
819 -- generates correct code under all circumstances.
821 pprInstr g@(GADD _ src1 src2 dst)
823 = pprG g (text "\t#GADD-xxxcase1" $$
824 hcat [gtab, gpush src2 0,
825 text " ; faddp %st(0),", greg src1 1])
827 = pprG g (text "\t#GADD-xxxcase2" $$
828 hcat [gtab, gpush src1 0,
829 text " ; faddp %st(0),", greg src2 1])
831 = pprG g (hcat [gtab, gpush src1 0,
832 text " ; fadd ", greg src2 1, text ",%st(0)",
836 pprInstr g@(GMUL _ src1 src2 dst)
838 = pprG g (text "\t#GMUL-xxxcase1" $$
839 hcat [gtab, gpush src2 0,
840 text " ; fmulp %st(0),", greg src1 1])
842 = pprG g (text "\t#GMUL-xxxcase2" $$
843 hcat [gtab, gpush src1 0,
844 text " ; fmulp %st(0),", greg src2 1])
846 = pprG g (hcat [gtab, gpush src1 0,
847 text " ; fmul ", greg src2 1, text ",%st(0)",
851 pprInstr g@(GSUB _ src1 src2 dst)
853 = pprG g (text "\t#GSUB-xxxcase1" $$
854 hcat [gtab, gpush src2 0,
855 text " ; fsubrp %st(0),", greg src1 1])
857 = pprG g (text "\t#GSUB-xxxcase2" $$
858 hcat [gtab, gpush src1 0,
859 text " ; fsubp %st(0),", greg src2 1])
861 = pprG g (hcat [gtab, gpush src1 0,
862 text " ; fsub ", greg src2 1, text ",%st(0)",
866 pprInstr g@(GDIV _ src1 src2 dst)
868 = pprG g (text "\t#GDIV-xxxcase1" $$
869 hcat [gtab, gpush src2 0,
870 text " ; fdivrp %st(0),", greg src1 1])
872 = pprG g (text "\t#GDIV-xxxcase2" $$
873 hcat [gtab, gpush src1 0,
874 text " ; fdivp %st(0),", greg src2 1])
876 = pprG g (hcat [gtab, gpush src1 0,
877 text " ; fdiv ", greg src2 1, text ",%st(0)",
882 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
883 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
887 = panic "X86.Ppr.pprInstr: no match"
890 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
891 pprTrigOp op -- fsin, fcos or fptan
892 isTan -- we need a couple of extra steps if we're doing tan
893 l1 l2 -- internal labels for us to use
895 = -- We'll be needing %eax later on
896 hcat [gtab, text "pushl %eax;"] $$
897 -- tan is going to use an extra space on the FP stack
898 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
899 -- First put the value in %st(0) and try to apply the op to it
900 hcat [gpush src 0, text ("; " ++ op)] $$
901 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
902 hcat [gtab, text "fnstsw %ax"] $$
903 hcat [gtab, text "test $0x400,%eax"] $$
904 -- If we were in bounds then jump to the end
905 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
906 -- Otherwise we need to shrink the value. Start by
907 -- loading pi, doubleing it (by adding it to itself),
908 -- and then swapping pi with the value, so the value we
909 -- want to apply op to is in %st(0) again
910 hcat [gtab, text "ffree %st(7); fldpi"] $$
911 hcat [gtab, text "fadd %st(0),%st"] $$
912 hcat [gtab, text "fxch %st(1)"] $$
913 -- Now we have a loop in which we make the value smaller,
914 -- see if it's small enough, and loop if not
915 (pprCLabel_asm l2 <> char ':') $$
916 hcat [gtab, text "fprem1"] $$
917 -- My Debian libc uses fstsw here for the tan code, but I can't
918 -- see any reason why it should need to be different for tan.
919 hcat [gtab, text "fnstsw %ax"] $$
920 hcat [gtab, text "test $0x400,%eax"] $$
921 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
922 hcat [gtab, text "fstp %st(1)"] $$
923 hcat [gtab, text op] $$
924 (pprCLabel_asm l1 <> char ':') $$
925 -- Pop the 1.0 tan gave us
926 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
928 hcat [gtab, text "popl %eax;"] $$
929 -- And finally make the result the right size
930 hcat [gtab, gcoerceto sz, gpop dst 1]
932 --------------------------
934 -- coerce %st(0) to the specified size
935 gcoerceto :: Size -> Doc
936 gcoerceto FF64 = empty
937 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
938 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
940 gpush :: Reg -> RegNo -> Doc
942 = hcat [text "fld ", greg reg offset]
944 gpop :: Reg -> RegNo -> Doc
946 = hcat [text "fstp ", greg reg offset]
948 greg :: Reg -> RegNo -> Doc
949 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
960 gregno :: Reg -> RegNo
961 gregno (RegReal (RealRegSingle i)) = i
962 gregno _ = --pprPanic "gregno" (ppr other)
963 999 -- bogus; only needed for debug printing
965 pprG :: Instr -> Doc -> Doc
967 = (char '#' <> pprGInstr fake) $$ actual
970 pprGInstr :: Instr -> Doc
971 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
972 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
973 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
975 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
976 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
978 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
979 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
981 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
982 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
983 pprGInstr (GDTOF src dst) = pprSizeSizeRegReg (sLit "gdtof") FF64 FF32 src dst
985 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
986 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
987 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
988 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
989 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
990 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
991 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
993 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
994 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
995 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
996 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
998 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
1000 pprDollImm :: Imm -> Doc
1001 pprDollImm i = ptext (sLit "$") <> pprImm i
1004 pprOperand :: Size -> Operand -> Doc
1005 pprOperand s (OpReg r) = pprReg s r
1006 pprOperand _ (OpImm i) = pprDollImm i
1007 pprOperand _ (OpAddr ea) = pprAddr ea
1010 pprMnemonic_ :: LitString -> Doc
1012 char '\t' <> ptext name <> space
1015 pprMnemonic :: LitString -> Size -> Doc
1016 pprMnemonic name size =
1017 char '\t' <> ptext name <> pprSize size <> space
1020 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1021 pprSizeImmOp name size imm op1
1023 pprMnemonic name size,
1031 pprSizeOp :: LitString -> Size -> Operand -> Doc
1032 pprSizeOp name size op1
1034 pprMnemonic name size,
1039 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1040 pprSizeOpOp name size op1 op2
1042 pprMnemonic name size,
1043 pprOperand size op1,
1049 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1050 pprOpOp name size op1 op2
1053 pprOperand size op1,
1059 pprSizeReg :: LitString -> Size -> Reg -> Doc
1060 pprSizeReg name size reg1
1062 pprMnemonic name size,
1067 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1068 pprSizeRegReg name size reg1 reg2
1070 pprMnemonic name size,
1077 pprRegReg :: LitString -> Reg -> Reg -> Doc
1078 pprRegReg name reg1 reg2
1081 pprReg archWordSize reg1,
1083 pprReg archWordSize reg2
1087 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1088 pprSizeOpReg name size op1 reg2
1090 pprMnemonic name size,
1091 pprOperand size op1,
1093 pprReg archWordSize reg2
1097 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1098 pprCondRegReg name size cond reg1 reg2
1109 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1110 pprSizeSizeRegReg name size1 size2 reg1 reg2
1124 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1125 pprSizeRegRegReg name size reg1 reg2 reg3
1127 pprMnemonic name size,
1136 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1137 pprSizeAddrReg name size op dst
1139 pprMnemonic name size,
1146 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1147 pprSizeRegAddr name size src op
1149 pprMnemonic name size,
1156 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1157 pprShift name size src dest
1159 pprMnemonic name size,
1160 pprOperand II8 src, -- src is 8-bit sized
1162 pprOperand size dest
1166 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1167 pprSizeOpOpCoerce name size1 size2 op1 op2
1168 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1169 pprOperand size1 op1,
1171 pprOperand size2 op2
1175 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1176 pprCondInstr name cond arg
1177 = hcat [ char '\t', ptext name, pprCond cond, space, arg]