1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
38 import Unique ( pprUnique, Uniquable(..) )
41 import qualified Outputable
42 import Outputable (panic, Outputable)
45 import Distribution.System
47 #if i386_TARGET_ARCH && darwin_TARGET_OS
51 -- -----------------------------------------------------------------------------
52 -- Printing this stuff out
54 pprNatCmmTop :: NatCmmTop Instr -> Doc
55 pprNatCmmTop (CmmData section dats) =
56 pprSectionHeader section $$ vcat (map pprData dats)
58 -- special case for split markers:
59 pprNatCmmTop (CmmProc [] lbl (ListGraph [])) = pprLabel lbl
61 pprNatCmmTop (CmmProc info lbl (ListGraph blocks)) =
62 pprSectionHeader Text $$
63 (if null info then -- blocks guaranteed not null, so label needed
66 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
67 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
70 vcat (map pprData info) $$
71 pprLabel (entryLblToInfoLbl lbl)
73 vcat (map pprBasicBlock blocks)
74 -- above: Even the first block gets a label, because with branch-chain
75 -- elimination, it might be the target of a goto.
76 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
77 -- If we are using the .subsections_via_symbols directive
78 -- (available on recent versions of Darwin),
79 -- we have to make sure that there is some kind of reference
80 -- from the entry code to a label on the _top_ of of the info table,
81 -- so that the linker will not think it is unreferenced and dead-strip
82 -- it. That's why the label is called a DeadStripPreventer (_dsp).
85 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
87 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
92 pprBasicBlock :: NatBasicBlock Instr -> Doc
93 pprBasicBlock (BasicBlock blockid instrs) =
94 pprLabel (mkAsmTempLabel (getUnique blockid)) $$
95 vcat (map pprInstr instrs)
98 pprData :: CmmStatic -> Doc
99 pprData (CmmAlign bytes) = pprAlign bytes
100 pprData (CmmDataLabel lbl) = pprLabel lbl
101 pprData (CmmString str) = pprASCII str
104 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
106 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
109 pprData (CmmStaticLit lit) = pprDataItem lit
111 pprGloblDecl :: CLabel -> Doc
113 | not (externallyVisibleCLabel lbl) = empty
114 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
118 pprTypeAndSizeDecl :: CLabel -> Doc
120 pprTypeAndSizeDecl lbl
121 | not (externallyVisibleCLabel lbl) = empty
122 | otherwise = ptext (sLit ".type ") <>
123 pprCLabel_asm lbl <> ptext (sLit ", @object")
129 pprLabel :: CLabel -> Doc
130 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
133 pprASCII :: [Word8] -> Doc
135 = vcat (map do1 str) $$ do1 0
138 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
140 pprAlign :: Int -> Doc
144 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
150 log2 :: Int -> Int -- cache the common ones
155 log2 n = 1 + log2 (n `quot` 2)
158 -- -----------------------------------------------------------------------------
159 -- pprInstr: print an 'Instr'
161 instance Outputable Instr where
162 ppr instr = Outputable.docToSDoc $ pprInstr instr
165 pprUserReg :: Reg -> Doc
167 | cTargetArch == I386 = pprReg II32
168 | cTargetArch == X86_64 = pprReg II64
169 | otherwise = panic "X86.Ppr.pprUserReg: not defined"
171 pprReg :: Size -> Reg -> Doc
175 RegReal (RealRegSingle i) -> ppr_reg_no s i
176 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
177 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
178 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
179 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
180 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
184 ppr_reg_no :: Size -> Int -> Doc
185 ppr_reg_no II8 = ppr_reg_byte
186 ppr_reg_no II16 = ppr_reg_word
187 ppr_reg_no _ = ppr_reg_long
189 ppr_reg_byte i = ptext
191 0 -> sLit "%al"; 1 -> sLit "%bl";
192 2 -> sLit "%cl"; 3 -> sLit "%dl";
193 _ -> sLit "very naughty I386 byte register"
196 ppr_reg_word i = ptext
198 0 -> sLit "%ax"; 1 -> sLit "%bx";
199 2 -> sLit "%cx"; 3 -> sLit "%dx";
200 4 -> sLit "%si"; 5 -> sLit "%di";
201 6 -> sLit "%bp"; 7 -> sLit "%sp";
202 _ -> sLit "very naughty I386 word register"
205 ppr_reg_long i = ptext
207 0 -> sLit "%eax"; 1 -> sLit "%ebx";
208 2 -> sLit "%ecx"; 3 -> sLit "%edx";
209 4 -> sLit "%esi"; 5 -> sLit "%edi";
210 6 -> sLit "%ebp"; 7 -> sLit "%esp";
213 #elif x86_64_TARGET_ARCH
214 ppr_reg_no :: Size -> Int -> Doc
215 ppr_reg_no II8 = ppr_reg_byte
216 ppr_reg_no II16 = ppr_reg_word
217 ppr_reg_no II32 = ppr_reg_long
218 ppr_reg_no _ = ppr_reg_quad
220 ppr_reg_byte i = ptext
222 0 -> sLit "%al"; 1 -> sLit "%bl";
223 2 -> sLit "%cl"; 3 -> sLit "%dl";
224 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
225 6 -> sLit "%bpl"; 7 -> sLit "%spl";
226 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
227 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
228 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
229 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
230 _ -> sLit "very naughty x86_64 byte register"
233 ppr_reg_word i = ptext
235 0 -> sLit "%ax"; 1 -> sLit "%bx";
236 2 -> sLit "%cx"; 3 -> sLit "%dx";
237 4 -> sLit "%si"; 5 -> sLit "%di";
238 6 -> sLit "%bp"; 7 -> sLit "%sp";
239 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
240 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
241 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
242 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
243 _ -> sLit "very naughty x86_64 word register"
246 ppr_reg_long i = ptext
248 0 -> sLit "%eax"; 1 -> sLit "%ebx";
249 2 -> sLit "%ecx"; 3 -> sLit "%edx";
250 4 -> sLit "%esi"; 5 -> sLit "%edi";
251 6 -> sLit "%ebp"; 7 -> sLit "%esp";
252 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
253 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
254 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
255 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
256 _ -> sLit "very naughty x86_64 register"
259 ppr_reg_quad i = ptext
261 0 -> sLit "%rax"; 1 -> sLit "%rbx";
262 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
263 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
264 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
265 8 -> sLit "%r8"; 9 -> sLit "%r9";
266 10 -> sLit "%r10"; 11 -> sLit "%r11";
267 12 -> sLit "%r12"; 13 -> sLit "%r13";
268 14 -> sLit "%r14"; 15 -> sLit "%r15";
272 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
275 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
276 ppr_reg_float :: Int -> LitString
277 ppr_reg_float i = case i of
278 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
279 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
280 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
281 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
282 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
283 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
284 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
285 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
286 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
287 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
288 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
289 _ -> sLit "very naughty x86 register"
292 pprSize :: Size -> Doc
299 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
300 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
304 pprSize_x87 :: Size -> Doc
310 _ -> panic "X86.Ppr.pprSize_x87"
312 pprCond :: Cond -> Doc
315 GEU -> sLit "ae"; LU -> sLit "b";
316 EQQ -> sLit "e"; GTT -> sLit "g";
317 GE -> sLit "ge"; GU -> sLit "a";
318 LTT -> sLit "l"; LE -> sLit "le";
319 LEU -> sLit "be"; NE -> sLit "ne";
320 NEG -> sLit "s"; POS -> sLit "ns";
321 CARRY -> sLit "c"; OFLO -> sLit "o";
322 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
323 ALWAYS -> sLit "mp"})
327 pprImm (ImmInt i) = int i
328 pprImm (ImmInteger i) = integer i
329 pprImm (ImmCLbl l) = pprCLabel_asm l
330 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
331 pprImm (ImmLit s) = s
333 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
334 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
336 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
337 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
338 <> lparen <> pprImm b <> rparen
342 pprAddr :: AddrMode -> Doc
343 pprAddr (ImmAddr imm off)
344 = let pp_imm = pprImm imm
348 else if (off < 0) then
351 pp_imm <> char '+' <> int off
353 pprAddr (AddrBaseIndex base index displacement)
355 pp_disp = ppr_disp displacement
356 pp_off p = pp_disp <> char '(' <> p <> char ')'
357 pp_reg r = pprReg archWordSize r
359 case (base, index) of
360 (EABaseNone, EAIndexNone) -> pp_disp
361 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
362 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
363 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
364 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
366 _ -> panic "X86.Ppr.pprAddr: no match"
369 ppr_disp (ImmInt 0) = empty
370 ppr_disp imm = pprImm imm
373 pprSectionHeader :: Section -> Doc
376 # if darwin_TARGET_OS
379 Text -> ptext (sLit ".text\n\t.align 2")
380 Data -> ptext (sLit ".data\n\t.align 2")
381 ReadOnlyData -> ptext (sLit ".const\n.align 2")
382 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
383 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
384 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
385 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
390 Text -> ptext (sLit ".text\n\t.align 4,0x90")
391 Data -> ptext (sLit ".data\n\t.align 4")
392 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
393 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
394 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
395 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
396 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
400 #elif x86_64_TARGET_ARCH
401 # if darwin_TARGET_OS
404 Text -> ptext (sLit ".text\n.align 3")
405 Data -> ptext (sLit ".data\n.align 3")
406 ReadOnlyData -> ptext (sLit ".const\n.align 3")
407 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
408 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
409 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
410 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
415 Text -> ptext (sLit ".text\n\t.align 8")
416 Data -> ptext (sLit ".data\n\t.align 8")
417 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
418 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
419 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
420 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
421 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
426 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
433 pprDataItem :: CmmLit -> Doc
435 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
439 -- These seem to be common:
440 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
441 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
443 ppr_item FF32 (CmmFloat r _)
444 = let bs = floatToBytes (fromRational r)
445 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
447 ppr_item FF64 (CmmFloat r _)
448 = let bs = doubleToBytes (fromRational r)
449 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
451 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
452 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
454 #if i386_TARGET_ARCH && darwin_TARGET_OS
455 ppr_item II64 (CmmInt x _) =
456 [ptext (sLit "\t.long\t")
457 <> int (fromIntegral (fromIntegral x :: Word32)),
458 ptext (sLit "\t.long\t")
460 (fromIntegral (x `shiftR` 32) :: Word32))]
462 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
463 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
465 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
466 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
467 -- type, which means we can't do pc-relative 64-bit addresses.
468 -- Fortunately we're assuming the small memory model, in which
469 -- all such offsets will fit into 32 bits, so we have to stick
470 -- to 32-bit offset fields and modify the RTS appropriately
472 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
475 | isRelativeReloc x =
476 [ptext (sLit "\t.long\t") <> pprImm imm,
477 ptext (sLit "\t.long\t0")]
479 [ptext (sLit "\t.quad\t") <> pprImm imm]
481 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
482 isRelativeReloc _ = False
486 = panic "X86.Ppr.ppr_item: no match"
490 pprInstr :: Instr -> Doc
492 pprInstr (COMMENT _) = empty -- nuke 'em
495 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
496 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
497 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
498 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
499 ,IF_ARCH_powerpc( IF_OS_linux(
500 ((<>) (ptext (sLit "# ")) (ftext s)),
501 ((<>) (ptext (sLit "; ")) (ftext s)))
505 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
507 pprInstr (NEWBLOCK _)
508 = panic "PprMach.pprInstr: NEWBLOCK"
511 = panic "PprMach.pprInstr: LDATA"
514 pprInstr (SPILL reg slot)
516 ptext (sLit "\tSPILL"),
520 ptext (sLit "SLOT") <> parens (int slot)]
522 pprInstr (RELOAD slot reg)
524 ptext (sLit "\tRELOAD"),
526 ptext (sLit "SLOT") <> parens (int slot),
531 pprInstr (MOV size src dst)
532 = pprSizeOpOp (sLit "mov") size src dst
534 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
535 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
536 -- movl. But we represent it as a MOVZxL instruction, because
537 -- the reg alloc would tend to throw away a plain reg-to-reg
538 -- move, and we still want it to do that.
540 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
541 -- zero-extension only needs to extend to 32 bits: on x86_64,
542 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
543 -- instruction is shorter.
545 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
547 -- here we do some patching, since the physical registers are only set late
548 -- in the code generation.
549 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
551 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
553 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
555 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
557 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
559 = pprInstr (ADD size (OpImm displ) dst)
561 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
563 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
564 = pprSizeOp (sLit "dec") size dst
565 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
566 = pprSizeOp (sLit "inc") size dst
567 pprInstr (ADD size src dst)
568 = pprSizeOpOp (sLit "add") size src dst
569 pprInstr (ADC size src dst)
570 = pprSizeOpOp (sLit "adc") size src dst
571 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
572 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
574 {- A hack. The Intel documentation says that "The two and three
575 operand forms [of IMUL] may also be used with unsigned operands
576 because the lower half of the product is the same regardless if
577 (sic) the operands are signed or unsigned. The CF and OF flags,
578 however, cannot be used to determine if the upper half of the
579 result is non-zero." So there.
581 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
582 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
584 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
585 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
586 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
588 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
589 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
591 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
592 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
593 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
595 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
597 pprInstr (CMP size src dst)
598 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
599 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
601 -- This predicate is needed here and nowhere else
607 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
608 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
609 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
611 -- both unused (SDM):
612 -- pprInstr PUSHA = ptext (sLit "\tpushal")
613 -- pprInstr POPA = ptext (sLit "\tpopal")
615 pprInstr NOP = ptext (sLit "\tnop")
616 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
617 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
619 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
621 pprInstr (JXX cond blockid)
622 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
623 where lab = mkAsmTempLabel (getUnique blockid)
625 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
627 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
628 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
629 pprInstr (JMP_TBL op _) = pprInstr (JMP op)
630 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
631 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
633 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
634 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
635 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
638 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
640 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
642 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
643 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
644 pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
645 pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
646 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
647 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
649 -- FETCHGOT for PIC on ELF platforms
650 pprInstr (FETCHGOT reg)
651 = vcat [ ptext (sLit "\tcall 1f"),
652 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
653 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
657 -- FETCHPC for PIC on Darwin/x86
658 -- get the instruction pointer into a register
659 -- (Terminology note: the IP is called Program Counter on PPC,
660 -- and it's a good thing to use the same name on both platforms)
661 pprInstr (FETCHPC reg)
662 = vcat [ ptext (sLit "\tcall 1f"),
663 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
667 -- -----------------------------------------------------------------------------
668 -- i386 floating-point
670 -- Simulating a flat register set on the x86 FP stack is tricky.
671 -- you have to free %st(7) before pushing anything on the FP reg stack
672 -- so as to preclude the possibility of a FP stack overflow exception.
673 pprInstr g@(GMOV src dst)
677 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
679 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
680 pprInstr g@(GLD sz addr dst)
681 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
682 pprAddr addr, gsemi, gpop dst 1])
684 -- GST sz src addr ==> FLD dst ; FSTPsz addr
685 pprInstr g@(GST sz src addr)
686 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
687 = pprG g (hcat [gtab,
688 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
690 = pprG g (hcat [gtab, gpush src 0, gsemi,
691 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
693 pprInstr g@(GLDZ dst)
694 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
695 pprInstr g@(GLD1 dst)
696 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
698 pprInstr (GFTOI src dst)
699 = pprInstr (GDTOI src dst)
701 pprInstr g@(GDTOI src dst)
703 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
704 hcat [gtab, gpush src 0],
705 hcat [gtab, text "movzwl 4(%esp), ", reg,
706 text " ; orl $0xC00, ", reg],
707 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
708 hcat [gtab, text "fistpl 0(%esp)"],
709 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
710 hcat [gtab, text "addl $8, %esp"]
713 reg = pprReg II32 dst
715 pprInstr (GITOF src dst)
716 = pprInstr (GITOD src dst)
718 pprInstr g@(GITOD src dst)
719 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
720 text " ; fildl (%esp) ; ",
721 gpop dst 1, text " ; addl $4,%esp"])
723 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
724 this far into the jungle AND you give a Rat's Ass (tm) what's going
725 on, here's the deal. Generate code to do a floating point comparison
726 of src1 and src2, of kind cond, and set the Zero flag if true.
728 The complications are to do with handling NaNs correctly. We want the
729 property that if either argument is NaN, then the result of the
730 comparison is False ... except if we're comparing for inequality,
731 in which case the answer is True.
733 Here's how the general (non-inequality) case works. As an
734 example, consider generating the an equality test:
736 pushl %eax -- we need to mess with this
737 <get src1 to top of FPU stack>
738 fcomp <src2 location in FPU stack> and pop pushed src1
739 -- Result of comparison is in FPU Status Register bits
741 fstsw %ax -- Move FPU Status Reg to %ax
742 sahf -- move C3 C2 C0 from %ax to integer flag reg
743 -- now the serious magic begins
744 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
745 sete %al -- %al = if arg1 == arg2 then 1 else 0
746 andb %ah,%al -- %al &= %ah
747 -- so %al == 1 iff (comparable && same); else it holds 0
748 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
749 else %al == 0xFF, ZeroFlag=0
750 -- the zero flag is now set as we desire.
753 The special case of inequality differs thusly:
755 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
756 setne %al -- %al = if arg1 /= arg2 then 1 else 0
757 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
758 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
759 else (%al == 0xFF, ZF=0)
761 pprInstr g@(GCMP cond src1 src2)
762 | case cond of { NE -> True; _ -> False }
764 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
765 hcat [gtab, text "fcomp ", greg src2 1,
766 text "; fstsw %ax ; sahf ; setpe %ah"],
767 hcat [gtab, text "setne %al ; ",
768 text "orb %ah,%al ; decb %al ; popl %eax"]
772 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
773 hcat [gtab, text "fcomp ", greg src2 1,
774 text "; fstsw %ax ; sahf ; setpo %ah"],
775 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
776 text "andb %ah,%al ; decb %al ; popl %eax"]
779 {- On the 486, the flags set by FP compare are the unsigned ones!
780 (This looks like a HACK to me. WDP 96/03)
782 fix_FP_cond :: Cond -> Cond
787 fix_FP_cond EQQ = EQQ
789 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
790 -- there should be no others
793 pprInstr g@(GABS _ src dst)
794 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
796 pprInstr g@(GNEG _ src dst)
797 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
799 pprInstr g@(GSQRT sz src dst)
800 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
801 hcat [gtab, gcoerceto sz, gpop dst 1])
803 pprInstr g@(GSIN sz l1 l2 src dst)
804 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
806 pprInstr g@(GCOS sz l1 l2 src dst)
807 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
809 pprInstr g@(GTAN sz l1 l2 src dst)
810 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
812 -- In the translations for GADD, GMUL, GSUB and GDIV,
813 -- the first two cases are mere optimisations. The otherwise clause
814 -- generates correct code under all circumstances.
816 pprInstr g@(GADD _ src1 src2 dst)
818 = pprG g (text "\t#GADD-xxxcase1" $$
819 hcat [gtab, gpush src2 0,
820 text " ; faddp %st(0),", greg src1 1])
822 = pprG g (text "\t#GADD-xxxcase2" $$
823 hcat [gtab, gpush src1 0,
824 text " ; faddp %st(0),", greg src2 1])
826 = pprG g (hcat [gtab, gpush src1 0,
827 text " ; fadd ", greg src2 1, text ",%st(0)",
831 pprInstr g@(GMUL _ src1 src2 dst)
833 = pprG g (text "\t#GMUL-xxxcase1" $$
834 hcat [gtab, gpush src2 0,
835 text " ; fmulp %st(0),", greg src1 1])
837 = pprG g (text "\t#GMUL-xxxcase2" $$
838 hcat [gtab, gpush src1 0,
839 text " ; fmulp %st(0),", greg src2 1])
841 = pprG g (hcat [gtab, gpush src1 0,
842 text " ; fmul ", greg src2 1, text ",%st(0)",
846 pprInstr g@(GSUB _ src1 src2 dst)
848 = pprG g (text "\t#GSUB-xxxcase1" $$
849 hcat [gtab, gpush src2 0,
850 text " ; fsubrp %st(0),", greg src1 1])
852 = pprG g (text "\t#GSUB-xxxcase2" $$
853 hcat [gtab, gpush src1 0,
854 text " ; fsubp %st(0),", greg src2 1])
856 = pprG g (hcat [gtab, gpush src1 0,
857 text " ; fsub ", greg src2 1, text ",%st(0)",
861 pprInstr g@(GDIV _ src1 src2 dst)
863 = pprG g (text "\t#GDIV-xxxcase1" $$
864 hcat [gtab, gpush src2 0,
865 text " ; fdivrp %st(0),", greg src1 1])
867 = pprG g (text "\t#GDIV-xxxcase2" $$
868 hcat [gtab, gpush src1 0,
869 text " ; fdivp %st(0),", greg src2 1])
871 = pprG g (hcat [gtab, gpush src1 0,
872 text " ; fdiv ", greg src2 1, text ",%st(0)",
877 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
878 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
882 = panic "X86.Ppr.pprInstr: no match"
885 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
886 pprTrigOp op -- fsin, fcos or fptan
887 isTan -- we need a couple of extra steps if we're doing tan
888 l1 l2 -- internal labels for us to use
890 = -- We'll be needing %eax later on
891 hcat [gtab, text "pushl %eax;"] $$
892 -- tan is going to use an extra space on the FP stack
893 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
894 -- First put the value in %st(0) and try to apply the op to it
895 hcat [gpush src 0, text ("; " ++ op)] $$
896 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
897 hcat [gtab, text "fnstsw %ax"] $$
898 hcat [gtab, text "test $0x400,%eax"] $$
899 -- If we were in bounds then jump to the end
900 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
901 -- Otherwise we need to shrink the value. Start by
902 -- loading pi, doubleing it (by adding it to itself),
903 -- and then swapping pi with the value, so the value we
904 -- want to apply op to is in %st(0) again
905 hcat [gtab, text "ffree %st(7); fldpi"] $$
906 hcat [gtab, text "fadd %st(0),%st"] $$
907 hcat [gtab, text "fxch %st(1)"] $$
908 -- Now we have a loop in which we make the value smaller,
909 -- see if it's small enough, and loop if not
910 (pprCLabel_asm l2 <> char ':') $$
911 hcat [gtab, text "fprem1"] $$
912 -- My Debian libc uses fstsw here for the tan code, but I can't
913 -- see any reason why it should need to be different for tan.
914 hcat [gtab, text "fnstsw %ax"] $$
915 hcat [gtab, text "test $0x400,%eax"] $$
916 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
917 hcat [gtab, text "fstp %st(1)"] $$
918 hcat [gtab, text op] $$
919 (pprCLabel_asm l1 <> char ':') $$
920 -- Pop the 1.0 tan gave us
921 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
923 hcat [gtab, text "popl %eax;"] $$
924 -- And finally make the result the right size
925 hcat [gtab, gcoerceto sz, gpop dst 1]
927 --------------------------
929 -- coerce %st(0) to the specified size
930 gcoerceto :: Size -> Doc
931 gcoerceto FF64 = empty
932 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
933 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
935 gpush :: Reg -> RegNo -> Doc
937 = hcat [text "fld ", greg reg offset]
939 gpop :: Reg -> RegNo -> Doc
941 = hcat [text "fstp ", greg reg offset]
943 greg :: Reg -> RegNo -> Doc
944 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
955 gregno :: Reg -> RegNo
956 gregno (RegReal (RealRegSingle i)) = i
957 gregno _ = --pprPanic "gregno" (ppr other)
958 999 -- bogus; only needed for debug printing
960 pprG :: Instr -> Doc -> Doc
962 = (char '#' <> pprGInstr fake) $$ actual
965 pprGInstr :: Instr -> Doc
966 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
967 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
968 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
970 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
971 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
973 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
974 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
976 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
977 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
979 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
980 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
981 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
982 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
983 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
984 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
985 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
987 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
988 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
989 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
990 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
992 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
994 pprDollImm :: Imm -> Doc
995 pprDollImm i = ptext (sLit "$") <> pprImm i
998 pprOperand :: Size -> Operand -> Doc
999 pprOperand s (OpReg r) = pprReg s r
1000 pprOperand _ (OpImm i) = pprDollImm i
1001 pprOperand _ (OpAddr ea) = pprAddr ea
1004 pprMnemonic_ :: LitString -> Doc
1006 char '\t' <> ptext name <> space
1009 pprMnemonic :: LitString -> Size -> Doc
1010 pprMnemonic name size =
1011 char '\t' <> ptext name <> pprSize size <> space
1014 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1015 pprSizeImmOp name size imm op1
1017 pprMnemonic name size,
1025 pprSizeOp :: LitString -> Size -> Operand -> Doc
1026 pprSizeOp name size op1
1028 pprMnemonic name size,
1033 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1034 pprSizeOpOp name size op1 op2
1036 pprMnemonic name size,
1037 pprOperand size op1,
1043 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1044 pprOpOp name size op1 op2
1047 pprOperand size op1,
1053 pprSizeReg :: LitString -> Size -> Reg -> Doc
1054 pprSizeReg name size reg1
1056 pprMnemonic name size,
1061 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1062 pprSizeRegReg name size reg1 reg2
1064 pprMnemonic name size,
1071 pprRegReg :: LitString -> Reg -> Reg -> Doc
1072 pprRegReg name reg1 reg2
1075 pprReg archWordSize reg1,
1077 pprReg archWordSize reg2
1081 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1082 pprSizeOpReg name size op1 reg2
1084 pprMnemonic name size,
1085 pprOperand size op1,
1087 pprReg archWordSize reg2
1091 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1092 pprCondRegReg name size cond reg1 reg2
1103 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1104 pprSizeSizeRegReg name size1 size2 reg1 reg2
1118 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1119 pprSizeRegRegReg name size reg1 reg2 reg3
1121 pprMnemonic name size,
1130 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1131 pprSizeAddrReg name size op dst
1133 pprMnemonic name size,
1140 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1141 pprSizeRegAddr name size src op
1143 pprMnemonic name size,
1150 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1151 pprShift name size src dest
1153 pprMnemonic name size,
1154 pprOperand II8 src, -- src is 8-bit sized
1156 pprOperand size dest
1160 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1161 pprSizeOpOpCoerce name size1 size2 op1 op2
1162 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1163 pprOperand size1 op1,
1165 pprOperand size2 op2
1169 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1170 pprCondInstr name cond arg
1171 = hcat [ char '\t', ptext name, pprCond cond, space, arg]