1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
22 #include "HsVersions.h"
23 #include "nativeGen/NCG.h"
37 import Unique ( pprUnique, Uniquable(..) )
40 import qualified Outputable
41 import Outputable (panic, Outputable)
44 import Distribution.System
46 #if i386_TARGET_ARCH && darwin_TARGET_OS
50 -- -----------------------------------------------------------------------------
51 -- Printing this stuff out
53 pprNatCmmTop :: NatCmmTop Instr -> Doc
54 pprNatCmmTop (CmmData section dats) =
55 pprSectionHeader section $$ vcat (map pprData dats)
57 -- special case for split markers:
58 pprNatCmmTop (CmmProc [] lbl (ListGraph [])) = pprLabel lbl
60 pprNatCmmTop (CmmProc info lbl (ListGraph blocks)) =
61 pprSectionHeader Text $$
62 (if null info then -- blocks guaranteed not null, so label needed
65 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
66 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
69 vcat (map pprData info) $$
70 pprLabel (entryLblToInfoLbl lbl)
72 vcat (map pprBasicBlock blocks)
73 -- above: Even the first block gets a label, because with branch-chain
74 -- elimination, it might be the target of a goto.
75 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
76 -- If we are using the .subsections_via_symbols directive
77 -- (available on recent versions of Darwin),
78 -- we have to make sure that there is some kind of reference
79 -- from the entry code to a label on the _top_ of of the info table,
80 -- so that the linker will not think it is unreferenced and dead-strip
81 -- it. That's why the label is called a DeadStripPreventer (_dsp).
84 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
86 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
89 $$ pprSizeDecl (if null info then lbl else entryLblToInfoLbl lbl)
91 -- | Output the ELF .size directive.
92 pprSizeDecl :: CLabel -> Doc
95 ptext (sLit "\t.size") <+> pprCLabel_asm lbl
96 <> ptext (sLit ", .-") <> pprCLabel_asm lbl
101 pprBasicBlock :: NatBasicBlock Instr -> Doc
102 pprBasicBlock (BasicBlock blockid instrs) =
103 pprLabel (mkAsmTempLabel (getUnique blockid)) $$
104 vcat (map pprInstr instrs)
107 pprData :: CmmStatic -> Doc
108 pprData (CmmAlign bytes) = pprAlign bytes
109 pprData (CmmDataLabel lbl) = pprLabel lbl
110 pprData (CmmString str) = pprASCII str
113 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
115 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
118 pprData (CmmStaticLit lit) = pprDataItem lit
120 pprGloblDecl :: CLabel -> Doc
122 | not (externallyVisibleCLabel lbl) = empty
123 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
127 pprTypeAndSizeDecl :: CLabel -> Doc
129 pprTypeAndSizeDecl lbl
130 | not (externallyVisibleCLabel lbl) = empty
131 | otherwise = ptext (sLit ".type ") <>
132 pprCLabel_asm lbl <> ptext (sLit ", @object")
138 pprLabel :: CLabel -> Doc
139 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
142 pprASCII :: [Word8] -> Doc
144 = vcat (map do1 str) $$ do1 0
147 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
149 pprAlign :: Int -> Doc
153 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
159 log2 :: Int -> Int -- cache the common ones
164 log2 n = 1 + log2 (n `quot` 2)
167 -- -----------------------------------------------------------------------------
168 -- pprInstr: print an 'Instr'
170 instance Outputable Instr where
171 ppr instr = Outputable.docToSDoc $ pprInstr instr
174 pprReg :: Size -> Reg -> Doc
178 RegReal (RealRegSingle i) -> ppr_reg_no s i
179 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
180 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
182 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
183 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
184 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
187 ppr_reg_no :: Size -> Int -> Doc
188 ppr_reg_no II8 = ppr_reg_byte
189 ppr_reg_no II16 = ppr_reg_word
190 ppr_reg_no _ = ppr_reg_long
192 ppr_reg_byte i = ptext
194 0 -> sLit "%al"; 1 -> sLit "%bl";
195 2 -> sLit "%cl"; 3 -> sLit "%dl";
196 _ -> sLit "very naughty I386 byte register"
199 ppr_reg_word i = ptext
201 0 -> sLit "%ax"; 1 -> sLit "%bx";
202 2 -> sLit "%cx"; 3 -> sLit "%dx";
203 4 -> sLit "%si"; 5 -> sLit "%di";
204 6 -> sLit "%bp"; 7 -> sLit "%sp";
205 _ -> sLit "very naughty I386 word register"
208 ppr_reg_long i = ptext
210 0 -> sLit "%eax"; 1 -> sLit "%ebx";
211 2 -> sLit "%ecx"; 3 -> sLit "%edx";
212 4 -> sLit "%esi"; 5 -> sLit "%edi";
213 6 -> sLit "%ebp"; 7 -> sLit "%esp";
216 #elif x86_64_TARGET_ARCH
217 ppr_reg_no :: Size -> Int -> Doc
218 ppr_reg_no II8 = ppr_reg_byte
219 ppr_reg_no II16 = ppr_reg_word
220 ppr_reg_no II32 = ppr_reg_long
221 ppr_reg_no _ = ppr_reg_quad
223 ppr_reg_byte i = ptext
225 0 -> sLit "%al"; 1 -> sLit "%bl";
226 2 -> sLit "%cl"; 3 -> sLit "%dl";
227 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
228 6 -> sLit "%bpl"; 7 -> sLit "%spl";
229 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
230 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
231 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
232 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
233 _ -> sLit "very naughty x86_64 byte register"
236 ppr_reg_word i = ptext
238 0 -> sLit "%ax"; 1 -> sLit "%bx";
239 2 -> sLit "%cx"; 3 -> sLit "%dx";
240 4 -> sLit "%si"; 5 -> sLit "%di";
241 6 -> sLit "%bp"; 7 -> sLit "%sp";
242 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
243 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
244 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
245 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
246 _ -> sLit "very naughty x86_64 word register"
249 ppr_reg_long i = ptext
251 0 -> sLit "%eax"; 1 -> sLit "%ebx";
252 2 -> sLit "%ecx"; 3 -> sLit "%edx";
253 4 -> sLit "%esi"; 5 -> sLit "%edi";
254 6 -> sLit "%ebp"; 7 -> sLit "%esp";
255 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
256 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
257 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
258 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
259 _ -> sLit "very naughty x86_64 register"
262 ppr_reg_quad i = ptext
264 0 -> sLit "%rax"; 1 -> sLit "%rbx";
265 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
266 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
267 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
268 8 -> sLit "%r8"; 9 -> sLit "%r9";
269 10 -> sLit "%r10"; 11 -> sLit "%r11";
270 12 -> sLit "%r12"; 13 -> sLit "%r13";
271 14 -> sLit "%r14"; 15 -> sLit "%r15";
275 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
278 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
279 ppr_reg_float :: Int -> LitString
280 ppr_reg_float i = case i of
281 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
282 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
283 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
284 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
285 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
286 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
287 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
288 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
289 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
290 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
291 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
292 _ -> sLit "very naughty x86 register"
295 pprSize :: Size -> Doc
302 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
303 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
307 pprSize_x87 :: Size -> Doc
313 _ -> panic "X86.Ppr.pprSize_x87"
315 pprCond :: Cond -> Doc
318 GEU -> sLit "ae"; LU -> sLit "b";
319 EQQ -> sLit "e"; GTT -> sLit "g";
320 GE -> sLit "ge"; GU -> sLit "a";
321 LTT -> sLit "l"; LE -> sLit "le";
322 LEU -> sLit "be"; NE -> sLit "ne";
323 NEG -> sLit "s"; POS -> sLit "ns";
324 CARRY -> sLit "c"; OFLO -> sLit "o";
325 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
326 ALWAYS -> sLit "mp"})
330 pprImm (ImmInt i) = int i
331 pprImm (ImmInteger i) = integer i
332 pprImm (ImmCLbl l) = pprCLabel_asm l
333 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
334 pprImm (ImmLit s) = s
336 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
337 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
339 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
340 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
341 <> lparen <> pprImm b <> rparen
345 pprAddr :: AddrMode -> Doc
346 pprAddr (ImmAddr imm off)
347 = let pp_imm = pprImm imm
351 else if (off < 0) then
354 pp_imm <> char '+' <> int off
356 pprAddr (AddrBaseIndex base index displacement)
358 pp_disp = ppr_disp displacement
359 pp_off p = pp_disp <> char '(' <> p <> char ')'
360 pp_reg r = pprReg archWordSize r
362 case (base, index) of
363 (EABaseNone, EAIndexNone) -> pp_disp
364 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
365 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
366 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
367 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
369 _ -> panic "X86.Ppr.pprAddr: no match"
372 ppr_disp (ImmInt 0) = empty
373 ppr_disp imm = pprImm imm
376 pprSectionHeader :: Section -> Doc
379 # if darwin_TARGET_OS
382 Text -> ptext (sLit ".text\n\t.align 2")
383 Data -> ptext (sLit ".data\n\t.align 2")
384 ReadOnlyData -> ptext (sLit ".const\n.align 2")
385 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
386 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
387 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
388 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
393 Text -> ptext (sLit ".text\n\t.align 4,0x90")
394 Data -> ptext (sLit ".data\n\t.align 4")
395 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
396 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
397 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
398 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
399 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
403 #elif x86_64_TARGET_ARCH
404 # if darwin_TARGET_OS
407 Text -> ptext (sLit ".text\n.align 3")
408 Data -> ptext (sLit ".data\n.align 3")
409 ReadOnlyData -> ptext (sLit ".const\n.align 3")
410 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
411 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
412 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
413 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
418 Text -> ptext (sLit ".text\n\t.align 8")
419 Data -> ptext (sLit ".data\n\t.align 8")
420 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
421 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
422 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
423 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
424 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
429 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
436 pprDataItem :: CmmLit -> Doc
438 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
442 -- These seem to be common:
443 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
444 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
446 ppr_item FF32 (CmmFloat r _)
447 = let bs = floatToBytes (fromRational r)
448 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
450 ppr_item FF64 (CmmFloat r _)
451 = let bs = doubleToBytes (fromRational r)
452 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
454 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
455 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
457 #if i386_TARGET_ARCH && darwin_TARGET_OS
458 ppr_item II64 (CmmInt x _) =
459 [ptext (sLit "\t.long\t")
460 <> int (fromIntegral (fromIntegral x :: Word32)),
461 ptext (sLit "\t.long\t")
463 (fromIntegral (x `shiftR` 32) :: Word32))]
465 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
466 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
468 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
469 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
470 -- type, which means we can't do pc-relative 64-bit addresses.
471 -- Fortunately we're assuming the small memory model, in which
472 -- all such offsets will fit into 32 bits, so we have to stick
473 -- to 32-bit offset fields and modify the RTS appropriately
475 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
478 | isRelativeReloc x =
479 [ptext (sLit "\t.long\t") <> pprImm imm,
480 ptext (sLit "\t.long\t0")]
482 [ptext (sLit "\t.quad\t") <> pprImm imm]
484 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
485 isRelativeReloc _ = False
489 = panic "X86.Ppr.ppr_item: no match"
493 pprInstr :: Instr -> Doc
495 pprInstr (COMMENT _) = empty -- nuke 'em
498 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
499 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
500 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
501 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
502 ,IF_ARCH_powerpc( IF_OS_linux(
503 ((<>) (ptext (sLit "# ")) (ftext s)),
504 ((<>) (ptext (sLit "; ")) (ftext s)))
508 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
510 pprInstr (NEWBLOCK _)
511 = panic "PprMach.pprInstr: NEWBLOCK"
514 = panic "PprMach.pprInstr: LDATA"
517 pprInstr (SPILL reg slot)
519 ptext (sLit "\tSPILL"),
523 ptext (sLit "SLOT") <> parens (int slot)]
525 pprInstr (RELOAD slot reg)
527 ptext (sLit "\tRELOAD"),
529 ptext (sLit "SLOT") <> parens (int slot),
534 pprInstr (MOV size src dst)
535 = pprSizeOpOp (sLit "mov") size src dst
537 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
538 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
539 -- movl. But we represent it as a MOVZxL instruction, because
540 -- the reg alloc would tend to throw away a plain reg-to-reg
541 -- move, and we still want it to do that.
543 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
544 -- zero-extension only needs to extend to 32 bits: on x86_64,
545 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
546 -- instruction is shorter.
548 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
550 -- here we do some patching, since the physical registers are only set late
551 -- in the code generation.
552 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
554 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
556 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
558 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
560 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
562 = pprInstr (ADD size (OpImm displ) dst)
564 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
566 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
567 = pprSizeOp (sLit "dec") size dst
568 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
569 = pprSizeOp (sLit "inc") size dst
570 pprInstr (ADD size src dst)
571 = pprSizeOpOp (sLit "add") size src dst
572 pprInstr (ADC size src dst)
573 = pprSizeOpOp (sLit "adc") size src dst
574 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
575 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
577 {- A hack. The Intel documentation says that "The two and three
578 operand forms [of IMUL] may also be used with unsigned operands
579 because the lower half of the product is the same regardless if
580 (sic) the operands are signed or unsigned. The CF and OF flags,
581 however, cannot be used to determine if the upper half of the
582 result is non-zero." So there.
584 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
585 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
587 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
588 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
589 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
591 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
592 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
594 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
595 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
596 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
598 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
600 pprInstr (CMP size src dst)
601 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
602 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
604 -- This predicate is needed here and nowhere else
610 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
611 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
612 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
614 -- both unused (SDM):
615 -- pprInstr PUSHA = ptext (sLit "\tpushal")
616 -- pprInstr POPA = ptext (sLit "\tpopal")
618 pprInstr NOP = ptext (sLit "\tnop")
619 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
620 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
622 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
624 pprInstr (JXX cond blockid)
625 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
626 where lab = mkAsmTempLabel (getUnique blockid)
628 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
630 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
631 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
632 pprInstr (JMP_TBL op _ _ _) = pprInstr (JMP op)
633 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
634 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
636 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
637 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
638 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
641 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
643 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
645 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
646 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
647 pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
648 pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
649 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
650 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
652 -- FETCHGOT for PIC on ELF platforms
653 pprInstr (FETCHGOT reg)
654 = vcat [ ptext (sLit "\tcall 1f"),
655 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
656 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
660 -- FETCHPC for PIC on Darwin/x86
661 -- get the instruction pointer into a register
662 -- (Terminology note: the IP is called Program Counter on PPC,
663 -- and it's a good thing to use the same name on both platforms)
664 pprInstr (FETCHPC reg)
665 = vcat [ ptext (sLit "\tcall 1f"),
666 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
670 -- -----------------------------------------------------------------------------
671 -- i386 floating-point
673 -- Simulating a flat register set on the x86 FP stack is tricky.
674 -- you have to free %st(7) before pushing anything on the FP reg stack
675 -- so as to preclude the possibility of a FP stack overflow exception.
676 pprInstr g@(GMOV src dst)
680 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
682 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
683 pprInstr g@(GLD sz addr dst)
684 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
685 pprAddr addr, gsemi, gpop dst 1])
687 -- GST sz src addr ==> FLD dst ; FSTPsz addr
688 pprInstr g@(GST sz src addr)
689 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
690 = pprG g (hcat [gtab,
691 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
693 = pprG g (hcat [gtab, gpush src 0, gsemi,
694 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
696 pprInstr g@(GLDZ dst)
697 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
698 pprInstr g@(GLD1 dst)
699 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
701 pprInstr (GFTOI src dst)
702 = pprInstr (GDTOI src dst)
704 pprInstr g@(GDTOI src dst)
706 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
707 hcat [gtab, gpush src 0],
708 hcat [gtab, text "movzwl 4(%esp), ", reg,
709 text " ; orl $0xC00, ", reg],
710 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
711 hcat [gtab, text "fistpl 0(%esp)"],
712 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
713 hcat [gtab, text "addl $8, %esp"]
716 reg = pprReg II32 dst
718 pprInstr (GITOF src dst)
719 = pprInstr (GITOD src dst)
721 pprInstr g@(GITOD src dst)
722 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
723 text " ; fildl (%esp) ; ",
724 gpop dst 1, text " ; addl $4,%esp"])
726 pprInstr g@(GDTOF src dst)
727 = pprG g (vcat [gtab <> gpush src 0,
728 gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;",
731 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
732 this far into the jungle AND you give a Rat's Ass (tm) what's going
733 on, here's the deal. Generate code to do a floating point comparison
734 of src1 and src2, of kind cond, and set the Zero flag if true.
736 The complications are to do with handling NaNs correctly. We want the
737 property that if either argument is NaN, then the result of the
738 comparison is False ... except if we're comparing for inequality,
739 in which case the answer is True.
741 Here's how the general (non-inequality) case works. As an
742 example, consider generating the an equality test:
744 pushl %eax -- we need to mess with this
745 <get src1 to top of FPU stack>
746 fcomp <src2 location in FPU stack> and pop pushed src1
747 -- Result of comparison is in FPU Status Register bits
749 fstsw %ax -- Move FPU Status Reg to %ax
750 sahf -- move C3 C2 C0 from %ax to integer flag reg
751 -- now the serious magic begins
752 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
753 sete %al -- %al = if arg1 == arg2 then 1 else 0
754 andb %ah,%al -- %al &= %ah
755 -- so %al == 1 iff (comparable && same); else it holds 0
756 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
757 else %al == 0xFF, ZeroFlag=0
758 -- the zero flag is now set as we desire.
761 The special case of inequality differs thusly:
763 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
764 setne %al -- %al = if arg1 /= arg2 then 1 else 0
765 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
766 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
767 else (%al == 0xFF, ZF=0)
769 pprInstr g@(GCMP cond src1 src2)
770 | case cond of { NE -> True; _ -> False }
772 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
773 hcat [gtab, text "fcomp ", greg src2 1,
774 text "; fstsw %ax ; sahf ; setpe %ah"],
775 hcat [gtab, text "setne %al ; ",
776 text "orb %ah,%al ; decb %al ; popl %eax"]
780 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
781 hcat [gtab, text "fcomp ", greg src2 1,
782 text "; fstsw %ax ; sahf ; setpo %ah"],
783 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
784 text "andb %ah,%al ; decb %al ; popl %eax"]
787 {- On the 486, the flags set by FP compare are the unsigned ones!
788 (This looks like a HACK to me. WDP 96/03)
790 fix_FP_cond :: Cond -> Cond
795 fix_FP_cond EQQ = EQQ
797 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
798 -- there should be no others
801 pprInstr g@(GABS _ src dst)
802 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
804 pprInstr g@(GNEG _ src dst)
805 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
807 pprInstr g@(GSQRT sz src dst)
808 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
809 hcat [gtab, gcoerceto sz, gpop dst 1])
811 pprInstr g@(GSIN sz l1 l2 src dst)
812 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
814 pprInstr g@(GCOS sz l1 l2 src dst)
815 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
817 pprInstr g@(GTAN sz l1 l2 src dst)
818 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
820 -- In the translations for GADD, GMUL, GSUB and GDIV,
821 -- the first two cases are mere optimisations. The otherwise clause
822 -- generates correct code under all circumstances.
824 pprInstr g@(GADD _ src1 src2 dst)
826 = pprG g (text "\t#GADD-xxxcase1" $$
827 hcat [gtab, gpush src2 0,
828 text " ; faddp %st(0),", greg src1 1])
830 = pprG g (text "\t#GADD-xxxcase2" $$
831 hcat [gtab, gpush src1 0,
832 text " ; faddp %st(0),", greg src2 1])
834 = pprG g (hcat [gtab, gpush src1 0,
835 text " ; fadd ", greg src2 1, text ",%st(0)",
839 pprInstr g@(GMUL _ src1 src2 dst)
841 = pprG g (text "\t#GMUL-xxxcase1" $$
842 hcat [gtab, gpush src2 0,
843 text " ; fmulp %st(0),", greg src1 1])
845 = pprG g (text "\t#GMUL-xxxcase2" $$
846 hcat [gtab, gpush src1 0,
847 text " ; fmulp %st(0),", greg src2 1])
849 = pprG g (hcat [gtab, gpush src1 0,
850 text " ; fmul ", greg src2 1, text ",%st(0)",
854 pprInstr g@(GSUB _ src1 src2 dst)
856 = pprG g (text "\t#GSUB-xxxcase1" $$
857 hcat [gtab, gpush src2 0,
858 text " ; fsubrp %st(0),", greg src1 1])
860 = pprG g (text "\t#GSUB-xxxcase2" $$
861 hcat [gtab, gpush src1 0,
862 text " ; fsubp %st(0),", greg src2 1])
864 = pprG g (hcat [gtab, gpush src1 0,
865 text " ; fsub ", greg src2 1, text ",%st(0)",
869 pprInstr g@(GDIV _ src1 src2 dst)
871 = pprG g (text "\t#GDIV-xxxcase1" $$
872 hcat [gtab, gpush src2 0,
873 text " ; fdivrp %st(0),", greg src1 1])
875 = pprG g (text "\t#GDIV-xxxcase2" $$
876 hcat [gtab, gpush src1 0,
877 text " ; fdivp %st(0),", greg src2 1])
879 = pprG g (hcat [gtab, gpush src1 0,
880 text " ; fdiv ", greg src2 1, text ",%st(0)",
885 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
886 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
890 = panic "X86.Ppr.pprInstr: no match"
893 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
894 pprTrigOp op -- fsin, fcos or fptan
895 isTan -- we need a couple of extra steps if we're doing tan
896 l1 l2 -- internal labels for us to use
898 = -- We'll be needing %eax later on
899 hcat [gtab, text "pushl %eax;"] $$
900 -- tan is going to use an extra space on the FP stack
901 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
902 -- First put the value in %st(0) and try to apply the op to it
903 hcat [gpush src 0, text ("; " ++ op)] $$
904 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
905 hcat [gtab, text "fnstsw %ax"] $$
906 hcat [gtab, text "test $0x400,%eax"] $$
907 -- If we were in bounds then jump to the end
908 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
909 -- Otherwise we need to shrink the value. Start by
910 -- loading pi, doubleing it (by adding it to itself),
911 -- and then swapping pi with the value, so the value we
912 -- want to apply op to is in %st(0) again
913 hcat [gtab, text "ffree %st(7); fldpi"] $$
914 hcat [gtab, text "fadd %st(0),%st"] $$
915 hcat [gtab, text "fxch %st(1)"] $$
916 -- Now we have a loop in which we make the value smaller,
917 -- see if it's small enough, and loop if not
918 (pprCLabel_asm l2 <> char ':') $$
919 hcat [gtab, text "fprem1"] $$
920 -- My Debian libc uses fstsw here for the tan code, but I can't
921 -- see any reason why it should need to be different for tan.
922 hcat [gtab, text "fnstsw %ax"] $$
923 hcat [gtab, text "test $0x400,%eax"] $$
924 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
925 hcat [gtab, text "fstp %st(1)"] $$
926 hcat [gtab, text op] $$
927 (pprCLabel_asm l1 <> char ':') $$
928 -- Pop the 1.0 tan gave us
929 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
931 hcat [gtab, text "popl %eax;"] $$
932 -- And finally make the result the right size
933 hcat [gtab, gcoerceto sz, gpop dst 1]
935 --------------------------
937 -- coerce %st(0) to the specified size
938 gcoerceto :: Size -> Doc
939 gcoerceto FF64 = empty
940 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
941 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
943 gpush :: Reg -> RegNo -> Doc
945 = hcat [text "fld ", greg reg offset]
947 gpop :: Reg -> RegNo -> Doc
949 = hcat [text "fstp ", greg reg offset]
951 greg :: Reg -> RegNo -> Doc
952 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
963 gregno :: Reg -> RegNo
964 gregno (RegReal (RealRegSingle i)) = i
965 gregno _ = --pprPanic "gregno" (ppr other)
966 999 -- bogus; only needed for debug printing
968 pprG :: Instr -> Doc -> Doc
970 = (char '#' <> pprGInstr fake) $$ actual
973 pprGInstr :: Instr -> Doc
974 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
975 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
976 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
978 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
979 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
981 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
982 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
984 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
985 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
986 pprGInstr (GDTOF src dst) = pprSizeSizeRegReg (sLit "gdtof") FF64 FF32 src dst
988 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
989 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
990 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
991 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
992 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
993 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
994 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
996 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
997 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
998 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
999 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
1001 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
1003 pprDollImm :: Imm -> Doc
1004 pprDollImm i = ptext (sLit "$") <> pprImm i
1007 pprOperand :: Size -> Operand -> Doc
1008 pprOperand s (OpReg r) = pprReg s r
1009 pprOperand _ (OpImm i) = pprDollImm i
1010 pprOperand _ (OpAddr ea) = pprAddr ea
1013 pprMnemonic_ :: LitString -> Doc
1015 char '\t' <> ptext name <> space
1018 pprMnemonic :: LitString -> Size -> Doc
1019 pprMnemonic name size =
1020 char '\t' <> ptext name <> pprSize size <> space
1023 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1024 pprSizeImmOp name size imm op1
1026 pprMnemonic name size,
1034 pprSizeOp :: LitString -> Size -> Operand -> Doc
1035 pprSizeOp name size op1
1037 pprMnemonic name size,
1042 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1043 pprSizeOpOp name size op1 op2
1045 pprMnemonic name size,
1046 pprOperand size op1,
1052 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1053 pprOpOp name size op1 op2
1056 pprOperand size op1,
1062 pprSizeReg :: LitString -> Size -> Reg -> Doc
1063 pprSizeReg name size reg1
1065 pprMnemonic name size,
1070 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1071 pprSizeRegReg name size reg1 reg2
1073 pprMnemonic name size,
1080 pprRegReg :: LitString -> Reg -> Reg -> Doc
1081 pprRegReg name reg1 reg2
1084 pprReg archWordSize reg1,
1086 pprReg archWordSize reg2
1090 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1091 pprSizeOpReg name size op1 reg2
1093 pprMnemonic name size,
1094 pprOperand size op1,
1096 pprReg archWordSize reg2
1100 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1101 pprCondRegReg name size cond reg1 reg2
1112 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1113 pprSizeSizeRegReg name size1 size2 reg1 reg2
1127 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1128 pprSizeRegRegReg name size reg1 reg2 reg3
1130 pprMnemonic name size,
1139 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1140 pprSizeAddrReg name size op dst
1142 pprMnemonic name size,
1149 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1150 pprSizeRegAddr name size src op
1152 pprMnemonic name size,
1159 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1160 pprShift name size src dest
1162 pprMnemonic name size,
1163 pprOperand II8 src, -- src is 8-bit sized
1165 pprOperand size dest
1169 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1170 pprSizeOpOpCoerce name size1 size2 op1 op2
1171 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1172 pprOperand size1 op1,
1174 pprOperand size2 op2
1178 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1179 pprCondInstr name cond arg
1180 = hcat [ char '\t', ptext name, pprCond cond, space, arg]