1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
38 import Unique ( pprUnique )
41 import qualified Outputable
42 import Outputable (panic, Outputable)
46 #if i386_TARGET_ARCH && darwin_TARGET_OS
50 -- -----------------------------------------------------------------------------
51 -- Printing this stuff out
53 pprNatCmmTop :: NatCmmTop Instr -> Doc
54 pprNatCmmTop (CmmData section dats) =
55 pprSectionHeader section $$ vcat (map pprData dats)
57 -- special case for split markers:
58 pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
60 pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) =
61 pprSectionHeader Text $$
62 (if null info then -- blocks guaranteed not null, so label needed
65 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
66 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
69 vcat (map pprData info) $$
70 pprLabel (entryLblToInfoLbl lbl)
72 vcat (map pprBasicBlock blocks)
73 -- above: Even the first block gets a label, because with branch-chain
74 -- elimination, it might be the target of a goto.
75 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
76 -- If we are using the .subsections_via_symbols directive
77 -- (available on recent versions of Darwin),
78 -- we have to make sure that there is some kind of reference
79 -- from the entry code to a label on the _top_ of of the info table,
80 -- so that the linker will not think it is unreferenced and dead-strip
81 -- it. That's why the label is called a DeadStripPreventer (_dsp).
84 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
86 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
91 pprBasicBlock :: NatBasicBlock Instr -> Doc
92 pprBasicBlock (BasicBlock (BlockId id) instrs) =
93 pprLabel (mkAsmTempLabel id) $$
94 vcat (map pprInstr instrs)
97 pprData :: CmmStatic -> Doc
98 pprData (CmmAlign bytes) = pprAlign bytes
99 pprData (CmmDataLabel lbl) = pprLabel lbl
100 pprData (CmmString str) = pprASCII str
103 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
105 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
108 pprData (CmmStaticLit lit) = pprDataItem lit
110 pprGloblDecl :: CLabel -> Doc
112 | not (externallyVisibleCLabel lbl) = empty
113 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
117 pprTypeAndSizeDecl :: CLabel -> Doc
119 pprTypeAndSizeDecl lbl
120 | not (externallyVisibleCLabel lbl) = empty
121 | otherwise = ptext (sLit ".type ") <>
122 pprCLabel_asm lbl <> ptext (sLit ", @object")
128 pprLabel :: CLabel -> Doc
129 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
132 pprASCII :: [Word8] -> Doc
134 = vcat (map do1 str) $$ do1 0
137 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
139 pprAlign :: Int -> Doc
143 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
149 log2 :: Int -> Int -- cache the common ones
154 log2 n = 1 + log2 (n `quot` 2)
157 -- -----------------------------------------------------------------------------
158 -- pprInstr: print an 'Instr'
160 instance Outputable Instr where
161 ppr instr = Outputable.docToSDoc $ pprInstr instr
164 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
165 pprUserReg :: Reg -> Doc
166 pprUserReg = pprReg IF_ARCH_i386(II32,) IF_ARCH_x86_64(II64,)
169 pprUserReg :: Reg -> Doc
170 pprUserReg = panic "X86.Ppr.pprUserReg: not defined"
174 pprReg :: Size -> Reg -> Doc
178 RegReal (RealRegSingle i) -> ppr_reg_no s i
179 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
180 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
182 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
183 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
184 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
187 ppr_reg_no :: Size -> Int -> Doc
188 ppr_reg_no II8 = ppr_reg_byte
189 ppr_reg_no II16 = ppr_reg_word
190 ppr_reg_no _ = ppr_reg_long
192 ppr_reg_byte i = ptext
194 0 -> sLit "%al"; 1 -> sLit "%bl";
195 2 -> sLit "%cl"; 3 -> sLit "%dl";
196 _ -> sLit "very naughty I386 byte register"
199 ppr_reg_word i = ptext
201 0 -> sLit "%ax"; 1 -> sLit "%bx";
202 2 -> sLit "%cx"; 3 -> sLit "%dx";
203 4 -> sLit "%si"; 5 -> sLit "%di";
204 6 -> sLit "%bp"; 7 -> sLit "%sp";
205 _ -> sLit "very naughty I386 word register"
208 ppr_reg_long i = ptext
210 0 -> sLit "%eax"; 1 -> sLit "%ebx";
211 2 -> sLit "%ecx"; 3 -> sLit "%edx";
212 4 -> sLit "%esi"; 5 -> sLit "%edi";
213 6 -> sLit "%ebp"; 7 -> sLit "%esp";
216 #elif x86_64_TARGET_ARCH
217 ppr_reg_no :: Size -> Int -> Doc
218 ppr_reg_no II8 = ppr_reg_byte
219 ppr_reg_no II16 = ppr_reg_word
220 ppr_reg_no II32 = ppr_reg_long
221 ppr_reg_no _ = ppr_reg_quad
223 ppr_reg_byte i = ptext
225 0 -> sLit "%al"; 1 -> sLit "%bl";
226 2 -> sLit "%cl"; 3 -> sLit "%dl";
227 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
228 6 -> sLit "%bpl"; 7 -> sLit "%spl";
229 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
230 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
231 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
232 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
233 _ -> sLit "very naughty x86_64 byte register"
236 ppr_reg_word i = ptext
238 0 -> sLit "%ax"; 1 -> sLit "%bx";
239 2 -> sLit "%cx"; 3 -> sLit "%dx";
240 4 -> sLit "%si"; 5 -> sLit "%di";
241 6 -> sLit "%bp"; 7 -> sLit "%sp";
242 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
243 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
244 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
245 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
246 _ -> sLit "very naughty x86_64 word register"
249 ppr_reg_long i = ptext
251 0 -> sLit "%eax"; 1 -> sLit "%ebx";
252 2 -> sLit "%ecx"; 3 -> sLit "%edx";
253 4 -> sLit "%esi"; 5 -> sLit "%edi";
254 6 -> sLit "%ebp"; 7 -> sLit "%esp";
255 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
256 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
257 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
258 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
259 _ -> sLit "very naughty x86_64 register"
262 ppr_reg_quad i = ptext
264 0 -> sLit "%rax"; 1 -> sLit "%rbx";
265 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
266 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
267 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
268 8 -> sLit "%r8"; 9 -> sLit "%r9";
269 10 -> sLit "%r10"; 11 -> sLit "%r11";
270 12 -> sLit "%r12"; 13 -> sLit "%r13";
271 14 -> sLit "%r14"; 15 -> sLit "%r15";
275 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
278 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
279 ppr_reg_float :: Int -> LitString
280 ppr_reg_float i = case i of
281 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
282 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
283 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
284 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
285 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
286 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
287 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
288 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
289 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
290 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
291 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
292 _ -> sLit "very naughty x86 register"
295 pprSize :: Size -> Doc
302 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
303 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
307 pprSize_x87 :: Size -> Doc
313 _ -> panic "X86.Ppr.pprSize_x87"
315 pprCond :: Cond -> Doc
318 GEU -> sLit "ae"; LU -> sLit "b";
319 EQQ -> sLit "e"; GTT -> sLit "g";
320 GE -> sLit "ge"; GU -> sLit "a";
321 LTT -> sLit "l"; LE -> sLit "le";
322 LEU -> sLit "be"; NE -> sLit "ne";
323 NEG -> sLit "s"; POS -> sLit "ns";
324 CARRY -> sLit "c"; OFLO -> sLit "o";
325 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
326 ALWAYS -> sLit "mp"})
330 pprImm (ImmInt i) = int i
331 pprImm (ImmInteger i) = integer i
332 pprImm (ImmCLbl l) = pprCLabel_asm l
333 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
334 pprImm (ImmLit s) = s
336 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
337 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
339 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
340 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
341 <> lparen <> pprImm b <> rparen
345 pprAddr :: AddrMode -> Doc
346 pprAddr (ImmAddr imm off)
347 = let pp_imm = pprImm imm
351 else if (off < 0) then
354 pp_imm <> char '+' <> int off
356 pprAddr (AddrBaseIndex base index displacement)
358 pp_disp = ppr_disp displacement
359 pp_off p = pp_disp <> char '(' <> p <> char ')'
360 pp_reg r = pprReg archWordSize r
362 case (base, index) of
363 (EABaseNone, EAIndexNone) -> pp_disp
364 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
365 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
366 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
367 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
369 _ -> panic "X86.Ppr.pprAddr: no match"
372 ppr_disp (ImmInt 0) = empty
373 ppr_disp imm = pprImm imm
376 pprSectionHeader :: Section -> Doc
379 # if darwin_TARGET_OS
382 Text -> ptext (sLit ".text\n\t.align 2")
383 Data -> ptext (sLit ".data\n\t.align 2")
384 ReadOnlyData -> ptext (sLit ".const\n.align 2")
385 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
386 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
387 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
388 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
393 Text -> ptext (sLit ".text\n\t.align 4,0x90")
394 Data -> ptext (sLit ".data\n\t.align 4")
395 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
396 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
397 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
398 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
399 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
403 #elif x86_64_TARGET_ARCH
404 # if darwin_TARGET_OS
407 Text -> ptext (sLit ".text\n.align 3")
408 Data -> ptext (sLit ".data\n.align 3")
409 ReadOnlyData -> ptext (sLit ".const\n.align 3")
410 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
411 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
412 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
413 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
418 Text -> ptext (sLit ".text\n\t.align 8")
419 Data -> ptext (sLit ".data\n\t.align 8")
420 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
421 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
422 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
423 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
424 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
429 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
436 pprDataItem :: CmmLit -> Doc
438 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
442 -- These seem to be common:
443 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
444 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
446 ppr_item FF32 (CmmFloat r _)
447 = let bs = floatToBytes (fromRational r)
448 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
450 ppr_item FF64 (CmmFloat r _)
451 = let bs = doubleToBytes (fromRational r)
452 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
454 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
455 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
457 #if i386_TARGET_ARCH && darwin_TARGET_OS
458 ppr_item II64 (CmmInt x _) =
459 [ptext (sLit "\t.long\t")
460 <> int (fromIntegral (fromIntegral x :: Word32)),
461 ptext (sLit "\t.long\t")
463 (fromIntegral (x `shiftR` 32) :: Word32))]
465 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
466 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
468 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
469 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
470 -- type, which means we can't do pc-relative 64-bit addresses.
471 -- Fortunately we're assuming the small memory model, in which
472 -- all such offsets will fit into 32 bits, so we have to stick
473 -- to 32-bit offset fields and modify the RTS appropriately
475 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
478 | isRelativeReloc x =
479 [ptext (sLit "\t.long\t") <> pprImm imm,
480 ptext (sLit "\t.long\t0")]
482 [ptext (sLit "\t.quad\t") <> pprImm imm]
484 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
485 isRelativeReloc _ = False
489 = panic "X86.Ppr.ppr_item: no match"
493 pprInstr :: Instr -> Doc
495 pprInstr (COMMENT _) = empty -- nuke 'em
498 = IF_ARCH_alpha( ((<>) (ptext (sLit "\t# ")) (ftext s))
499 ,IF_ARCH_sparc( ((<>) (ptext (sLit "# ")) (ftext s))
500 ,IF_ARCH_i386( ((<>) (ptext (sLit "# ")) (ftext s))
501 ,IF_ARCH_x86_64( ((<>) (ptext (sLit "# ")) (ftext s))
502 ,IF_ARCH_powerpc( IF_OS_linux(
503 ((<>) (ptext (sLit "# ")) (ftext s)),
504 ((<>) (ptext (sLit "; ")) (ftext s)))
508 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
510 pprInstr (NEWBLOCK _)
511 = panic "PprMach.pprInstr: NEWBLOCK"
514 = panic "PprMach.pprInstr: LDATA"
517 pprInstr (SPILL reg slot)
519 ptext (sLit "\tSPILL"),
523 ptext (sLit "SLOT") <> parens (int slot)]
525 pprInstr (RELOAD slot reg)
527 ptext (sLit "\tRELOAD"),
529 ptext (sLit "SLOT") <> parens (int slot),
534 pprInstr (MOV size src dst)
535 = pprSizeOpOp (sLit "mov") size src dst
537 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
538 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
539 -- movl. But we represent it as a MOVZxL instruction, because
540 -- the reg alloc would tend to throw away a plain reg-to-reg
541 -- move, and we still want it to do that.
543 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
544 -- zero-extension only needs to extend to 32 bits: on x86_64,
545 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
546 -- instruction is shorter.
548 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
550 -- here we do some patching, since the physical registers are only set late
551 -- in the code generation.
552 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
554 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
556 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
558 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
560 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
562 = pprInstr (ADD size (OpImm displ) dst)
564 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
566 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
567 = pprSizeOp (sLit "dec") size dst
568 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
569 = pprSizeOp (sLit "inc") size dst
570 pprInstr (ADD size src dst)
571 = pprSizeOpOp (sLit "add") size src dst
572 pprInstr (ADC size src dst)
573 = pprSizeOpOp (sLit "adc") size src dst
574 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
575 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
577 {- A hack. The Intel documentation says that "The two and three
578 operand forms [of IMUL] may also be used with unsigned operands
579 because the lower half of the product is the same regardless if
580 (sic) the operands are signed or unsigned. The CF and OF flags,
581 however, cannot be used to determine if the upper half of the
582 result is non-zero." So there.
584 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
585 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
587 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
588 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
589 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
591 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
592 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
594 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
595 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
596 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
598 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
600 pprInstr (CMP size src dst)
601 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
602 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
604 -- This predicate is needed here and nowhere else
610 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
611 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
612 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
614 -- both unused (SDM):
615 -- pprInstr PUSHA = ptext (sLit "\tpushal")
616 -- pprInstr POPA = ptext (sLit "\tpopal")
618 pprInstr NOP = ptext (sLit "\tnop")
619 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
620 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
622 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
624 pprInstr (JXX cond (BlockId id))
625 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
626 where lab = mkAsmTempLabel id
628 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
630 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
631 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
632 pprInstr (JMP_TBL op _) = pprInstr (JMP op)
633 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
634 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
636 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
637 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
638 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
641 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
643 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
645 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
646 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
647 pprInstr (CVTTSS2SIQ sz from to) = pprSizeOpReg (sLit "cvttss2si") sz from to
648 pprInstr (CVTTSD2SIQ sz from to) = pprSizeOpReg (sLit "cvttsd2si") sz from to
649 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
650 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
652 -- FETCHGOT for PIC on ELF platforms
653 pprInstr (FETCHGOT reg)
654 = vcat [ ptext (sLit "\tcall 1f"),
655 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
656 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
660 -- FETCHPC for PIC on Darwin/x86
661 -- get the instruction pointer into a register
662 -- (Terminology note: the IP is called Program Counter on PPC,
663 -- and it's a good thing to use the same name on both platforms)
664 pprInstr (FETCHPC reg)
665 = vcat [ ptext (sLit "\tcall 1f"),
666 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
670 -- -----------------------------------------------------------------------------
671 -- i386 floating-point
673 -- Simulating a flat register set on the x86 FP stack is tricky.
674 -- you have to free %st(7) before pushing anything on the FP reg stack
675 -- so as to preclude the possibility of a FP stack overflow exception.
676 pprInstr g@(GMOV src dst)
680 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
682 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
683 pprInstr g@(GLD sz addr dst)
684 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
685 pprAddr addr, gsemi, gpop dst 1])
687 -- GST sz src addr ==> FLD dst ; FSTPsz addr
688 pprInstr g@(GST sz src addr)
689 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
690 = pprG g (hcat [gtab,
691 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
693 = pprG g (hcat [gtab, gpush src 0, gsemi,
694 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
696 pprInstr g@(GLDZ dst)
697 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
698 pprInstr g@(GLD1 dst)
699 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
701 pprInstr (GFTOI src dst)
702 = pprInstr (GDTOI src dst)
704 pprInstr g@(GDTOI src dst)
706 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
707 hcat [gtab, gpush src 0],
708 hcat [gtab, text "movzwl 4(%esp), ", reg,
709 text " ; orl $0xC00, ", reg],
710 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
711 hcat [gtab, text "fistpl 0(%esp)"],
712 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
713 hcat [gtab, text "addl $8, %esp"]
716 reg = pprReg II32 dst
718 pprInstr (GITOF src dst)
719 = pprInstr (GITOD src dst)
721 pprInstr g@(GITOD src dst)
722 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
723 text " ; fildl (%esp) ; ",
724 gpop dst 1, text " ; addl $4,%esp"])
726 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
727 this far into the jungle AND you give a Rat's Ass (tm) what's going
728 on, here's the deal. Generate code to do a floating point comparison
729 of src1 and src2, of kind cond, and set the Zero flag if true.
731 The complications are to do with handling NaNs correctly. We want the
732 property that if either argument is NaN, then the result of the
733 comparison is False ... except if we're comparing for inequality,
734 in which case the answer is True.
736 Here's how the general (non-inequality) case works. As an
737 example, consider generating the an equality test:
739 pushl %eax -- we need to mess with this
740 <get src1 to top of FPU stack>
741 fcomp <src2 location in FPU stack> and pop pushed src1
742 -- Result of comparison is in FPU Status Register bits
744 fstsw %ax -- Move FPU Status Reg to %ax
745 sahf -- move C3 C2 C0 from %ax to integer flag reg
746 -- now the serious magic begins
747 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
748 sete %al -- %al = if arg1 == arg2 then 1 else 0
749 andb %ah,%al -- %al &= %ah
750 -- so %al == 1 iff (comparable && same); else it holds 0
751 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
752 else %al == 0xFF, ZeroFlag=0
753 -- the zero flag is now set as we desire.
756 The special case of inequality differs thusly:
758 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
759 setne %al -- %al = if arg1 /= arg2 then 1 else 0
760 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
761 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
762 else (%al == 0xFF, ZF=0)
764 pprInstr g@(GCMP cond src1 src2)
765 | case cond of { NE -> True; _ -> False }
767 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
768 hcat [gtab, text "fcomp ", greg src2 1,
769 text "; fstsw %ax ; sahf ; setpe %ah"],
770 hcat [gtab, text "setne %al ; ",
771 text "orb %ah,%al ; decb %al ; popl %eax"]
775 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
776 hcat [gtab, text "fcomp ", greg src2 1,
777 text "; fstsw %ax ; sahf ; setpo %ah"],
778 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
779 text "andb %ah,%al ; decb %al ; popl %eax"]
782 {- On the 486, the flags set by FP compare are the unsigned ones!
783 (This looks like a HACK to me. WDP 96/03)
785 fix_FP_cond :: Cond -> Cond
790 fix_FP_cond EQQ = EQQ
792 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
793 -- there should be no others
796 pprInstr g@(GABS _ src dst)
797 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
799 pprInstr g@(GNEG _ src dst)
800 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
802 pprInstr g@(GSQRT sz src dst)
803 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
804 hcat [gtab, gcoerceto sz, gpop dst 1])
806 pprInstr g@(GSIN sz l1 l2 src dst)
807 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
809 pprInstr g@(GCOS sz l1 l2 src dst)
810 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
812 pprInstr g@(GTAN sz l1 l2 src dst)
813 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
815 -- In the translations for GADD, GMUL, GSUB and GDIV,
816 -- the first two cases are mere optimisations. The otherwise clause
817 -- generates correct code under all circumstances.
819 pprInstr g@(GADD _ src1 src2 dst)
821 = pprG g (text "\t#GADD-xxxcase1" $$
822 hcat [gtab, gpush src2 0,
823 text " ; faddp %st(0),", greg src1 1])
825 = pprG g (text "\t#GADD-xxxcase2" $$
826 hcat [gtab, gpush src1 0,
827 text " ; faddp %st(0),", greg src2 1])
829 = pprG g (hcat [gtab, gpush src1 0,
830 text " ; fadd ", greg src2 1, text ",%st(0)",
834 pprInstr g@(GMUL _ src1 src2 dst)
836 = pprG g (text "\t#GMUL-xxxcase1" $$
837 hcat [gtab, gpush src2 0,
838 text " ; fmulp %st(0),", greg src1 1])
840 = pprG g (text "\t#GMUL-xxxcase2" $$
841 hcat [gtab, gpush src1 0,
842 text " ; fmulp %st(0),", greg src2 1])
844 = pprG g (hcat [gtab, gpush src1 0,
845 text " ; fmul ", greg src2 1, text ",%st(0)",
849 pprInstr g@(GSUB _ src1 src2 dst)
851 = pprG g (text "\t#GSUB-xxxcase1" $$
852 hcat [gtab, gpush src2 0,
853 text " ; fsubrp %st(0),", greg src1 1])
855 = pprG g (text "\t#GSUB-xxxcase2" $$
856 hcat [gtab, gpush src1 0,
857 text " ; fsubp %st(0),", greg src2 1])
859 = pprG g (hcat [gtab, gpush src1 0,
860 text " ; fsub ", greg src2 1, text ",%st(0)",
864 pprInstr g@(GDIV _ src1 src2 dst)
866 = pprG g (text "\t#GDIV-xxxcase1" $$
867 hcat [gtab, gpush src2 0,
868 text " ; fdivrp %st(0),", greg src1 1])
870 = pprG g (text "\t#GDIV-xxxcase2" $$
871 hcat [gtab, gpush src1 0,
872 text " ; fdivp %st(0),", greg src2 1])
874 = pprG g (hcat [gtab, gpush src1 0,
875 text " ; fdiv ", greg src2 1, text ",%st(0)",
880 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
881 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
885 = panic "X86.Ppr.pprInstr: no match"
888 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
889 pprTrigOp op -- fsin, fcos or fptan
890 isTan -- we need a couple of extra steps if we're doing tan
891 l1 l2 -- internal labels for us to use
893 = -- We'll be needing %eax later on
894 hcat [gtab, text "pushl %eax;"] $$
895 -- tan is going to use an extra space on the FP stack
896 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
897 -- First put the value in %st(0) and try to apply the op to it
898 hcat [gpush src 0, text ("; " ++ op)] $$
899 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
900 hcat [gtab, text "fnstsw %ax"] $$
901 hcat [gtab, text "test $0x400,%eax"] $$
902 -- If we were in bounds then jump to the end
903 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
904 -- Otherwise we need to shrink the value. Start by
905 -- loading pi, doubleing it (by adding it to itself),
906 -- and then swapping pi with the value, so the value we
907 -- want to apply op to is in %st(0) again
908 hcat [gtab, text "ffree %st(7); fldpi"] $$
909 hcat [gtab, text "fadd %st(0),%st"] $$
910 hcat [gtab, text "fxch %st(1)"] $$
911 -- Now we have a loop in which we make the value smaller,
912 -- see if it's small enough, and loop if not
913 (pprCLabel_asm l2 <> char ':') $$
914 hcat [gtab, text "fprem1"] $$
915 -- My Debian libc uses fstsw here for the tan code, but I can't
916 -- see any reason why it should need to be different for tan.
917 hcat [gtab, text "fnstsw %ax"] $$
918 hcat [gtab, text "test $0x400,%eax"] $$
919 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
920 hcat [gtab, text "fstp %st(1)"] $$
921 hcat [gtab, text op] $$
922 (pprCLabel_asm l1 <> char ':') $$
923 -- Pop the 1.0 tan gave us
924 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
926 hcat [gtab, text "popl %eax;"] $$
927 -- And finally make the result the right size
928 hcat [gtab, gcoerceto sz, gpop dst 1]
930 --------------------------
932 -- coerce %st(0) to the specified size
933 gcoerceto :: Size -> Doc
934 gcoerceto FF64 = empty
935 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
936 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
938 gpush :: Reg -> RegNo -> Doc
940 = hcat [text "fld ", greg reg offset]
942 gpop :: Reg -> RegNo -> Doc
944 = hcat [text "fstp ", greg reg offset]
946 greg :: Reg -> RegNo -> Doc
947 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
958 gregno :: Reg -> RegNo
959 gregno (RegReal (RealRegSingle i)) = i
960 gregno _ = --pprPanic "gregno" (ppr other)
961 999 -- bogus; only needed for debug printing
963 pprG :: Instr -> Doc -> Doc
965 = (char '#' <> pprGInstr fake) $$ actual
968 pprGInstr :: Instr -> Doc
969 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
970 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
971 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
973 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
974 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
976 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
977 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
979 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
980 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
982 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
983 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
984 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
985 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
986 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
987 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
988 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
990 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
991 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
992 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
993 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
995 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
997 pprDollImm :: Imm -> Doc
998 pprDollImm i = ptext (sLit "$") <> pprImm i
1001 pprOperand :: Size -> Operand -> Doc
1002 pprOperand s (OpReg r) = pprReg s r
1003 pprOperand _ (OpImm i) = pprDollImm i
1004 pprOperand _ (OpAddr ea) = pprAddr ea
1007 pprMnemonic_ :: LitString -> Doc
1009 char '\t' <> ptext name <> space
1012 pprMnemonic :: LitString -> Size -> Doc
1013 pprMnemonic name size =
1014 char '\t' <> ptext name <> pprSize size <> space
1017 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1018 pprSizeImmOp name size imm op1
1020 pprMnemonic name size,
1028 pprSizeOp :: LitString -> Size -> Operand -> Doc
1029 pprSizeOp name size op1
1031 pprMnemonic name size,
1036 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1037 pprSizeOpOp name size op1 op2
1039 pprMnemonic name size,
1040 pprOperand size op1,
1046 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1047 pprOpOp name size op1 op2
1050 pprOperand size op1,
1056 pprSizeReg :: LitString -> Size -> Reg -> Doc
1057 pprSizeReg name size reg1
1059 pprMnemonic name size,
1064 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1065 pprSizeRegReg name size reg1 reg2
1067 pprMnemonic name size,
1074 pprRegReg :: LitString -> Reg -> Reg -> Doc
1075 pprRegReg name reg1 reg2
1078 pprReg archWordSize reg1,
1080 pprReg archWordSize reg2
1084 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1085 pprSizeOpReg name size op1 reg2
1087 pprMnemonic name size,
1088 pprOperand size op1,
1090 pprReg archWordSize reg2
1094 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1095 pprCondRegReg name size cond reg1 reg2
1106 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1107 pprSizeSizeRegReg name size1 size2 reg1 reg2
1121 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1122 pprSizeRegRegReg name size reg1 reg2 reg3
1124 pprMnemonic name size,
1133 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1134 pprSizeAddrReg name size op dst
1136 pprMnemonic name size,
1143 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1144 pprSizeRegAddr name size src op
1146 pprMnemonic name size,
1153 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1154 pprShift name size src dest
1156 pprMnemonic name size,
1157 pprOperand II8 src, -- src is 8-bit sized
1159 pprOperand size dest
1163 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1164 pprSizeOpOpCoerce name size1 size2 op1 op2
1165 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1166 pprOperand size1 op1,
1168 pprOperand size2 op2
1172 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1173 pprCondInstr name cond arg
1174 = hcat [ char '\t', ptext name, pprCond cond, space, arg]