1 -----------------------------------------------------------------------------
3 -- Pretty-printing assembly language
5 -- (c) The University of Glasgow 1993-2005
7 -----------------------------------------------------------------------------
22 #include "HsVersions.h"
23 #include "nativeGen/NCG.h"
36 import Unique ( pprUnique, Uniquable(..) )
39 import qualified Outputable
40 import Outputable (panic, Outputable)
44 #if i386_TARGET_ARCH && darwin_TARGET_OS
48 -- -----------------------------------------------------------------------------
49 -- Printing this stuff out
51 pprNatCmmTop :: NatCmmTop Instr -> Doc
52 pprNatCmmTop (CmmData section dats) =
53 pprSectionHeader section $$ vcat (map pprData dats)
55 -- special case for split markers:
56 pprNatCmmTop (CmmProc [] lbl (ListGraph [])) = pprLabel lbl
58 pprNatCmmTop (CmmProc info lbl (ListGraph blocks)) =
59 pprSectionHeader Text $$
60 (if null info then -- blocks guaranteed not null, so label needed
63 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
64 pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
67 vcat (map pprData info) $$
68 pprLabel (entryLblToInfoLbl lbl)
70 vcat (map pprBasicBlock blocks)
71 -- above: Even the first block gets a label, because with branch-chain
72 -- elimination, it might be the target of a goto.
73 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
74 -- If we are using the .subsections_via_symbols directive
75 -- (available on recent versions of Darwin),
76 -- we have to make sure that there is some kind of reference
77 -- from the entry code to a label on the _top_ of of the info table,
78 -- so that the linker will not think it is unreferenced and dead-strip
79 -- it. That's why the label is called a DeadStripPreventer (_dsp).
82 <+> pprCLabel_asm (entryLblToInfoLbl lbl)
84 <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
87 $$ pprSizeDecl (if null info then lbl else entryLblToInfoLbl lbl)
89 -- | Output the ELF .size directive.
90 pprSizeDecl :: CLabel -> Doc
93 ptext (sLit "\t.size") <+> pprCLabel_asm lbl
94 <> ptext (sLit ", .-") <> pprCLabel_asm lbl
99 pprBasicBlock :: NatBasicBlock Instr -> Doc
100 pprBasicBlock (BasicBlock blockid instrs) =
101 pprLabel (mkAsmTempLabel (getUnique blockid)) $$
102 vcat (map pprInstr instrs)
105 pprData :: CmmStatic -> Doc
106 pprData (CmmAlign bytes) = pprAlign bytes
107 pprData (CmmDataLabel lbl) = pprLabel lbl
108 pprData (CmmString str) = pprASCII str
111 pprData (CmmUninitialised bytes) = ptext (sLit ".space ") <> int bytes
113 pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
116 pprData (CmmStaticLit lit) = pprDataItem lit
118 pprGloblDecl :: CLabel -> Doc
120 | not (externallyVisibleCLabel lbl) = empty
121 | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
125 pprTypeAndSizeDecl :: CLabel -> Doc
127 pprTypeAndSizeDecl lbl
128 | not (externallyVisibleCLabel lbl) = empty
129 | otherwise = ptext (sLit ".type ") <>
130 pprCLabel_asm lbl <> ptext (sLit ", @object")
136 pprLabel :: CLabel -> Doc
137 pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
140 pprASCII :: [Word8] -> Doc
142 = vcat (map do1 str) $$ do1 0
145 do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
147 pprAlign :: Int -> Doc
151 = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
157 log2 :: Int -> Int -- cache the common ones
162 log2 n = 1 + log2 (n `quot` 2)
165 -- -----------------------------------------------------------------------------
166 -- pprInstr: print an 'Instr'
168 instance Outputable Instr where
169 ppr instr = Outputable.docToSDoc $ pprInstr instr
172 pprReg :: Size -> Reg -> Doc
176 RegReal (RealRegSingle i) -> ppr_reg_no s i
177 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
178 RegVirtual (VirtualRegI u) -> text "%vI_" <> asmSDoc (pprUnique u)
179 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> asmSDoc (pprUnique u)
180 RegVirtual (VirtualRegF u) -> text "%vF_" <> asmSDoc (pprUnique u)
181 RegVirtual (VirtualRegD u) -> text "%vD_" <> asmSDoc (pprUnique u)
182 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> asmSDoc (pprUnique u)
185 ppr_reg_no :: Size -> Int -> Doc
186 ppr_reg_no II8 = ppr_reg_byte
187 ppr_reg_no II16 = ppr_reg_word
188 ppr_reg_no _ = ppr_reg_long
190 ppr_reg_byte i = ptext
192 0 -> sLit "%al"; 1 -> sLit "%bl";
193 2 -> sLit "%cl"; 3 -> sLit "%dl";
194 _ -> sLit "very naughty I386 byte register"
197 ppr_reg_word i = ptext
199 0 -> sLit "%ax"; 1 -> sLit "%bx";
200 2 -> sLit "%cx"; 3 -> sLit "%dx";
201 4 -> sLit "%si"; 5 -> sLit "%di";
202 6 -> sLit "%bp"; 7 -> sLit "%sp";
203 _ -> sLit "very naughty I386 word register"
206 ppr_reg_long i = ptext
208 0 -> sLit "%eax"; 1 -> sLit "%ebx";
209 2 -> sLit "%ecx"; 3 -> sLit "%edx";
210 4 -> sLit "%esi"; 5 -> sLit "%edi";
211 6 -> sLit "%ebp"; 7 -> sLit "%esp";
214 #elif x86_64_TARGET_ARCH
215 ppr_reg_no :: Size -> Int -> Doc
216 ppr_reg_no II8 = ppr_reg_byte
217 ppr_reg_no II16 = ppr_reg_word
218 ppr_reg_no II32 = ppr_reg_long
219 ppr_reg_no _ = ppr_reg_quad
221 ppr_reg_byte i = ptext
223 0 -> sLit "%al"; 1 -> sLit "%bl";
224 2 -> sLit "%cl"; 3 -> sLit "%dl";
225 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
226 6 -> sLit "%bpl"; 7 -> sLit "%spl";
227 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
228 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
229 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
230 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
231 _ -> sLit "very naughty x86_64 byte register"
234 ppr_reg_word i = ptext
236 0 -> sLit "%ax"; 1 -> sLit "%bx";
237 2 -> sLit "%cx"; 3 -> sLit "%dx";
238 4 -> sLit "%si"; 5 -> sLit "%di";
239 6 -> sLit "%bp"; 7 -> sLit "%sp";
240 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
241 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
242 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
243 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
244 _ -> sLit "very naughty x86_64 word register"
247 ppr_reg_long i = ptext
249 0 -> sLit "%eax"; 1 -> sLit "%ebx";
250 2 -> sLit "%ecx"; 3 -> sLit "%edx";
251 4 -> sLit "%esi"; 5 -> sLit "%edi";
252 6 -> sLit "%ebp"; 7 -> sLit "%esp";
253 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
254 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
255 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
256 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
257 _ -> sLit "very naughty x86_64 register"
260 ppr_reg_quad i = ptext
262 0 -> sLit "%rax"; 1 -> sLit "%rbx";
263 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
264 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
265 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
266 8 -> sLit "%r8"; 9 -> sLit "%r9";
267 10 -> sLit "%r10"; 11 -> sLit "%r11";
268 12 -> sLit "%r12"; 13 -> sLit "%r13";
269 14 -> sLit "%r14"; 15 -> sLit "%r15";
273 ppr_reg_no _ = panic "X86.Ppr.ppr_reg_no: no match"
276 #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
277 ppr_reg_float :: Int -> LitString
278 ppr_reg_float i = case i of
279 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
280 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
281 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
282 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
283 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
284 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
285 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
286 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
287 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
288 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
289 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
290 _ -> sLit "very naughty x86 register"
293 pprSize :: Size -> Doc
300 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
301 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
305 pprSize_x87 :: Size -> Doc
311 _ -> panic "X86.Ppr.pprSize_x87"
313 pprCond :: Cond -> Doc
316 GEU -> sLit "ae"; LU -> sLit "b";
317 EQQ -> sLit "e"; GTT -> sLit "g";
318 GE -> sLit "ge"; GU -> sLit "a";
319 LTT -> sLit "l"; LE -> sLit "le";
320 LEU -> sLit "be"; NE -> sLit "ne";
321 NEG -> sLit "s"; POS -> sLit "ns";
322 CARRY -> sLit "c"; OFLO -> sLit "o";
323 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
324 ALWAYS -> sLit "mp"})
328 pprImm (ImmInt i) = int i
329 pprImm (ImmInteger i) = integer i
330 pprImm (ImmCLbl l) = pprCLabel_asm l
331 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
332 pprImm (ImmLit s) = s
334 pprImm (ImmFloat _) = ptext (sLit "naughty float immediate")
335 pprImm (ImmDouble _) = ptext (sLit "naughty double immediate")
337 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
338 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
339 <> lparen <> pprImm b <> rparen
343 pprAddr :: AddrMode -> Doc
344 pprAddr (ImmAddr imm off)
345 = let pp_imm = pprImm imm
349 else if (off < 0) then
352 pp_imm <> char '+' <> int off
354 pprAddr (AddrBaseIndex base index displacement)
356 pp_disp = ppr_disp displacement
357 pp_off p = pp_disp <> char '(' <> p <> char ')'
358 pp_reg r = pprReg archWordSize r
360 case (base, index) of
361 (EABaseNone, EAIndexNone) -> pp_disp
362 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
363 (EABaseRip, EAIndexNone) -> pp_off (ptext (sLit "%rip"))
364 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
365 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
367 _ -> panic "X86.Ppr.pprAddr: no match"
370 ppr_disp (ImmInt 0) = empty
371 ppr_disp imm = pprImm imm
374 pprSectionHeader :: Section -> Doc
377 # if darwin_TARGET_OS
380 Text -> ptext (sLit ".text\n\t.align 2")
381 Data -> ptext (sLit ".data\n\t.align 2")
382 ReadOnlyData -> ptext (sLit ".const\n.align 2")
383 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 2")
384 UninitialisedData -> ptext (sLit ".data\n\t.align 2")
385 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
386 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
391 Text -> ptext (sLit ".text\n\t.align 4,0x90")
392 Data -> ptext (sLit ".data\n\t.align 4")
393 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 4")
394 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 4")
395 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 4")
396 ReadOnlyData16 -> ptext (sLit ".section .rodata\n\t.align 16")
397 OtherSection _ -> panic "X86.Ppr.pprSectionHeader: unknown section"
401 #elif x86_64_TARGET_ARCH
402 # if darwin_TARGET_OS
405 Text -> ptext (sLit ".text\n.align 3")
406 Data -> ptext (sLit ".data\n.align 3")
407 ReadOnlyData -> ptext (sLit ".const\n.align 3")
408 RelocatableReadOnlyData -> ptext (sLit ".const_data\n.align 3")
409 UninitialisedData -> ptext (sLit ".data\n\t.align 3")
410 ReadOnlyData16 -> ptext (sLit ".const\n.align 4")
411 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
416 Text -> ptext (sLit ".text\n\t.align 8")
417 Data -> ptext (sLit ".data\n\t.align 8")
418 ReadOnlyData -> ptext (sLit ".section .rodata\n\t.align 8")
419 RelocatableReadOnlyData -> ptext (sLit ".section .data\n\t.align 8")
420 UninitialisedData -> ptext (sLit ".section .bss\n\t.align 8")
421 ReadOnlyData16 -> ptext (sLit ".section .rodata.cst16\n\t.align 16")
422 OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section"
427 pprSectionHeader _ = panic "X86.Ppr.pprSectionHeader: not defined for this architecture"
434 pprDataItem :: CmmLit -> Doc
436 = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit)
440 -- These seem to be common:
441 ppr_item II8 _ = [ptext (sLit "\t.byte\t") <> pprImm imm]
442 ppr_item II32 _ = [ptext (sLit "\t.long\t") <> pprImm imm]
444 ppr_item FF32 (CmmFloat r _)
445 = let bs = floatToBytes (fromRational r)
446 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
448 ppr_item FF64 (CmmFloat r _)
449 = let bs = doubleToBytes (fromRational r)
450 in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs
452 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
453 ppr_item II16 _ = [ptext (sLit "\t.word\t") <> pprImm imm]
455 #if i386_TARGET_ARCH && darwin_TARGET_OS
456 ppr_item II64 (CmmInt x _) =
457 [ptext (sLit "\t.long\t")
458 <> int (fromIntegral (fromIntegral x :: Word32)),
459 ptext (sLit "\t.long\t")
461 (fromIntegral (x `shiftR` 32) :: Word32))]
463 #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
464 ppr_item II64 _ = [ptext (sLit "\t.quad\t") <> pprImm imm]
466 #if x86_64_TARGET_ARCH && !darwin_TARGET_OS
467 -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
468 -- type, which means we can't do pc-relative 64-bit addresses.
469 -- Fortunately we're assuming the small memory model, in which
470 -- all such offsets will fit into 32 bits, so we have to stick
471 -- to 32-bit offset fields and modify the RTS appropriately
473 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
476 | isRelativeReloc x =
477 [ptext (sLit "\t.long\t") <> pprImm imm,
478 ptext (sLit "\t.long\t0")]
480 [ptext (sLit "\t.quad\t") <> pprImm imm]
482 isRelativeReloc (CmmLabelDiffOff _ _ _) = True
483 isRelativeReloc _ = False
487 = panic "X86.Ppr.ppr_item: no match"
491 pprInstr :: Instr -> Doc
493 pprInstr (COMMENT _) = empty -- nuke 'em
495 pprInstr (COMMENT s) = ptext (sLit "# ") <> ftext s
498 = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
500 pprInstr (NEWBLOCK _)
501 = panic "PprMach.pprInstr: NEWBLOCK"
504 = panic "PprMach.pprInstr: LDATA"
507 pprInstr (SPILL reg slot)
509 ptext (sLit "\tSPILL"),
513 ptext (sLit "SLOT") <> parens (int slot)]
515 pprInstr (RELOAD slot reg)
517 ptext (sLit "\tRELOAD"),
519 ptext (sLit "SLOT") <> parens (int slot),
524 pprInstr (MOV size src dst)
525 = pprSizeOpOp (sLit "mov") size src dst
527 pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst
528 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
529 -- movl. But we represent it as a MOVZxL instruction, because
530 -- the reg alloc would tend to throw away a plain reg-to-reg
531 -- move, and we still want it to do that.
533 pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst
534 -- zero-extension only needs to extend to 32 bits: on x86_64,
535 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
536 -- instruction is shorter.
538 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
540 -- here we do some patching, since the physical registers are only set late
541 -- in the code generation.
542 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
544 = pprSizeOpOp (sLit "add") size (OpReg reg2) dst
546 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
548 = pprSizeOpOp (sLit "add") size (OpReg reg1) dst
550 pprInstr (LEA size (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
552 = pprInstr (ADD size (OpImm displ) dst)
554 pprInstr (LEA size src dst) = pprSizeOpOp (sLit "lea") size src dst
556 pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
557 = pprSizeOp (sLit "dec") size dst
558 pprInstr (ADD size (OpImm (ImmInt 1)) dst)
559 = pprSizeOp (sLit "inc") size dst
560 pprInstr (ADD size src dst)
561 = pprSizeOpOp (sLit "add") size src dst
562 pprInstr (ADC size src dst)
563 = pprSizeOpOp (sLit "adc") size src dst
564 pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
565 pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
567 {- A hack. The Intel documentation says that "The two and three
568 operand forms [of IMUL] may also be used with unsigned operands
569 because the lower half of the product is the same regardless if
570 (sic) the operands are signed or unsigned. The CF and OF flags,
571 however, cannot be used to determine if the upper half of the
572 result is non-zero." So there.
574 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst
575 pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst
577 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
578 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
579 pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
581 pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
582 pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
584 pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
585 pprInstr (SAR size src dst) = pprShift (sLit "sar") size src dst
586 pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst
588 pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src
590 pprInstr (CMP size src dst)
591 | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2
592 | otherwise = pprSizeOpOp (sLit "cmp") size src dst
594 -- This predicate is needed here and nowhere else
600 pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst
601 pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op
602 pprInstr (POP size op) = pprSizeOp (sLit "pop") size op
604 -- both unused (SDM):
605 -- pprInstr PUSHA = ptext (sLit "\tpushal")
606 -- pprInstr POPA = ptext (sLit "\tpopal")
608 pprInstr NOP = ptext (sLit "\tnop")
609 pprInstr (CLTD II32) = ptext (sLit "\tcltd")
610 pprInstr (CLTD II64) = ptext (sLit "\tcqto")
612 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
614 pprInstr (JXX cond blockid)
615 = pprCondInstr (sLit "j") cond (pprCLabel_asm lab)
616 where lab = mkAsmTempLabel (getUnique blockid)
618 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
620 pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
621 pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
622 pprInstr (JMP_TBL op _ _ _) = pprInstr (JMP op)
623 pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
624 pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
626 pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
627 pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
628 pprInstr (IMUL2 sz op) = pprSizeOp (sLit "imul") sz op
631 pprInstr (MUL size op1 op2) = pprSizeOpOp (sLit "mul") size op1 op2
633 pprInstr (FDIV size op1 op2) = pprSizeOpOp (sLit "div") size op1 op2
635 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
636 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
637 pprInstr (CVTTSS2SIQ sz from to) = pprSizeSizeOpReg (sLit "cvttss2si") FF32 sz from to
638 pprInstr (CVTTSD2SIQ sz from to) = pprSizeSizeOpReg (sLit "cvttsd2si") FF64 sz from to
639 pprInstr (CVTSI2SS sz from to) = pprSizeOpReg (sLit "cvtsi2ss") sz from to
640 pprInstr (CVTSI2SD sz from to) = pprSizeOpReg (sLit "cvtsi2sd") sz from to
642 -- FETCHGOT for PIC on ELF platforms
643 pprInstr (FETCHGOT reg)
644 = vcat [ ptext (sLit "\tcall 1f"),
645 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ],
646 hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
650 -- FETCHPC for PIC on Darwin/x86
651 -- get the instruction pointer into a register
652 -- (Terminology note: the IP is called Program Counter on PPC,
653 -- and it's a good thing to use the same name on both platforms)
654 pprInstr (FETCHPC reg)
655 = vcat [ ptext (sLit "\tcall 1f"),
656 hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ]
660 -- -----------------------------------------------------------------------------
661 -- i386 floating-point
663 -- Simulating a flat register set on the x86 FP stack is tricky.
664 -- you have to free %st(7) before pushing anything on the FP reg stack
665 -- so as to preclude the possibility of a FP stack overflow exception.
666 pprInstr g@(GMOV src dst)
670 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
672 -- GLD sz addr dst ==> FLDsz addr ; FSTP (dst+1)
673 pprInstr g@(GLD sz addr dst)
674 = pprG g (hcat [gtab, text "fld", pprSize_x87 sz, gsp,
675 pprAddr addr, gsemi, gpop dst 1])
677 -- GST sz src addr ==> FLD dst ; FSTPsz addr
678 pprInstr g@(GST sz src addr)
679 | src == fake0 && sz /= FF80 -- fstt instruction doesn't exist
680 = pprG g (hcat [gtab,
681 text "fst", pprSize_x87 sz, gsp, pprAddr addr])
683 = pprG g (hcat [gtab, gpush src 0, gsemi,
684 text "fstp", pprSize_x87 sz, gsp, pprAddr addr])
686 pprInstr g@(GLDZ dst)
687 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
688 pprInstr g@(GLD1 dst)
689 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
691 pprInstr (GFTOI src dst)
692 = pprInstr (GDTOI src dst)
694 pprInstr g@(GDTOI src dst)
696 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
697 hcat [gtab, gpush src 0],
698 hcat [gtab, text "movzwl 4(%esp), ", reg,
699 text " ; orl $0xC00, ", reg],
700 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
701 hcat [gtab, text "fistpl 0(%esp)"],
702 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
703 hcat [gtab, text "addl $8, %esp"]
706 reg = pprReg II32 dst
708 pprInstr (GITOF src dst)
709 = pprInstr (GITOD src dst)
711 pprInstr g@(GITOD src dst)
712 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
713 text " ; fildl (%esp) ; ",
714 gpop dst 1, text " ; addl $4,%esp"])
716 pprInstr g@(GDTOF src dst)
717 = pprG g (vcat [gtab <> gpush src 0,
718 gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;",
721 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
722 this far into the jungle AND you give a Rat's Ass (tm) what's going
723 on, here's the deal. Generate code to do a floating point comparison
724 of src1 and src2, of kind cond, and set the Zero flag if true.
726 The complications are to do with handling NaNs correctly. We want the
727 property that if either argument is NaN, then the result of the
728 comparison is False ... except if we're comparing for inequality,
729 in which case the answer is True.
731 Here's how the general (non-inequality) case works. As an
732 example, consider generating the an equality test:
734 pushl %eax -- we need to mess with this
735 <get src1 to top of FPU stack>
736 fcomp <src2 location in FPU stack> and pop pushed src1
737 -- Result of comparison is in FPU Status Register bits
739 fstsw %ax -- Move FPU Status Reg to %ax
740 sahf -- move C3 C2 C0 from %ax to integer flag reg
741 -- now the serious magic begins
742 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
743 sete %al -- %al = if arg1 == arg2 then 1 else 0
744 andb %ah,%al -- %al &= %ah
745 -- so %al == 1 iff (comparable && same); else it holds 0
746 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
747 else %al == 0xFF, ZeroFlag=0
748 -- the zero flag is now set as we desire.
751 The special case of inequality differs thusly:
753 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
754 setne %al -- %al = if arg1 /= arg2 then 1 else 0
755 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
756 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
757 else (%al == 0xFF, ZF=0)
759 pprInstr g@(GCMP cond src1 src2)
760 | case cond of { NE -> True; _ -> False }
762 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
763 hcat [gtab, text "fcomp ", greg src2 1,
764 text "; fstsw %ax ; sahf ; setpe %ah"],
765 hcat [gtab, text "setne %al ; ",
766 text "orb %ah,%al ; decb %al ; popl %eax"]
770 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
771 hcat [gtab, text "fcomp ", greg src2 1,
772 text "; fstsw %ax ; sahf ; setpo %ah"],
773 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
774 text "andb %ah,%al ; decb %al ; popl %eax"]
777 {- On the 486, the flags set by FP compare are the unsigned ones!
778 (This looks like a HACK to me. WDP 96/03)
780 fix_FP_cond :: Cond -> Cond
785 fix_FP_cond EQQ = EQQ
787 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
788 -- there should be no others
791 pprInstr g@(GABS _ src dst)
792 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
794 pprInstr g@(GNEG _ src dst)
795 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
797 pprInstr g@(GSQRT sz src dst)
798 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
799 hcat [gtab, gcoerceto sz, gpop dst 1])
801 pprInstr g@(GSIN sz l1 l2 src dst)
802 = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
804 pprInstr g@(GCOS sz l1 l2 src dst)
805 = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
807 pprInstr g@(GTAN sz l1 l2 src dst)
808 = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
810 -- In the translations for GADD, GMUL, GSUB and GDIV,
811 -- the first two cases are mere optimisations. The otherwise clause
812 -- generates correct code under all circumstances.
814 pprInstr g@(GADD _ src1 src2 dst)
816 = pprG g (text "\t#GADD-xxxcase1" $$
817 hcat [gtab, gpush src2 0,
818 text " ; faddp %st(0),", greg src1 1])
820 = pprG g (text "\t#GADD-xxxcase2" $$
821 hcat [gtab, gpush src1 0,
822 text " ; faddp %st(0),", greg src2 1])
824 = pprG g (hcat [gtab, gpush src1 0,
825 text " ; fadd ", greg src2 1, text ",%st(0)",
829 pprInstr g@(GMUL _ src1 src2 dst)
831 = pprG g (text "\t#GMUL-xxxcase1" $$
832 hcat [gtab, gpush src2 0,
833 text " ; fmulp %st(0),", greg src1 1])
835 = pprG g (text "\t#GMUL-xxxcase2" $$
836 hcat [gtab, gpush src1 0,
837 text " ; fmulp %st(0),", greg src2 1])
839 = pprG g (hcat [gtab, gpush src1 0,
840 text " ; fmul ", greg src2 1, text ",%st(0)",
844 pprInstr g@(GSUB _ src1 src2 dst)
846 = pprG g (text "\t#GSUB-xxxcase1" $$
847 hcat [gtab, gpush src2 0,
848 text " ; fsubrp %st(0),", greg src1 1])
850 = pprG g (text "\t#GSUB-xxxcase2" $$
851 hcat [gtab, gpush src1 0,
852 text " ; fsubp %st(0),", greg src2 1])
854 = pprG g (hcat [gtab, gpush src1 0,
855 text " ; fsub ", greg src2 1, text ",%st(0)",
859 pprInstr g@(GDIV _ src1 src2 dst)
861 = pprG g (text "\t#GDIV-xxxcase1" $$
862 hcat [gtab, gpush src2 0,
863 text " ; fdivrp %st(0),", greg src1 1])
865 = pprG g (text "\t#GDIV-xxxcase2" $$
866 hcat [gtab, gpush src1 0,
867 text " ; fdivp %st(0),", greg src2 1])
869 = pprG g (hcat [gtab, gpush src1 0,
870 text " ; fdiv ", greg src2 1, text ",%st(0)",
875 = vcat [ ptext (sLit "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
876 ptext (sLit "\tffree %st(4) ;ffree %st(5)")
880 = panic "X86.Ppr.pprInstr: no match"
883 pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc
884 pprTrigOp op -- fsin, fcos or fptan
885 isTan -- we need a couple of extra steps if we're doing tan
886 l1 l2 -- internal labels for us to use
888 = -- We'll be needing %eax later on
889 hcat [gtab, text "pushl %eax;"] $$
890 -- tan is going to use an extra space on the FP stack
891 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
892 -- First put the value in %st(0) and try to apply the op to it
893 hcat [gpush src 0, text ("; " ++ op)] $$
894 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
895 hcat [gtab, text "fnstsw %ax"] $$
896 hcat [gtab, text "test $0x400,%eax"] $$
897 -- If we were in bounds then jump to the end
898 hcat [gtab, text "je " <> pprCLabel_asm l1] $$
899 -- Otherwise we need to shrink the value. Start by
900 -- loading pi, doubleing it (by adding it to itself),
901 -- and then swapping pi with the value, so the value we
902 -- want to apply op to is in %st(0) again
903 hcat [gtab, text "ffree %st(7); fldpi"] $$
904 hcat [gtab, text "fadd %st(0),%st"] $$
905 hcat [gtab, text "fxch %st(1)"] $$
906 -- Now we have a loop in which we make the value smaller,
907 -- see if it's small enough, and loop if not
908 (pprCLabel_asm l2 <> char ':') $$
909 hcat [gtab, text "fprem1"] $$
910 -- My Debian libc uses fstsw here for the tan code, but I can't
911 -- see any reason why it should need to be different for tan.
912 hcat [gtab, text "fnstsw %ax"] $$
913 hcat [gtab, text "test $0x400,%eax"] $$
914 hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
915 hcat [gtab, text "fstp %st(1)"] $$
916 hcat [gtab, text op] $$
917 (pprCLabel_asm l1 <> char ':') $$
918 -- Pop the 1.0 tan gave us
919 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
921 hcat [gtab, text "popl %eax;"] $$
922 -- And finally make the result the right size
923 hcat [gtab, gcoerceto sz, gpop dst 1]
925 --------------------------
927 -- coerce %st(0) to the specified size
928 gcoerceto :: Size -> Doc
929 gcoerceto FF64 = empty
930 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
931 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
933 gpush :: Reg -> RegNo -> Doc
935 = hcat [text "fld ", greg reg offset]
937 gpop :: Reg -> RegNo -> Doc
939 = hcat [text "fstp ", greg reg offset]
941 greg :: Reg -> RegNo -> Doc
942 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
953 gregno :: Reg -> RegNo
954 gregno (RegReal (RealRegSingle i)) = i
955 gregno _ = --pprPanic "gregno" (ppr other)
956 999 -- bogus; only needed for debug printing
958 pprG :: Instr -> Doc -> Doc
960 = (char '#' <> pprGInstr fake) $$ actual
963 pprGInstr :: Instr -> Doc
964 pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst
965 pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst
966 pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst
968 pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst
969 pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst
971 pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst
972 pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst
974 pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst
975 pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst
976 pprGInstr (GDTOF src dst) = pprSizeSizeRegReg (sLit "gdtof") FF64 FF32 src dst
978 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
979 pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
980 pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
981 pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
982 pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
983 pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
984 pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
986 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
987 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
988 pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg (sLit "gmul") sz src1 src2 dst
989 pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg (sLit "gdiv") sz src1 src2 dst
991 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
993 pprDollImm :: Imm -> Doc
994 pprDollImm i = ptext (sLit "$") <> pprImm i
997 pprOperand :: Size -> Operand -> Doc
998 pprOperand s (OpReg r) = pprReg s r
999 pprOperand _ (OpImm i) = pprDollImm i
1000 pprOperand _ (OpAddr ea) = pprAddr ea
1003 pprMnemonic_ :: LitString -> Doc
1005 char '\t' <> ptext name <> space
1008 pprMnemonic :: LitString -> Size -> Doc
1009 pprMnemonic name size =
1010 char '\t' <> ptext name <> pprSize size <> space
1013 pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc
1014 pprSizeImmOp name size imm op1
1016 pprMnemonic name size,
1024 pprSizeOp :: LitString -> Size -> Operand -> Doc
1025 pprSizeOp name size op1
1027 pprMnemonic name size,
1032 pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1033 pprSizeOpOp name size op1 op2
1035 pprMnemonic name size,
1036 pprOperand size op1,
1042 pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc
1043 pprOpOp name size op1 op2
1046 pprOperand size op1,
1052 pprSizeReg :: LitString -> Size -> Reg -> Doc
1053 pprSizeReg name size reg1
1055 pprMnemonic name size,
1060 pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc
1061 pprSizeRegReg name size reg1 reg2
1063 pprMnemonic name size,
1070 pprRegReg :: LitString -> Reg -> Reg -> Doc
1071 pprRegReg name reg1 reg2
1074 pprReg archWordSize reg1,
1076 pprReg archWordSize reg2
1080 pprSizeOpReg :: LitString -> Size -> Operand -> Reg -> Doc
1081 pprSizeOpReg name size op1 reg2
1083 pprMnemonic name size,
1084 pprOperand size op1,
1086 pprReg archWordSize reg2
1089 pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc
1090 pprCondRegReg name size cond reg1 reg2
1101 pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc
1102 pprSizeSizeRegReg name size1 size2 reg1 reg2
1114 pprSizeSizeOpReg :: LitString -> Size -> Size -> Operand -> Reg -> Doc
1115 pprSizeSizeOpReg name size1 size2 op1 reg2
1117 pprMnemonic name size2,
1118 pprOperand size1 op1,
1123 pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc
1124 pprSizeRegRegReg name size reg1 reg2 reg3
1126 pprMnemonic name size,
1135 pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc
1136 pprSizeAddrReg name size op dst
1138 pprMnemonic name size,
1145 pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc
1146 pprSizeRegAddr name size src op
1148 pprMnemonic name size,
1155 pprShift :: LitString -> Size -> Operand -> Operand -> Doc
1156 pprShift name size src dest
1158 pprMnemonic name size,
1159 pprOperand II8 src, -- src is 8-bit sized
1161 pprOperand size dest
1165 pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc
1166 pprSizeOpOpCoerce name size1 size2 op1 op2
1167 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space,
1168 pprOperand size1 op1,
1170 pprOperand size2 op2
1174 pprCondInstr :: LitString -> Cond -> Doc -> Doc
1175 pprCondInstr name cond arg
1176 = hcat [ char '\t', ptext name, pprCond cond, space, arg]