-pprInstr (JMP (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm)
-pprInstr (JMP op) = (<>) (ptext SLIT("\tjmp *")) (pprOperand L op)
-
-pprInstr (CALL imm)
- = hcat [ ptext SLIT("\tcall "), pprImm imm ]
-
-pprInstr SAHF = ptext SLIT("\tsahf")
-pprInstr FABS = ptext SLIT("\tfabs")
-
-pprInstr (FADD sz src@(OpAddr _))
- = hcat [ptext SLIT("\tfadd"), pprSize sz, space, pprOperand sz src]
-pprInstr (FADD sz src)
- = ptext SLIT("\tfadd")
-pprInstr FADDP
- = ptext SLIT("\tfaddp")
-pprInstr (FMUL sz src)
- = hcat [ptext SLIT("\tfmul"), pprSize sz, space, pprOperand sz src]
-pprInstr FMULP
- = ptext SLIT("\tfmulp")
-pprInstr (FIADD size op) = pprSizeAddr SLIT("fiadd") size op
-pprInstr FCHS = ptext SLIT("\tfchs")
-pprInstr (FCOM size op) = pprSizeOp SLIT("fcom") size op
-pprInstr FCOS = ptext SLIT("\tfcos")
-pprInstr (FIDIV size op) = pprSizeAddr SLIT("fidiv") size op
-pprInstr (FDIV sz src)
- = hcat [ptext SLIT("\tfdiv"), pprSize sz, space, pprOperand sz src]
-pprInstr FDIVP
- = ptext SLIT("\tfdivp")
-pprInstr (FDIVR sz src)
- = hcat [ptext SLIT("\tfdivr"), pprSize sz, space, pprOperand sz src]
-pprInstr FDIVRP
- = ptext SLIT("\tfdivpr")
-pprInstr (FIDIVR size op) = pprSizeAddr SLIT("fidivr") size op
-pprInstr (FICOM size op) = pprSizeAddr SLIT("ficom") size op
-pprInstr (FILD sz op reg) = pprSizeAddrReg SLIT("fild") sz op reg
-pprInstr (FIST size op) = pprSizeAddr SLIT("fist") size op
-pprInstr (FLD sz (OpImm (ImmCLbl src)))
- = hcat [ptext SLIT("\tfld"),pprSize sz,space,pprCLabel_asm src]
-pprInstr (FLD sz src)
- = hcat [ptext SLIT("\tfld"),pprSize sz,space,pprOperand sz src]
-pprInstr FLD1 = ptext SLIT("\tfld1")
-pprInstr FLDZ = ptext SLIT("\tfldz")
-pprInstr (FIMUL size op) = pprSizeAddr SLIT("fimul") size op
-pprInstr FRNDINT = ptext SLIT("\tfrndint")
-pprInstr FSIN = ptext SLIT("\tfsin")
-pprInstr FSQRT = ptext SLIT("\tfsqrt")
-pprInstr (FST sz dst)
- = hcat [ptext SLIT("\tfst"), pprSize sz, space, pprOperand sz dst]
-pprInstr (FSTP sz dst)
- = hcat [ptext SLIT("\tfstp"), pprSize sz, space, pprOperand sz dst]
-pprInstr (FISUB size op) = pprSizeAddr SLIT("fisub") size op
-pprInstr (FSUB sz src)
- = hcat [ptext SLIT("\tfsub"), pprSize sz, space, pprOperand sz src]
-pprInstr FSUBP
- = ptext SLIT("\tfsubp")
-pprInstr (FSUBR size src)
- = pprSizeOp SLIT("fsubr") size src
-pprInstr FSUBRP
- = ptext SLIT("\tfsubpr")
-pprInstr (FISUBR size op)
- = pprSizeAddr SLIT("fisubr") size op
-pprInstr FTST = ptext SLIT("\tftst")
-pprInstr (FCOMP sz op)
- = hcat [ptext SLIT("\tfcomp"), pprSize sz, space, pprOperand sz op]
-pprInstr FUCOMPP = ptext SLIT("\tfucompp")
-pprInstr FXCH = ptext SLIT("\tfxch")
-pprInstr FNSTSW = ptext SLIT("\tfnstsw %ax")
-pprInstr FNOP = ptext SLIT("")
+pprInstr (JMP dsts (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm)
+pprInstr (JMP dsts op) = (<>) (ptext SLIT("\tjmp *")) (pprOperand L op)
+pprInstr (CALL (Left imm)) = (<>) (ptext SLIT("\tcall ")) (pprImm imm)
+pprInstr (CALL (Right reg)) = (<>) (ptext SLIT("\tcall *")) (pprReg L reg)
+
+-- First bool indicates signedness; second whether quot or rem
+pprInstr (IQUOT sz src dst) = pprInstr_quotRem True True sz src dst
+pprInstr (IREM sz src dst) = pprInstr_quotRem True False sz src dst
+
+pprInstr (QUOT sz src dst) = pprInstr_quotRem False True sz src dst
+pprInstr (REM sz src dst) = pprInstr_quotRem False False sz src dst
+
+pprInstr (IMUL64 sd_hi sd_lo) = pprInstr_imul64 sd_hi sd_lo
+
+
+-- Simulating a flat register set on the x86 FP stack is tricky.
+-- you have to free %st(7) before pushing anything on the FP reg stack
+-- so as to preclude the possibility of a FP stack overflow exception.
+pprInstr g@(GMOV src dst)
+ | src == dst
+ = empty
+ | otherwise
+ = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
+
+-- GLD sz addr dst ==> FFREE %st(7) ; FLDsz addr ; FSTP (dst+1)
+pprInstr g@(GLD sz addr dst)
+ = pprG g (hcat [gtab, text "ffree %st(7) ; fld", pprSize sz, gsp,
+ pprAddr addr, gsemi, gpop dst 1])
+
+-- GST sz src addr ==> FFREE %st(7) ; FLD dst ; FSTPsz addr
+pprInstr g@(GST sz src addr)
+ = pprG g (hcat [gtab, gpush src 0, gsemi,
+ text "fstp", pprSize sz, gsp, pprAddr addr])
+
+pprInstr g@(GLDZ dst)
+ = pprG g (hcat [gtab, text "ffree %st(7) ; fldz ; ", gpop dst 1])
+pprInstr g@(GLD1 dst)
+ = pprG g (hcat [gtab, text "ffree %st(7) ; fld1 ; ", gpop dst 1])
+
+pprInstr g@(GFTOI src dst)
+ = pprInstr (GDTOI src dst)
+pprInstr g@(GDTOI src dst)
+ = pprG g (hcat [gtab, text "subl $4, %esp ; ",
+ gpush src 0, gsemi, text "fistpl 0(%esp) ; popl ",
+ pprReg L dst])
+
+pprInstr g@(GITOF src dst)
+ = pprInstr (GITOD src dst)
+pprInstr g@(GITOD src dst)
+ = pprG g (hcat [gtab, text "pushl ", pprReg L src,
+ text " ; ffree %st(7); fildl (%esp) ; ",
+ gpop dst 1, text " ; addl $4,%esp"])
+
+{- Gruesome swamp follows. If you're unfortunate enough to have ventured
+ this far into the jungle AND you give a Rat's Ass (tm) what's going
+ on, here's the deal. Generate code to do a floating point comparison
+ of src1 and src2, of kind cond, and set the Zero flag if true.
+
+ The complications are to do with handling NaNs correctly. We want the
+ property that if either argument is NaN, then the result of the
+ comparison is False ... except if we're comparing for inequality,
+ in which case the answer is True.
+
+ Here's how the general (non-inequality) case works. As an
+ example, consider generating the an equality test:
+
+ pushl %eax -- we need to mess with this
+ <get src1 to top of FPU stack>
+ fcomp <src2 location in FPU stack> and pop pushed src1
+ -- Result of comparison is in FPU Status Register bits
+ -- C3 C2 and C0
+ fstsw %ax -- Move FPU Status Reg to %ax
+ sahf -- move C3 C2 C0 from %ax to integer flag reg
+ -- now the serious magic begins
+ setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
+ sete %al -- %al = if arg1 == arg2 then 1 else 0
+ andb %ah,%al -- %al &= %ah
+ -- so %al == 1 iff (comparable && same); else it holds 0
+ decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
+ else %al == 0xFF, ZeroFlag=0
+ -- the zero flag is now set as we desire.
+ popl %eax
+
+ The special case of inequality differs thusly:
+
+ setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
+ setne %al -- %al = if arg1 /= arg2 then 1 else 0
+ orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
+ decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
+ else (%al == 0xFF, ZF=0)
+-}
+pprInstr g@(GCMP cond src1 src2)
+ | case cond of { NE -> True; other -> False }
+ = pprG g (vcat [
+ hcat [gtab, text "pushl %eax ; ",gpush src1 0],
+ hcat [gtab, text "fcomp ", greg src2 1,
+ text "; fstsw %ax ; sahf ; setpe %ah"],
+ hcat [gtab, text "setne %al ; ",
+ text "orb %ah,%al ; decb %al ; popl %eax"]
+ ])
+ | otherwise
+ = pprG g (vcat [
+ hcat [gtab, text "pushl %eax ; ",gpush src1 0],
+ hcat [gtab, text "fcomp ", greg src2 1,
+ text "; fstsw %ax ; sahf ; setpo %ah"],
+ hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
+ text "andb %ah,%al ; decb %al ; popl %eax"]
+ ])
+ where
+ {- On the 486, the flags set by FP compare are the unsigned ones!
+ (This looks like a HACK to me. WDP 96/03)
+ -}
+ fix_FP_cond :: Cond -> Cond
+ fix_FP_cond GE = GEU
+ fix_FP_cond GTT = GU
+ fix_FP_cond LTT = LU
+ fix_FP_cond LE = LEU
+ fix_FP_cond EQQ = EQQ
+ fix_FP_cond NE = NE
+ -- there should be no others
+
+
+pprInstr g@(GABS sz src dst)
+ = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
+pprInstr g@(GNEG sz src dst)
+ = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
+
+pprInstr g@(GSQRT sz src dst)
+ = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
+ hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GSIN sz src dst)
+ = pprG g (hcat [gtab, gpush src 0, text " ; fsin"] $$
+ hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GCOS sz src dst)
+ = pprG g (hcat [gtab, gpush src 0, text " ; fcos"] $$
+ hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GTAN sz src dst)
+ = pprG g (hcat [gtab, text "ffree %st(6) ; ",
+ gpush src 0, text " ; fptan ; ",
+ text " fstp %st(0)"] $$
+ hcat [gtab, gcoerceto sz, gpop dst 1])
+
+-- In the translations for GADD, GMUL, GSUB and GDIV,
+-- the first two cases are mere optimisations. The otherwise clause
+-- generates correct code under all circumstances.
+
+pprInstr g@(GADD sz src1 src2 dst)
+ | src1 == dst
+ = pprG g (text "\t#GADD-xxxcase1" $$
+ hcat [gtab, gpush src2 0,
+ text " ; faddp %st(0),", greg src1 1])
+ | src2 == dst
+ = pprG g (text "\t#GADD-xxxcase2" $$
+ hcat [gtab, gpush src1 0,
+ text " ; faddp %st(0),", greg src2 1])
+ | otherwise
+ = pprG g (hcat [gtab, gpush src1 0,
+ text " ; fadd ", greg src2 1, text ",%st(0)",
+ gsemi, gpop dst 1])
+
+
+pprInstr g@(GMUL sz src1 src2 dst)
+ | src1 == dst
+ = pprG g (text "\t#GMUL-xxxcase1" $$
+ hcat [gtab, gpush src2 0,
+ text " ; fmulp %st(0),", greg src1 1])
+ | src2 == dst
+ = pprG g (text "\t#GMUL-xxxcase2" $$
+ hcat [gtab, gpush src1 0,
+ text " ; fmulp %st(0),", greg src2 1])
+ | otherwise
+ = pprG g (hcat [gtab, gpush src1 0,
+ text " ; fmul ", greg src2 1, text ",%st(0)",
+ gsemi, gpop dst 1])
+
+
+pprInstr g@(GSUB sz src1 src2 dst)
+ | src1 == dst
+ = pprG g (text "\t#GSUB-xxxcase1" $$
+ hcat [gtab, gpush src2 0,
+ text " ; fsubrp %st(0),", greg src1 1])
+ | src2 == dst
+ = pprG g (text "\t#GSUB-xxxcase2" $$
+ hcat [gtab, gpush src1 0,
+ text " ; fsubp %st(0),", greg src2 1])
+ | otherwise
+ = pprG g (hcat [gtab, gpush src1 0,
+ text " ; fsub ", greg src2 1, text ",%st(0)",
+ gsemi, gpop dst 1])
+
+
+pprInstr g@(GDIV sz src1 src2 dst)
+ | src1 == dst
+ = pprG g (text "\t#GDIV-xxxcase1" $$
+ hcat [gtab, gpush src2 0,
+ text " ; fdivrp %st(0),", greg src1 1])
+ | src2 == dst
+ = pprG g (text "\t#GDIV-xxxcase2" $$
+ hcat [gtab, gpush src1 0,
+ text " ; fdivp %st(0),", greg src2 1])
+ | otherwise
+ = pprG g (hcat [gtab, gpush src1 0,
+ text " ; fdiv ", greg src2 1, text ",%st(0)",
+ gsemi, gpop dst 1])
+
+
+pprInstr GFREE
+ = vcat [ ptext SLIT("\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)"),
+ ptext SLIT("\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)")
+ ]
+
+
+pprInstr_quotRem signed isQuot sz src dst
+ | case sz of L -> False; _ -> True
+ = panic "pprInstr_quotRem: dunno how to do non-32bit operands"
+ | otherwise
+ = vcat [
+ (text "\t# BEGIN " <> fakeInsn),
+ (text "\tpushl $0; pushl %eax; pushl %edx; pushl " <> pprOperand sz src),
+ (text "\tmovl " <> pprOperand sz dst <> text ",%eax; " <> widen_to_64),
+ (x86op <> text " 0(%esp); movl " <> text resReg <> text ",12(%esp)"),
+ (text "\tpopl %edx; popl %edx; popl %eax; popl " <> pprOperand sz dst),
+ (text "\t# END " <> fakeInsn)
+ ]
+ where
+ widen_to_64 | signed = text "cltd"
+ | not signed = text "xorl %edx,%edx"
+ x86op = if signed then text "\tidivl" else text "\tdivl"
+ resReg = if isQuot then "%eax" else "%edx"
+ opStr | signed = if isQuot then "IQUOT" else "IREM"
+ | not signed = if isQuot then "QUOT" else "REM"
+ fakeInsn = text opStr <+> pprOperand sz src
+ <> char ',' <+> pprOperand sz dst
+
+-- Emit code to make hi_reg:lo_reg be the 64-bit product of hi_reg and lo_reg
+pprInstr_imul64 hi_reg lo_reg
+ = let fakeInsn = text "imul64" <+> pp_hi_reg <> comma <+> pp_lo_reg
+ pp_hi_reg = pprReg L hi_reg
+ pp_lo_reg = pprReg L lo_reg
+ in
+ vcat [
+ text "\t# BEGIN " <> fakeInsn,
+ text "\tpushl" <+> pp_hi_reg <> text" ; pushl" <+> pp_lo_reg,
+ text "\tpushl %eax ; pushl %edx",
+ text "\tmovl 12(%esp), %eax ; imull 8(%esp)",
+ text "\tmovl %edx, 12(%esp) ; movl %eax, 8(%esp)",
+ text "\tpopl %edx ; popl %eax",
+ text "\tpopl" <+> pp_lo_reg <> text " ; popl" <+> pp_hi_reg,
+ text "\t# END " <> fakeInsn
+ ]
+
+
+--------------------------
+
+-- coerce %st(0) to the specified size
+gcoerceto DF = empty
+gcoerceto F = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
+
+gpush reg offset
+ = hcat [text "ffree %st(7) ; fld ", greg reg offset]
+gpop reg offset
+ = hcat [text "fstp ", greg reg offset]
+
+bogus = text "\tbogus"
+greg reg offset = text "%st(" <> int (gregno reg - 8+offset) <> char ')'
+gsemi = text " ; "
+gtab = char '\t'
+gsp = char ' '
+
+gregno (RealReg i) = i
+gregno other = --pprPanic "gregno" (ppr other)
+ 999 -- bogus; only needed for debug printing
+
+pprG :: Instr -> Doc -> Doc
+pprG fake actual
+ = (char '#' <> pprGInstr fake) $$ actual
+
+pprGInstr (GMOV src dst) = pprSizeRegReg SLIT("gmov") DF src dst
+pprGInstr (GLD sz src dst) = pprSizeAddrReg SLIT("gld") sz src dst
+pprGInstr (GST sz src dst) = pprSizeRegAddr SLIT("gst") sz src dst
+
+pprGInstr (GLDZ dst) = pprSizeReg SLIT("gldz") DF dst
+pprGInstr (GLD1 dst) = pprSizeReg SLIT("gld1") DF dst
+
+pprGInstr (GFTOI src dst) = pprSizeSizeRegReg SLIT("gftoi") F L src dst
+pprGInstr (GDTOI src dst) = pprSizeSizeRegReg SLIT("gdtoi") DF L src dst
+
+pprGInstr (GITOF src dst) = pprSizeSizeRegReg SLIT("gitof") L F src dst
+pprGInstr (GITOD src dst) = pprSizeSizeRegReg SLIT("gitod") L DF src dst
+
+pprGInstr (GCMP co src dst) = pprCondRegReg SLIT("gcmp_") DF co src dst
+pprGInstr (GABS sz src dst) = pprSizeRegReg SLIT("gabs") sz src dst
+pprGInstr (GNEG sz src dst) = pprSizeRegReg SLIT("gneg") sz src dst
+pprGInstr (GSQRT sz src dst) = pprSizeRegReg SLIT("gsqrt") sz src dst
+pprGInstr (GSIN sz src dst) = pprSizeRegReg SLIT("gsin") sz src dst
+pprGInstr (GCOS sz src dst) = pprSizeRegReg SLIT("gcos") sz src dst
+pprGInstr (GTAN sz src dst) = pprSizeRegReg SLIT("gtan") sz src dst
+
+pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg SLIT("gadd") sz src1 src2 dst
+pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst
+pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg SLIT("gmul") sz src1 src2 dst
+pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg SLIT("gdiv") sz src1 src2 dst