#include "HsVersions.h"
#include "nativeGen/NCG.h"
+#include "MachDeps.h"
-- NCG stuff:
import MachInstrs
----------------------
div_code rep signed quotient x y = do
- (y_op, y_code) <- getOperand y -- cannot be clobbered
+ (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
x_code <- getAnyReg x
let
widen | signed = CLTD rep
getRegister (CmmLit (CmmInt 0 rep))
= let
+ -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+ adj_rep = case rep of I64 -> I32; _ -> rep
+ rep1 = IF_ARCH_i386( rep, adj_rep )
code dst
- = unitOL (XOR rep (OpReg dst) (OpReg dst))
+ = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
in
return (Any rep code)
+#if x86_64_TARGET_ARCH
+ -- optimisation for loading small literals on x86_64: take advantage
+ -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+ -- instruction forms are shorter.
+getRegister (CmmLit lit)
+ | I64 <- cmmLitRep lit, not (isBigLit lit)
+ = let
+ imm = litToImm lit
+ code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+ in
+ return (Any I64 code)
+ where
+ isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+ isBigLit _ = False
+ -- note1: not the same as is64BitLit, because that checks for
+ -- signed literals that fit in 32 bits, but we want unsigned
+ -- literals here.
+ -- note2: all labels are small, because we're assuming the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
getRegister (CmmLit lit)
= let
rep = cmmLitRep lit
in
return (Any rep code)
-getRegister other = panic "getRegister(x86)"
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
#if x86_64_TARGET_ARCH
is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
+ -- assume that labels are in the range 0-2^31-1: this assumes the
+ -- small memory model (see gcc docs, -mcmodel=small).
#endif
is64BitLit x = False
#endif
pprUserReg :: Reg -> Doc
pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,)
-pprReg :: IF_ARCH_i386(MachRep ->, IF_ARCH_x86_64(MachRep ->,)) Reg -> Doc
+pprReg :: IF_ARCH_i386(MachRep ->,) IF_ARCH_x86_64(MachRep ->,) Reg -> Doc
-pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r
+pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r
= case r of
- RealReg i -> ppr_reg_no IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) i
+ RealReg i -> ppr_reg_no IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) i
VirtualRegI u -> text "%vI_" <> asmSDoc (pprUnique u)
VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u)
VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u)
22 -> SLIT("%xmm6"); 23 -> SLIT("%xmm7");
24 -> SLIT("%xmm8"); 25 -> SLIT("%xmm9");
26 -> SLIT("%xmm10"); 27 -> SLIT("%xmm11");
- 28 -> SLIT("%xmm12"); 28 -> SLIT("%xmm13");
- 30 -> SLIT("%xmm13"); 31 -> SLIT("%xmm15")
+ 28 -> SLIT("%xmm12"); 29 -> SLIT("%xmm13");
+ 30 -> SLIT("%xmm14"); 31 -> SLIT("%xmm15");
+ _ -> SLIT("very naughty x86_64 register")
})
#endif
IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 4")
- ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8")
+ ,IF_ARCH_x86_64(SLIT(".section .rodata\n\t.align 8")
,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
SLIT(".data\n\t.align 2"))
,)))))
- -- the assembler on x86_64/Linux refuses to generate code for
- -- .quad x - y
- -- where x is in the text section and y in the rodata section.
- -- It works if y is in the text section, though. This is probably
- -- going to cause difficulties for PIC, I imagine.
pprSectionHeader UninitialisedData
= ptext
IF_ARCH_alpha(SLIT("\t.bss\n\t.align 3")
#endif
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
ppr_item I16 x = [ptext SLIT("\t.word\t") <> pprImm imm]
+#endif
+#if i386_TARGET_ARCH
ppr_item I64 x = [ptext SLIT("\t.quad\t") <> pprImm imm]
#endif
+#if x86_64_TARGET_ARCH
+ -- x86_64: binutils can't handle the R_X86_64_PC64 relocation
+ -- type, which means we can't do pc-relative 64-bit addresses.
+ -- Fortunately we're assuming the small memory model, in which
+ -- all such offsets will fit into 32 bits, so we have to stick
+ -- to 32-bit offset fields and modify the RTS appropriately
+ -- (see InfoTables.h).
+ --
+ ppr_item I64 x
+ | isRelativeReloc x =
+ [ptext SLIT("\t.long\t") <> pprImm imm,
+ ptext SLIT("\t.long\t0")]
+ | otherwise =
+ [ptext SLIT("\t.quad\t") <> pprImm imm]
+ where
+ isRelativeReloc (CmmLabelOff _ _) = True
+ isRelativeReloc (CmmLabelDiffOff _ _ _) = True
+ isRelativeReloc _ = False
+#endif
#if powerpc_TARGET_ARCH
ppr_item I16 x = [ptext SLIT("\t.short\t") <> pprImm imm]
ppr_item I64 (CmmInt x _) =
JXX cond lbl -> mkRU [] []
JMP op -> mkRU (use_R op) []
JMP_TBL op ids -> mkRU (use_R op) []
+#if i386_TARGET_ARCH
CALL (Left imm) -> mkRU [] callClobberedRegs
CALL (Right reg) -> mkRU [reg] callClobberedRegs
+#else
+ CALL (Left imm) -> mkRU params callClobberedRegs
+ CALL (Right reg) -> mkRU (reg:params) callClobberedRegs
+#endif
CLTD sz -> mkRU [eax] [edx]
NOP -> mkRU [] []
_other -> panic "regUsage: unrecognised instr"
where
+#if x86_64_TARGET_ARCH
+ -- call parameters: include %eax, because it is used
+ -- to pass the number of SSE reg arguments to varargs fns.
+ params = eax : allArgRegs ++ allFPArgRegs
+#endif
+
-- 2 operand form; first operand Read; second Written
usageRW :: Operand -> Operand -> RegUsage
usageRW op (OpReg reg) = mkRU (use_R op) [reg]
let off_w = (off-delta) `div` 8
in case regClass reg of
RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w))
- _ -> panic "mkSpillInstr: ToDo"
+ RcDouble -> MOV F64 (OpReg reg) (OpAddr (spRel off_w))
+ -- ToDo: will it work to always spill as a double?
+ -- does that cause a stall if the data was a float?
#endif
#ifdef sparc_TARGET_ARCH
{-SPARC: spill below frame pointer leaving 2 words/spill-}
let off_w = (off-delta) `div` 8
in case regClass reg of
RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg)
- _ -> panic "mkLoadInstr: ToDo"
+ _ -> MOV F64 (OpAddr (spRel off_w)) (OpReg reg)
#endif
#if sparc_TARGET_ARCH
let{off_w = 1 + (off `div` 4);