From: simonmar Date: Tue, 5 Apr 2005 09:45:06 +0000 (+0000) Subject: [project @ 2005-04-05 09:45:06 by simonmar] X-Git-Tag: Initial_conversion_from_CVS_complete~805 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=14a5aadb84c34dbe2bee129ed80fdfa1fb12e3e0;p=ghc-hetmet.git [project @ 2005-04-05 09:45:06 by simonmar] x86_64: generate 32-bit relative offset plus 32-bit padding instead of a 64-bit relative offset. This is probably not the best place to do it, but it'll do for now. Also reinstate some changes that accidentally got spammed yesterday. --- diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs index 7e4c03f..3a0dfad 100644 --- a/ghc/compiler/nativeGen/MachCodeGen.hs +++ b/ghc/compiler/nativeGen/MachCodeGen.hs @@ -15,6 +15,7 @@ module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where #include "HsVersions.h" #include "nativeGen/NCG.h" +#include "MachDeps.h" -- NCG stuff: import MachInstrs @@ -1159,7 +1160,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps ---------------------- div_code rep signed quotient x y = do - (y_op, y_code) <- getOperand y -- cannot be clobbered + (y_op, y_code) <- getRegOrMem y -- cannot be clobbered x_code <- getAnyReg x let widen | signed = CLTD rep @@ -1217,11 +1218,35 @@ getRegister (CmmLoad mem pk) getRegister (CmmLit (CmmInt 0 rep)) = let + -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits + adj_rep = case rep of I64 -> I32; _ -> rep + rep1 = IF_ARCH_i386( rep, adj_rep ) code dst - = unitOL (XOR rep (OpReg dst) (OpReg dst)) + = unitOL (XOR rep1 (OpReg dst) (OpReg dst)) in return (Any rep code) +#if x86_64_TARGET_ARCH + -- optimisation for loading small literals on x86_64: take advantage + -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit + -- instruction forms are shorter. +getRegister (CmmLit lit) + | I64 <- cmmLitRep lit, not (isBigLit lit) + = let + imm = litToImm lit + code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst)) + in + return (Any I64 code) + where + isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff + isBigLit _ = False + -- note1: not the same as is64BitLit, because that checks for + -- signed literals that fit in 32 bits, but we want unsigned + -- literals here. + -- note2: all labels are small, because we're assuming the + -- small memory model (see gcc docs, -mcmodel=small). +#endif + getRegister (CmmLit lit) = let rep = cmmLitRep lit @@ -1230,7 +1255,7 @@ getRegister (CmmLit lit) in return (Any rep code) -getRegister other = panic "getRegister(x86)" +getRegister other = pprPanic "getRegister(x86)" (ppr other) intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr @@ -2027,6 +2052,8 @@ getRegOrMem e = do #if x86_64_TARGET_ARCH is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000 + -- assume that labels are in the range 0-2^31-1: this assumes the + -- small memory model (see gcc docs, -mcmodel=small). #endif is64BitLit x = False #endif diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs index 1f1adda..c6501a0 100644 --- a/ghc/compiler/nativeGen/PprMach.hs +++ b/ghc/compiler/nativeGen/PprMach.hs @@ -117,11 +117,11 @@ pprBasicBlock (BasicBlock (BlockId id) instrs) = pprUserReg :: Reg -> Doc pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,) -pprReg :: IF_ARCH_i386(MachRep ->, IF_ARCH_x86_64(MachRep ->,)) Reg -> Doc +pprReg :: IF_ARCH_i386(MachRep ->,) IF_ARCH_x86_64(MachRep ->,) Reg -> Doc -pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r +pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r = case r of - RealReg i -> ppr_reg_no IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) i + RealReg i -> ppr_reg_no IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) i VirtualRegI u -> text "%vI_" <> asmSDoc (pprUnique u) VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u) VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u) @@ -263,8 +263,9 @@ pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r 22 -> SLIT("%xmm6"); 23 -> SLIT("%xmm7"); 24 -> SLIT("%xmm8"); 25 -> SLIT("%xmm9"); 26 -> SLIT("%xmm10"); 27 -> SLIT("%xmm11"); - 28 -> SLIT("%xmm12"); 28 -> SLIT("%xmm13"); - 30 -> SLIT("%xmm13"); 31 -> SLIT("%xmm15") + 28 -> SLIT("%xmm12"); 29 -> SLIT("%xmm13"); + 30 -> SLIT("%xmm14"); 31 -> SLIT("%xmm15"); + _ -> SLIT("very naughty x86_64 register") }) #endif @@ -640,15 +641,10 @@ pprSectionHeader RelocatableReadOnlyData IF_ARCH_alpha(SLIT("\t.data\n\t.align 3") ,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -} ,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 4") - ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8") + ,IF_ARCH_x86_64(SLIT(".section .rodata\n\t.align 8") ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"), SLIT(".data\n\t.align 2")) ,))))) - -- the assembler on x86_64/Linux refuses to generate code for - -- .quad x - y - -- where x is in the text section and y in the rodata section. - -- It works if y is in the text section, though. This is probably - -- going to cause difficulties for PIC, I imagine. pprSectionHeader UninitialisedData = ptext IF_ARCH_alpha(SLIT("\t.bss\n\t.align 3") @@ -741,8 +737,29 @@ pprDataItem lit #endif #if i386_TARGET_ARCH || x86_64_TARGET_ARCH ppr_item I16 x = [ptext SLIT("\t.word\t") <> pprImm imm] +#endif +#if i386_TARGET_ARCH ppr_item I64 x = [ptext SLIT("\t.quad\t") <> pprImm imm] #endif +#if x86_64_TARGET_ARCH + -- x86_64: binutils can't handle the R_X86_64_PC64 relocation + -- type, which means we can't do pc-relative 64-bit addresses. + -- Fortunately we're assuming the small memory model, in which + -- all such offsets will fit into 32 bits, so we have to stick + -- to 32-bit offset fields and modify the RTS appropriately + -- (see InfoTables.h). + -- + ppr_item I64 x + | isRelativeReloc x = + [ptext SLIT("\t.long\t") <> pprImm imm, + ptext SLIT("\t.long\t0")] + | otherwise = + [ptext SLIT("\t.quad\t") <> pprImm imm] + where + isRelativeReloc (CmmLabelOff _ _) = True + isRelativeReloc (CmmLabelDiffOff _ _ _) = True + isRelativeReloc _ = False +#endif #if powerpc_TARGET_ARCH ppr_item I16 x = [ptext SLIT("\t.short\t") <> pprImm imm] ppr_item I64 (CmmInt x _) = diff --git a/ghc/compiler/nativeGen/RegAllocInfo.hs b/ghc/compiler/nativeGen/RegAllocInfo.hs index 65299cb..f6b3131 100644 --- a/ghc/compiler/nativeGen/RegAllocInfo.hs +++ b/ghc/compiler/nativeGen/RegAllocInfo.hs @@ -171,8 +171,13 @@ regUsage instr = case instr of JXX cond lbl -> mkRU [] [] JMP op -> mkRU (use_R op) [] JMP_TBL op ids -> mkRU (use_R op) [] +#if i386_TARGET_ARCH CALL (Left imm) -> mkRU [] callClobberedRegs CALL (Right reg) -> mkRU [reg] callClobberedRegs +#else + CALL (Left imm) -> mkRU params callClobberedRegs + CALL (Right reg) -> mkRU (reg:params) callClobberedRegs +#endif CLTD sz -> mkRU [eax] [edx] NOP -> mkRU [] [] @@ -222,6 +227,12 @@ regUsage instr = case instr of _other -> panic "regUsage: unrecognised instr" where +#if x86_64_TARGET_ARCH + -- call parameters: include %eax, because it is used + -- to pass the number of SSE reg arguments to varargs fns. + params = eax : allArgRegs ++ allFPArgRegs +#endif + -- 2 operand form; first operand Read; second Written usageRW :: Operand -> Operand -> RegUsage usageRW op (OpReg reg) = mkRU (use_R op) [reg] @@ -706,7 +717,9 @@ mkSpillInstr reg delta slot let off_w = (off-delta) `div` 8 in case regClass reg of RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w)) - _ -> panic "mkSpillInstr: ToDo" + RcDouble -> MOV F64 (OpReg reg) (OpAddr (spRel off_w)) + -- ToDo: will it work to always spill as a double? + -- does that cause a stall if the data was a float? #endif #ifdef sparc_TARGET_ARCH {-SPARC: spill below frame pointer leaving 2 words/spill-} @@ -748,7 +761,7 @@ mkLoadInstr reg delta slot let off_w = (off-delta) `div` 8 in case regClass reg of RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg) - _ -> panic "mkLoadInstr: ToDo" + _ -> MOV F64 (OpAddr (spRel off_w)) (OpReg reg) #endif #if sparc_TARGET_ARCH let{off_w = 1 + (off `div` 4);