From 61988d6363aad1c5dea5eea24a29fd63a634cb4f Mon Sep 17 00:00:00 2001 From: simonmar Date: Mon, 4 Apr 2005 15:54:18 +0000 Subject: [PATCH] [project @ 2005-04-04 15:54:18 by simonmar] Use the imul instruction natively, rather than the braindead wrapper around it. The register allocator has been able to handle the fixed register usage of imul for a while now. Also, the wrapper was broken on x86_64. --- ghc/compiler/nativeGen/MachCodeGen.hs | 57 +++++++++----------------------- ghc/compiler/nativeGen/MachInstrs.hs | 3 +- ghc/compiler/nativeGen/PprMach.hs | 30 ++++------------- ghc/compiler/nativeGen/RegAllocInfo.hs | 21 +++--------- 4 files changed, 27 insertions(+), 84 deletions(-) diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs index 84d6d0d..7e4c03f 100644 --- a/ghc/compiler/nativeGen/MachCodeGen.hs +++ b/ghc/compiler/nativeGen/MachCodeGen.hs @@ -15,7 +15,6 @@ module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where #include "HsVersions.h" #include "nativeGen/NCG.h" -#include "MachDeps.h" -- NCG stuff: import MachInstrs @@ -1087,23 +1086,25 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps -------------------- imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do - res_lo <- getNewRegNat rep - res_hi <- getNewRegNat rep (a_reg, a_code) <- getNonClobberedReg a - (b_reg, b_code) <- getSomeReg b + b_code <- getAnyReg b let - code dst = a_code `appOL` b_code `appOL` + shift_amt = case rep of + I32 -> 31 + I64 -> 63 + _ -> panic "shift_amt" + + code = a_code `appOL` b_code eax `appOL` toOL [ - MOV rep (OpReg a_reg) (OpReg res_hi), - MOV rep (OpReg b_reg) (OpReg res_lo), - IMUL64 res_hi res_lo, -- result in res_hi:res_lo - SAR rep (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part - SUB rep (OpReg res_hi) (OpReg res_lo), -- compare against upper - MOV rep (OpReg res_lo) (OpReg dst) - -- dst==0 if high part == sign extended low part + IMUL2 rep (OpReg a_reg), -- result in %edx:%eax + SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax), + -- sign extend lower part + SUB rep (OpReg edx) (OpReg eax) + -- compare against upper + -- eax==0 if high part == sign extended low part ] -- in - return (Any rep code) + return (Fixed rep eax code) -------------------- shift_code :: MachRep @@ -1216,35 +1217,11 @@ getRegister (CmmLoad mem pk) getRegister (CmmLit (CmmInt 0 rep)) = let - -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits - adj_rep = case rep of I64 -> I32; _ -> rep - rep1 = IF_ARCH_i386( rep, adj_rep ) code dst - = unitOL (XOR rep1 (OpReg dst) (OpReg dst)) + = unitOL (XOR rep (OpReg dst) (OpReg dst)) in return (Any rep code) -#if x86_64_TARGET_ARCH - -- optimisation for loading small literals on x86_64: take advantage - -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit - -- instruction forms are shorter. -getRegister (CmmLit lit) - | I64 <- cmmLitRep lit, not (isBigLit lit) - = let - imm = litToImm lit - code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst)) - in - return (Any I64 code) - where - isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff - isBigLit _ = False - -- note1: not the same as is64BitLit, because that checks for - -- signed literals that fit in 32 bits, but we want unsigned - -- literals here. - -- note2: all labels are small, because we're assuming the - -- small memory model (see gcc docs, -mcmodel=small). -#endif - getRegister (CmmLit lit) = let rep = cmmLitRep lit @@ -1253,7 +1230,7 @@ getRegister (CmmLit lit) in return (Any rep code) -getRegister other = pprPanic "getRegister(x86)" (ppr other) +getRegister other = panic "getRegister(x86)" intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr @@ -2050,8 +2027,6 @@ getRegOrMem e = do #if x86_64_TARGET_ARCH is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000 - -- assume that labels are in the range 0-2^31-1: this assumes the - -- small memory model (see gcc docs, -mcmodel=small). #endif is64BitLit x = False #endif diff --git a/ghc/compiler/nativeGen/MachInstrs.hs b/ghc/compiler/nativeGen/MachInstrs.hs index 0839694..40d1766 100644 --- a/ghc/compiler/nativeGen/MachInstrs.hs +++ b/ghc/compiler/nativeGen/MachInstrs.hs @@ -399,8 +399,7 @@ bit or 64 bit precision. | MUL MachRep Operand Operand | IMUL MachRep Operand Operand -- signed int mul - | IMUL64 Reg Reg - -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0]) + | IMUL2 MachRep Operand -- %edx:%eax = operand * %eax | DIV MachRep Operand -- eax := eax:edx/op, edx := eax:edx%op | IDIV MachRep Operand -- ditto, but signed diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs index 9fb2123..1f1adda 100644 --- a/ghc/compiler/nativeGen/PprMach.hs +++ b/ghc/compiler/nativeGen/PprMach.hs @@ -117,11 +117,11 @@ pprBasicBlock (BasicBlock (BlockId id) instrs) = pprUserReg :: Reg -> Doc pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,) -pprReg :: IF_ARCH_i386(MachRep ->,) IF_ARCH_x86_64(MachRep ->,) Reg -> Doc +pprReg :: IF_ARCH_i386(MachRep ->, IF_ARCH_x86_64(MachRep ->,)) Reg -> Doc -pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r +pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r = case r of - RealReg i -> ppr_reg_no IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) i + RealReg i -> ppr_reg_no IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) i VirtualRegI u -> text "%vI_" <> asmSDoc (pprUnique u) VirtualRegHi u -> text "%vHi_" <> asmSDoc (pprUnique u) VirtualRegF u -> text "%vF_" <> asmSDoc (pprUnique u) @@ -263,9 +263,8 @@ pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r 22 -> SLIT("%xmm6"); 23 -> SLIT("%xmm7"); 24 -> SLIT("%xmm8"); 25 -> SLIT("%xmm9"); 26 -> SLIT("%xmm10"); 27 -> SLIT("%xmm11"); - 28 -> SLIT("%xmm12"); 29 -> SLIT("%xmm13"); - 30 -> SLIT("%xmm14"); 31 -> SLIT("%xmm15"); - _ -> SLIT("very naughty x86_64 register") + 28 -> SLIT("%xmm12"); 28 -> SLIT("%xmm13"); + 30 -> SLIT("%xmm13"); 31 -> SLIT("%xmm15") }) #endif @@ -1266,8 +1265,7 @@ pprInstr (CALL (Right reg)) = (<>) (ptext SLIT("\tcall *")) (pprReg wordRep pprInstr (IDIV sz op) = pprSizeOp SLIT("idiv") sz op pprInstr (DIV sz op) = pprSizeOp SLIT("div") sz op - -pprInstr (IMUL64 sd_hi sd_lo) = pprInstr_imul64 sd_hi sd_lo +pprInstr (IMUL2 sz op) = pprSizeOp SLIT("imul") sz op #if x86_64_TARGET_ARCH pprInstr (MUL size op1 op2) = pprSizeOpOp SLIT("mul") size op1 op2 @@ -1544,22 +1542,6 @@ pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg SLIT("gdiv") sz src1 src2 d #if i386_TARGET_ARCH || x86_64_TARGET_ARCH --- Emit code to make hi_reg:lo_reg be the 64-bit product of hi_reg and lo_reg -pprInstr_imul64 hi_reg lo_reg - = let fakeInsn = text "imul64" <+> pp_hi_reg <> comma <+> pp_lo_reg - pp_hi_reg = pprReg wordRep hi_reg - pp_lo_reg = pprReg wordRep lo_reg - in - vcat [ - text "\t# BEGIN " <> fakeInsn, - text "\tpushl" <+> pp_hi_reg <> text" ; pushl" <+> pp_lo_reg, - text "\tpushl %eax ; pushl %edx", - text "\tmovl 12(%esp), %eax ; imull 8(%esp)", - text "\tmovl %edx, 12(%esp) ; movl %eax, 8(%esp)", - text "\tpopl %edx ; popl %eax", - text "\tpopl" <+> pp_lo_reg <> text " ; popl" <+> pp_hi_reg, - text "\t# END " <> fakeInsn - ] -- Continue with I386-only printing bits and bobs: pprDollImm :: Imm -> Doc diff --git a/ghc/compiler/nativeGen/RegAllocInfo.hs b/ghc/compiler/nativeGen/RegAllocInfo.hs index 22d84df..65299cb 100644 --- a/ghc/compiler/nativeGen/RegAllocInfo.hs +++ b/ghc/compiler/nativeGen/RegAllocInfo.hs @@ -149,7 +149,7 @@ regUsage instr = case instr of ADC sz src dst -> usageRM src dst SUB sz src dst -> usageRM src dst IMUL sz src dst -> usageRM src dst - IMUL64 sd1 sd2 -> mkRU [sd1,sd2] [sd1,sd2] + IMUL2 sz src -> mkRU (eax:use_R src) [eax,edx] MUL sz src dst -> usageRM src dst DIV sz op -> mkRU (eax:edx:use_R op) [eax,edx] IDIV sz op -> mkRU (eax:edx:use_R op) [eax,edx] @@ -171,13 +171,8 @@ regUsage instr = case instr of JXX cond lbl -> mkRU [] [] JMP op -> mkRU (use_R op) [] JMP_TBL op ids -> mkRU (use_R op) [] -#if i386_TARGET_ARCH CALL (Left imm) -> mkRU [] callClobberedRegs CALL (Right reg) -> mkRU [reg] callClobberedRegs -#else - CALL (Left imm) -> mkRU params callClobberedRegs - CALL (Right reg) -> mkRU (reg:params) callClobberedRegs -#endif CLTD sz -> mkRU [eax] [edx] NOP -> mkRU [] [] @@ -227,12 +222,6 @@ regUsage instr = case instr of _other -> panic "regUsage: unrecognised instr" where -#if x86_64_TARGET_ARCH - -- call parameters: include %eax, because it is used - -- to pass the number of SSE reg arguments to varargs fns. - params = eax : allArgRegs ++ allFPArgRegs -#endif - -- 2 operand form; first operand Read; second Written usageRW :: Operand -> Operand -> RegUsage usageRW op (OpReg reg) = mkRU (use_R op) [reg] @@ -479,7 +468,7 @@ patchRegs instr env = case instr of ADC sz src dst -> patch2 (ADC sz) src dst SUB sz src dst -> patch2 (SUB sz) src dst IMUL sz src dst -> patch2 (IMUL sz) src dst - IMUL64 sd1 sd2 -> IMUL64 (env sd1) (env sd2) + IMUL2 sz src -> patch1 (IMUL2 sz) src MUL sz src dst -> patch2 (MUL sz) src dst IDIV sz op -> patch1 (IDIV sz) op DIV sz op -> patch1 (DIV sz) op @@ -717,9 +706,7 @@ mkSpillInstr reg delta slot let off_w = (off-delta) `div` 8 in case regClass reg of RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w)) - RcDouble -> MOV F64 (OpReg reg) (OpAddr (spRel off_w)) - -- ToDo: will it work to always spill as a double? - -- does that cause a stall if the data was a float? + _ -> panic "mkSpillInstr: ToDo" #endif #ifdef sparc_TARGET_ARCH {-SPARC: spill below frame pointer leaving 2 words/spill-} @@ -761,7 +748,7 @@ mkLoadInstr reg delta slot let off_w = (off-delta) `div` 8 in case regClass reg of RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg) - _ -> MOV F64 (OpAddr (spRel off_w)) (OpReg reg) + _ -> panic "mkLoadInstr: ToDo" #endif #if sparc_TARGET_ARCH let{off_w = 1 + (off `div` 4); -- 1.7.10.4