From c39373f1371fd1e46ea91be262f00c277b31f8e5 Mon Sep 17 00:00:00 2001 From: sewardj Date: Fri, 28 Jan 2000 18:07:56 +0000 Subject: [PATCH] [project @ 2000-01-28 18:07:55 by sewardj] Modifications to make x86 register spilling to work reasonably. It should work ok most of the time, although there is still a remote possibility that the allocator simply will be unable to complete spilling, and will just give up. -- Incrementally try with 0, 1, 2 and 3 spill regs, so as not to unduly restrict the supply of regs in code which doesn't need spilling. -- Remove the use of %ecx for shift values, so it is always available as the first-choice spill temporary. For code which doesn't do int division, make %edx and %eax available for spilling too. Shifts by a non-constant amount (very rare) are now done by a short test-and-jump sequence, so that %ecx is not tied up. -- x86 FP: do sin, cos, tan in-line so we get the same answers as gcc. -- Moved a little code around to remove recursive dependencies. -- Fix a subtle bug in x86 regUsage, which could cause underestimation of live ranges. --- ghc/compiler/nativeGen/AsmCodeGen.lhs | 4 +- ghc/compiler/nativeGen/AsmRegAlloc.lhs | 115 ++++++++++++-------- ghc/compiler/nativeGen/MachCode.lhs | 86 +++++++++++++-- ghc/compiler/nativeGen/MachMisc.hi-boot | 3 +- ghc/compiler/nativeGen/MachMisc.hi-boot-5 | 3 +- ghc/compiler/nativeGen/MachMisc.lhs | 5 + ghc/compiler/nativeGen/MachRegs.lhs | 39 +------ ghc/compiler/nativeGen/PprMach.lhs | 34 +++++- ghc/compiler/nativeGen/RegAllocInfo.lhs | 162 +++++++++++++++++++++++------ 9 files changed, 329 insertions(+), 122 deletions(-) diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs index aa5d4e4..31c3825 100644 --- a/ghc/compiler/nativeGen/AsmCodeGen.lhs +++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs @@ -21,7 +21,7 @@ import AbsCSyn ( AbstractC, MagicId ) import AsmRegAlloc ( runRegAllocate ) import OrdList ( OrdList ) import PrimOp ( commutableOp, PrimOp(..) ) -import RegAllocInfo ( mkMRegsState, MRegsState ) +import RegAllocInfo ( mkMRegsState, MRegsState, findReservedRegs ) import Stix ( StixTree(..), StixReg(..), pprStixTrees, CodeSegment(..) ) import PrimRep ( isFloatingRep, PrimRep(..) ) @@ -130,7 +130,7 @@ might be needed. scheduleMachCode :: [InstrList] -> [[Instr]] scheduleMachCode - = map (runRegAllocate freeRegsState reservedRegs) + = map (runRegAllocate freeRegsState findReservedRegs) where freeRegsState = mkMRegsState (extractMappedRegNos freeRegs) \end{code} diff --git a/ghc/compiler/nativeGen/AsmRegAlloc.lhs b/ghc/compiler/nativeGen/AsmRegAlloc.lhs index 9a6fca0..2ddb991 100644 --- a/ghc/compiler/nativeGen/AsmRegAlloc.lhs +++ b/ghc/compiler/nativeGen/AsmRegAlloc.lhs @@ -31,24 +31,38 @@ things the hard way. \begin{code} runRegAllocate :: MRegsState - -> [RegNo] + -> ([Instr] -> [[RegNo]]) -> InstrList -> [Instr] -runRegAllocate regs reserve_regs instrs +runRegAllocate regs find_reserve_regs instrs = case simpleAlloc of - Just x -> x - Nothing -> hairyAlloc + Just simple -> simple + Nothing -> tryHairy reserves where - flatInstrs = flattenOrdList instrs - simpleAlloc = simpleRegAlloc regs [] emptyFM flatInstrs - hairyAlloc = hairyRegAlloc regs reserve_regs flatInstrs + tryHairy [] + = error "nativeGen: register allocator: too difficult! Try -fvia-C.\n" + tryHairy (resv:resvs) + = case hairyAlloc resv of + Just success -> success + Nothing -> fooble resvs (tryHairy resvs) + + fooble [] x = x + fooble (resvs:_) x = trace ("nativeGen: spilling with " + ++ show (length resvs - 2) ++ + " int temporaries") x + + reserves = find_reserve_regs flatInstrs + flatInstrs = flattenOrdList instrs + simpleAlloc = simpleRegAlloc regs [] emptyFM flatInstrs + hairyAlloc resvd = hairyRegAlloc regs resvd flatInstrs -runHairyRegAllocate -- use only hairy for i386! + +runHairyRegAllocate :: MRegsState -> [RegNo] -> InstrList - -> [Instr] + -> Maybe [Instr] runHairyRegAllocate regs reserve_regs instrs = hairyRegAlloc regs reserve_regs flatInstrs @@ -83,7 +97,8 @@ simpleRegAlloc free live env (instr:instrs) where instr3 = patchRegs instr (lookup env2) - (srcs, dsts) = case regUsage instr of (RU s d) -> (regSetToList s, regSetToList d) + (srcs, dsts) = case regUsage instr of + (RU s d) -> (regSetToList s, regSetToList d) lookup env x = case lookupFM env x of Just y -> y; Nothing -> x @@ -121,40 +136,49 @@ Here is the ``clever'' bit. First go backward (i.e. left), looking for the last use of dynamic registers. Then go forward (i.e. right), filling registers with static placements. +hairyRegAlloc takes reserve_regs as the regs to use as spill +temporaries. First it tries to allocate using all regs except +reserve_regs. If that fails, it inserts spill code and tries again to +allocate regs, but this time with the spill temporaries available. +Even this might not work if there are insufficient spill temporaries: +in the worst case on x86, we'd need 3 of them, for insns like +addl (reg1,reg2,4) reg3, since this insn uses all 3 regs as input. + \begin{code} hairyRegAlloc :: MRegsState -> [RegNo] -> [Instr] - -> [Instr] + -> Maybe [Instr] hairyRegAlloc regs reserve_regs instrs = - case mapAccumB (doRegAlloc reserve_regs) (RH regs' 1 emptyFM) noFuture instrs of - (RH _ mloc1 _, _, instrs') - | mloc1 == 1 -> instrs' - | otherwise -> - let - instrs_patched' = patchMem instrs' - instrs_patched = flattenOrdList instrs_patched' - in - case mapAccumB do_RegAlloc_Nil (RH regs'' mloc1 emptyFM) noFuture instrs_patched of - ((RH _ mloc2 _),_,instrs'') - | mloc2 == mloc1 -> instrs'' - | otherwise -> instrs'' - --pprPanic "runRegAllocate" (ppr mloc2 <+> ppr mloc1) + case mapAccumB (doRegAlloc reserve_regs) + (RH regs' 1 emptyFM) noFuture instrs of + (RH _ mloc1 _, _, instrs') + -- succeeded w/out using reserves + | mloc1 == 1 -> Just instrs' + -- failed, and no reserves avail, so pointless to attempt spilling + | null reserve_regs -> Nothing + -- failed, but we have reserves, so attempt to do spilling + | otherwise + -> let instrs_patched' = patchMem instrs' + instrs_patched = flattenOrdList instrs_patched' + in + case mapAccumB (doRegAlloc []) (RH regs'' mloc1 emptyFM) + noFuture instrs_patched of + ((RH _ mloc2 _),_,instrs'') + -- successfully allocated the patched code + | mloc2 == mloc1 -> Just instrs'' + -- no; we have to give up + | otherwise -> Nothing + -- instrs'' + -- pprPanic "runRegAllocate" (ppr mloc2 <+> ppr mloc1) where regs' = regs `useMRegs` reserve_regs regs'' = mkMRegsState reserve_regs -do_RegAlloc_Nil = doRegAlloc [] -- out here to avoid CAF (sigh) -do_RegAlloc_Nil - :: RegHistory MRegsState - -> RegFuture - -> Instr - -> (RegHistory MRegsState, RegFuture, Instr) - -noFuture :: RegFuture -noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM + noFuture :: RegFuture + noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM \end{code} Here we patch instructions that reference ``registers'' which are really in @@ -225,7 +249,8 @@ getUsage (RF next_in_use future reg_conflicts) instr (RL in_use future') = regLiveness instr (RL next_in_use future) live_through = in_use `minusRegSet` dsts last_used = [ r | r <- regSetToList srcs, - not (r `elementOfRegSet` (fstFL future) || r `elementOfRegSet` in_use)] + not (r `elementOfRegSet` (fstFL future) + || r `elementOfRegSet` in_use)] in_use' = srcs `unionRegSets` live_through @@ -245,7 +270,9 @@ getUsage (RF next_in_use future reg_conflicts) instr Nothing -> live_dynamics Just conflicts -> conflicts `unionRegSets` live_dynamics - live_dynamics = mkRegSet [ r | r@(UnmappedReg _ _) <- regSetToList live_through ] + live_dynamics + = mkRegSet [ r | r@(UnmappedReg _ _) + <- regSetToList live_through ] doRegAlloc' :: [RegNo] @@ -254,7 +281,8 @@ doRegAlloc' -> Instr -> (RegHistory MRegsState, Instr) -doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) instr = +doRegAlloc' reserved (RH frs loc env) + (RI in_use srcs dsts lastu conflicts) instr = (RH frs'' loc' env'', patchRegs instr dynToStatic) @@ -264,14 +292,17 @@ doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) inst free :: [RegNo] free = extractMappedRegNos (map dynToStatic lastu) - -- (1) free registers that are used last as source operands in this instruction - frs_not_in_use = frs `useMRegs` (extractMappedRegNos (regSetToList in_use)) + -- (1) free registers that are used last as + -- source operands in this instruction + frs_not_in_use = frs `useMRegs` + (extractMappedRegNos (regSetToList in_use)) frs' = (frs_not_in_use `freeMRegs` free) `useMRegs` reserved -- (2) allocate new registers for the destination operands -- allocate registers for new dynamics - new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts, r `not_elem` keysFM env ] + new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts, + r `not_elem` keysFM env ] (frs'', loc', new) = foldr allocateNewRegs (frs', loc, []) new_dynamix @@ -283,14 +314,16 @@ doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) inst dynToStatic dyn@(UnmappedReg _ _) = case lookupFM env' dyn of Just r -> r - Nothing -> trace "Lost register; possibly a floating point type error in a _ccall_?" dyn + Nothing -> trace ("Lost register; possibly a floating point" + ++" type error in a _ccall_?") dyn dynToStatic other = other allocateNewRegs :: Reg -> (MRegsState, Int, [(Reg, Reg)]) -> (MRegsState, Int, [(Reg, Reg)]) - allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst) = (fs', mem', (d, f) : lst) + allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst) + = (fs', mem', (d, f) : lst) where (fs', f, mem') = case acceptable fs of diff --git a/ghc/compiler/nativeGen/MachCode.lhs b/ghc/compiler/nativeGen/MachCode.lhs index a4bd777..b38b24b 100644 --- a/ghc/compiler/nativeGen/MachCode.lhs +++ b/ghc/compiler/nativeGen/MachCode.lhs @@ -34,7 +34,6 @@ import UniqSupply ( returnUs, thenUs, mapUs, mapAndUnzipUs, mapAccumLUs, UniqSM ) import Outputable -import PprMach ( pprSize ) \end{code} Code extractor for an entire stix tree---stix statement level. @@ -499,6 +498,15 @@ getRegister (StPrim primop [x]) -- unary PrimOps FloatSqrtOp -> trivialUFCode FloatRep (GSQRT F) x DoubleSqrtOp -> trivialUFCode DoubleRep (GSQRT DF) x + FloatSinOp -> trivialUFCode FloatRep (GSIN F) x + DoubleSinOp -> trivialUFCode DoubleRep (GSIN DF) x + + FloatCosOp -> trivialUFCode FloatRep (GCOS F) x + DoubleCosOp -> trivialUFCode DoubleRep (GCOS DF) x + + FloatTanOp -> trivialUFCode FloatRep (GTAN F) x + DoubleTanOp -> trivialUFCode DoubleRep (GTAN DF) x + Double2FloatOp -> trivialUFCode FloatRep GDTOF x Float2DoubleOp -> trivialUFCode DoubleRep GFTOD x @@ -523,9 +531,9 @@ getRegister (StPrim primop [x]) -- unary PrimOps FloatExpOp -> (True, SLIT("exp")) FloatLogOp -> (True, SLIT("log")) - FloatSinOp -> (True, SLIT("sin")) - FloatCosOp -> (True, SLIT("cos")) - FloatTanOp -> (True, SLIT("tan")) + --FloatSinOp -> (True, SLIT("sin")) + --FloatCosOp -> (True, SLIT("cos")) + --FloatTanOp -> (True, SLIT("tan")) FloatAsinOp -> (True, SLIT("asin")) FloatAcosOp -> (True, SLIT("acos")) @@ -538,9 +546,9 @@ getRegister (StPrim primop [x]) -- unary PrimOps DoubleExpOp -> (False, SLIT("exp")) DoubleLogOp -> (False, SLIT("log")) - DoubleSinOp -> (False, SLIT("sin")) - DoubleCosOp -> (False, SLIT("cos")) - DoubleTanOp -> (False, SLIT("tan")) + --DoubleSinOp -> (False, SLIT("sin")) + --DoubleCosOp -> (False, SLIT("cos")) + --DoubleTanOp -> (False, SLIT("tan")) DoubleAsinOp -> (False, SLIT("asin")) DoubleAcosOp -> (False, SLIT("acos")) @@ -674,6 +682,65 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps {- Case2: shift length is complex (non-immediate) -} shift_code instr x y{-amount-} + = getRegister x `thenUs` \ register1 -> + getRegister y `thenUs` \ register2 -> + getUniqLabelNCG `thenUs` \ lbl_test3 -> + getUniqLabelNCG `thenUs` \ lbl_test2 -> + getUniqLabelNCG `thenUs` \ lbl_test1 -> + getUniqLabelNCG `thenUs` \ lbl_test0 -> + getUniqLabelNCG `thenUs` \ lbl_after -> + getNewRegNCG IntRep `thenUs` \ tmp -> + let code__2 dst + = let src_val = registerName register1 dst + code_val = registerCode register1 dst + src_amt = registerName register2 tmp + code_amt = registerCode register2 tmp + r_dst = OpReg dst + r_tmp = OpReg tmp + in + code_val . + code_amt . + mkSeqInstrs [ + COMMENT (_PK_ "begin shift sequence"), + MOV L (OpReg src_val) r_dst, + MOV L (OpReg src_amt) r_tmp, + + BT L (ImmInt 4) r_tmp, + JXX GEU lbl_test3, + instr (OpImm (ImmInt 16)) r_dst, + + LABEL lbl_test3, + BT L (ImmInt 3) r_tmp, + JXX GEU lbl_test2, + instr (OpImm (ImmInt 8)) r_dst, + + LABEL lbl_test2, + BT L (ImmInt 2) r_tmp, + JXX GEU lbl_test1, + instr (OpImm (ImmInt 4)) r_dst, + + LABEL lbl_test1, + BT L (ImmInt 1) r_tmp, + JXX GEU lbl_test0, + instr (OpImm (ImmInt 2)) r_dst, + + LABEL lbl_test0, + BT L (ImmInt 0) r_tmp, + JXX GEU lbl_after, + instr (OpImm (ImmInt 1)) r_dst, + LABEL lbl_after, + + COMMENT (_PK_ "end shift sequence") + ] + in + returnUs (Any IntRep code__2) + +{- + -- since ECX is always used as a spill temporary, we can't + -- use it here to do non-immediate shifts. No big deal -- + -- they are only very rare, and we can give an equivalent + -- insn sequence which doesn't use ECX. + -- DO NOT USE THIS CODE, SINCE IT IS INCOMPATIBLE WITH THE SPILLER = getRegister y `thenUs` \ register1 -> getRegister x `thenUs` \ register2 -> let @@ -699,6 +766,7 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps mkSeqInstr (instr (OpReg ecx) (OpReg eax)) in returnUs (Fixed IntRep eax code__2) +-} -------------------- add_code :: Size -> StixTree -> StixTree -> UniqSM Register @@ -2441,10 +2509,10 @@ condIntReg cond x y code = condCode condition cond = condName condition -- ToDo: if dst is eax, ebx, ecx, or edx we would not need the move. - code__2 dst = code . mkSeqInstrs [ + code__2 dst = code . mkSeqInstrs [COMMENT (_PK_ "aaaaa"), SETCC cond (OpReg tmp), AND L (OpImm (ImmInt 1)) (OpReg tmp), - MOV L (OpReg tmp) (OpReg dst)] + MOV L (OpReg tmp) (OpReg dst) ,COMMENT (_PK_ "bbbbb")] in returnUs (Any IntRep code__2) diff --git a/ghc/compiler/nativeGen/MachMisc.hi-boot b/ghc/compiler/nativeGen/MachMisc.hi-boot index 91f6330..242c93a 100644 --- a/ghc/compiler/nativeGen/MachMisc.hi-boot +++ b/ghc/compiler/nativeGen/MachMisc.hi-boot @@ -1,7 +1,8 @@ _interface_ MachMisc 1 _exports_ -MachMisc fixedHdrSize fmtAsmLbl underscorePrefix; +MachMisc Instr fixedHdrSize fmtAsmLbl underscorePrefix; _declarations_ 1 fixedHdrSize _:_ PrelBase.Int ;; 2 fmtAsmLbl _:_ PrelBase.String -> PrelBase.String ;; 1 underscorePrefix _:_ PrelBase.Bool ;; +1 data Instr; \ No newline at end of file diff --git a/ghc/compiler/nativeGen/MachMisc.hi-boot-5 b/ghc/compiler/nativeGen/MachMisc.hi-boot-5 index 6fb5f9e..8c2a6f2 100644 --- a/ghc/compiler/nativeGen/MachMisc.hi-boot-5 +++ b/ghc/compiler/nativeGen/MachMisc.hi-boot-5 @@ -1,5 +1,6 @@ __interface MachMisc 1 0 where -__export MachMisc fixedHdrSize fmtAsmLbl underscorePrefix; +__export MachMisc Instr fixedHdrSize fmtAsmLbl underscorePrefix; 1 fixedHdrSize :: PrelBase.Int ; 2 fmtAsmLbl :: PrelBase.String -> PrelBase.String ; 1 underscorePrefix :: PrelBase.Bool ; +1 data Instr ; diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs index d31af20..893bf87 100644 --- a/ghc/compiler/nativeGen/MachMisc.lhs +++ b/ghc/compiler/nativeGen/MachMisc.lhs @@ -507,6 +507,7 @@ current translation. | SAR Size Operand Operand -- 1st operand must be an Imm or CL | SHR Size Operand Operand -- 1st operand must be an Imm or CL | NOP + | BT Size Imm Operand -- Float Arithmetic. -- ToDo for 386 @@ -539,6 +540,9 @@ current translation. | GABS Size Reg Reg -- src, dst | GNEG Size Reg Reg -- src, dst | GSQRT Size Reg Reg -- src, dst + | GSIN Size Reg Reg -- src, dst + | GCOS Size Reg Reg -- src, dst + | GTAN Size Reg Reg -- src, dst | GFREE -- do ffree on all x86 regs; an ugly hack -- Comparison @@ -598,6 +602,7 @@ is_G_instr instr GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True GCMP _ _ _ -> True; GABS _ _ _ -> True GNEG _ _ _ -> True; GSQRT _ _ _ -> True + GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True; GFREE -> panic "is_G_instr: GFREE (!)" other -> False diff --git a/ghc/compiler/nativeGen/MachRegs.lhs b/ghc/compiler/nativeGen/MachRegs.lhs index f32024f..446e7dd 100644 --- a/ghc/compiler/nativeGen/MachRegs.lhs +++ b/ghc/compiler/nativeGen/MachRegs.lhs @@ -26,13 +26,13 @@ module MachRegs ( callClobberedRegs, callerSaves, extractMappedRegNos, + mappedRegNo, freeMappedRegs, freeReg, freeRegs, getNewRegNCG, magicIdRegMaybe, mkReg, realReg, - reservedRegs, saveLoc, spRel, stgReg, @@ -336,6 +336,10 @@ extractMappedRegNos regs where ex (MappedReg i) acc = IBOX(i) : acc -- we'll take it ex _ acc = acc -- leave it out + +mappedRegNo :: Reg -> RegNo +mappedRegNo (MappedReg i) = IBOX(i) +mappedRegNo _ = pprPanic "mappedRegNo" empty \end{code} ** Machine-specific Reg stuff: ** @@ -733,40 +737,7 @@ magicIdRegMaybe HpLim = Just (FixedReg ILIT(REG_HpLim)) magicIdRegMaybe _ = Nothing \end{code} -%************************************************************************ -%* * -\subsection{Free, reserved, call-clobbered, and argument registers} -%* * -%************************************************************************ - -@freeRegs@ is the list of registers we can use in register allocation. -@freeReg@ (below) says if a particular register is free. - -With a per-instruction clobber list, we might be able to get some of -these back, but it's probably not worth the hassle. - -@callClobberedRegs@ ... the obvious. - -@argRegs@: assuming a call with N arguments, what registers will be -used to hold arguments? (NB: it doesn't know whether the arguments -are integer or floating-point...) - \begin{code} -reservedRegs :: [RegNo] -reservedRegs -#if alpha_TARGET_ARCH - = [NCG_Reserved_I1, NCG_Reserved_I2, - NCG_Reserved_F1, NCG_Reserved_F2] -#endif -#if i386_TARGET_ARCH - = [{-certainly cannot afford any!-}] -#endif -#if sparc_TARGET_ARCH - = [NCG_Reserved_I1, NCG_Reserved_I2, - NCG_Reserved_F1, NCG_Reserved_F2, - NCG_Reserved_D1, NCG_Reserved_D2] -#endif - ------------------------------- freeRegs :: [Reg] freeRegs diff --git a/ghc/compiler/nativeGen/PprMach.lhs b/ghc/compiler/nativeGen/PprMach.lhs index e35e22c..6232f37 100644 --- a/ghc/compiler/nativeGen/PprMach.lhs +++ b/ghc/compiler/nativeGen/PprMach.lhs @@ -941,7 +941,7 @@ pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack #ifdef DEBUG (<>) (ptext SLIT("# warning: ")) (pprSizeOpOp SLIT("mov") size s d) #else - (ptext SLIT("")) + empty #endif pprInstr (MOV size src dst) = pprSizeOpOp SLIT("mov") size src dst @@ -977,9 +977,9 @@ pprInstr (XOR size src dst) = pprSizeOpOp SLIT("xor") size src dst pprInstr (NOT size op) = pprSizeOp SLIT("not") size op pprInstr (NEGI size op) = pprSizeOp SLIT("neg") size op -pprInstr (SHL size imm dst) = pprSizeByteOpOp SLIT("shl") size imm dst -pprInstr (SAR size imm dst) = pprSizeByteOpOp SLIT("sar") size imm dst -pprInstr (SHR size imm dst) = pprSizeByteOpOp SLIT("shr") size imm dst +pprInstr (SHL size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("shl") size imm dst +pprInstr (SAR size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("sar") size imm dst +pprInstr (SHR size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("shr") size imm dst pprInstr (CMP size src dst) = pprSizeOpOp SLIT("cmp") size src dst pprInstr (TEST size src dst) = pprSizeOpOp SLIT("test") size src dst @@ -989,6 +989,7 @@ pprInstr PUSHA = ptext SLIT("\tpushal") pprInstr POPA = ptext SLIT("\tpopal") pprInstr (NOP) = ptext SLIT("\tnop") +pprInstr (BT size imm src) = pprSizeImmOp SLIT("bt") size imm src pprInstr (CLTD) = ptext SLIT("\tcltd") pprInstr (SETCC cond op) = pprCondInstr SLIT("set") cond (pprOperand B op) @@ -1047,6 +1048,15 @@ pprInstr g@(GNEG sz src dst) = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1]) pprInstr g@(GSQRT sz src dst) = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt ; ", gpop dst 1]) +pprInstr g@(GSIN sz src dst) + = pprG g (hcat [gtab, gpush src 0, text " ; fsin ; ", gpop dst 1]) +pprInstr g@(GCOS sz src dst) + = pprG g (hcat [gtab, gpush src 0, text " ; fcos ; ", gpop dst 1]) + +pprInstr g@(GTAN sz src dst) + = pprG g (hcat [gtab, text "ffree %st(6) ; ", + gpush src 0, text " ; fptan ; ", + text " fstp %st(0) ; ", gpop dst 1]) pprInstr g@(GADD sz src1 src2 dst) = pprG g (hcat [gtab, gpush src1 0, @@ -1106,6 +1116,9 @@ pprGInstr (GCMP sz src dst) = pprSizeRegReg SLIT("gcmp") sz src dst pprGInstr (GABS sz src dst) = pprSizeRegReg SLIT("gabs") sz src dst pprGInstr (GNEG sz src dst) = pprSizeRegReg SLIT("gneg") sz src dst pprGInstr (GSQRT sz src dst) = pprSizeRegReg SLIT("gsqrt") sz src dst +pprGInstr (GSIN sz src dst) = pprSizeRegReg SLIT("gsin") sz src dst +pprGInstr (GCOS sz src dst) = pprSizeRegReg SLIT("gcos") sz src dst +pprGInstr (GTAN sz src dst) = pprSizeRegReg SLIT("gtan") sz src dst pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg SLIT("gadd") sz src1 src2 dst pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst @@ -1124,6 +1137,19 @@ pprOperand s (OpReg r) = pprReg s r pprOperand s (OpImm i) = pprDollImm i pprOperand s (OpAddr ea) = pprAddr ea +pprSizeImmOp :: FAST_STRING -> Size -> Imm -> Operand -> SDoc +pprSizeImmOp name size imm op1 + = hcat [ + char '\t', + ptext name, + pprSize size, + space, + char '$', + pprImm imm, + comma, + pprOperand size op1 + ] + pprSizeOp :: FAST_STRING -> Size -> Operand -> SDoc pprSizeOp name size op1 = hcat [ diff --git a/ghc/compiler/nativeGen/RegAllocInfo.lhs b/ghc/compiler/nativeGen/RegAllocInfo.lhs index eab566c..c1bd50c 100644 --- a/ghc/compiler/nativeGen/RegAllocInfo.lhs +++ b/ghc/compiler/nativeGen/RegAllocInfo.lhs @@ -35,6 +35,7 @@ module RegAllocInfo ( patchRegs, regLiveness, spillReg, + IF_ARCH_i386(findReservedRegs COMMA,) RegSet, elementOfRegSet, @@ -64,7 +65,6 @@ import OrdList ( mkUnitList ) import PrimRep ( PrimRep(..) ) import UniqSet -- quite a bit of it import Outputable -import PprMach ( pprInstr ) \end{code} %************************************************************************ @@ -354,22 +354,24 @@ regUsage instr = case instr of #if i386_TARGET_ARCH regUsage instr = case instr of - MOV sz src dst -> usage2 src dst - MOVZxL sz src dst -> usage2 src dst - MOVSxL sz src dst -> usage2 src dst - LEA sz src dst -> usage2 src dst - ADD sz src dst -> usage2 src dst - SUB sz src dst -> usage2 src dst - IMUL sz src dst -> usage2 src dst + MOV sz src dst -> usage2 src dst + MOVZxL sz src dst -> usage2 src dst + MOVSxL sz src dst -> usage2 src dst + LEA sz src dst -> usage2 src dst + ADD sz src dst -> usage2s src dst + SUB sz src dst -> usage2s src dst + IMUL sz src dst -> usage2s src dst IDIV sz src -> usage (eax:edx:opToReg src) [eax,edx] - AND sz src dst -> usage2 src dst - OR sz src dst -> usage2 src dst - XOR sz src dst -> usage2 src dst + AND sz src dst -> usage2s src dst + OR sz src dst -> usage2s src dst + XOR sz src dst -> usage2s src dst NOT sz op -> usage1 op NEGI sz op -> usage1 op - SHL sz dst len -> usage2 dst len -- len is either an Imm or ecx. - SAR sz dst len -> usage2 dst len -- len is either an Imm or ecx. - SHR sz len dst -> usage2 dst len -- len is either an Imm or ecx. + SHL sz len dst -> usage2s len dst -- len is either an Imm or ecx. + SAR sz len dst -> usage2s len dst -- len is either an Imm or ecx. + SHR sz len dst -> usage2s len dst -- len is either an Imm or ecx. + BT sz imm src -> usage (opToReg src) [] + PUSH sz op -> usage (opToReg op) [] POP sz op -> usage [] (opToReg op) TEST sz src dst -> usage (opToReg src ++ opToReg dst) [] @@ -403,21 +405,35 @@ regUsage instr = case instr of GABS sz src dst -> usage [src] [dst] GNEG sz src dst -> usage [src] [dst] GSQRT sz src dst -> usage [src] [dst] + GSIN sz src dst -> usage [src] [dst] + GCOS sz src dst -> usage [src] [dst] + GTAN sz src dst -> usage [src] [dst] COMMENT _ -> noUsage SEGMENT _ -> noUsage LABEL _ -> noUsage ASCII _ _ -> noUsage DATA _ _ -> noUsage - _ -> error ("regUsage(x86): " ++ showSDoc (pprInstr instr)) + _ -> pprPanic "regUsage(x86) " empty + where + -- 2 operand form in which the second operand is purely a destination usage2 :: Operand -> Operand -> RegUsage usage2 op (OpReg reg) = usage (opToReg op) [reg] usage2 op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) [] usage2 op (OpImm imm) = usage (opToReg op) [] + + -- 2 operand form in which the second operand is also an input + usage2s :: Operand -> Operand -> RegUsage + usage2s op (OpReg reg) = usage (opToReg op ++ [reg]) [reg] + usage2s op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) [] + usage2s op (OpImm imm) = usage (opToReg op) [] + + -- 1 operand form in which the operand is both used and written usage1 :: Operand -> RegUsage usage1 (OpReg reg) = usage [reg] [reg] usage1 (OpAddr ea) = usage (addrToRegs ea) [] + allFPRegs = [fake0,fake1,fake2,fake3,fake4,fake5] --callClobberedRegs = [ eax, ecx, edx ] -- according to gcc, anyway. @@ -442,6 +458,14 @@ regUsage instr = case instr of interesting (FixedReg _) = False interesting _ = True + +-- Allow the spiller to decide whether or not it can use +-- %eax and %edx as spill temporaries. +hasFixedEAXorEDX instr = case instr of + IDIV _ _ -> True + CLTD -> True + other -> False + #endif {- i386_TARGET_ARCH -} -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if sparc_TARGET_ARCH @@ -495,6 +519,71 @@ regUsage instr = case instr of #endif {- sparc_TARGET_ARCH -} \end{code} + +%************************************************************************ +%* * +\subsection{Free, reserved, call-clobbered, and argument registers} +%* * +%************************************************************************ + +@freeRegs@ is the list of registers we can use in register allocation. +@freeReg@ (below) says if a particular register is free. + +With a per-instruction clobber list, we might be able to get some of +these back, but it's probably not worth the hassle. + +@callClobberedRegs@ ... the obvious. + +@argRegs@: assuming a call with N arguments, what registers will be +used to hold arguments? (NB: it doesn't know whether the arguments +are integer or floating-point...) + +findReservedRegs tells us which regs can be used as spill temporaries. +The list of instructions for which we are attempting allocation is +supplied. This is so that we can (at least for x86) examine it to +discover which registers are being used in a fixed way -- for example, +%eax and %edx are used by integer division, so they can't be used as +spill temporaries. However, most instruction lists don't do integer +division, so we don't want to rule them out altogether. + +findReservedRegs returns not a list of spill temporaries, but a list +of list of them. This is so that the allocator can attempt allocating +with at first no spill temps, then if that fails, increasing numbers. +For x86 it is important that we minimise the number of regs reserved +as spill temporaries, since there are so few. For Alpha and Sparc +this isn't a concern; we just ignore the supplied code list and return +a singleton list which we know will satisfy all spill demands. + +\begin{code} +findReservedRegs :: [Instr] -> [[RegNo]] +findReservedRegs instrs +#if alpha_TARGET_ARCH + = [[NCG_Reserved_I1, NCG_Reserved_I2, + NCG_Reserved_F1, NCG_Reserved_F2]] +#endif +#if sparc_TARGET_ARCH + = [[NCG_Reserved_I1, NCG_Reserved_I2, + NCG_Reserved_F1, NCG_Reserved_F2, + NCG_Reserved_D1, NCG_Reserved_D2]] +#endif +#if i386_TARGET_ARCH + -- Sigh. This is where it gets complicated. + = -- first of all, try without any at all. + map (map mappedRegNo) ( + [ [], + -- if that doesn't work, try one integer reg (which might fail) + -- and two float regs (which will always fix any float insns) + [ecx, fake4,fake5] + ] + -- dire straits (but still correct): see if we can bag %eax and %edx + ++ if any hasFixedEAXorEDX instrs + then [] -- bummer + else [ [ecx,edx,fake4,fake5], + [ecx,edx,eax,fake4,fake5] ] + ) +#endif +\end{code} + %************************************************************************ %* * \subsection{@RegLiveness@ type; @regLiveness@ function} @@ -655,6 +744,7 @@ patchRegs instr env = case instr of SHL sz imm dst -> patch2 (SHL sz) imm dst SAR sz imm dst -> patch2 (SAR sz) imm dst SHR sz imm dst -> patch2 (SHR sz) imm dst + BT sz imm src -> patch1 (BT sz imm) src TEST sz src dst -> patch2 (TEST sz) src dst CMP sz src dst -> patch2 (CMP sz) src dst PUSH sz op -> patch1 (PUSH sz) op @@ -684,6 +774,9 @@ patchRegs instr env = case instr of GABS sz src dst -> GABS sz (env src) (env dst) GNEG sz src dst -> GNEG sz (env src) (env dst) GSQRT sz src dst -> GSQRT sz (env src) (env dst) + GSIN sz src dst -> GSIN sz (env src) (env dst) + GCOS sz src dst -> GCOS sz (env src) (env dst) + GTAN sz src dst -> GTAN sz (env src) (env dst) COMMENT _ -> instr SEGMENT _ -> instr @@ -693,7 +786,8 @@ patchRegs instr env = case instr of JXX _ _ -> instr CALL _ -> instr CLTD -> instr - _ -> error ("patchInstr: " ++ showSDoc (pprInstr instr)) --instr + _ -> pprPanic "patchInstr(x86)" empty + where patch1 insn op = insn (patchOp op) patch2 insn src dst = insn (patchOp src) (patchOp dst) @@ -761,7 +855,7 @@ patchRegs instr env = case instr of Spill to memory, and load it back... -JRS, 000122: on x86, don't spill directly below the stack pointer, since +JRS, 000122: on x86, don't spill directly above the stack pointer, since some insn sequences (int <-> conversions) use this as a temp location. Leave 16 bytes of slop. @@ -769,36 +863,44 @@ Leave 16 bytes of slop. spillReg, loadReg :: Reg -> Reg -> InstrList spillReg dyn (MemoryReg i pk) - | i >= 0 -- JRS paranoia - = let - sz = primRepToSize pk + | i >= 0 -- JRS paranoia + = let sz = primRepToSize pk in mkUnitList ( {-Alpha: spill below the stack pointer (?)-} IF_ARCH_alpha( ST sz dyn (spRel i) {-I386: spill above stack pointer leaving 2 words/spill-} - ,IF_ARCH_i386 ( if pk == FloatRep || pk == DoubleRep - then GST DF dyn (spRel (16 + 2 * i)) - else MOV sz (OpReg dyn) (OpAddr (spRel (16 + 2 * i))) + ,IF_ARCH_i386 ( let loc | i < 60 = 4 + 2 * i + | otherwise = -2000 - 2 * i + in + if pk == FloatRep || pk == DoubleRep + then GST DF dyn (spRel loc) + else MOV sz (OpReg dyn) (OpAddr (spRel loc)) {-SPARC: spill below frame pointer leaving 2 words/spill-} ,IF_ARCH_sparc( ST sz dyn (fpRel (-2 * i)) ,))) ) - + | otherwise + = pprPanic "spillReg:" (text "invalid spill location: " <> int i) + ---------------------------- loadReg (MemoryReg i pk) dyn - | i >= 0 -- JRS paranoia - = let - sz = primRepToSize pk + | i >= 0 -- JRS paranoia + = let sz = primRepToSize pk in mkUnitList ( IF_ARCH_alpha( LD sz dyn (spRel i) - ,IF_ARCH_i386 ( if pk == FloatRep || pk == DoubleRep - then GLD DF (spRel (16 + 2 * i)) dyn - else MOV sz (OpAddr (spRel (16 + 2 * i))) (OpReg dyn) + ,IF_ARCH_i386 ( let loc | i < 60 = 4 + 2 * i + | otherwise = -2000 - 2 * i + in + if pk == FloatRep || pk == DoubleRep + then GLD DF (spRel loc) dyn + else MOV sz (OpAddr (spRel loc)) (OpReg dyn) ,IF_ARCH_sparc( LD sz (fpRel (-2 * i)) dyn ,))) ) + | otherwise + = pprPanic "loadReg:" (text "invalid spill location: " <> int i) \end{code} -- 1.7.10.4