X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FAsmRegAlloc.lhs;h=92f395a3a075f214af978d8a5d4e407d289b121b;hb=6254fd4ab7c5798599e58b48896c9e284222f26f;hp=00d5d79e568238328becb5f3783237528da6cf34;hpb=e7498a3ee1d0484d02a9e86633cc179c76ebf36e;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/AsmRegAlloc.lhs b/ghc/compiler/nativeGen/AsmRegAlloc.lhs index 00d5d79..92f395a 100644 --- a/ghc/compiler/nativeGen/AsmRegAlloc.lhs +++ b/ghc/compiler/nativeGen/AsmRegAlloc.lhs @@ -1,296 +1,925 @@ % -% (c) The AQUA Project, Glasgow University, 1993-1996 +% (c) The AQUA Project, Glasgow University, 1993-2000 % \section[AsmRegAlloc]{Register allocator} \begin{code} -#include "HsVersions.h" - -module AsmRegAlloc ( runRegAllocate, runHairyRegAllocate ) where +module AsmRegAlloc ( runRegAllocate ) where -IMP_Ubiq(){-uitous-} +#include "HsVersions.h" -import MachCode ( InstrList(..) ) -import MachMisc ( Instr ) +import MachCode ( InstrBlock ) +import MachMisc ( Instr(..) ) +import PprMach ( pprUserReg, pprInstr ) -- debugging import MachRegs import RegAllocInfo -import AbsCSyn ( MagicId ) -import BitSet ( BitSet ) -import FiniteMap ( emptyFM, addListToFM, delListFromFM, lookupFM, keysFM ) +import FiniteMap ( FiniteMap, emptyFM, addListToFM, delListFromFM, + lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM, + listToFM, fmToList, lookupWithDefaultFM ) import Maybes ( maybeToBool ) -import OrdList ( mkEmptyList, mkUnitList, mkSeqList, mkParList, - flattenOrdList, OrdList - ) -import Stix ( StixTree ) import Unique ( mkBuiltinUnique ) -import Util ( mapAccumB, panic ) +import Util ( mapAccumB ) +import OrdList ( unitOL, appOL, fromOL, concatOL ) +import Outputable +import Unique ( Unique, Uniquable(..), mkPseudoUnique3 ) +import CLabel ( CLabel, pprCLabel ) + +import List ( mapAccumL, nub, sort ) +import Array ( Array, array, (!), bounds ) \end{code} -This is the generic register allocator. +This is the generic register allocator. It does allocation for all +architectures. Details for specific architectures are given in +RegAllocInfo.lhs. In practice the allocator needs to know next to +nothing about an architecture to do its job: + +* It needs to be given a list of the registers it can allocate to. + +* It needs to be able to find out which registers each insn reads and + writes. + +* It needs be able to change registers in instructions into other + registers. + +* It needs to be able to find out where execution could go after an + in instruction. + +* It needs to be able to discover sets of registers which can be + used to attempt spilling. First we try something extremely simple. If that fails, we have to do things the hard way. \begin{code} runRegAllocate - :: MRegsState - -> [RegNo] - -> InstrList - -> [Instr] - -runRegAllocate regs reserve_regs instrs - = case simpleAlloc of - Just x -> x - Nothing -> hairyAlloc - where - flatInstrs = flattenOrdList instrs - simpleAlloc = simpleRegAlloc regs [] emptyFM flatInstrs - hairyAlloc = hairyRegAlloc regs reserve_regs flatInstrs - -runHairyRegAllocate -- use only hairy for i386! - :: MRegsState - -> [RegNo] - -> InstrList + :: [Reg] + -> ([Instr] -> [[Reg]]) + -> InstrBlock -> [Instr] -runHairyRegAllocate regs reserve_regs instrs - = hairyRegAlloc regs reserve_regs flatInstrs +runRegAllocate regs find_reserve_regs instrs + = --trace ("runRegAllocate: " ++ show regs) ( + case simpleAlloc of + Just simple -> --trace "SIMPLE" + simple + Nothing -> --trace "GENERAL" + (tryGeneral reserves) + --) where - flatInstrs = flattenOrdList instrs + tryGeneral [] + = error "nativeGen: spilling failed. Workaround: compile with -fvia-C.\n" + tryGeneral (resv:resvs) + = case generalAlloc resv of + Just success -> success + Nothing -> tryGeneral resvs + + reserves = find_reserve_regs flatInstrs + flatInstrs = fromOL instrs + simpleAlloc = doSimpleAlloc regs flatInstrs + generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs \end{code} -Here is the simple register allocator. Just dole out registers until -we run out, or until one gets clobbered before its last use. Don't -do anything fancy with branches. Just pretend that you've got a block -of straight-line code and hope for the best. Experience indicates that -this approach will suffice for about 96 percent of the code blocks that -we generate. - -\begin{code} -simpleRegAlloc - :: MRegsState -- registers to select from - -> [Reg] -- live static registers - -> RegAssignment -- mapping of dynamics to statics - -> [Instr] -- code - -> Maybe [Instr] - -simpleRegAlloc _ _ _ [] = Just [] - -simpleRegAlloc free live env (instr:instrs) - = if null deadSrcs && maybeToBool newAlloc && maybeToBool instrs2 then - Just (instr3 : instrs3) - else - Nothing - where - instr3 = patchRegs instr (lookup env2) - - (srcs, dsts) = case regUsage instr of { RU s d -> (regSetToList s, regSetToList d) } - - lookup env x = case lookupFM env x of {Just y -> y; Nothing -> x} - - deadSrcs = [r | r@(UnmappedReg _ _) <- srcs, lookup env r `not_elem` live] - newDsts = [r | r@(UnmappedReg _ _) <- dsts, r `not_elem` keysFM env] - - newAlloc = foldr allocateNewReg (Just (free, [])) newDsts - (free2, new) = case newAlloc of Just x -> x +Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for +each and every code block, we first try using this simple, fast and +utterly braindead allocator. In practice it handles about 60\% of the +code blocks really fast, even with only 3 integer registers available. +Since we can always give up and fall back to @doGeneralAlloc@, +@doSimpleAlloc@ is geared to handling the common case as fast as +possible. It will succeed only if: - env2 = env `addListToFM` new +* The code mentions registers only of integer class, not floating + class. - live2 = map snd new ++ [x | x <- live, x `not_elem` dsts] +* The code doesn't mention any real registers, so we don't have to + think about dodging and weaving to work around fixed register uses. - instrs2 = simpleRegAlloc free2 live2 env2 instrs - instrs3 = case instrs2 of Just x -> x +* The code mentions at most N virtual registers, where N is the number + of real registers for allocation. - allocateNewReg - :: Reg - -> Maybe (MRegsState, [(Reg, Reg)]) - -> Maybe (MRegsState, [(Reg, Reg)]) +If those conditions are satisfied, we simply trundle along the code, +doling out a real register every time we see mention of a new virtual +register. We either succeed at this, or give up when one of the above +three conditions is no longer satisfied. - allocateNewReg _ Nothing = Nothing - - allocateNewReg d@(UnmappedReg _ pk) (Just (free, prs)) = - if null choices then Nothing - else Just (free2, prs2) - where - choices = possibleMRegs pk free - reg = head choices - free2 = free `useMReg` (case reg of {IBOX(reg2) -> reg2} ) - prs2 = ((d, MappedReg (case reg of {IBOX(reg2) -> reg2})) : prs) +\begin{code} +doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr] +doSimpleAlloc available_real_regs instrs + = let available_iregs + = filter ((== RcInteger).regClass) available_real_regs + + trundle :: [( {-Virtual-}Reg, {-Real-}Reg )] + -> [ {-Real-}Reg ] + -> [Instr] + -> [Instr] + -> Maybe [Instr] + trundle vreg_map uncommitted_rregs ris_done [] + = Just (reverse ris_done) + trundle vreg_map uncommitted_rregs ris_done (i:is) + = case regUsage i of + RU rds wrs + + -- Mentions no regs? Move on quickly + | null rds_l && null wrs_l + -> trundle vreg_map uncommitted_rregs (i:ris_done) is + + -- A case we can't be bothered to handle? + | any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l + -> Nothing + + -- Update the rreg commitments, and map the insn + | otherwise + -> case upd_commitment (wrs_l++rds_l) + vreg_map uncommitted_rregs of + Nothing -- out of rregs; give up + -> Nothing + Just (vreg_map2, uncommitted_rregs2) + -> let i2 = patchRegs i (subst_reg vreg_map2) + in trundle vreg_map2 uncommitted_rregs2 + (i2:ris_done) is + where + isFloatingOrReal reg + = isRealReg reg || regClass reg == RcFloat + || regClass reg == RcDouble + + rds_l = regSetToList rds + wrs_l = regSetToList wrs + + upd_commitment [] vr_map uncomm + = Just (vr_map, uncomm) + upd_commitment (reg:regs) vr_map uncomm + | isRealReg reg + = upd_commitment regs vr_map uncomm + | reg `elem` (map fst vr_map) + = upd_commitment regs vr_map uncomm + | null uncomm + = Nothing + | otherwise + = upd_commitment regs ((reg, head uncomm):vr_map) + (tail uncomm) + + subst_reg vreg_map r + -- If it's a RealReg, it must be STG-specific one + -- (Hp,Sp,BaseReg,etc), since regUsage filters them out, + -- so isFloatingOrReal would not have objected to it. + | isRealReg r + = r + | otherwise + = case [rr | (vr,rr) <- vreg_map, vr == r] of + [rr2] -> rr2 + other -> pprPanic + "doSimpleAlloc: unmapped VirtualReg" + (ppr r) + in + trundle [] available_iregs [] instrs \end{code} -Here is the ``clever'' bit. First go backward (i.e. left), looking for -the last use of dynamic registers. Then go forward (i.e. right), filling -registers with static placements. +From here onwards is the general register allocator and spiller. For +each flow edge (possible transition between instructions), we compute +which virtual and real registers are live on that edge. Then the +mapping is inverted, to give a mapping from register (virtual+real) to +sets of flow edges on which the register is live. Finally, we can use +those sets to decide whether a virtual reg v can be assigned to a real +reg r, by checking that v's live-edge-set does not intersect with r's +current live-edge-set. Having made that assignment, we then augment +r's current live-edge-set (its current commitment, you could say) with +v's live-edge-set. + +doGeneralAlloc takes reserve_regs as the regs to use as spill +temporaries. First it tries to allocate using all regs except +reserve_regs. If that fails, it inserts spill code and tries again to +allocate regs, but this time with the spill temporaries available. +Even this might not work if there are insufficient spill temporaries: +in the worst case on x86, we'd need 3 of them, for insns like addl +(%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input. \begin{code} -hairyRegAlloc - :: MRegsState - -> [RegNo] - -> [Instr] - -> [Instr] - -hairyRegAlloc regs reserve_regs instrs - = case mapAccumB (doRegAlloc reserve_regs) - (RH regs' 1 emptyFM) noFuture instrs - of (RH _ loc' _, _, instrs') -> - if loc' == 1 then instrs' else - case mapAccumB do_RegAlloc_Nil - (RH regs'' loc' emptyFM) noFuture (flattenOrdList (patchMem instrs')) - of ((RH _ loc'' _),_,instrs'') -> - if loc'' == loc' then instrs'' else panic "runRegAllocate" - where - regs' = regs `useMRegs` reserve_regs - regs'' = mkMRegsState reserve_regs - -do_RegAlloc_Nil = doRegAlloc [] -- out here to avoid CAF (sigh) -do_RegAlloc_Nil - :: RegHistory MRegsState - -> RegFuture - -> Instr - -> (RegHistory MRegsState, RegFuture, Instr) - -noFuture :: RegFuture -noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM +doGeneralAlloc + :: [Reg] -- all allocatable regs + -> [Reg] -- the reserve regs + -> [Instr] -- instrs in + -> Maybe [Instr] -- instrs out + +doGeneralAlloc all_regs reserve_regs instrs + -- succeeded without spilling + | prespill_ok = Just prespill_insns + -- failed, and no spill regs avail, so pointless to attempt spilling + | null reserve_regs = Nothing + -- success after spilling + | postspill_ok = maybetrace (spillMsg True) (Just postspill_insns) + -- still not enough reserves after spilling; we have to give up + | otherwise = maybetrace (spillMsg False) Nothing + where + prespill_regs + = filter (`notElem` reserve_regs) all_regs + (prespill_ok, prespill_insns) + = allocUsingTheseRegs instrs prespill_regs + instrs_with_spill_code + = insertSpillCode prespill_insns + (postspill_ok, postspill_insns) + = allocUsingTheseRegs instrs_with_spill_code all_regs + + spillMsg success + = "nativeGen: spilling " + ++ (if success then "succeeded" else "failed ") + ++ " using " + ++ showSDoc (hsep (map ppr reserve_regs)) + +# if 1 /* ifdef DEBUG */ + maybetrace msg x = trace msg x +# else + maybetrace msg x = x +# endif \end{code} -Here we patch instructions that reference ``registers'' which are really in -memory somewhere (the mapping is under the control of the machine-specific -code generator). We place the appropriate load sequences before any instructions -that use memory registers as sources, and we place the appropriate spill sequences -after any instructions that use memory registers as destinations. The offending -instructions are rewritten with new dynamic registers, so we have to run register -allocation again after all of this is said and done. +Here we patch instructions that reference ``registers'' which are +really in memory somewhere (the mapping is under the control of the +machine-specific code generator). We place the appropriate load +sequences before any instructions that use memory registers as +sources, and we place the appropriate spill sequences after any +instructions that use memory registers as destinations. The offending +instructions are rewritten with new dynamic registers, so generalAlloc +has to run register allocation again after all of this is said and +done. + +On some architectures (x86, currently), we do without a frame-pointer, +and instead spill relative to the stack pointer (%esp on x86). +Because the stack pointer may move, the patcher needs to keep track of +the current stack pointer "delta". That's easy, because all it needs +to do is spot the DELTA bogus-insns which will have been inserted by +the relevant insn selector precisely so as to notify the spiller of +stack-pointer movement. The delta is passed to loadReg and spillReg, +since they generate the actual spill code. We expect the final delta +to be the same as the starting one (zero), reflecting the fact that +changes to the stack pointer should not extend beyond a basic block. + +Finally, there is the issue of mapping an arbitrary set of unallocated +VirtualRegs into a contiguous sequence of spill slots. The failed +allocation will have left the code peppered with references to +VirtualRegs, each of which contains a unique. So we make an env which +maps these VirtualRegs to integers, starting from zero, and pass that +env through to loadReg and spillReg. There, they are used to look up +spill slot numbers for the uniques. \begin{code} -patchMem :: [Instr] -> InstrList - -patchMem cs = foldr (mkSeqList . patchMem') mkEmptyList cs - -patchMem' :: Instr -> InstrList - -patchMem' instr - = if null memSrcs && null memDsts then mkUnitList instr - else mkSeqList - (foldr mkParList mkEmptyList loadSrcs) - (mkSeqList instr' - (foldr mkParList mkEmptyList spillDsts)) - - where - (RU srcs dsts) = regUsage instr - - memToDyn (MemoryReg i pk) = UnmappedReg (mkBuiltinUnique i) pk - memToDyn other = other - - memSrcs = [ r | r@(MemoryReg _ _) <- regSetToList srcs] - memDsts = [ r | r@(MemoryReg _ _) <- regSetToList dsts] - - loadSrcs = map load memSrcs +insertSpillCode :: [Instr] -> [Instr] +insertSpillCode insns + = let uniques_in_insns + = map getUnique + (regSetToList + (foldl unionRegSets emptyRegSet + (map vregs_in_insn insns))) + vregs_in_insn i + = case regUsage i of + RU rds wrs -> filterRegSet isVirtualReg + (rds `unionRegSets` wrs) + vreg_to_slot_map :: FiniteMap Unique Int + vreg_to_slot_map + = listToFM (zip uniques_in_insns [0..]) + + ((final_stack_delta, final_ctr), insnss) + = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns + in + if final_stack_delta == 0 + then concat insnss + else pprPanic "patchMem: non-zero final delta" + (int final_stack_delta) + + +-- patchInstr has as a running state two Ints, one the current stack delta, +-- needed to figure out offsets to stack slots on archs where we spill relative +-- to the stack pointer, as opposed to the frame pointer. The other is a +-- counter, used to manufacture new temporary register names. + +patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr]) +patchInstr vreg_to_slot_map (delta,ctr) instr + + | null memSrcs && null memDsts + = ((delta',ctr), [instr]) + + | otherwise + = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts) + where + delta' = case instr of DELTA d -> d ; _ -> delta + + (RU srcs dsts) = regUsage instr + + -- The instr being patched may mention several vregs -- those which + -- could not be assigned real registers. For each such vreg, we + -- invent a new vreg, used only around this instruction and nowhere + -- else. These new vregs replace the unallocatable vregs; they are + -- loaded from the spill area, the instruction is done with them, + -- and results if any are then written back to the spill area. + vregs_in_instr + = nub (filter isVirtualReg + (regSetToList srcs ++ regSetToList dsts)) + n_vregs_in_instr + = length vregs_in_instr + ctr' + = ctr + n_vregs_in_instr + vreg_env + = zip vregs_in_instr [ctr, ctr+1 ..] + + mkTmpReg vreg + | isVirtualReg vreg + = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of + [i] -> if regClass vreg == RcInteger + then VirtualRegI (mkPseudoUnique3 i) + else VirtualRegF (mkPseudoUnique3 i) + _ -> pprPanic "patchInstr: unmapped VReg" (ppr vreg) + | otherwise + = vreg + + memSrcs = filter isVirtualReg (regSetToList srcs) + memDsts = filter isVirtualReg (regSetToList dsts) + + loadSrcs = map load memSrcs spillDsts = map spill memDsts - load mem = loadReg mem (memToDyn mem) - spill mem = spillReg (memToDyn mem) mem + load mem = loadReg vreg_to_slot_map delta mem (mkTmpReg mem) + spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem - instr' = mkUnitList (patchRegs instr memToDyn) + instr' = patchRegs instr mkTmpReg \end{code} +allocUsingTheseRegs is the register allocator proper. It attempts +to allocate dynamic regs to real regs, given a list of real regs +which it may use. If it fails due to lack of real regs, the returned +instructions use what real regs there are, but will retain uses of +dynamic regs for which a real reg could not be found. It is these +leftover dynamic reg references which insertSpillCode will later +assign to spill slots. + +Some implementation notes. +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Instructions are numbered sequentially, starting at zero. + +A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting +a possible flow of control from the first insn to the second. + +The input to the register allocator is a list of instructions, which +mention Regs. A Reg can be a RealReg -- a real machine reg -- or a +VirtualReg, which carries a unique. After allocation, all the +VirtualReg references will have been converted into RealRegs, and +possible some spill code will have been inserted. + +The heart of the register allocator works in four phases. + +1. (find_flow_edges) Calculate all the FEs for the code list. + Return them not as a [FE], but implicitly, as a pair of + Array Int [Int], being the successor and predecessor maps + for instructions. + +2. (calc_liveness) Returns a FiniteMap FE RegSet. For each + FE, indicates the set of registers live on that FE. Note + that the set includes both RealRegs and VirtualRegs. The + former appear because the code could mention fixed register + usages, and we need to take them into account from the start. + +3. (calc_live_range_sets) Invert the above mapping, giving a + FiniteMap Reg FeSet, indicating, for each virtual and real + reg mentioned in the code, which FEs it is live on. + +4. (calc_vreg_to_rreg_mapping) For virtual reg, try and find + an allocatable real register for it. Each real register has + a "current commitment", indicating the set of FEs it is + currently live on. A virtual reg v can be assigned to + real reg r iff v's live-fe-set does not intersect with r's + current commitment fe-set. If the assignment is made, + v's live-fe-set is union'd into r's current commitment fe-set. + There is also the minor restriction that v and r must be of + the same register class (integer or floating). + + Once this mapping is established, we simply apply it to the + input insns, and that's it. + + If no suitable real register can be found, the vreg is mapped + to itself, and we deem allocation to have failed. The partially + allocated code is returned. The higher echelons of the allocator + (doGeneralAlloc and runRegAlloc) then cooperate to insert spill + code and re-run allocation, until a successful allocation is found. \begin{code} -doRegAlloc - :: [RegNo] - -> RegHistory MRegsState - -> RegFuture - -> Instr - -> (RegHistory MRegsState, RegFuture, Instr) - -doRegAlloc reserved_regs free_env in_use instr = (free_env', in_use', instr') - where - (free_env', instr') = doRegAlloc' reserved_regs free_env info instr - (in_use', info) = getUsage in_use instr -\end{code} -\begin{code} -getUsage - :: RegFuture - -> Instr - -> (RegFuture, RegInfo Instr) - -getUsage (RF next_in_use future reg_conflicts) instr - = (RF in_use' future' reg_conflicts', - RI in_use' srcs dsts last_used reg_conflicts') - where (RU srcs dsts) = regUsage instr - (RL in_use future') = regLiveness instr (RL next_in_use future) - live_through = in_use `minusRegSet` dsts - last_used = [ r | r <- regSetToList srcs, - not (r `elementOfRegSet` (fstFL future) || r `elementOfRegSet` in_use)] - in_use' = srcs `unionRegSets` live_through - reg_conflicts' = case new_conflicts of - [] -> reg_conflicts - _ -> addListToFM reg_conflicts new_conflicts - new_conflicts = if isEmptyRegSet live_dynamics then [] - else [ (r, merge_conflicts r) - | r <- extractMappedRegNos (regSetToList dsts) ] - merge_conflicts reg = case lookupFM reg_conflicts reg of - Nothing -> live_dynamics - Just conflicts -> conflicts `unionRegSets` live_dynamics - live_dynamics = mkRegSet - [ r | r@(UnmappedReg _ _) <- regSetToList live_through ] - -doRegAlloc' - :: [RegNo] - -> RegHistory MRegsState - -> RegInfo Instr - -> Instr - -> (RegHistory MRegsState, Instr) - -doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) instr = - - (RH frs'' loc' env'', patchRegs instr dynToStatic) - - where - - -- free up new registers - free :: [RegNo] - free = extractMappedRegNos (map dynToStatic lastu) - - -- (1) free registers that are used last as source operands in this instruction - frs_not_in_use = frs `useMRegs` (extractMappedRegNos (regSetToList in_use)) - frs' = (frs_not_in_use `freeMRegs` free) `useMRegs` reserved - - -- (2) allocate new registers for the destination operands - -- allocate registers for new dynamics - - new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts, r `not_elem` keysFM env ] - - (frs'', loc', new) = foldr allocateNewRegs (frs', loc, []) new_dynamix - - env' = addListToFM env new - - env'' = delListFromFM env' lastu - - dynToStatic :: Reg -> Reg - dynToStatic dyn@(UnmappedReg _ _) = - case lookupFM env' dyn of - Just r -> r - Nothing -> trace "Lost register; possibly a floating point type error in a _ccall_?" dyn - dynToStatic other = other - - allocateNewRegs - :: Reg -> (MRegsState, Int, [(Reg, Reg)]) -> (MRegsState, Int, [(Reg, Reg)]) - - allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst) = (fs', mem', (d, f) : lst) - where (fs', f, mem') = case acceptable fs of - [] -> (fs, MemoryReg mem pk, mem + 1) - (IBOX(x2):_) -> (fs `useMReg` x2, MappedReg x2, mem) - - acceptable regs = filter no_conflict (possibleMRegs pk regs) - no_conflict reg = case lookupFM conflicts reg of - Nothing -> True - Just conflicts -> not (d `elementOfRegSet` conflicts) -\end{code} +allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr]) +allocUsingTheseRegs instrs available_real_regs + = let (all_vregs_mapped, v_to_r_mapping) + = calc_vreg_to_rreg_mapping instrs available_real_regs + new_insns + = map (flip patchRegs sr) instrs + sr reg + | isRealReg reg + = reg + | otherwise + = case lookupFM v_to_r_mapping reg of + Just r -> r + Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: " + (ppr reg) + in + --trace ("allocUsingTheseRegs: " ++ show available_real_regs) ( + (all_vregs_mapped, new_insns) + --) + + +-- the heart of the matter. +calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg) +calc_vreg_to_rreg_mapping insns available_real_regs + = let + lr_sets :: FiniteMap Reg FeSet + lr_sets = calc_live_range_sets insns + + -- lr_sets maps: vregs mentioned in insns to sets of live FEs + -- and also: rregs mentioned in insns to sets of live FEs + -- We need to extract the rreg mapping, and use it as the + -- initial real-register-commitment. Also, add to the initial + -- commitment, empty commitments for any real regs not + -- mentioned in it. + + -- which real regs do we want to keep track of in the running + -- commitment mapping? Precisely the available_real_regs. + -- We don't care about real regs mentioned by insns which are + -- not in this list, since we're not allocating to them. + initial_rr_commitment :: FiniteMap Reg FeSet + initial_rr_commitment + = listToFM [(rreg, + case lookupFM lr_sets rreg of + Nothing -> emptyFeSet + Just fixed_use_fes -> fixed_use_fes + ) + | rreg <- available_real_regs] + + -- These are the vregs for which we actually have to (try to) + -- assign a real register. (ie, the whole reason we're here at all :) + vreg_liveness_list :: [(Reg, FeSet)] + vreg_liveness_list = filter (not.isRealReg.fst) + (fmToList lr_sets) + + -- A loop, which attempts to assign each vreg to a rreg. + loop rr_commitment v_to_r_map [] + = v_to_r_map + loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done) + = let + -- find a real reg which is not live for any of vreg_live_fes + cand_reals + = [rreg + | (rreg,rreg_live_FEs) <- fmToList rr_commitment, + regClass vreg == regClass rreg, + isEmptyFeSet (intersectionFeSets rreg_live_FEs + vreg_live_fes) + ] + in + case cand_reals of + [] -> -- bummer. No register is available. Just go on to + -- the next vreg, mapping the vreg to itself. + loop rr_commitment (addToFM v_to_r_map vreg vreg) + not_yet_done + (r:_) + -> -- Hurrah! Found a free reg of the right class. + -- Now we need to update the RR commitment. + loop rr_commitment2 (addToFM v_to_r_map vreg r) + not_yet_done + where + rr_commitment2 + = addToFM_C unionFeSets rr_commitment r + vreg_live_fes + + -- the final vreg to rreg mapping + vreg_assignment + = loop initial_rr_commitment emptyFM vreg_liveness_list + -- did we succeed in mapping everyone to a real reg? + allocation_succeeded + = all isRealReg (eltsFM vreg_assignment) + in + (allocation_succeeded, vreg_assignment) + + + +-- calculate liveness, then produce the live range info +-- as a mapping of VRegs to the set of FEs on which they are live. +-- The difficult part is inverting the mapping of Reg -> FeSet +-- to produce a mapping FE -> RegSet. + +calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet +calc_live_range_sets insns + = let + -- this is the "original" (old) mapping + lis :: FiniteMap FE RegSet + lis = calc_liveness insns + + -- establish the totality of reg names mentioned by the + -- insns, by scanning over the insns. + all_mentioned_regs :: RegSet + all_mentioned_regs + = foldl unionRegSets emptyRegSet + (map (\i -> case regUsage i of + RU rds wrs -> unionRegSets rds wrs) + insns) + + -- Initial inverted mapping, from Reg to sets of FEs + initial_imap :: FiniteMap Reg FeSet + initial_imap + = listToFM [(reg, emptyFeSet) + | reg <- regSetToList all_mentioned_regs] + + -- Update the new map with one element of the old map + upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet) + -> FiniteMap Reg FeSet + upd_imap imap (fe, regset) + = foldl upd_1_imap imap (regSetToList regset) + where + upd_1_imap curr reg + = addToFM_C unionFeSets curr reg (unitFeSet fe) + + -- the complete inverse mapping + final_imap :: FiniteMap Reg FeSet + final_imap + = foldl upd_imap initial_imap (fmToList lis) + in + final_imap + + + +-- Given the insns, calculate the FEs, and then doing fixpointing to +-- figure out the set of live regs (virtual regs AND real regs) live +-- on each FE. + +calc_liveness :: [Instr] -> FiniteMap FE RegSet +calc_liveness insns + = let (pred_map, succ_map) + = find_flow_edges insns + + -- We use the convention that if the current approximation + -- doesn't give a mapping for some FE, that FE maps to the + -- empty set. + initial_approx, fixpoint :: FiniteMap FE RegSet + initial_approx + = mk_initial_approx 0 insns succ_map emptyFM + fixpoint + = fix_set initial_approx 1 + -- If you want to live dangerously, and promise that the code + -- doesn't contain any loops (ie, there are no back edges in + -- the flow graph), you should be able to get away with this: + -- = upd_liveness_info pred_map succ_map insn_array initial_approx + -- But since I'm paranoid, and since it hardly makes any difference + -- to the compiler run-time (about 0.1%), I prefer to do the + -- the full fixpointing game. + + insn_array + = let n = length insns + in array (0, n-1) (zip [0..] insns) + + sameSets [] [] = True + sameSets (c:cs) (n:ns) = eqRegSets c n && sameSets cs ns + sameSets _ _ = False + + fix_set curr_approx iter_number + = let next_approx + = upd_liveness_info pred_map succ_map insn_array curr_approx + curr_sets + = eltsFM curr_approx + next_sets + = eltsFM next_approx + same + = sameSets curr_sets next_sets + final_approx + = if same then curr_approx + else fix_set next_approx (iter_number+1) + in + --trace (let qqq (fe, regset) + -- = show fe ++ " " ++ show (regSetToList regset) + -- in + -- "\n::iteration " ++ show iter_number ++ "\n" + -- ++ (unlines . map qqq . fmToList) + -- next_approx ++"\n" + -- ) + final_approx + in + fixpoint + + +-- Create a correct initial approximation. For each instruction that +-- writes a register, we deem that the register is live on the +-- flow edges leaving the instruction. Subsequent iterations of +-- the liveness AbI augment this based purely on reads of regs, not +-- writes. We need to start off with at least this minimal write- +-- based information in order that writes to vregs which are never +-- used have non-empty live ranges. If we don't do that, we eventually +-- wind up assigning such vregs to any old real reg, since they don't +-- apparently conflict -- you can't conflict with an empty live range. +-- This kludge is unfortunate, but we need to do it to cover not only +-- writes to vregs which are never used, but also to deal correctly +-- with the fact that calls to C will trash the callee saves registers. + +mk_initial_approx :: Int -> [Instr] -> Array Int [Int] + -> FiniteMap FE RegSet + -> FiniteMap FE RegSet +mk_initial_approx ino [] succ_map ia_so_far + = ia_so_far +mk_initial_approx ino (i:is) succ_map ia_so_far + = let wrs + = case regUsage i of RU rrr www -> www + new_fes + = [case ino of { I# inoh -> + case ino_succ of { I# ino_succh -> + MkFE inoh ino_succh + }} + | ino_succ <- succ_map ! ino] + + loop [] ia = ia + loop (fe:fes) ia + = loop fes (addToFM_C unionRegSets ia fe wrs) + + next_ia + = loop new_fes ia_so_far + in + mk_initial_approx (ino+1) is succ_map next_ia + + +-- Do one step in the liveness info calculation (AbI :). Given the +-- prior approximation (which tells you a subset of live VRegs+RRegs +-- for each flow edge), calculate new information for all FEs. +-- Rather than do this by iterating over FEs, it's easier to iterate +-- over insns, and update their incoming FEs. + +upd_liveness_info :: Array Int [Int] -- instruction pred map + -> Array Int [Int] -- instruction succ map + -> Array Int Instr -- array of instructions + -> FiniteMap FE RegSet -- previous approx + -> FiniteMap FE RegSet -- improved approx + +upd_liveness_info pred_map succ_map insn_array prev_approx + = do_insns hi prev_approx + where + (lo, hi) = bounds insn_array + + enquireMapFE :: FiniteMap FE RegSet -> FE + -> RegSet + enquireMapFE fm fe + = case lookupFM fm fe of + Just set -> set + Nothing -> emptyRegSet + + -- Work backwards, from the highest numbered insn to the lowest. + -- This is a heuristic which causes faster convergence to the + -- fixed point. In particular, for straight-line code with no + -- branches at all, arrives at the fixpoint in one iteration. + do_insns ino approx + | ino < lo + = approx + | otherwise + = let fes_to_futures + = [case ino of { I# inoh -> + case future_ino of { I# future_inoh -> + MkFE inoh future_inoh + }} + | future_ino <- succ_map ! ino] + future_lives + = map (enquireMapFE approx) fes_to_futures + future_live + = foldr unionRegSets emptyRegSet future_lives + + fes_from_histories + = [case history_ino of { I# history_inoh -> + case ino of { I# inoh -> + MkFE history_inoh inoh + }} + | history_ino <- pred_map ! ino] + new_approx + = foldl update_one_history approx fes_from_histories + + insn + = insn_array ! ino + history_independent_component + = case regUsage insn of + RU rds wrs + -> unionRegSets rds + (minusRegSets future_live wrs) + + update_one_history :: FiniteMap FE RegSet + -> FE + -> FiniteMap FE RegSet + update_one_history approx0 fe + = addToFM_C unionRegSets approx0 fe + history_independent_component + + rest_done + = do_insns (ino-1) new_approx + in + rest_done + + + +-- Extract the flow edges from a list of insns. Express the information +-- as two mappings, from insn number to insn numbers of predecessors, +-- and from insn number to insn numbers of successors. (Since that's +-- what we need to know when computing live ranges later). Instructions +-- are numbered starting at zero. This function is long and complex +-- in order to be efficient; it could equally well be shorter and slower. + +find_flow_edges :: [Instr] -> (Array Int [Int], + Array Int [Int]) +find_flow_edges insns + = let + -- First phase: make a temp env which maps labels + -- to insn numbers, so the second pass can know the insn + -- numbers for jump targets. + + label_env :: FiniteMap CLabel Int + + mk_label_env n env [] = env + mk_label_env n env ((LABEL clbl):is) + = mk_label_env (n+1) (addToFM env clbl n) is + mk_label_env n env (i:is) + = mk_label_env (n+1) env is + + label_env = mk_label_env 0 emptyFM insns + + find_label :: CLabel -> Int + find_label jmptarget + = case lookupFM label_env jmptarget of + Just ino -> ino + Nothing -> pprPanic "find_flow_edges: unmapped label" + (pprCLabel jmptarget) + + -- Second phase: traverse the insns, and make up the successor map. + + least_ino, greatest_ino :: Int + least_ino = 0 + greatest_ino = length insns - 1 + + mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])] + + mk_succ_map i_num rsucc_map [] + = reverse rsucc_map + + mk_succ_map i_num rsucc_map (i:is) + = let i_num_1 = i_num + 1 + in + case insnFuture i of + + NoFuture + -> -- A non-local jump. We can regard this insn as a terminal + -- insn in the graph, so we don't add any edges. + mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is + + Next + | null is -- this is the last insn, and it doesn't go anywhere + -- (a meaningless scenario); handle it anyway + -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is + + | otherwise -- flows to next insn; add fe i_num -> i_num+1 + -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map) + is + + Branch lab -- jmps to lab; add fe i_num -> i_target + -> let i_target = find_label lab + in + mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map) + is + NextOrBranch lab + | null is -- jmps to label, or falls through, and this is + -- the last insn (a meaningless scenario); + -- flag an error + -> error "find_flow_edges: NextOrBranch is last" + + | otherwise -- add fes i_num -> i_num+1 + -- and i_num -> i_target + -> let i_target = find_label lab + in + mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map) + is + + -- Third phase: invert the successor map to get the predecessor + -- map, using an algorithm which is quadratic in the worst case, + -- but runs in almost-linear time, because of the nature of our + -- inputs: most insns have a single successor, the next insn. + + invert :: [(Int, [Int])] -> [(Int, [Int])] + invert fmap + = let inverted_pairs + = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap + sorted_inverted_pairs + = isort inverted_pairs + + grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])] + grp k vs [] = [(k, vs)] + grp k vs ((kk,vv):rest) + | k == kk = grp k (vv:vs) rest + | otherwise = (k,vs) : grp kk [vv] rest + + grp_start [] = [] + grp_start ((kk,vv):rest) = grp kk [vv] rest + + grouped + = grp_start sorted_inverted_pairs + + -- make sure that the reverse mapping maps all inos + add_empties ino [] + | ino > greatest_ino = [] + | otherwise = (ino,[]): add_empties (ino+1) [] + add_empties ino ((k,vs):rest) + | ino < k = (ino,[]): add_empties (ino+1) ((k,vs):rest) + | ino == k = (k,vs) : add_empties (ino+1) rest + + -- This is nearly linear provided that the fsts of the + -- list are nearly in order -- a critical assumption + -- for efficiency. + isort :: [(Int,Int)] -> [(Int,Int)] + isort [] = [] + isort (x:xs) = insert x (isort xs) + + insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)] + insert y [] = [y] + insert y (z:zs) + -- specifically, this first test should almost always + -- be True in order for the near-linearity to happen + | fst y <= fst z = y:z:zs + | otherwise = z: insert y zs + in + add_empties least_ino grouped + + -- Finally ... + + succ_list + = mk_succ_map 0 [] insns + succ_map + = array (least_ino, greatest_ino) succ_list + pred_list + = invert succ_list + pred_map + = array (least_ino, greatest_ino) pred_list + in + (pred_map, succ_map) + + +-- That's all, folks! From here on is just some dull supporting stuff. + +-- A data type for flow edges +data FE + = MkFE Int# Int# deriving (Eq, Ord) + +-- deriving Show on types with unboxed fields doesn't work +instance Show FE where + showsPrec _ (MkFE s d) + = showString "MkFE" . shows (I# s) . shows ' ' . shows (I# d) + +-- Blargh. Use ghc stuff soon! Or: perhaps that's not such a good +-- idea. Most of these sets are either empty or very small, and it +-- might be that the overheads of the FiniteMap based set implementation +-- is a net loss. The same might be true of RegSets. + +newtype FeSet = MkFeSet [FE] + +feSetFromList xs + = MkFeSet (nukeDups (sort xs)) + where nukeDups :: [FE] -> [FE] + nukeDups [] = [] + nukeDups [x] = [x] + nukeDups (x:y:xys) + = if x == y then nukeDups (y:xys) + else x : nukeDups (y:xys) + +feSetToList (MkFeSet xs) = xs +isEmptyFeSet (MkFeSet xs) = null xs +emptyFeSet = MkFeSet [] +eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2 +unitFeSet x = MkFeSet [x] + +elemFeSet x (MkFeSet xs) + = f xs + where + f [] = False + f (y:ys) | x == y = True + | x < y = False + | otherwise = f ys + +unionFeSets (MkFeSet xs1) (MkFeSet xs2) + = MkFeSet (f xs1 xs2) + where + f [] bs = bs + f as [] = as + f (a:as) (b:bs) + | a < b = a : f as (b:bs) + | a > b = b : f (a:as) bs + | otherwise = a : f as bs + +minusFeSets (MkFeSet xs1) (MkFeSet xs2) + = MkFeSet (f xs1 xs2) + where + f [] bs = [] + f as [] = as + f (a:as) (b:bs) + | a < b = a : f as (b:bs) + | a > b = f (a:as) bs + | otherwise = f as bs + +intersectionFeSets (MkFeSet xs1) (MkFeSet xs2) + = MkFeSet (f xs1 xs2) + where + f [] bs = [] + f as [] = [] + f (a:as) (b:bs) + | a < b = f as (b:bs) + | a > b = f (a:as) bs + | otherwise = a : f as bs -We keep a local copy of the Prelude function \tr{notElem}, -so that it can be specialised. (Hack me gently. [WDP 94/11]) -\begin{code} -not_elem x [] = True -not_elem x (y:ys) = x /= y && not_elem x ys \end{code}