2 % (c) The AQUA Project, Glasgow University, 1993-2000
4 \section[AsmRegAlloc]{Register allocator}
7 module AsmRegAlloc ( runRegAllocate ) where
9 #include "HsVersions.h"
11 import MachCode ( InstrBlock )
12 import MachMisc ( Instr(..) )
13 import PprMach ( pprInstr ) -- Just for debugging
17 import FiniteMap ( FiniteMap, emptyFM, addListToFM, delListFromFM,
18 lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM,
19 listToFM, fmToList, lookupWithDefaultFM )
20 import Unique ( mkBuiltinUnique )
21 import OrdList ( unitOL, appOL, fromOL, concatOL )
23 import Unique ( Unique, Uniquable(..), mkPseudoUnique3 )
24 import CLabel ( CLabel, pprCLabel )
26 import List ( mapAccumL, nub, sort )
27 import Array ( Array, array, (!), bounds )
30 This is the generic register allocator. It does allocation for all
31 architectures. Details for specific architectures are given in
32 RegAllocInfo.lhs. In practice the allocator needs to know next to
33 nothing about an architecture to do its job:
35 * It needs to be given a list of the registers it can allocate to.
37 * It needs to be able to find out which registers each insn reads and
40 * It needs be able to change registers in instructions into other
43 * It needs to be able to find out where execution could go after an
46 * It needs to be able to discover sets of registers which can be
47 used to attempt spilling.
49 First we try something extremely simple. If that fails, we have to do
55 -> ([Instr] -> [[Reg]])
59 runRegAllocate regs find_reserve_regs instrs
60 = --trace ("runRegAllocate: " ++ show regs) (
62 Just simple -> --trace "SIMPLE"
64 Nothing -> --trace "GENERAL"
69 = pprPanic "nativeGen: spilling failed. Workaround: compile with -fvia-C.\n"
70 ( (text "reserves = " <> ppr reserves)
74 (vcat (map pprInstr flatInstrs))
76 tryGeneral (resv:resvs)
77 = case generalAlloc resv of
78 Just success -> success
79 Nothing -> tryGeneral resvs
81 reserves = find_reserve_regs flatInstrs
82 flatInstrs = fromOL instrs
83 simpleAlloc = doSimpleAlloc regs flatInstrs
84 generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
87 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
88 each and every code block, we first try using this simple, fast and
89 utterly braindead allocator. In practice it handles about 60\% of the
90 code blocks really fast, even with only 3 integer registers available.
91 Since we can always give up and fall back to @doGeneralAlloc@,
92 @doSimpleAlloc@ is geared to handling the common case as fast as
93 possible. It will succeed only if:
95 * The code mentions registers only of integer class, not floating
98 * The code doesn't mention any real registers, so we don't have to
99 think about dodging and weaving to work around fixed register uses.
101 * The code mentions at most N virtual registers, where N is the number
102 of real registers for allocation.
104 If those conditions are satisfied, we simply trundle along the code,
105 doling out a real register every time we see mention of a new virtual
106 register. We either succeed at this, or give up when one of the above
107 three conditions is no longer satisfied.
110 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
111 doSimpleAlloc available_real_regs instrs
112 = let available_iregs
113 = filter ((== RcInteger).regClass) available_real_regs
115 trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
120 trundle vreg_map uncommitted_rregs ris_done []
121 = Just (reverse ris_done)
122 trundle vreg_map uncommitted_rregs ris_done (i:is)
126 -- Mentions no regs? Move on quickly
127 | null rds_l && null wrs_l
128 -> trundle vreg_map uncommitted_rregs (i:ris_done) is
130 -- A case we can't be bothered to handle?
131 | any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
134 -- Update the rreg commitments, and map the insn
136 -> case upd_commitment (wrs_l++rds_l)
137 vreg_map uncommitted_rregs of
138 Nothing -- out of rregs; give up
140 Just (vreg_map2, uncommitted_rregs2)
141 -> let i2 = patchRegs i (subst_reg vreg_map2)
142 in trundle vreg_map2 uncommitted_rregs2
146 = isRealReg reg || regClass reg == RcFloat
147 || regClass reg == RcDouble
149 rds_l = regSetToList rds
150 wrs_l = regSetToList wrs
152 upd_commitment [] vr_map uncomm
153 = Just (vr_map, uncomm)
154 upd_commitment (reg:regs) vr_map uncomm
156 = upd_commitment regs vr_map uncomm
157 | reg `elem` (map fst vr_map)
158 = upd_commitment regs vr_map uncomm
162 = upd_commitment regs ((reg, head uncomm):vr_map)
166 -- If it's a RealReg, it must be STG-specific one
167 -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
168 -- so isFloatingOrReal would not have objected to it.
172 = case [rr | (vr,rr) <- vreg_map, vr == r] of
175 "doSimpleAlloc: unmapped VirtualReg"
178 trundle [] available_iregs [] instrs
181 From here onwards is the general register allocator and spiller. For
182 each flow edge (possible transition between instructions), we compute
183 which virtual and real registers are live on that edge. Then the
184 mapping is inverted, to give a mapping from register (virtual+real) to
185 sets of flow edges on which the register is live. Finally, we can use
186 those sets to decide whether a virtual reg v can be assigned to a real
187 reg r, by checking that v's live-edge-set does not intersect with r's
188 current live-edge-set. Having made that assignment, we then augment
189 r's current live-edge-set (its current commitment, you could say) with
192 doGeneralAlloc takes reserve_regs as the regs to use as spill
193 temporaries. First it tries to allocate using all regs except
194 reserve_regs. If that fails, it inserts spill code and tries again to
195 allocate regs, but this time with the spill temporaries available.
196 Even this might not work if there are insufficient spill temporaries:
197 in the worst case on x86, we'd need 3 of them, for insns like addl
198 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
202 :: [Reg] -- all allocatable regs
203 -> [Reg] -- the reserve regs
204 -> [Instr] -- instrs in
205 -> Maybe [Instr] -- instrs out
207 doGeneralAlloc all_regs reserve_regs instrs
208 -- succeeded without spilling
209 | --trace (showSDoc (
210 -- text "allocating with these regs" <+> ppr prespill_regs
212 -- text "giving code"
214 -- vcat (map pprInstr prespill_insns)
217 = Just prespill_insns
219 -- failed, and no spill regs avail, so pointless to attempt spilling
220 | null reserve_regs = Nothing
221 -- success after spilling
222 | postspill_ok = maybetrace (spillMsg True) (Just postspill_insns)
223 -- still not enough reserves after spilling; we have to give up
224 | otherwise = maybetrace (spillMsg False) Nothing
227 = filter (`notElem` reserve_regs) all_regs
228 (prespill_ok, prespill_insns)
229 = allocUsingTheseRegs instrs prespill_regs
230 instrs_with_spill_code
231 = insertSpillCode prespill_insns
232 (postspill_ok, postspill_insns)
233 = allocUsingTheseRegs instrs_with_spill_code all_regs
236 = "nativeGen: spilling "
237 ++ (if success then "succeeded" else "failed ")
239 ++ showSDoc (hsep (map ppr reserve_regs))
242 maybetrace msg x = trace msg x
248 Here we patch instructions that reference ``registers'' which are
249 really in memory somewhere (the mapping is under the control of the
250 machine-specific code generator). We place the appropriate load
251 sequences before any instructions that use memory registers as
252 sources, and we place the appropriate spill sequences after any
253 instructions that use memory registers as destinations. The offending
254 instructions are rewritten with new dynamic registers, so generalAlloc
255 has to run register allocation again after all of this is said and
258 On some architectures (x86, currently), we do without a frame-pointer,
259 and instead spill relative to the stack pointer (%esp on x86).
260 Because the stack pointer may move, the patcher needs to keep track of
261 the current stack pointer "delta". That's easy, because all it needs
262 to do is spot the DELTA bogus-insns which will have been inserted by
263 the relevant insn selector precisely so as to notify the spiller of
264 stack-pointer movement. The delta is passed to loadReg and spillReg,
265 since they generate the actual spill code. We expect the final delta
266 to be the same as the starting one (zero), reflecting the fact that
267 changes to the stack pointer should not extend beyond a basic block.
269 Finally, there is the issue of mapping an arbitrary set of unallocated
270 VirtualRegs into a contiguous sequence of spill slots. The failed
271 allocation will have left the code peppered with references to
272 VirtualRegs, each of which contains a unique. So we make an env which
273 maps these VirtualRegs to integers, starting from zero, and pass that
274 env through to loadReg and spillReg. There, they are used to look up
275 spill slot numbers for the uniques.
278 insertSpillCode :: [Instr] -> [Instr]
279 insertSpillCode insns
280 = let uniques_in_insns
283 (foldl unionRegSets emptyRegSet
284 (map vregs_in_insn insns)))
287 RU rds wrs -> filterRegSet isVirtualReg
288 (rds `unionRegSets` wrs)
289 vreg_to_slot_map :: FiniteMap Unique Int
291 = listToFM (zip uniques_in_insns [0..])
293 ((final_stack_delta, final_ctr), insnss)
294 = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
296 if final_stack_delta == 0
298 else pprPanic "patchMem: non-zero final delta"
299 (int final_stack_delta)
302 -- patchInstr has as a running state two Ints, one the current stack delta,
303 -- needed to figure out offsets to stack slots on archs where we spill relative
304 -- to the stack pointer, as opposed to the frame pointer. The other is a
305 -- counter, used to manufacture new temporary register names.
307 patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
308 patchInstr vreg_to_slot_map (delta,ctr) instr
310 | null memSrcs && null memDsts
311 = ((delta',ctr), [instr])
314 = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
316 delta' = case instr of DELTA d -> d ; _ -> delta
318 (RU srcs dsts) = regUsage instr
320 -- The instr being patched may mention several vregs -- those which
321 -- could not be assigned real registers. For each such vreg, we
322 -- invent a new vreg, used only around this instruction and nowhere
323 -- else. These new vregs replace the unallocatable vregs; they are
324 -- loaded from the spill area, the instruction is done with them,
325 -- and results if any are then written back to the spill area.
327 = nub (filter isVirtualReg
328 (regSetToList srcs ++ regSetToList dsts))
330 = length vregs_in_instr
332 = ctr + n_vregs_in_instr
334 = zip vregs_in_instr [ctr, ctr+1 ..]
338 = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
339 [i] -> case regClass vreg of
340 RcInteger -> VirtualRegI (mkPseudoUnique3 i)
341 RcFloat -> VirtualRegF (mkPseudoUnique3 i)
342 RcDouble -> VirtualRegD (mkPseudoUnique3 i)
343 _ -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
347 memSrcs = filter isVirtualReg (regSetToList srcs)
348 memDsts = filter isVirtualReg (regSetToList dsts)
350 loadSrcs = map load memSrcs
351 spillDsts = map spill memDsts
353 load mem = loadReg vreg_to_slot_map delta mem (mkTmpReg mem)
354 spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
356 instr' = patchRegs instr mkTmpReg
359 allocUsingTheseRegs is the register allocator proper. It attempts
360 to allocate dynamic regs to real regs, given a list of real regs
361 which it may use. If it fails due to lack of real regs, the returned
362 instructions use what real regs there are, but will retain uses of
363 dynamic regs for which a real reg could not be found. It is these
364 leftover dynamic reg references which insertSpillCode will later
365 assign to spill slots.
367 Some implementation notes.
368 ~~~~~~~~~~~~~~~~~~~~~~~~~~
369 Instructions are numbered sequentially, starting at zero.
371 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
372 a possible flow of control from the first insn to the second.
374 The input to the register allocator is a list of instructions, which
375 mention Regs. A Reg can be a RealReg -- a real machine reg -- or a
376 VirtualReg, which carries a unique. After allocation, all the
377 VirtualReg references will have been converted into RealRegs, and
378 possible some spill code will have been inserted.
380 The heart of the register allocator works in four phases.
382 1. (find_flow_edges) Calculate all the FEs for the code list.
383 Return them not as a [FE], but implicitly, as a pair of
384 Array Int [Int], being the successor and predecessor maps
387 2. (calc_liveness) Returns a FiniteMap FE RegSet. For each
388 FE, indicates the set of registers live on that FE. Note
389 that the set includes both RealRegs and VirtualRegs. The
390 former appear because the code could mention fixed register
391 usages, and we need to take them into account from the start.
393 3. (calc_live_range_sets) Invert the above mapping, giving a
394 FiniteMap Reg FeSet, indicating, for each virtual and real
395 reg mentioned in the code, which FEs it is live on.
397 4. (calc_vreg_to_rreg_mapping) For virtual reg, try and find
398 an allocatable real register for it. Each real register has
399 a "current commitment", indicating the set of FEs it is
400 currently live on. A virtual reg v can be assigned to
401 real reg r iff v's live-fe-set does not intersect with r's
402 current commitment fe-set. If the assignment is made,
403 v's live-fe-set is union'd into r's current commitment fe-set.
404 There is also the minor restriction that v and r must be of
405 the same register class (integer or floating).
407 Once this mapping is established, we simply apply it to the
408 input insns, and that's it.
410 If no suitable real register can be found, the vreg is mapped
411 to itself, and we deem allocation to have failed. The partially
412 allocated code is returned. The higher echelons of the allocator
413 (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
414 code and re-run allocation, until a successful allocation is found.
417 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
418 allocUsingTheseRegs instrs available_real_regs
419 = let (all_vregs_mapped, v_to_r_mapping)
420 = calc_vreg_to_rreg_mapping instrs available_real_regs
422 = map (flip patchRegs sr) instrs
427 = case lookupFM v_to_r_mapping reg of
429 Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
432 --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
433 (all_vregs_mapped, new_insns)
437 -- the heart of the matter.
438 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
439 calc_vreg_to_rreg_mapping insns available_real_regs
441 lr_sets :: FiniteMap Reg FeSet
442 lr_sets = calc_live_range_sets insns
444 -- lr_sets maps: vregs mentioned in insns to sets of live FEs
445 -- and also: rregs mentioned in insns to sets of live FEs
446 -- We need to extract the rreg mapping, and use it as the
447 -- initial real-register-commitment. Also, add to the initial
448 -- commitment, empty commitments for any real regs not
451 -- which real regs do we want to keep track of in the running
452 -- commitment mapping? Precisely the available_real_regs.
453 -- We don't care about real regs mentioned by insns which are
454 -- not in this list, since we're not allocating to them.
455 initial_rr_commitment :: FiniteMap Reg FeSet
456 initial_rr_commitment
458 case lookupFM lr_sets rreg of
459 Nothing -> emptyFeSet
460 Just fixed_use_fes -> fixed_use_fes
462 | rreg <- available_real_regs]
464 -- These are the vregs for which we actually have to (try to)
465 -- assign a real register. (ie, the whole reason we're here at all :)
466 vreg_liveness_list :: [(Reg, FeSet)]
467 vreg_liveness_list = filter (not.isRealReg.fst)
470 -- A loop, which attempts to assign each vreg to a rreg.
471 loop rr_commitment v_to_r_map []
473 loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
475 -- find a real reg which is not live for any of vreg_live_fes
478 | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
479 regClass vreg == regClass rreg,
480 isEmptyFeSet (intersectionFeSets rreg_live_FEs
485 [] -> -- bummer. No register is available. Just go on to
486 -- the next vreg, mapping the vreg to itself.
487 loop rr_commitment (addToFM v_to_r_map vreg vreg)
490 -> -- Hurrah! Found a free reg of the right class.
491 -- Now we need to update the RR commitment.
492 loop rr_commitment2 (addToFM v_to_r_map vreg r)
496 = addToFM_C unionFeSets rr_commitment r
499 -- the final vreg to rreg mapping
501 = loop initial_rr_commitment emptyFM vreg_liveness_list
502 -- did we succeed in mapping everyone to a real reg?
504 = all isRealReg (eltsFM vreg_assignment)
506 (allocation_succeeded, vreg_assignment)
510 -- calculate liveness, then produce the live range info
511 -- as a mapping of VRegs to the set of FEs on which they are live.
512 -- The difficult part is inverting the mapping of Reg -> FeSet
513 -- to produce a mapping FE -> RegSet.
515 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
516 calc_live_range_sets insns
518 -- this is the "original" (old) mapping
519 lis :: FiniteMap FE RegSet
520 lis = calc_liveness insns
522 -- establish the totality of reg names mentioned by the
523 -- insns, by scanning over the insns.
524 all_mentioned_regs :: RegSet
526 = foldl unionRegSets emptyRegSet
527 (map (\i -> case regUsage i of
528 RU rds wrs -> unionRegSets rds wrs)
531 -- Initial inverted mapping, from Reg to sets of FEs
532 initial_imap :: FiniteMap Reg FeSet
534 = listToFM [(reg, emptyFeSet)
535 | reg <- regSetToList all_mentioned_regs]
537 -- Update the new map with one element of the old map
538 upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
539 -> FiniteMap Reg FeSet
540 upd_imap imap (fe, regset)
541 = foldl upd_1_imap imap (regSetToList regset)
544 = addToFM_C unionFeSets curr reg (unitFeSet fe)
546 -- the complete inverse mapping
547 final_imap :: FiniteMap Reg FeSet
549 = foldl upd_imap initial_imap (fmToList lis)
555 -- Given the insns, calculate the FEs, and then doing fixpointing to
556 -- figure out the set of live regs (virtual regs AND real regs) live
559 calc_liveness :: [Instr] -> FiniteMap FE RegSet
561 = let (pred_map, succ_map)
562 = find_flow_edges insns
564 -- We use the convention that if the current approximation
565 -- doesn't give a mapping for some FE, that FE maps to the
567 initial_approx, fixpoint :: FiniteMap FE RegSet
569 = mk_initial_approx 0 insns succ_map emptyFM
571 = fix_set initial_approx 1
572 -- If you want to live dangerously, and promise that the code
573 -- doesn't contain any loops (ie, there are no back edges in
574 -- the flow graph), you should be able to get away with this:
575 -- = upd_liveness_info pred_map succ_map insn_array initial_approx
576 -- But since I'm paranoid, and since it hardly makes any difference
577 -- to the compiler run-time (about 0.1%), I prefer to do the
578 -- the full fixpointing game.
581 = let n = length insns
582 in array (0, n-1) (zip [0..] insns)
584 sameSets [] [] = True
585 sameSets (c:cs) (n:ns) = eqRegSets c n && sameSets cs ns
588 fix_set curr_approx iter_number
590 = upd_liveness_info pred_map succ_map insn_array curr_approx
596 = sameSets curr_sets next_sets
598 = if same then curr_approx
599 else fix_set next_approx (iter_number+1)
601 --trace (let qqq (fe, regset)
602 -- = show fe ++ " " ++ show (regSetToList regset)
604 -- "\n::iteration " ++ show iter_number ++ "\n"
605 -- ++ (unlines . map qqq . fmToList)
606 -- next_approx ++"\n"
613 -- Create a correct initial approximation. For each instruction that
614 -- writes a register, we deem that the register is live on the
615 -- flow edges leaving the instruction. Subsequent iterations of
616 -- the liveness AbI augment this based purely on reads of regs, not
617 -- writes. We need to start off with at least this minimal write-
618 -- based information in order that writes to vregs which are never
619 -- used have non-empty live ranges. If we don't do that, we eventually
620 -- wind up assigning such vregs to any old real reg, since they don't
621 -- apparently conflict -- you can't conflict with an empty live range.
622 -- This kludge is unfortunate, but we need to do it to cover not only
623 -- writes to vregs which are never used, but also to deal correctly
624 -- with the fact that calls to C will trash the callee saves registers.
626 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
627 -> FiniteMap FE RegSet
628 -> FiniteMap FE RegSet
629 mk_initial_approx ino [] succ_map ia_so_far
631 mk_initial_approx ino (i:is) succ_map ia_so_far
633 = case regUsage i of RU rrr www -> www
635 = [case ino of { I# inoh ->
636 case ino_succ of { I# ino_succh ->
639 | ino_succ <- succ_map ! ino]
643 = loop fes (addToFM_C unionRegSets ia fe wrs)
646 = loop new_fes ia_so_far
648 mk_initial_approx (ino+1) is succ_map next_ia
651 -- Do one step in the liveness info calculation (AbI :). Given the
652 -- prior approximation (which tells you a subset of live VRegs+RRegs
653 -- for each flow edge), calculate new information for all FEs.
654 -- Rather than do this by iterating over FEs, it's easier to iterate
655 -- over insns, and update their incoming FEs.
657 upd_liveness_info :: Array Int [Int] -- instruction pred map
658 -> Array Int [Int] -- instruction succ map
659 -> Array Int Instr -- array of instructions
660 -> FiniteMap FE RegSet -- previous approx
661 -> FiniteMap FE RegSet -- improved approx
663 upd_liveness_info pred_map succ_map insn_array prev_approx
664 = do_insns hi prev_approx
666 (lo, hi) = bounds insn_array
668 enquireMapFE :: FiniteMap FE RegSet -> FE
671 = case lookupFM fm fe of
673 Nothing -> emptyRegSet
675 -- Work backwards, from the highest numbered insn to the lowest.
676 -- This is a heuristic which causes faster convergence to the
677 -- fixed point. In particular, for straight-line code with no
678 -- branches at all, arrives at the fixpoint in one iteration.
684 = [case ino of { I# inoh ->
685 case future_ino of { I# future_inoh ->
686 MkFE inoh future_inoh
688 | future_ino <- succ_map ! ino]
690 = map (enquireMapFE approx) fes_to_futures
692 = foldr unionRegSets emptyRegSet future_lives
695 = [case history_ino of { I# history_inoh ->
696 case ino of { I# inoh ->
697 MkFE history_inoh inoh
699 | history_ino <- pred_map ! ino]
701 = foldl update_one_history approx fes_from_histories
705 history_independent_component
706 = case regUsage insn of
709 (minusRegSets future_live wrs)
711 update_one_history :: FiniteMap FE RegSet
713 -> FiniteMap FE RegSet
714 update_one_history approx0 fe
715 = addToFM_C unionRegSets approx0 fe
716 history_independent_component
719 = do_insns (ino-1) new_approx
725 -- Extract the flow edges from a list of insns. Express the information
726 -- as two mappings, from insn number to insn numbers of predecessors,
727 -- and from insn number to insn numbers of successors. (Since that's
728 -- what we need to know when computing live ranges later). Instructions
729 -- are numbered starting at zero. This function is long and complex
730 -- in order to be efficient; it could equally well be shorter and slower.
732 find_flow_edges :: [Instr] -> (Array Int [Int],
734 find_flow_edges insns
736 -- First phase: make a temp env which maps labels
737 -- to insn numbers, so the second pass can know the insn
738 -- numbers for jump targets.
740 label_env :: FiniteMap CLabel Int
742 mk_label_env n env [] = env
743 mk_label_env n env ((LABEL clbl):is)
744 = mk_label_env (n+1) (addToFM env clbl n) is
745 mk_label_env n env (i:is)
746 = mk_label_env (n+1) env is
748 label_env = mk_label_env 0 emptyFM insns
750 find_label :: CLabel -> Int
752 = case lookupFM label_env jmptarget of
754 Nothing -> pprPanic "find_flow_edges: unmapped label"
755 (pprCLabel jmptarget)
757 -- Second phase: traverse the insns, and make up the successor map.
759 least_ino, greatest_ino :: Int
761 greatest_ino = length insns - 1
763 mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
765 mk_succ_map i_num rsucc_map []
768 mk_succ_map i_num rsucc_map (i:is)
769 = let i_num_1 = i_num + 1
774 -> -- A non-local jump. We can regard this insn as a terminal
775 -- insn in the graph, so we don't add any edges.
776 mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
779 | null is -- this is the last insn, and it doesn't go anywhere
780 -- (a meaningless scenario); handle it anyway
781 -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
783 | otherwise -- flows to next insn; add fe i_num -> i_num+1
784 -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
787 Branch lab -- jmps to lab; add fe i_num -> i_target
788 -> let i_target = find_label lab
790 mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map) is
793 | null is -- jmps to label, or falls through, and this is
794 -- the last insn (a meaningless scenario);
796 -> error "find_flow_edges: NextOrBranch is last"
798 | otherwise -- add fes i_num -> i_num+1
799 -- and i_num -> i_target
800 -> let i_target = find_label lab
802 mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
805 -> -- A jump, whose targets are listed explicitly.
806 -- (Generated from table-based switch translations).
807 -- Add fes i_num -> x for each x in labels
808 let is_target = nub (map find_label labels)
810 mk_succ_map i_num_1 ((i_num, is_target):rsucc_map) is
812 -- Third phase: invert the successor map to get the predecessor
813 -- map, using an algorithm which is quadratic in the worst case,
814 -- but runs in almost-linear time, because of the nature of our
815 -- inputs: most insns have a single successor, the next insn.
817 invert :: [(Int, [Int])] -> [(Int, [Int])]
820 = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
821 sorted_inverted_pairs
822 = isort inverted_pairs
824 grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
825 grp k vs [] = [(k, vs)]
826 grp k vs ((kk,vv):rest)
827 | k == kk = grp k (vv:vs) rest
828 | otherwise = (k,vs) : grp kk [vv] rest
831 grp_start ((kk,vv):rest) = grp kk [vv] rest
834 = grp_start sorted_inverted_pairs
836 -- make sure that the reverse mapping maps all inos
838 | ino > greatest_ino = []
839 | otherwise = (ino,[]): add_empties (ino+1) []
840 add_empties ino ((k,vs):rest)
841 | ino < k = (ino,[]): add_empties (ino+1) ((k,vs):rest)
842 | ino == k = (k,vs) : add_empties (ino+1) rest
844 -- This is nearly linear provided that the fsts of the
845 -- list are nearly in order -- a critical assumption
847 isort :: [(Int,Int)] -> [(Int,Int)]
849 isort (x:xs) = insert x (isort xs)
851 insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
854 -- specifically, this first test should almost always
855 -- be True in order for the near-linearity to happen
856 | fst y <= fst z = y:z:zs
857 | otherwise = z: insert y zs
859 add_empties least_ino grouped
864 = mk_succ_map 0 [] insns
866 = array (least_ino, greatest_ino) succ_list
870 = array (least_ino, greatest_ino) pred_list
875 -- That's all, folks! From here on is just some dull supporting stuff.
877 -- A data type for flow edges
879 = MkFE Int# Int# deriving (Eq, Ord)
881 -- deriving Show on types with unboxed fields doesn't work
882 instance Show FE where
883 showsPrec _ (MkFE s d)
884 = showString "MkFE" . shows (I# s) . shows ' ' . shows (I# d)
886 -- Blargh. Use ghc stuff soon! Or: perhaps that's not such a good
887 -- idea. Most of these sets are either empty or very small, and it
888 -- might be that the overheads of the FiniteMap based set implementation
889 -- is a net loss. The same might be true of RegSets.
891 newtype FeSet = MkFeSet [FE]
894 = MkFeSet (nukeDups (sort xs))
895 where nukeDups :: [FE] -> [FE]
899 = if x == y then nukeDups (y:xys)
900 else x : nukeDups (y:xys)
902 feSetToList (MkFeSet xs) = xs
903 isEmptyFeSet (MkFeSet xs) = null xs
904 emptyFeSet = MkFeSet []
905 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
906 unitFeSet x = MkFeSet [x]
908 elemFeSet x (MkFeSet xs)
912 f (y:ys) | x == y = True
916 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
917 = MkFeSet (f xs1 xs2)
922 | a < b = a : f as (b:bs)
923 | a > b = b : f (a:as) bs
924 | otherwise = a : f as bs
926 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
927 = MkFeSet (f xs1 xs2)
932 | a < b = a : f as (b:bs)
933 | a > b = f (a:as) bs
934 | otherwise = f as bs
936 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
937 = MkFeSet (f xs1 xs2)
942 | a < b = f as (b:bs)
943 | a > b = f (a:as) bs
944 | otherwise = a : f as bs