2 % (c) The AQUA Project, Glasgow University, 1993-2000
4 \section[AsmRegAlloc]{Register allocator}
7 module AsmRegAlloc ( runRegAllocate ) where
9 #include "HsVersions.h"
11 import MachCode ( InstrBlock )
12 import MachMisc ( Instr(..) )
13 import PprMach ( pprInstr ) -- Just for debugging
17 import FiniteMap ( FiniteMap, emptyFM, addListToFM, delListFromFM,
18 lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM,
19 listToFM, fmToList, lookupWithDefaultFM )
20 import Unique ( mkBuiltinUnique )
21 import OrdList ( unitOL, appOL, fromOL, concatOL )
23 import Unique ( Unique, Uniquable(..), mkPseudoUnique3 )
24 import CLabel ( CLabel, pprCLabel )
27 import List ( mapAccumL, nub, sort )
28 import Array ( Array, array, (!), bounds )
31 This is the generic register allocator. It does allocation for all
32 architectures. Details for specific architectures are given in
33 RegAllocInfo.lhs. In practice the allocator needs to know next to
34 nothing about an architecture to do its job:
36 * It needs to be given a list of the registers it can allocate to.
38 * It needs to be able to find out which registers each insn reads and
41 * It needs be able to change registers in instructions into other
44 * It needs to be able to find out where execution could go after an
47 * It needs to be able to discover sets of registers which can be
48 used to attempt spilling.
50 First we try something extremely simple. If that fails, we have to do
56 -> ([Instr] -> [[Reg]])
60 runRegAllocate regs find_reserve_regs instrs
61 = --trace ("runRegAllocate: " ++ show regs) (
63 Just simple -> --trace "SIMPLE"
65 Nothing -> --trace "GENERAL"
70 = pprPanic "nativeGen: spilling failed. Workaround: compile with -fvia-C.\n"
71 ( (text "reserves = " <> ppr reserves)
75 (vcat (map pprInstr flatInstrs))
77 tryGeneral (resv:resvs)
78 = case generalAlloc resv of
79 Just success -> success
80 Nothing -> tryGeneral resvs
82 reserves = find_reserve_regs flatInstrs
83 flatInstrs = fromOL instrs
84 simpleAlloc = doSimpleAlloc regs flatInstrs
85 generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
88 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
89 each and every code block, we first try using this simple, fast and
90 utterly braindead allocator. In practice it handles about 60\% of the
91 code blocks really fast, even with only 3 integer registers available.
92 Since we can always give up and fall back to @doGeneralAlloc@,
93 @doSimpleAlloc@ is geared to handling the common case as fast as
94 possible. It will succeed only if:
96 * The code mentions registers only of integer class, not floating
99 * The code doesn't mention any real registers, so we don't have to
100 think about dodging and weaving to work around fixed register uses.
102 * The code mentions at most N virtual registers, where N is the number
103 of real registers for allocation.
105 If those conditions are satisfied, we simply trundle along the code,
106 doling out a real register every time we see mention of a new virtual
107 register. We either succeed at this, or give up when one of the above
108 three conditions is no longer satisfied.
111 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
112 doSimpleAlloc available_real_regs instrs
113 = let available_iregs
114 = filter ((== RcInteger).regClass) available_real_regs
116 trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
121 trundle vreg_map uncommitted_rregs ris_done []
122 = Just (reverse ris_done)
123 trundle vreg_map uncommitted_rregs ris_done (i:is)
127 -- Mentions no regs? Move on quickly
128 | null rds_l && null wrs_l
129 -> trundle vreg_map uncommitted_rregs (i:ris_done) is
131 -- A case we can't be bothered to handle?
132 | any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
135 -- Update the rreg commitments, and map the insn
137 -> case upd_commitment (wrs_l++rds_l)
138 vreg_map uncommitted_rregs of
139 Nothing -- out of rregs; give up
141 Just (vreg_map2, uncommitted_rregs2)
142 -> let i2 = patchRegs i (subst_reg vreg_map2)
143 in trundle vreg_map2 uncommitted_rregs2
147 = isRealReg reg || regClass reg == RcFloat
148 || regClass reg == RcDouble
150 rds_l = regSetToList rds
151 wrs_l = regSetToList wrs
153 upd_commitment [] vr_map uncomm
154 = Just (vr_map, uncomm)
155 upd_commitment (reg:regs) vr_map uncomm
157 = upd_commitment regs vr_map uncomm
158 | reg `elem` (map fst vr_map)
159 = upd_commitment regs vr_map uncomm
163 = upd_commitment regs ((reg, head uncomm):vr_map)
167 -- If it's a RealReg, it must be STG-specific one
168 -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
169 -- so isFloatingOrReal would not have objected to it.
173 = case [rr | (vr,rr) <- vreg_map, vr == r] of
176 "doSimpleAlloc: unmapped VirtualReg"
179 trundle [] available_iregs [] instrs
182 From here onwards is the general register allocator and spiller. For
183 each flow edge (possible transition between instructions), we compute
184 which virtual and real registers are live on that edge. Then the
185 mapping is inverted, to give a mapping from register (virtual+real) to
186 sets of flow edges on which the register is live. Finally, we can use
187 those sets to decide whether a virtual reg v can be assigned to a real
188 reg r, by checking that v's live-edge-set does not intersect with r's
189 current live-edge-set. Having made that assignment, we then augment
190 r's current live-edge-set (its current commitment, you could say) with
193 doGeneralAlloc takes reserve_regs as the regs to use as spill
194 temporaries. First it tries to allocate using all regs except
195 reserve_regs. If that fails, it inserts spill code and tries again to
196 allocate regs, but this time with the spill temporaries available.
197 Even this might not work if there are insufficient spill temporaries:
198 in the worst case on x86, we'd need 3 of them, for insns like addl
199 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
203 :: [Reg] -- all allocatable regs
204 -> [Reg] -- the reserve regs
205 -> [Instr] -- instrs in
206 -> Maybe [Instr] -- instrs out
208 doGeneralAlloc all_regs reserve_regs instrs
209 -- succeeded without spilling
211 = Just prespill_insns
213 -- failed, and no spill regs avail, so pointless to attempt spilling
214 | null reserve_regs = Nothing
215 -- success after spilling
216 | postspill_ok = maybetrace (spillMsg True) (Just postspill_insns)
217 -- still not enough reserves after spilling; we have to give up
218 | otherwise = maybetrace (spillMsg False) Nothing
221 = filter (`notElem` reserve_regs) all_regs
222 (prespill_ok, prespill_insns)
223 = allocUsingTheseRegs instrs prespill_regs
224 instrs_with_spill_code
225 = insertSpillCode prespill_insns
226 (postspill_ok, postspill_insns)
227 = allocUsingTheseRegs instrs_with_spill_code all_regs
230 = "nativeGen: spilling "
231 ++ (if success then "succeeded" else "failed ")
233 ++ showSDoc (hsep (map ppr reserve_regs))
236 maybetrace msg x = trace msg x
242 Here we patch instructions that reference ``registers'' which are
243 really in memory somewhere (the mapping is under the control of the
244 machine-specific code generator). We place the appropriate load
245 sequences before any instructions that use memory registers as
246 sources, and we place the appropriate spill sequences after any
247 instructions that use memory registers as destinations. The offending
248 instructions are rewritten with new dynamic registers, so generalAlloc
249 has to run register allocation again after all of this is said and
252 On some architectures (x86, currently), we do without a frame-pointer,
253 and instead spill relative to the stack pointer (%esp on x86).
254 Because the stack pointer may move, the patcher needs to keep track of
255 the current stack pointer "delta". That's easy, because all it needs
256 to do is spot the DELTA bogus-insns which will have been inserted by
257 the relevant insn selector precisely so as to notify the spiller of
258 stack-pointer movement. The delta is passed to loadReg and spillReg,
259 since they generate the actual spill code. We expect the final delta
260 to be the same as the starting one (zero), reflecting the fact that
261 changes to the stack pointer should not extend beyond a basic block.
263 Finally, there is the issue of mapping an arbitrary set of unallocated
264 VirtualRegs into a contiguous sequence of spill slots. The failed
265 allocation will have left the code peppered with references to
266 VirtualRegs, each of which contains a unique. So we make an env which
267 maps these VirtualRegs to integers, starting from zero, and pass that
268 env through to loadReg and spillReg. There, they are used to look up
269 spill slot numbers for the uniques.
272 insertSpillCode :: [Instr] -> [Instr]
273 insertSpillCode insns
274 = let uniques_in_insns
277 (foldl unionRegSets emptyRegSet
278 (map vregs_in_insn insns)))
281 RU rds wrs -> filterRegSet isVirtualReg
282 (rds `unionRegSets` wrs)
283 vreg_to_slot_map :: FiniteMap Unique Int
285 = listToFM (zip uniques_in_insns [0..])
287 ((final_stack_delta, final_ctr), insnss)
288 = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
290 if final_stack_delta == 0
292 else pprPanic "patchMem: non-zero final delta"
293 (int final_stack_delta)
296 -- patchInstr has as a running state two Ints, one the current stack delta,
297 -- needed to figure out offsets to stack slots on archs where we spill relative
298 -- to the stack pointer, as opposed to the frame pointer. The other is a
299 -- counter, used to manufacture new temporary register names.
301 patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
302 patchInstr vreg_to_slot_map (delta,ctr) instr
304 | null memSrcs && null memDsts
305 = ((delta',ctr), [instr])
308 = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
310 delta' = case instr of DELTA d -> d ; _ -> delta
312 (RU srcs dsts) = regUsage instr
314 -- The instr being patched may mention several vregs -- those which
315 -- could not be assigned real registers. For each such vreg, we
316 -- invent a new vreg, used only around this instruction and nowhere
317 -- else. These new vregs replace the unallocatable vregs; they are
318 -- loaded from the spill area, the instruction is done with them,
319 -- and results if any are then written back to the spill area.
321 = nub (filter isVirtualReg
322 (regSetToList srcs ++ regSetToList dsts))
324 = length vregs_in_instr
326 = ctr + n_vregs_in_instr
328 = zip vregs_in_instr [ctr, ctr+1 ..]
332 = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
333 [i] -> case regClass vreg of
334 RcInteger -> VirtualRegI (mkPseudoUnique3 i)
335 RcFloat -> VirtualRegF (mkPseudoUnique3 i)
336 RcDouble -> VirtualRegD (mkPseudoUnique3 i)
337 _ -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
341 memSrcs = filter isVirtualReg (regSetToList srcs)
342 memDsts = filter isVirtualReg (regSetToList dsts)
344 loadSrcs = map load memSrcs
345 spillDsts = map spill memDsts
347 load mem = loadReg vreg_to_slot_map delta mem (mkTmpReg mem)
348 spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
350 instr' = patchRegs instr mkTmpReg
353 allocUsingTheseRegs is the register allocator proper. It attempts
354 to allocate dynamic regs to real regs, given a list of real regs
355 which it may use. If it fails due to lack of real regs, the returned
356 instructions use what real regs there are, but will retain uses of
357 dynamic regs for which a real reg could not be found. It is these
358 leftover dynamic reg references which insertSpillCode will later
359 assign to spill slots.
361 Some implementation notes.
362 ~~~~~~~~~~~~~~~~~~~~~~~~~~
363 Instructions are numbered sequentially, starting at zero.
365 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
366 a possible flow of control from the first insn to the second.
368 The input to the register allocator is a list of instructions, which
369 mention Regs. A Reg can be a RealReg -- a real machine reg -- or a
370 VirtualReg, which carries a unique. After allocation, all the
371 VirtualReg references will have been converted into RealRegs, and
372 possible some spill code will have been inserted.
374 The heart of the register allocator works in four phases.
376 1. (find_flow_edges) Calculate all the FEs for the code list.
377 Return them not as a [FE], but implicitly, as a pair of
378 Array Int [Int], being the successor and predecessor maps
381 2. (calc_liveness) Returns a FiniteMap FE RegSet. For each
382 FE, indicates the set of registers live on that FE. Note
383 that the set includes both RealRegs and VirtualRegs. The
384 former appear because the code could mention fixed register
385 usages, and we need to take them into account from the start.
387 3. (calc_live_range_sets) Invert the above mapping, giving a
388 FiniteMap Reg FeSet, indicating, for each virtual and real
389 reg mentioned in the code, which FEs it is live on.
391 4. (calc_vreg_to_rreg_mapping) For virtual reg, try and find
392 an allocatable real register for it. Each real register has
393 a "current commitment", indicating the set of FEs it is
394 currently live on. A virtual reg v can be assigned to
395 real reg r iff v's live-fe-set does not intersect with r's
396 current commitment fe-set. If the assignment is made,
397 v's live-fe-set is union'd into r's current commitment fe-set.
398 There is also the minor restriction that v and r must be of
399 the same register class (integer or floating).
401 Once this mapping is established, we simply apply it to the
402 input insns, and that's it.
404 If no suitable real register can be found, the vreg is mapped
405 to itself, and we deem allocation to have failed. The partially
406 allocated code is returned. The higher echelons of the allocator
407 (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
408 code and re-run allocation, until a successful allocation is found.
411 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
412 allocUsingTheseRegs instrs available_real_regs
413 = let (all_vregs_mapped, v_to_r_mapping)
414 = calc_vreg_to_rreg_mapping instrs available_real_regs
416 = map (flip patchRegs sr) instrs
421 = case lookupFM v_to_r_mapping reg of
423 Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
426 --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
427 (all_vregs_mapped, new_insns)
431 -- the heart of the matter.
432 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
433 calc_vreg_to_rreg_mapping insns available_real_regs
435 lr_sets :: FiniteMap Reg FeSet
436 lr_sets = calc_live_range_sets insns
438 -- lr_sets maps: vregs mentioned in insns to sets of live FEs
439 -- and also: rregs mentioned in insns to sets of live FEs
440 -- We need to extract the rreg mapping, and use it as the
441 -- initial real-register-commitment. Also, add to the initial
442 -- commitment, empty commitments for any real regs not
445 -- which real regs do we want to keep track of in the running
446 -- commitment mapping? Precisely the available_real_regs.
447 -- We don't care about real regs mentioned by insns which are
448 -- not in this list, since we're not allocating to them.
449 initial_rr_commitment :: FiniteMap Reg FeSet
450 initial_rr_commitment
452 case lookupFM lr_sets rreg of
453 Nothing -> emptyFeSet
454 Just fixed_use_fes -> fixed_use_fes
456 | rreg <- available_real_regs]
458 -- These are the vregs for which we actually have to (try to)
459 -- assign a real register. (ie, the whole reason we're here at all :)
460 vreg_liveness_list :: [(Reg, FeSet)]
461 vreg_liveness_list = filter (not.isRealReg.fst)
464 -- A loop, which attempts to assign each vreg to a rreg.
465 loop rr_commitment v_to_r_map []
467 loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
469 -- find a real reg which is not live for any of vreg_live_fes
472 | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
473 regClass vreg == regClass rreg,
474 isEmptyFeSet (intersectionFeSets rreg_live_FEs
479 [] -> -- bummer. No register is available. Just go on to
480 -- the next vreg, mapping the vreg to itself.
481 loop rr_commitment (addToFM v_to_r_map vreg vreg)
484 -> -- Hurrah! Found a free reg of the right class.
485 -- Now we need to update the RR commitment.
486 loop rr_commitment2 (addToFM v_to_r_map vreg r)
490 = addToFM_C unionFeSets rr_commitment r
493 -- the final vreg to rreg mapping
495 = loop initial_rr_commitment emptyFM vreg_liveness_list
496 -- did we succeed in mapping everyone to a real reg?
498 = all isRealReg (eltsFM vreg_assignment)
500 (allocation_succeeded, vreg_assignment)
504 -- calculate liveness, then produce the live range info
505 -- as a mapping of VRegs to the set of FEs on which they are live.
506 -- The difficult part is inverting the mapping of Reg -> FeSet
507 -- to produce a mapping FE -> RegSet.
509 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
510 calc_live_range_sets insns
512 -- this is the "original" (old) mapping
513 lis :: FiniteMap FE RegSet
514 lis = calc_liveness insns
516 -- establish the totality of reg names mentioned by the
517 -- insns, by scanning over the insns.
518 all_mentioned_regs :: RegSet
520 = foldl unionRegSets emptyRegSet
521 (map (\i -> case regUsage i of
522 RU rds wrs -> unionRegSets rds wrs)
525 -- Initial inverted mapping, from Reg to sets of FEs
526 initial_imap :: FiniteMap Reg FeSet
528 = listToFM [(reg, emptyFeSet)
529 | reg <- regSetToList all_mentioned_regs]
531 -- Update the new map with one element of the old map
532 upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
533 -> FiniteMap Reg FeSet
534 upd_imap imap (fe, regset)
535 = foldl upd_1_imap imap (regSetToList regset)
538 = addToFM_C unionFeSets curr reg (unitFeSet fe)
540 -- the complete inverse mapping
541 final_imap :: FiniteMap Reg FeSet
543 = foldl upd_imap initial_imap (fmToList lis)
549 -- Given the insns, calculate the FEs, and then doing fixpointing to
550 -- figure out the set of live regs (virtual regs AND real regs) live
553 calc_liveness :: [Instr] -> FiniteMap FE RegSet
555 = let (pred_map, succ_map)
556 = find_flow_edges insns
558 -- We use the convention that if the current approximation
559 -- doesn't give a mapping for some FE, that FE maps to the
561 initial_approx, fixpoint :: FiniteMap FE RegSet
563 = mk_initial_approx 0 insns succ_map emptyFM
565 = fix_set initial_approx 1
566 -- If you want to live dangerously, and promise that the code
567 -- doesn't contain any loops (ie, there are no back edges in
568 -- the flow graph), you should be able to get away with this:
569 -- = upd_liveness_info pred_map succ_map insn_array initial_approx
570 -- But since I'm paranoid, and since it hardly makes any difference
571 -- to the compiler run-time (about 0.1%), I prefer to do the
572 -- the full fixpointing game.
575 = let n = length insns
576 in array (0, n-1) (zip [0..] insns)
578 sameSets [] [] = True
579 sameSets (c:cs) (n:ns) = eqRegSets c n && sameSets cs ns
582 fix_set curr_approx iter_number
584 = upd_liveness_info pred_map succ_map insn_array curr_approx
590 = sameSets curr_sets next_sets
592 = if same then curr_approx
593 else fix_set next_approx (iter_number+1)
595 --trace (let qqq (fe, regset)
596 -- = show fe ++ " " ++ show (regSetToList regset)
598 -- "\n::iteration " ++ show iter_number ++ "\n"
599 -- ++ (unlines . map qqq . fmToList)
600 -- next_approx ++"\n"
607 -- Create a correct initial approximation. For each instruction that
608 -- writes a register, we deem that the register is live on the
609 -- flow edges leaving the instruction. Subsequent iterations of
610 -- the liveness AbI augment this based purely on reads of regs, not
611 -- writes. We need to start off with at least this minimal write-
612 -- based information in order that writes to vregs which are never
613 -- used have non-empty live ranges. If we don't do that, we eventually
614 -- wind up assigning such vregs to any old real reg, since they don't
615 -- apparently conflict -- you can't conflict with an empty live range.
616 -- This kludge is unfortunate, but we need to do it to cover not only
617 -- writes to vregs which are never used, but also to deal correctly
618 -- with the fact that calls to C will trash the callee saves registers.
620 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
621 -> FiniteMap FE RegSet
622 -> FiniteMap FE RegSet
623 mk_initial_approx ino [] succ_map ia_so_far
625 mk_initial_approx ino (i:is) succ_map ia_so_far
627 = case regUsage i of RU rrr www -> www
629 = [case iUnbox ino of { inoh ->
630 case iUnbox ino_succ of { ino_succh ->
633 | ino_succ <- succ_map ! ino]
637 = loop fes (addToFM_C unionRegSets ia fe wrs)
640 = loop new_fes ia_so_far
642 mk_initial_approx (ino+1) is succ_map next_ia
645 -- Do one step in the liveness info calculation (AbI :). Given the
646 -- prior approximation (which tells you a subset of live VRegs+RRegs
647 -- for each flow edge), calculate new information for all FEs.
648 -- Rather than do this by iterating over FEs, it's easier to iterate
649 -- over insns, and update their incoming FEs.
651 upd_liveness_info :: Array Int [Int] -- instruction pred map
652 -> Array Int [Int] -- instruction succ map
653 -> Array Int Instr -- array of instructions
654 -> FiniteMap FE RegSet -- previous approx
655 -> FiniteMap FE RegSet -- improved approx
657 upd_liveness_info pred_map succ_map insn_array prev_approx
658 = do_insns hi prev_approx
660 (lo, hi) = bounds insn_array
662 enquireMapFE :: FiniteMap FE RegSet -> FE
665 = case lookupFM fm fe of
667 Nothing -> emptyRegSet
669 -- Work backwards, from the highest numbered insn to the lowest.
670 -- This is a heuristic which causes faster convergence to the
671 -- fixed point. In particular, for straight-line code with no
672 -- branches at all, arrives at the fixpoint in one iteration.
678 = [case iUnbox ino of { inoh ->
679 case iUnbox future_ino of { future_inoh ->
680 MkFE inoh future_inoh
682 | future_ino <- succ_map ! ino]
684 = map (enquireMapFE approx) fes_to_futures
686 = foldr unionRegSets emptyRegSet future_lives
689 = [case iUnbox history_ino of { history_inoh ->
690 case iUnbox ino of { inoh ->
691 MkFE history_inoh inoh
693 | history_ino <- pred_map ! ino]
695 = foldl update_one_history approx fes_from_histories
699 history_independent_component
700 = case regUsage insn of
703 (minusRegSets future_live wrs)
705 update_one_history :: FiniteMap FE RegSet
707 -> FiniteMap FE RegSet
708 update_one_history approx0 fe
709 = addToFM_C unionRegSets approx0 fe
710 history_independent_component
713 = do_insns (ino-1) new_approx
719 -- Extract the flow edges from a list of insns. Express the information
720 -- as two mappings, from insn number to insn numbers of predecessors,
721 -- and from insn number to insn numbers of successors. (Since that's
722 -- what we need to know when computing live ranges later). Instructions
723 -- are numbered starting at zero. This function is long and complex
724 -- in order to be efficient; it could equally well be shorter and slower.
726 find_flow_edges :: [Instr] -> (Array Int [Int],
728 find_flow_edges insns
730 -- First phase: make a temp env which maps labels
731 -- to insn numbers, so the second pass can know the insn
732 -- numbers for jump targets.
734 label_env :: FiniteMap CLabel Int
736 mk_label_env n env [] = env
737 mk_label_env n env ((LABEL clbl):is)
738 = mk_label_env (n+1) (addToFM env clbl n) is
739 mk_label_env n env (i:is)
740 = mk_label_env (n+1) env is
742 label_env = mk_label_env 0 emptyFM insns
744 find_label :: CLabel -> Int
746 = case lookupFM label_env jmptarget of
748 Nothing -> pprPanic "find_flow_edges: unmapped label"
749 (pprCLabel jmptarget)
751 -- Second phase: traverse the insns, and make up the successor map.
753 least_ino, greatest_ino :: Int
755 greatest_ino = length insns - 1
757 mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
759 mk_succ_map i_num rsucc_map []
762 mk_succ_map i_num rsucc_map (i:is)
763 = let i_num_1 = i_num + 1
768 -> -- A non-local jump. We can regard this insn as a terminal
769 -- insn in the graph, so we don't add any edges.
770 mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
773 | null is -- this is the last insn, and it doesn't go anywhere
774 -- (a meaningless scenario); handle it anyway
775 -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
777 | otherwise -- flows to next insn; add fe i_num -> i_num+1
778 -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
781 Branch lab -- jmps to lab; add fe i_num -> i_target
782 -> let i_target = find_label lab
784 mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map) is
787 | null is -- jmps to label, or falls through, and this is
788 -- the last insn (a meaningless scenario);
790 -> error "find_flow_edges: NextOrBranch is last"
792 | otherwise -- add fes i_num -> i_num+1
793 -- and i_num -> i_target
794 -> let i_target = find_label lab
796 mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
799 -> -- A jump, whose targets are listed explicitly.
800 -- (Generated from table-based switch translations).
801 -- Add fes i_num -> x for each x in labels
802 let is_target = nub (map find_label labels)
804 mk_succ_map i_num_1 ((i_num, is_target):rsucc_map) is
806 -- Third phase: invert the successor map to get the predecessor
807 -- map, using an algorithm which is quadratic in the worst case,
808 -- but runs in almost-linear time, because of the nature of our
809 -- inputs: most insns have a single successor, the next insn.
811 invert :: [(Int, [Int])] -> [(Int, [Int])]
814 = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
815 sorted_inverted_pairs
816 = isort inverted_pairs
818 grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
819 grp k vs [] = [(k, vs)]
820 grp k vs ((kk,vv):rest)
821 | k == kk = grp k (vv:vs) rest
822 | otherwise = (k,vs) : grp kk [vv] rest
825 grp_start ((kk,vv):rest) = grp kk [vv] rest
828 = grp_start sorted_inverted_pairs
830 -- make sure that the reverse mapping maps all inos
832 | ino > greatest_ino = []
833 | otherwise = (ino,[]): add_empties (ino+1) []
834 add_empties ino ((k,vs):rest)
835 | ino < k = (ino,[]): add_empties (ino+1) ((k,vs):rest)
836 | ino == k = (k,vs) : add_empties (ino+1) rest
838 -- This is nearly linear provided that the fsts of the
839 -- list are nearly in order -- a critical assumption
841 isort :: [(Int,Int)] -> [(Int,Int)]
843 isort (x:xs) = insert x (isort xs)
845 insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
848 -- specifically, this first test should almost always
849 -- be True in order for the near-linearity to happen
850 | fst y <= fst z = y:z:zs
851 | otherwise = z: insert y zs
853 add_empties least_ino grouped
858 = mk_succ_map 0 [] insns
860 = array (least_ino, greatest_ino) succ_list
864 = array (least_ino, greatest_ino) pred_list
869 -- That's all, folks! From here on is just some dull supporting stuff.
871 -- A data type for flow edges
873 = MkFE FastInt FastInt deriving (Eq, Ord)
875 -- deriving Show on types with unboxed fields doesn't work
876 instance Show FE where
877 showsPrec _ (MkFE s d)
878 = showString "MkFE" . shows (iBox s) . shows ' ' . shows (iBox d)
880 -- Blargh. Use ghc stuff soon! Or: perhaps that's not such a good
881 -- idea. Most of these sets are either empty or very small, and it
882 -- might be that the overheads of the FiniteMap based set implementation
883 -- is a net loss. The same might be true of RegSets.
885 newtype FeSet = MkFeSet [FE]
888 = MkFeSet (nukeDups (sort xs))
889 where nukeDups :: [FE] -> [FE]
893 = if x == y then nukeDups (y:xys)
894 else x : nukeDups (y:xys)
896 feSetToList (MkFeSet xs) = xs
897 isEmptyFeSet (MkFeSet xs) = null xs
898 emptyFeSet = MkFeSet []
899 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
900 unitFeSet x = MkFeSet [x]
902 elemFeSet x (MkFeSet xs)
906 f (y:ys) | x == y = True
910 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
911 = MkFeSet (f xs1 xs2)
916 | a < b = a : f as (b:bs)
917 | a > b = b : f (a:as) bs
918 | otherwise = a : f as bs
920 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
921 = MkFeSet (f xs1 xs2)
926 | a < b = a : f as (b:bs)
927 | a > b = f (a:as) bs
928 | otherwise = f as bs
930 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
931 = MkFeSet (f xs1 xs2)
936 | a < b = f as (b:bs)
937 | a > b = f (a:as) bs
938 | otherwise = a : f as bs