ghc/compiler/nativeGen/AsmRegAlloc.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-2000
   3 %
   4 \section[AsmRegAlloc]{Register allocator}
   5
   6 \begin{code}
   7 module AsmRegAlloc ( runRegAllocate ) where
   8
   9 #include "HsVersions.h"
  10
  11 import MachCode         ( InstrBlock )
  12 import MachMisc         ( Instr(..) )
  13 import MachRegs
  14 import RegAllocInfo
  15
  16 import FiniteMap        ( FiniteMap, emptyFM, addListToFM, delListFromFM,
  17                           lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM,
  18                           listToFM, fmToList, lookupWithDefaultFM )
  19 import Unique           ( mkBuiltinUnique )
  20 import OrdList          ( unitOL, appOL, fromOL, concatOL )
  21 import Outputable
  22 import Unique           ( Unique, Uniquable(..), mkPseudoUnique3 )
  23 import CLabel           ( CLabel, pprCLabel )
  24
  25 import List             ( mapAccumL, nub, sort )
  26 import Array            ( Array, array, (!), bounds )
  27 \end{code}
  28
  29 This is the generic register allocator.  It does allocation for all
  30 architectures.  Details for specific architectures are given in
  31 RegAllocInfo.lhs.  In practice the allocator needs to know next to
  32 nothing about an architecture to do its job:
  33
  34 * It needs to be given a list of the registers it can allocate to.
  35
  36 * It needs to be able to find out which registers each insn reads and
  37   writes.
  38
  39 * It needs be able to change registers in instructions into other
  40   registers.
  41
  42 * It needs to be able to find out where execution could go after an
  43   in instruction.
  44
  45 * It needs to be able to discover sets of registers which can be
  46   used to attempt spilling.
  47
  48 First we try something extremely simple.  If that fails, we have to do
  49 things the hard way.
  50
  51 \begin{code}
  52 runRegAllocate
  53     :: [Reg]
  54     -> ([Instr] -> [[Reg]])
  55     -> InstrBlock
  56     -> [Instr]
  57
  58 runRegAllocate regs find_reserve_regs instrs
  59   = --trace ("runRegAllocate: " ++ show regs) (
  60     case simpleAlloc of
  61        Just simple -> --trace "SIMPLE"
  62                       simple
  63        Nothing     -> --trace "GENERAL"
  64                       (tryGeneral reserves)
  65     --)
  66   where
  67     tryGeneral []
  68        = error "nativeGen: spilling failed.  Workaround: compile with -fvia-C.\n"
  69     tryGeneral (resv:resvs)
  70        = case generalAlloc resv of
  71             Just success -> success
  72             Nothing      -> tryGeneral resvs
  73
  74     reserves           = find_reserve_regs flatInstrs
  75     flatInstrs         = fromOL instrs
  76     simpleAlloc        = doSimpleAlloc regs flatInstrs
  77     generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
  78 \end{code}
  79
  80 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
  81 each and every code block, we first try using this simple, fast and
  82 utterly braindead allocator.  In practice it handles about 60\% of the
  83 code blocks really fast, even with only 3 integer registers available.
  84 Since we can always give up and fall back to @doGeneralAlloc@,
  85 @doSimpleAlloc@ is geared to handling the common case as fast as
  86 possible.  It will succeed only if:
  87
  88 * The code mentions registers only of integer class, not floating
  89   class.
  90
  91 * The code doesn't mention any real registers, so we don't have to
  92   think about dodging and weaving to work around fixed register uses.
  93
  94 * The code mentions at most N virtual registers, where N is the number
  95   of real registers for allocation.
  96
  97 If those conditions are satisfied, we simply trundle along the code,
  98 doling out a real register every time we see mention of a new virtual
  99 register.  We either succeed at this, or give up when one of the above
 100 three conditions is no longer satisfied.
 101
 102 \begin{code}
 103 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
 104 doSimpleAlloc available_real_regs instrs
 105    = let available_iregs
 106             = filter ((== RcInteger).regClass) available_real_regs
 107
 108          trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
 109                     -> [ {-Real-}Reg ]
 110                     -> [Instr]
 111                     -> [Instr]
 112                     -> Maybe [Instr]
 113          trundle vreg_map uncommitted_rregs ris_done []
 114             = Just (reverse ris_done)
 115          trundle vreg_map uncommitted_rregs ris_done (i:is)
 116             = case regUsage i of
 117                  RU rds wrs
 118
 119                     -- Mentions no regs?  Move on quickly
 120                     |  null rds_l && null wrs_l
 121                     -> trundle vreg_map uncommitted_rregs (i:ris_done) is
 122
 123                     -- A case we can't be bothered to handle?
 124                     |  any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
 125                     -> Nothing
 126
 127                     -- Update the rreg commitments, and map the insn
 128                     |  otherwise
 129                     -> case upd_commitment (wrs_l++rds_l)
 130                                            vreg_map uncommitted_rregs of
 131                           Nothing -- out of rregs; give up
 132                              -> Nothing
 133                           Just (vreg_map2, uncommitted_rregs2)
 134                              -> let i2 = patchRegs i (subst_reg vreg_map2)
 135                                 in  trundle vreg_map2 uncommitted_rregs2
 136                                             (i2:ris_done) is
 137                        where
 138                           isFloatingOrReal reg
 139                              = isRealReg reg || regClass reg == RcFloat
 140                                              || regClass reg == RcDouble
 141
 142                           rds_l = regSetToList rds
 143                           wrs_l = regSetToList wrs
 144
 145                           upd_commitment [] vr_map uncomm
 146                              = Just (vr_map, uncomm)
 147                           upd_commitment (reg:regs) vr_map uncomm
 148                              | isRealReg reg
 149                              = upd_commitment regs vr_map uncomm
 150                              | reg `elem` (map fst vr_map)
 151                              = upd_commitment regs vr_map uncomm
 152                              | null uncomm
 153                              = Nothing
 154                              | otherwise
 155                              = upd_commitment regs ((reg, head uncomm):vr_map)
 156                                                    (tail uncomm)
 157
 158                           subst_reg vreg_map r
 159                              -- If it's a RealReg, it must be STG-specific one
 160                              -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
 161                              -- so isFloatingOrReal would not have objected to it.
 162                              | isRealReg r
 163                              = r
 164                              | otherwise
 165                              = case [rr | (vr,rr) <- vreg_map, vr == r] of
 166                                   [rr2] -> rr2
 167                                   other -> pprPanic
 168                                               "doSimpleAlloc: unmapped VirtualReg"
 169                                               (ppr r)
 170      in
 171          trundle [] available_iregs [] instrs
 172 \end{code}
 173
 174 From here onwards is the general register allocator and spiller.  For
 175 each flow edge (possible transition between instructions), we compute
 176 which virtual and real registers are live on that edge.  Then the
 177 mapping is inverted, to give a mapping from register (virtual+real) to
 178 sets of flow edges on which the register is live.  Finally, we can use
 179 those sets to decide whether a virtual reg v can be assigned to a real
 180 reg r, by checking that v's live-edge-set does not intersect with r's
 181 current live-edge-set.  Having made that assignment, we then augment
 182 r's current live-edge-set (its current commitment, you could say) with
 183 v's live-edge-set.
 184
 185 doGeneralAlloc takes reserve_regs as the regs to use as spill
 186 temporaries.  First it tries to allocate using all regs except
 187 reserve_regs.  If that fails, it inserts spill code and tries again to
 188 allocate regs, but this time with the spill temporaries available.
 189 Even this might not work if there are insufficient spill temporaries:
 190 in the worst case on x86, we'd need 3 of them, for insns like addl
 191 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
 192
 193 \begin{code}
 194 doGeneralAlloc
 195     :: [Reg]            -- all allocatable regs
 196     -> [Reg]            -- the reserve regs
 197     -> [Instr]          -- instrs in
 198     -> Maybe [Instr]    -- instrs out
 199
 200 doGeneralAlloc all_regs reserve_regs instrs
 201    -- succeeded without spilling
 202    | prespill_ok        = Just prespill_insns
 203    -- failed, and no spill regs avail, so pointless to attempt spilling
 204    | null reserve_regs  = Nothing
 205    -- success after spilling
 206    | postspill_ok       = maybetrace (spillMsg True) (Just postspill_insns)
 207    -- still not enough reserves after spilling; we have to give up
 208    | otherwise          = maybetrace (spillMsg False) Nothing
 209      where
 210          prespill_regs
 211             = filter (`notElem` reserve_regs) all_regs
 212          (prespill_ok, prespill_insns)
 213             = allocUsingTheseRegs instrs prespill_regs
 214          instrs_with_spill_code
 215             = insertSpillCode prespill_insns
 216          (postspill_ok, postspill_insns)
 217             = allocUsingTheseRegs instrs_with_spill_code all_regs
 218
 219          spillMsg success
 220             = "nativeGen: spilling "
 221               ++ (if success then "succeeded" else "failed   ")
 222               ++ " using "
 223               ++ showSDoc (hsep (map ppr reserve_regs))
 224
 225 #        ifdef NCG_DEBUG
 226          maybetrace msg x = trace msg x
 227 #        else
 228          maybetrace msg x = x
 229 #        endif
 230 \end{code}
 231
 232 Here we patch instructions that reference ``registers'' which are
 233 really in memory somewhere (the mapping is under the control of the
 234 machine-specific code generator).  We place the appropriate load
 235 sequences before any instructions that use memory registers as
 236 sources, and we place the appropriate spill sequences after any
 237 instructions that use memory registers as destinations.  The offending
 238 instructions are rewritten with new dynamic registers, so generalAlloc
 239 has to run register allocation again after all of this is said and
 240 done.
 241
 242 On some architectures (x86, currently), we do without a frame-pointer,
 243 and instead spill relative to the stack pointer (%esp on x86).
 244 Because the stack pointer may move, the patcher needs to keep track of
 245 the current stack pointer "delta".  That's easy, because all it needs
 246 to do is spot the DELTA bogus-insns which will have been inserted by
 247 the relevant insn selector precisely so as to notify the spiller of
 248 stack-pointer movement.  The delta is passed to loadReg and spillReg,
 249 since they generate the actual spill code.  We expect the final delta
 250 to be the same as the starting one (zero), reflecting the fact that
 251 changes to the stack pointer should not extend beyond a basic block.
 252
 253 Finally, there is the issue of mapping an arbitrary set of unallocated
 254 VirtualRegs into a contiguous sequence of spill slots.  The failed
 255 allocation will have left the code peppered with references to
 256 VirtualRegs, each of which contains a unique.  So we make an env which
 257 maps these VirtualRegs to integers, starting from zero, and pass that
 258 env through to loadReg and spillReg.  There, they are used to look up
 259 spill slot numbers for the uniques.
 260
 261 \begin{code}
 262 insertSpillCode :: [Instr] -> [Instr]
 263 insertSpillCode insns
 264    = let uniques_in_insns
 265             = map getUnique
 266                   (regSetToList
 267                      (foldl unionRegSets emptyRegSet
 268                             (map vregs_in_insn insns)))
 269          vregs_in_insn i
 270             = case regUsage i of
 271                  RU rds wrs -> filterRegSet isVirtualReg
 272                                              (rds `unionRegSets` wrs)
 273          vreg_to_slot_map :: FiniteMap Unique Int
 274          vreg_to_slot_map
 275             = listToFM (zip uniques_in_insns [0..])
 276
 277          ((final_stack_delta, final_ctr), insnss)
 278             = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
 279      in
 280          if   final_stack_delta == 0
 281          then concat insnss
 282          else pprPanic "patchMem: non-zero final delta"
 283                        (int final_stack_delta)
 284
 285
 286 -- patchInstr has as a running state two Ints, one the current stack delta,
 287 -- needed to figure out offsets to stack slots on archs where we spill relative
 288 -- to the stack pointer, as opposed to the frame pointer.  The other is a
 289 -- counter, used to manufacture new temporary register names.
 290
 291 patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
 292 patchInstr vreg_to_slot_map (delta,ctr) instr
 293
 294  | null memSrcs && null memDsts
 295  = ((delta',ctr), [instr])
 296
 297  | otherwise
 298  = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
 299    where
 300         delta' = case instr of DELTA d -> d ; _ -> delta
 301
 302         (RU srcs dsts) = regUsage instr
 303
 304         -- The instr being patched may mention several vregs -- those which
 305         -- could not be assigned real registers.  For each such vreg, we
 306         -- invent a new vreg, used only around this instruction and nowhere
 307         -- else.  These new vregs replace the unallocatable vregs; they are
 308         -- loaded from the spill area, the instruction is done with them,
 309         -- and results if any are then written back to the spill area.
 310         vregs_in_instr
 311            = nub (filter isVirtualReg
 312                          (regSetToList srcs ++ regSetToList dsts))
 313         n_vregs_in_instr
 314            = length vregs_in_instr
 315         ctr'
 316            = ctr + n_vregs_in_instr
 317         vreg_env
 318            = zip vregs_in_instr [ctr, ctr+1 ..]
 319
 320         mkTmpReg vreg
 321            | isVirtualReg vreg
 322            = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
 323                 [i] -> case regClass vreg of
 324                           RcInteger -> VirtualRegI (mkPseudoUnique3 i)
 325                           RcFloat   -> VirtualRegF (mkPseudoUnique3 i)
 326                           RcDouble  -> VirtualRegD (mkPseudoUnique3 i)
 327                 _   -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
 328            | otherwise
 329            = vreg
 330
 331         memSrcs   = filter isVirtualReg (regSetToList srcs)
 332         memDsts   = filter isVirtualReg (regSetToList dsts)
 333
 334         loadSrcs  = map load  memSrcs
 335         spillDsts = map spill memDsts
 336
 337         load mem  = loadReg  vreg_to_slot_map delta  mem (mkTmpReg mem)
 338         spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
 339
 340         instr'    = patchRegs instr mkTmpReg
 341 \end{code}
 342
 343 allocUsingTheseRegs is the register allocator proper.  It attempts
 344 to allocate dynamic regs to real regs, given a list of real regs
 345 which it may use.  If it fails due to lack of real regs, the returned
 346 instructions use what real regs there are, but will retain uses of
 347 dynamic regs for which a real reg could not be found.  It is these
 348 leftover dynamic reg references which insertSpillCode will later
 349 assign to spill slots.
 350
 351 Some implementation notes.
 352 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 353 Instructions are numbered sequentially, starting at zero.
 354
 355 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
 356 a possible flow of control from the first insn to the second.
 357
 358 The input to the register allocator is a list of instructions, which
 359 mention Regs.  A Reg can be a RealReg -- a real machine reg -- or a
 360 VirtualReg, which carries a unique.  After allocation, all the
 361 VirtualReg references will have been converted into RealRegs, and
 362 possible some spill code will have been inserted.
 363
 364 The heart of the register allocator works in four phases.
 365
 366 1.  (find_flow_edges) Calculate all the FEs for the code list.
 367     Return them not as a [FE], but implicitly, as a pair of
 368     Array Int [Int], being the successor and predecessor maps
 369     for instructions.
 370
 371 2.  (calc_liveness) Returns a FiniteMap FE RegSet.  For each
 372     FE, indicates the set of registers live on that FE.  Note
 373     that the set includes both RealRegs and VirtualRegs.  The
 374     former appear because the code could mention fixed register
 375     usages, and we need to take them into account from the start.
 376
 377 3.  (calc_live_range_sets) Invert the above mapping, giving a
 378     FiniteMap Reg FeSet, indicating, for each virtual and real
 379     reg mentioned in the code, which FEs it is live on.
 380
 381 4.  (calc_vreg_to_rreg_mapping) For virtual reg, try and find
 382     an allocatable real register for it.  Each real register has
 383     a "current commitment", indicating the set of FEs it is
 384     currently live on.  A virtual reg v can be assigned to
 385     real reg r iff v's live-fe-set does not intersect with r's
 386     current commitment fe-set.  If the assignment is made,
 387     v's live-fe-set is union'd into r's current commitment fe-set.
 388     There is also the minor restriction that v and r must be of
 389     the same register class (integer or floating).
 390
 391     Once this mapping is established, we simply apply it to the
 392     input insns, and that's it.
 393
 394     If no suitable real register can be found, the vreg is mapped
 395     to itself, and we deem allocation to have failed.  The partially
 396     allocated code is returned.  The higher echelons of the allocator
 397     (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
 398     code and re-run allocation, until a successful allocation is found.
 399 \begin{code}
 400
 401 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
 402 allocUsingTheseRegs instrs available_real_regs
 403    = let (all_vregs_mapped, v_to_r_mapping)
 404             = calc_vreg_to_rreg_mapping instrs available_real_regs
 405          new_insns
 406             = map (flip patchRegs sr) instrs
 407          sr reg
 408             | isRealReg reg
 409             = reg
 410             | otherwise
 411             = case lookupFM v_to_r_mapping reg of
 412                  Just r  -> r
 413                  Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
 414                                      (ppr reg)
 415      in
 416          --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
 417          (all_vregs_mapped, new_insns)
 418          --)
 419
 420
 421 -- the heart of the matter.
 422 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
 423 calc_vreg_to_rreg_mapping insns available_real_regs
 424    = let
 425          lr_sets  :: FiniteMap Reg FeSet
 426          lr_sets = calc_live_range_sets insns
 427
 428          -- lr_sets maps: vregs mentioned in insns to sets of live FEs
 429          -- and also:     rregs mentioned in insns to sets of live FEs
 430          -- We need to extract the rreg mapping, and use it as the
 431          -- initial real-register-commitment.  Also, add to the initial
 432          -- commitment, empty commitments for any real regs not
 433          -- mentioned in it.
 434
 435          -- which real regs do we want to keep track of in the running
 436          -- commitment mapping?  Precisely the available_real_regs.
 437          -- We don't care about real regs mentioned by insns which are
 438          -- not in this list, since we're not allocating to them.
 439          initial_rr_commitment :: FiniteMap Reg FeSet
 440          initial_rr_commitment
 441             = listToFM [(rreg,
 442                          case lookupFM lr_sets rreg of
 443                             Nothing            -> emptyFeSet
 444                             Just fixed_use_fes -> fixed_use_fes
 445                         )
 446                         | rreg <- available_real_regs]
 447
 448          -- These are the vregs for which we actually have to (try to)
 449          -- assign a real register. (ie, the whole reason we're here at all :)
 450          vreg_liveness_list :: [(Reg, FeSet)]
 451          vreg_liveness_list = filter (not.isRealReg.fst)
 452                                      (fmToList lr_sets)
 453
 454          -- A loop, which attempts to assign each vreg to a rreg.
 455          loop rr_commitment v_to_r_map []
 456             = v_to_r_map
 457          loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
 458             = let
 459                   -- find a real reg which is not live for any of vreg_live_fes
 460                   cand_reals
 461                      = [rreg
 462                            | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
 463                               regClass vreg == regClass rreg,
 464                               isEmptyFeSet (intersectionFeSets rreg_live_FEs
 465                                                                vreg_live_fes)
 466                        ]
 467               in
 468                  case cand_reals of
 469                     [] -> -- bummer.  No register is available.  Just go on to
 470                           -- the next vreg, mapping the vreg to itself.
 471                           loop rr_commitment (addToFM v_to_r_map vreg vreg)
 472                                not_yet_done
 473                     (r:_)
 474                        -> -- Hurrah!  Found a free reg of the right class.
 475                           -- Now we need to update the RR commitment.
 476                           loop rr_commitment2 (addToFM v_to_r_map vreg r)
 477                                not_yet_done
 478                           where
 479                              rr_commitment2
 480                                 = addToFM_C unionFeSets rr_commitment r
 481                                             vreg_live_fes
 482
 483          -- the final vreg to rreg mapping
 484          vreg_assignment
 485             = loop initial_rr_commitment emptyFM vreg_liveness_list
 486          -- did we succeed in mapping everyone to a real reg?
 487          allocation_succeeded
 488             = all isRealReg (eltsFM vreg_assignment)
 489      in
 490          (allocation_succeeded, vreg_assignment)
 491
 492
 493
 494 -- calculate liveness, then produce the live range info
 495 -- as a mapping of VRegs to the set of FEs on which they are live.
 496 -- The difficult part is inverting the mapping of Reg -> FeSet
 497 -- to produce a mapping FE -> RegSet.
 498
 499 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
 500 calc_live_range_sets insns
 501    = let
 502          -- this is the "original" (old) mapping
 503          lis :: FiniteMap FE RegSet
 504          lis = calc_liveness insns
 505
 506          -- establish the totality of reg names mentioned by the
 507          -- insns, by scanning over the insns.
 508          all_mentioned_regs :: RegSet
 509          all_mentioned_regs
 510             = foldl unionRegSets emptyRegSet
 511                     (map (\i -> case regUsage i of
 512                                    RU rds wrs -> unionRegSets rds wrs)
 513                          insns)
 514
 515          -- Initial inverted mapping, from Reg to sets of FEs
 516          initial_imap :: FiniteMap Reg FeSet
 517          initial_imap
 518             = listToFM [(reg, emptyFeSet)
 519                         | reg <- regSetToList all_mentioned_regs]
 520
 521          -- Update the new map with one element of the old map
 522          upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
 523                      -> FiniteMap Reg FeSet
 524          upd_imap imap (fe, regset)
 525              = foldl upd_1_imap imap (regSetToList regset)
 526                where
 527                   upd_1_imap curr reg
 528                      = addToFM_C unionFeSets curr reg (unitFeSet fe)
 529
 530          -- the complete inverse mapping
 531          final_imap :: FiniteMap Reg FeSet
 532          final_imap
 533              = foldl upd_imap initial_imap (fmToList lis)
 534      in
 535          final_imap
 536
 537
 538
 539 -- Given the insns, calculate the FEs, and then doing fixpointing to
 540 -- figure out the set of live regs (virtual regs AND real regs) live
 541 -- on each FE.
 542
 543 calc_liveness :: [Instr] -> FiniteMap FE RegSet
 544 calc_liveness insns
 545    = let (pred_map, succ_map)
 546             = find_flow_edges insns
 547
 548          -- We use the convention that if the current approximation
 549          -- doesn't give a mapping for some FE, that FE maps to the
 550          -- empty set.
 551          initial_approx, fixpoint :: FiniteMap FE RegSet
 552          initial_approx
 553             = mk_initial_approx 0 insns succ_map emptyFM
 554          fixpoint
 555             = fix_set initial_approx 1
 556               -- If you want to live dangerously, and promise that the code
 557               -- doesn't contain any loops (ie, there are no back edges in
 558               -- the flow graph), you should be able to get away with this:
 559               -- = upd_liveness_info pred_map succ_map insn_array initial_approx
 560               -- But since I'm paranoid, and since it hardly makes any difference
 561               -- to the compiler run-time (about 0.1%), I prefer to do the
 562               -- the full fixpointing game.
 563
 564          insn_array
 565             = let n = length insns
 566               in  array (0, n-1) (zip [0..] insns)
 567
 568          sameSets []     []       = True
 569          sameSets (c:cs) (n:ns)   = eqRegSets c n && sameSets cs ns
 570          sameSets _      _        = False
 571
 572          fix_set curr_approx iter_number
 573             = let next_approx
 574                      = upd_liveness_info pred_map succ_map insn_array curr_approx
 575                   curr_sets
 576                      = eltsFM curr_approx
 577                   next_sets
 578                      = eltsFM next_approx
 579                   same
 580                      = sameSets curr_sets next_sets
 581                   final_approx
 582                      = if same then curr_approx
 583                                else fix_set next_approx (iter_number+1)
 584               in
 585                   --trace (let qqq (fe, regset)
 586                   --             = show fe ++ "  " ++ show (regSetToList regset)
 587                   --       in
 588                   --          "\n::iteration " ++ show iter_number ++ "\n"
 589                   --          ++ (unlines . map qqq . fmToList)
 590                   --                               next_approx ++"\n"
 591                   --      )
 592                   final_approx
 593      in
 594          fixpoint
 595
 596
 597 -- Create a correct initial approximation.  For each instruction that
 598 -- writes a register, we deem that the register is live on the
 599 -- flow edges leaving the instruction.  Subsequent iterations of
 600 -- the liveness AbI augment this based purely on reads of regs, not
 601 -- writes.  We need to start off with at least this minimal write-
 602 -- based information in order that writes to vregs which are never
 603 -- used have non-empty live ranges.  If we don't do that, we eventually
 604 -- wind up assigning such vregs to any old real reg, since they don't
 605 -- apparently conflict -- you can't conflict with an empty live range.
 606 -- This kludge is unfortunate, but we need to do it to cover not only
 607 -- writes to vregs which are never used, but also to deal correctly
 608 -- with the fact that calls to C will trash the callee saves registers.
 609
 610 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
 611                      -> FiniteMap FE RegSet
 612                      -> FiniteMap FE RegSet
 613 mk_initial_approx ino [] succ_map ia_so_far
 614    = ia_so_far
 615 mk_initial_approx ino (i:is) succ_map ia_so_far
 616    = let wrs
 617             = case regUsage i of RU rrr www -> www
 618          new_fes
 619             = [case ino of      { I# inoh ->
 620                case ino_succ of { I# ino_succh ->
 621                MkFE inoh ino_succh
 622                }}
 623                   | ino_succ <- succ_map ! ino]
 624
 625          loop [] ia = ia
 626          loop (fe:fes) ia
 627             = loop fes (addToFM_C unionRegSets ia fe wrs)
 628
 629          next_ia
 630             = loop new_fes ia_so_far
 631      in
 632          mk_initial_approx (ino+1) is succ_map next_ia
 633
 634
 635 -- Do one step in the liveness info calculation (AbI :).  Given the
 636 -- prior approximation (which tells you a subset of live VRegs+RRegs
 637 -- for each flow edge), calculate new information for all FEs.
 638 -- Rather than do this by iterating over FEs, it's easier to iterate
 639 -- over insns, and update their incoming FEs.
 640
 641 upd_liveness_info :: Array Int [Int]         -- instruction pred map
 642                      -> Array Int [Int]      -- instruction succ map
 643                      -> Array Int Instr      -- array of instructions
 644                      -> FiniteMap FE RegSet  -- previous approx
 645                      -> FiniteMap FE RegSet  -- improved approx
 646
 647 upd_liveness_info pred_map succ_map insn_array prev_approx
 648    = do_insns hi prev_approx
 649      where
 650         (lo, hi) = bounds insn_array
 651
 652         enquireMapFE :: FiniteMap FE RegSet -> FE
 653                         -> RegSet
 654         enquireMapFE fm fe
 655            = case lookupFM fm fe of
 656                 Just set -> set
 657                 Nothing  -> emptyRegSet
 658
 659         -- Work backwards, from the highest numbered insn to the lowest.
 660         -- This is a heuristic which causes faster convergence to the
 661         -- fixed point.  In particular, for straight-line code with no
 662         -- branches at all, arrives at the fixpoint in one iteration.
 663         do_insns ino approx
 664            | ino < lo
 665            = approx
 666            | otherwise
 667            = let fes_to_futures
 668                     = [case ino of        { I# inoh ->
 669                        case future_ino of { I# future_inoh ->
 670                        MkFE inoh future_inoh
 671                        }}
 672                           | future_ino <- succ_map ! ino]
 673                  future_lives
 674                     = map (enquireMapFE approx) fes_to_futures
 675                  future_live
 676                     = foldr unionRegSets emptyRegSet future_lives
 677
 678                  fes_from_histories
 679                     = [case history_ino of { I# history_inoh ->
 680                        case ino of         { I# inoh ->
 681                        MkFE history_inoh inoh
 682                        }}
 683                           | history_ino <- pred_map ! ino]
 684                  new_approx
 685                     = foldl update_one_history approx fes_from_histories
 686
 687                  insn
 688                     = insn_array ! ino
 689                  history_independent_component
 690                     = case regUsage insn of
 691                          RU rds wrs
 692                             -> unionRegSets rds
 693                                   (minusRegSets future_live wrs)
 694
 695                  update_one_history :: FiniteMap FE RegSet
 696                                        -> FE
 697                                        -> FiniteMap FE RegSet
 698                  update_one_history approx0 fe
 699                       = addToFM_C unionRegSets approx0 fe
 700                                   history_independent_component
 701
 702                  rest_done
 703                     = do_insns (ino-1) new_approx
 704              in
 705                  rest_done
 706
 707
 708
 709 -- Extract the flow edges from a list of insns.  Express the information
 710 -- as two mappings, from insn number to insn numbers of predecessors,
 711 -- and from insn number to insn numbers of successors.  (Since that's
 712 -- what we need to know when computing live ranges later).  Instructions
 713 -- are numbered starting at zero.  This function is long and complex
 714 -- in order to be efficient; it could equally well be shorter and slower.
 715
 716 find_flow_edges :: [Instr] -> (Array Int [Int],
 717                                Array Int [Int])
 718 find_flow_edges insns
 719    = let
 720          -- First phase: make a temp env which maps labels
 721          -- to insn numbers, so the second pass can know the insn
 722          -- numbers for jump targets.
 723
 724          label_env :: FiniteMap CLabel Int
 725
 726          mk_label_env n env [] = env
 727          mk_label_env n env ((LABEL clbl):is)
 728             = mk_label_env (n+1) (addToFM env clbl n) is
 729          mk_label_env n env (i:is)
 730             = mk_label_env (n+1) env is
 731
 732          label_env = mk_label_env 0 emptyFM insns
 733
 734          find_label :: CLabel -> Int
 735          find_label jmptarget
 736             = case lookupFM label_env jmptarget of
 737                  Just ino -> ino
 738                  Nothing  -> pprPanic "find_flow_edges: unmapped label"
 739                                       (pprCLabel jmptarget)
 740
 741          -- Second phase: traverse the insns, and make up the successor map.
 742
 743          least_ino, greatest_ino :: Int
 744          least_ino    = 0
 745          greatest_ino = length insns - 1
 746
 747          mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
 748
 749          mk_succ_map i_num rsucc_map []
 750             = reverse rsucc_map
 751
 752          mk_succ_map i_num rsucc_map (i:is)
 753             = let i_num_1 = i_num + 1
 754               in
 755               case insnFuture i of
 756
 757                  NoFuture
 758                     -> -- A non-local jump.  We can regard this insn as a terminal
 759                        -- insn in the graph, so we don't add any edges.
 760                        mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 761
 762                  Next
 763                     |  null is -- this is the last insn, and it doesn't go anywhere
 764                                -- (a meaningless scenario); handle it anyway
 765                     -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 766
 767                     |  otherwise -- flows to next insn; add fe i_num -> i_num+1
 768                     -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
 769                                            is
 770
 771                  Branch lab -- jmps to lab; add fe i_num -> i_target
 772                     -> let i_target = find_label lab
 773                        in
 774                        mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map)
 775                                            is
 776                  NextOrBranch lab
 777                     |  null is   -- jmps to label, or falls through, and this is
 778                                  -- the last insn (a meaningless scenario);
 779                                  -- flag an error
 780                     -> error "find_flow_edges: NextOrBranch is last"
 781
 782                     |  otherwise -- add fes i_num -> i_num+1
 783                                  --     and i_num -> i_target
 784                     -> let i_target = find_label lab
 785                        in
 786                        mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
 787                                            is
 788
 789          -- Third phase: invert the successor map to get the predecessor
 790          -- map, using an algorithm which is quadratic in the worst case,
 791          -- but runs in almost-linear time, because of the nature of our
 792          -- inputs: most insns have a single successor, the next insn.
 793
 794          invert :: [(Int, [Int])] -> [(Int, [Int])]
 795          invert fmap
 796             = let inverted_pairs
 797                      = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
 798                   sorted_inverted_pairs
 799                      = isort inverted_pairs
 800
 801                   grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
 802                   grp k vs [] = [(k, vs)]
 803                   grp k vs ((kk,vv):rest)
 804                      | k == kk   = grp k (vv:vs) rest
 805                      | otherwise = (k,vs) : grp kk [vv] rest
 806
 807                   grp_start []             = []
 808                   grp_start ((kk,vv):rest) = grp kk [vv] rest
 809
 810                   grouped
 811                      = grp_start sorted_inverted_pairs
 812
 813                   -- make sure that the reverse mapping maps all inos
 814                   add_empties ino []
 815                      | ino > greatest_ino  = []
 816                      | otherwise           = (ino,[]): add_empties (ino+1) []
 817                   add_empties ino ((k,vs):rest)
 818                      | ino <  k   = (ino,[]): add_empties (ino+1) ((k,vs):rest)
 819                      | ino == k   = (k,vs) : add_empties (ino+1) rest
 820
 821                   -- This is nearly linear provided that the fsts of the
 822                   -- list are nearly in order -- a critical assumption
 823                   -- for efficiency.
 824                   isort :: [(Int,Int)] -> [(Int,Int)]
 825                   isort []     = []
 826                   isort (x:xs) = insert x (isort xs)
 827
 828                   insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
 829                   insert y []     = [y]
 830                   insert y (z:zs)
 831                      -- specifically, this first test should almost always
 832                      -- be True in order for the near-linearity to happen
 833                      | fst y <= fst z  = y:z:zs
 834                      | otherwise       = z: insert y zs
 835               in
 836                  add_empties least_ino grouped
 837
 838          -- Finally ...
 839
 840          succ_list
 841             = mk_succ_map 0 [] insns
 842          succ_map
 843             = array (least_ino, greatest_ino) succ_list
 844          pred_list
 845             = invert succ_list
 846          pred_map
 847             = array (least_ino, greatest_ino) pred_list
 848      in
 849          (pred_map, succ_map)
 850
 851
 852 -- That's all, folks!  From here on is just some dull supporting stuff.
 853
 854 -- A data type for flow edges
 855 data FE
 856    = MkFE Int# Int# deriving (Eq, Ord)
 857
 858 -- deriving Show on types with unboxed fields doesn't work
 859 instance Show FE where
 860     showsPrec _ (MkFE s d)
 861        = showString "MkFE" . shows (I# s) . shows ' ' . shows (I# d)
 862
 863 -- Blargh.  Use ghc stuff soon!  Or: perhaps that's not such a good
 864 -- idea.  Most of these sets are either empty or very small, and it
 865 -- might be that the overheads of the FiniteMap based set implementation
 866 -- is a net loss.  The same might be true of RegSets.
 867
 868 newtype FeSet = MkFeSet [FE]
 869
 870 feSetFromList xs
 871    = MkFeSet (nukeDups (sort xs))
 872      where nukeDups :: [FE] -> [FE]
 873            nukeDups []  = []
 874            nukeDups [x] = [x]
 875            nukeDups (x:y:xys)
 876               = if x == y then nukeDups (y:xys)
 877                           else x : nukeDups (y:xys)
 878
 879 feSetToList (MkFeSet xs)            = xs
 880 isEmptyFeSet (MkFeSet xs)           = null xs
 881 emptyFeSet                          = MkFeSet []
 882 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
 883 unitFeSet x                         = MkFeSet [x]
 884
 885 elemFeSet x (MkFeSet xs)
 886    = f xs
 887      where
 888         f []     = False
 889         f (y:ys) | x == y    = True
 890                  | x < y     = False
 891                  | otherwise = f ys
 892
 893 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
 894    = MkFeSet (f xs1 xs2)
 895      where
 896         f [] bs = bs
 897         f as [] = as
 898         f (a:as) (b:bs)
 899            | a < b      = a : f as (b:bs)
 900            | a > b      = b : f (a:as) bs
 901            | otherwise  = a : f as bs
 902
 903 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
 904    = MkFeSet (f xs1 xs2)
 905      where
 906         f [] bs = []
 907         f as [] = as
 908         f (a:as) (b:bs)
 909            | a < b      = a : f as (b:bs)
 910            | a > b      = f (a:as) bs
 911            | otherwise  = f as bs
 912
 913 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
 914    = MkFeSet (f xs1 xs2)
 915      where
 916         f [] bs = []
 917         f as [] = []
 918         f (a:as) (b:bs)
 919            | a < b      = f as (b:bs)
 920            | a > b      = f (a:as) bs
 921            | otherwise  = a : f as bs
 922
 923 \end{code}