ghc/compiler/nativeGen/AsmRegAlloc.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-2000
   3 %
   4 \section[AsmRegAlloc]{Register allocator}
   5
   6 \begin{code}
   7 module AsmRegAlloc ( runRegAllocate ) where
   8
   9 #include "HsVersions.h"
  10
  11 import MachCode         ( InstrBlock )
  12 import MachMisc         ( Instr(..) )
  13 import PprMach          ( pprInstr )    -- Just for debugging
  14 import MachRegs
  15 import RegAllocInfo
  16
  17 import FiniteMap        ( FiniteMap, emptyFM, addListToFM, delListFromFM,
  18                           lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM,
  19                           listToFM, fmToList, lookupWithDefaultFM )
  20 import Unique           ( mkBuiltinUnique )
  21 import OrdList          ( unitOL, appOL, fromOL, concatOL )
  22 import Outputable
  23 import Unique           ( Unique, Uniquable(..), mkPseudoUnique3 )
  24 import CLabel           ( CLabel, pprCLabel )
  25
  26 import List             ( mapAccumL, nub, sort )
  27 import Array            ( Array, array, (!), bounds )
  28 \end{code}
  29
  30 This is the generic register allocator.  It does allocation for all
  31 architectures.  Details for specific architectures are given in
  32 RegAllocInfo.lhs.  In practice the allocator needs to know next to
  33 nothing about an architecture to do its job:
  34
  35 * It needs to be given a list of the registers it can allocate to.
  36
  37 * It needs to be able to find out which registers each insn reads and
  38   writes.
  39
  40 * It needs be able to change registers in instructions into other
  41   registers.
  42
  43 * It needs to be able to find out where execution could go after an
  44   in instruction.
  45
  46 * It needs to be able to discover sets of registers which can be
  47   used to attempt spilling.
  48
  49 First we try something extremely simple.  If that fails, we have to do
  50 things the hard way.
  51
  52 \begin{code}
  53 runRegAllocate
  54     :: [Reg]
  55     -> ([Instr] -> [[Reg]])
  56     -> InstrBlock
  57     -> [Instr]
  58
  59 runRegAllocate regs find_reserve_regs instrs
  60   = --trace ("runRegAllocate: " ++ show regs) (
  61     case simpleAlloc of
  62        Just simple -> --trace "SIMPLE"
  63                       simple
  64        Nothing     -> --trace "GENERAL"
  65                       (tryGeneral reserves)
  66     --)
  67   where
  68     tryGeneral []
  69        = pprPanic "nativeGen: spilling failed.  Workaround: compile with -fvia-C.\n"
  70             ( (text "reserves = " <> ppr reserves)
  71               $$
  72               (text "code = ")
  73               $$
  74               (vcat (map pprInstr flatInstrs))
  75             )
  76     tryGeneral (resv:resvs)
  77        = case generalAlloc resv of
  78             Just success -> success
  79             Nothing      -> tryGeneral resvs
  80
  81     reserves           = find_reserve_regs flatInstrs
  82     flatInstrs         = fromOL instrs
  83     simpleAlloc        = doSimpleAlloc regs flatInstrs
  84     generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
  85 \end{code}
  86
  87 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
  88 each and every code block, we first try using this simple, fast and
  89 utterly braindead allocator.  In practice it handles about 60\% of the
  90 code blocks really fast, even with only 3 integer registers available.
  91 Since we can always give up and fall back to @doGeneralAlloc@,
  92 @doSimpleAlloc@ is geared to handling the common case as fast as
  93 possible.  It will succeed only if:
  94
  95 * The code mentions registers only of integer class, not floating
  96   class.
  97
  98 * The code doesn't mention any real registers, so we don't have to
  99   think about dodging and weaving to work around fixed register uses.
 100
 101 * The code mentions at most N virtual registers, where N is the number
 102   of real registers for allocation.
 103
 104 If those conditions are satisfied, we simply trundle along the code,
 105 doling out a real register every time we see mention of a new virtual
 106 register.  We either succeed at this, or give up when one of the above
 107 three conditions is no longer satisfied.
 108
 109 \begin{code}
 110 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
 111 doSimpleAlloc available_real_regs instrs
 112    = let available_iregs
 113             = filter ((== RcInteger).regClass) available_real_regs
 114
 115          trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
 116                     -> [ {-Real-}Reg ]
 117                     -> [Instr]
 118                     -> [Instr]
 119                     -> Maybe [Instr]
 120          trundle vreg_map uncommitted_rregs ris_done []
 121             = Just (reverse ris_done)
 122          trundle vreg_map uncommitted_rregs ris_done (i:is)
 123             = case regUsage i of
 124                  RU rds wrs
 125
 126                     -- Mentions no regs?  Move on quickly
 127                     |  null rds_l && null wrs_l
 128                     -> trundle vreg_map uncommitted_rregs (i:ris_done) is
 129
 130                     -- A case we can't be bothered to handle?
 131                     |  any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
 132                     -> Nothing
 133
 134                     -- Update the rreg commitments, and map the insn
 135                     |  otherwise
 136                     -> case upd_commitment (wrs_l++rds_l)
 137                                            vreg_map uncommitted_rregs of
 138                           Nothing -- out of rregs; give up
 139                              -> Nothing
 140                           Just (vreg_map2, uncommitted_rregs2)
 141                              -> let i2 = patchRegs i (subst_reg vreg_map2)
 142                                 in  trundle vreg_map2 uncommitted_rregs2
 143                                             (i2:ris_done) is
 144                        where
 145                           isFloatingOrReal reg
 146                              = isRealReg reg || regClass reg == RcFloat
 147                                              || regClass reg == RcDouble
 148
 149                           rds_l = regSetToList rds
 150                           wrs_l = regSetToList wrs
 151
 152                           upd_commitment [] vr_map uncomm
 153                              = Just (vr_map, uncomm)
 154                           upd_commitment (reg:regs) vr_map uncomm
 155                              | isRealReg reg
 156                              = upd_commitment regs vr_map uncomm
 157                              | reg `elem` (map fst vr_map)
 158                              = upd_commitment regs vr_map uncomm
 159                              | null uncomm
 160                              = Nothing
 161                              | otherwise
 162                              = upd_commitment regs ((reg, head uncomm):vr_map)
 163                                                    (tail uncomm)
 164
 165                           subst_reg vreg_map r
 166                              -- If it's a RealReg, it must be STG-specific one
 167                              -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
 168                              -- so isFloatingOrReal would not have objected to it.
 169                              | isRealReg r
 170                              = r
 171                              | otherwise
 172                              = case [rr | (vr,rr) <- vreg_map, vr == r] of
 173                                   [rr2] -> rr2
 174                                   other -> pprPanic
 175                                               "doSimpleAlloc: unmapped VirtualReg"
 176                                               (ppr r)
 177      in
 178          trundle [] available_iregs [] instrs
 179 \end{code}
 180
 181 From here onwards is the general register allocator and spiller.  For
 182 each flow edge (possible transition between instructions), we compute
 183 which virtual and real registers are live on that edge.  Then the
 184 mapping is inverted, to give a mapping from register (virtual+real) to
 185 sets of flow edges on which the register is live.  Finally, we can use
 186 those sets to decide whether a virtual reg v can be assigned to a real
 187 reg r, by checking that v's live-edge-set does not intersect with r's
 188 current live-edge-set.  Having made that assignment, we then augment
 189 r's current live-edge-set (its current commitment, you could say) with
 190 v's live-edge-set.
 191
 192 doGeneralAlloc takes reserve_regs as the regs to use as spill
 193 temporaries.  First it tries to allocate using all regs except
 194 reserve_regs.  If that fails, it inserts spill code and tries again to
 195 allocate regs, but this time with the spill temporaries available.
 196 Even this might not work if there are insufficient spill temporaries:
 197 in the worst case on x86, we'd need 3 of them, for insns like addl
 198 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
 199
 200 \begin{code}
 201 doGeneralAlloc
 202     :: [Reg]            -- all allocatable regs
 203     -> [Reg]            -- the reserve regs
 204     -> [Instr]          -- instrs in
 205     -> Maybe [Instr]    -- instrs out
 206
 207 doGeneralAlloc all_regs reserve_regs instrs
 208    -- succeeded without spilling
 209    | --trace (showSDoc (
 210      --   text "allocating with these regs" <+> ppr prespill_regs
 211      --   $$
 212      --   text "giving code"
 213      --   $$
 214      --   vcat (map pprInstr prespill_insns)
 215      --))
 216      prespill_ok
 217    = Just prespill_insns
 218
 219    -- failed, and no spill regs avail, so pointless to attempt spilling
 220    | null reserve_regs  = Nothing
 221    -- success after spilling
 222    | postspill_ok       = maybetrace (spillMsg True) (Just postspill_insns)
 223    -- still not enough reserves after spilling; we have to give up
 224    | otherwise          = maybetrace (spillMsg False) Nothing
 225      where
 226          prespill_regs
 227             = filter (`notElem` reserve_regs) all_regs
 228          (prespill_ok, prespill_insns)
 229             = allocUsingTheseRegs instrs prespill_regs
 230          instrs_with_spill_code
 231             = insertSpillCode prespill_insns
 232          (postspill_ok, postspill_insns)
 233             = allocUsingTheseRegs instrs_with_spill_code all_regs
 234
 235          spillMsg success
 236             = "nativeGen: spilling "
 237               ++ (if success then "succeeded" else "failed   ")
 238               ++ " using "
 239               ++ showSDoc (hsep (map ppr reserve_regs))
 240
 241 #        ifdef NCG_DEBUG
 242          maybetrace msg x = trace msg x
 243 #        else
 244          maybetrace msg x = x
 245 #        endif
 246 \end{code}
 247
 248 Here we patch instructions that reference ``registers'' which are
 249 really in memory somewhere (the mapping is under the control of the
 250 machine-specific code generator).  We place the appropriate load
 251 sequences before any instructions that use memory registers as
 252 sources, and we place the appropriate spill sequences after any
 253 instructions that use memory registers as destinations.  The offending
 254 instructions are rewritten with new dynamic registers, so generalAlloc
 255 has to run register allocation again after all of this is said and
 256 done.
 257
 258 On some architectures (x86, currently), we do without a frame-pointer,
 259 and instead spill relative to the stack pointer (%esp on x86).
 260 Because the stack pointer may move, the patcher needs to keep track of
 261 the current stack pointer "delta".  That's easy, because all it needs
 262 to do is spot the DELTA bogus-insns which will have been inserted by
 263 the relevant insn selector precisely so as to notify the spiller of
 264 stack-pointer movement.  The delta is passed to loadReg and spillReg,
 265 since they generate the actual spill code.  We expect the final delta
 266 to be the same as the starting one (zero), reflecting the fact that
 267 changes to the stack pointer should not extend beyond a basic block.
 268
 269 Finally, there is the issue of mapping an arbitrary set of unallocated
 270 VirtualRegs into a contiguous sequence of spill slots.  The failed
 271 allocation will have left the code peppered with references to
 272 VirtualRegs, each of which contains a unique.  So we make an env which
 273 maps these VirtualRegs to integers, starting from zero, and pass that
 274 env through to loadReg and spillReg.  There, they are used to look up
 275 spill slot numbers for the uniques.
 276
 277 \begin{code}
 278 insertSpillCode :: [Instr] -> [Instr]
 279 insertSpillCode insns
 280    = let uniques_in_insns
 281             = map getUnique
 282                   (regSetToList
 283                      (foldl unionRegSets emptyRegSet
 284                             (map vregs_in_insn insns)))
 285          vregs_in_insn i
 286             = case regUsage i of
 287                  RU rds wrs -> filterRegSet isVirtualReg
 288                                              (rds `unionRegSets` wrs)
 289          vreg_to_slot_map :: FiniteMap Unique Int
 290          vreg_to_slot_map
 291             = listToFM (zip uniques_in_insns [0..])
 292
 293          ((final_stack_delta, final_ctr), insnss)
 294             = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
 295      in
 296          if   final_stack_delta == 0
 297          then concat insnss
 298          else pprPanic "patchMem: non-zero final delta"
 299                        (int final_stack_delta)
 300
 301
 302 -- patchInstr has as a running state two Ints, one the current stack delta,
 303 -- needed to figure out offsets to stack slots on archs where we spill relative
 304 -- to the stack pointer, as opposed to the frame pointer.  The other is a
 305 -- counter, used to manufacture new temporary register names.
 306
 307 patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
 308 patchInstr vreg_to_slot_map (delta,ctr) instr
 309
 310  | null memSrcs && null memDsts
 311  = ((delta',ctr), [instr])
 312
 313  | otherwise
 314  = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
 315    where
 316         delta' = case instr of DELTA d -> d ; _ -> delta
 317
 318         (RU srcs dsts) = regUsage instr
 319
 320         -- The instr being patched may mention several vregs -- those which
 321         -- could not be assigned real registers.  For each such vreg, we
 322         -- invent a new vreg, used only around this instruction and nowhere
 323         -- else.  These new vregs replace the unallocatable vregs; they are
 324         -- loaded from the spill area, the instruction is done with them,
 325         -- and results if any are then written back to the spill area.
 326         vregs_in_instr
 327            = nub (filter isVirtualReg
 328                          (regSetToList srcs ++ regSetToList dsts))
 329         n_vregs_in_instr
 330            = length vregs_in_instr
 331         ctr'
 332            = ctr + n_vregs_in_instr
 333         vreg_env
 334            = zip vregs_in_instr [ctr, ctr+1 ..]
 335
 336         mkTmpReg vreg
 337            | isVirtualReg vreg
 338            = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
 339                 [i] -> case regClass vreg of
 340                           RcInteger -> VirtualRegI (mkPseudoUnique3 i)
 341                           RcFloat   -> VirtualRegF (mkPseudoUnique3 i)
 342                           RcDouble  -> VirtualRegD (mkPseudoUnique3 i)
 343                 _   -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
 344            | otherwise
 345            = vreg
 346
 347         memSrcs   = filter isVirtualReg (regSetToList srcs)
 348         memDsts   = filter isVirtualReg (regSetToList dsts)
 349
 350         loadSrcs  = map load  memSrcs
 351         spillDsts = map spill memDsts
 352
 353         load mem  = loadReg  vreg_to_slot_map delta  mem (mkTmpReg mem)
 354         spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
 355
 356         instr'    = patchRegs instr mkTmpReg
 357 \end{code}
 358
 359 allocUsingTheseRegs is the register allocator proper.  It attempts
 360 to allocate dynamic regs to real regs, given a list of real regs
 361 which it may use.  If it fails due to lack of real regs, the returned
 362 instructions use what real regs there are, but will retain uses of
 363 dynamic regs for which a real reg could not be found.  It is these
 364 leftover dynamic reg references which insertSpillCode will later
 365 assign to spill slots.
 366
 367 Some implementation notes.
 368 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 369 Instructions are numbered sequentially, starting at zero.
 370
 371 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
 372 a possible flow of control from the first insn to the second.
 373
 374 The input to the register allocator is a list of instructions, which
 375 mention Regs.  A Reg can be a RealReg -- a real machine reg -- or a
 376 VirtualReg, which carries a unique.  After allocation, all the
 377 VirtualReg references will have been converted into RealRegs, and
 378 possible some spill code will have been inserted.
 379
 380 The heart of the register allocator works in four phases.
 381
 382 1.  (find_flow_edges) Calculate all the FEs for the code list.
 383     Return them not as a [FE], but implicitly, as a pair of
 384     Array Int [Int], being the successor and predecessor maps
 385     for instructions.
 386
 387 2.  (calc_liveness) Returns a FiniteMap FE RegSet.  For each
 388     FE, indicates the set of registers live on that FE.  Note
 389     that the set includes both RealRegs and VirtualRegs.  The
 390     former appear because the code could mention fixed register
 391     usages, and we need to take them into account from the start.
 392
 393 3.  (calc_live_range_sets) Invert the above mapping, giving a
 394     FiniteMap Reg FeSet, indicating, for each virtual and real
 395     reg mentioned in the code, which FEs it is live on.
 396
 397 4.  (calc_vreg_to_rreg_mapping) For virtual reg, try and find
 398     an allocatable real register for it.  Each real register has
 399     a "current commitment", indicating the set of FEs it is
 400     currently live on.  A virtual reg v can be assigned to
 401     real reg r iff v's live-fe-set does not intersect with r's
 402     current commitment fe-set.  If the assignment is made,
 403     v's live-fe-set is union'd into r's current commitment fe-set.
 404     There is also the minor restriction that v and r must be of
 405     the same register class (integer or floating).
 406
 407     Once this mapping is established, we simply apply it to the
 408     input insns, and that's it.
 409
 410     If no suitable real register can be found, the vreg is mapped
 411     to itself, and we deem allocation to have failed.  The partially
 412     allocated code is returned.  The higher echelons of the allocator
 413     (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
 414     code and re-run allocation, until a successful allocation is found.
 415 \begin{code}
 416
 417 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
 418 allocUsingTheseRegs instrs available_real_regs
 419    = let (all_vregs_mapped, v_to_r_mapping)
 420             = calc_vreg_to_rreg_mapping instrs available_real_regs
 421          new_insns
 422             = map (flip patchRegs sr) instrs
 423          sr reg
 424             | isRealReg reg
 425             = reg
 426             | otherwise
 427             = case lookupFM v_to_r_mapping reg of
 428                  Just r  -> r
 429                  Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
 430                                      (ppr reg)
 431      in
 432          --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
 433          (all_vregs_mapped, new_insns)
 434          --)
 435
 436
 437 -- the heart of the matter.
 438 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
 439 calc_vreg_to_rreg_mapping insns available_real_regs
 440    = let
 441          lr_sets  :: FiniteMap Reg FeSet
 442          lr_sets = calc_live_range_sets insns
 443
 444          -- lr_sets maps: vregs mentioned in insns to sets of live FEs
 445          -- and also:     rregs mentioned in insns to sets of live FEs
 446          -- We need to extract the rreg mapping, and use it as the
 447          -- initial real-register-commitment.  Also, add to the initial
 448          -- commitment, empty commitments for any real regs not
 449          -- mentioned in it.
 450
 451          -- which real regs do we want to keep track of in the running
 452          -- commitment mapping?  Precisely the available_real_regs.
 453          -- We don't care about real regs mentioned by insns which are
 454          -- not in this list, since we're not allocating to them.
 455          initial_rr_commitment :: FiniteMap Reg FeSet
 456          initial_rr_commitment
 457             = listToFM [(rreg,
 458                          case lookupFM lr_sets rreg of
 459                             Nothing            -> emptyFeSet
 460                             Just fixed_use_fes -> fixed_use_fes
 461                         )
 462                         | rreg <- available_real_regs]
 463
 464          -- These are the vregs for which we actually have to (try to)
 465          -- assign a real register. (ie, the whole reason we're here at all :)
 466          vreg_liveness_list :: [(Reg, FeSet)]
 467          vreg_liveness_list = filter (not.isRealReg.fst)
 468                                      (fmToList lr_sets)
 469
 470          -- A loop, which attempts to assign each vreg to a rreg.
 471          loop rr_commitment v_to_r_map []
 472             = v_to_r_map
 473          loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
 474             = let
 475                   -- find a real reg which is not live for any of vreg_live_fes
 476                   cand_reals
 477                      = [rreg
 478                            | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
 479                               regClass vreg == regClass rreg,
 480                               isEmptyFeSet (intersectionFeSets rreg_live_FEs
 481                                                                vreg_live_fes)
 482                        ]
 483               in
 484                  case cand_reals of
 485                     [] -> -- bummer.  No register is available.  Just go on to
 486                           -- the next vreg, mapping the vreg to itself.
 487                           loop rr_commitment (addToFM v_to_r_map vreg vreg)
 488                                not_yet_done
 489                     (r:_)
 490                        -> -- Hurrah!  Found a free reg of the right class.
 491                           -- Now we need to update the RR commitment.
 492                           loop rr_commitment2 (addToFM v_to_r_map vreg r)
 493                                not_yet_done
 494                           where
 495                              rr_commitment2
 496                                 = addToFM_C unionFeSets rr_commitment r
 497                                             vreg_live_fes
 498
 499          -- the final vreg to rreg mapping
 500          vreg_assignment
 501             = loop initial_rr_commitment emptyFM vreg_liveness_list
 502          -- did we succeed in mapping everyone to a real reg?
 503          allocation_succeeded
 504             = all isRealReg (eltsFM vreg_assignment)
 505      in
 506          (allocation_succeeded, vreg_assignment)
 507
 508
 509
 510 -- calculate liveness, then produce the live range info
 511 -- as a mapping of VRegs to the set of FEs on which they are live.
 512 -- The difficult part is inverting the mapping of Reg -> FeSet
 513 -- to produce a mapping FE -> RegSet.
 514
 515 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
 516 calc_live_range_sets insns
 517    = let
 518          -- this is the "original" (old) mapping
 519          lis :: FiniteMap FE RegSet
 520          lis = calc_liveness insns
 521
 522          -- establish the totality of reg names mentioned by the
 523          -- insns, by scanning over the insns.
 524          all_mentioned_regs :: RegSet
 525          all_mentioned_regs
 526             = foldl unionRegSets emptyRegSet
 527                     (map (\i -> case regUsage i of
 528                                    RU rds wrs -> unionRegSets rds wrs)
 529                          insns)
 530
 531          -- Initial inverted mapping, from Reg to sets of FEs
 532          initial_imap :: FiniteMap Reg FeSet
 533          initial_imap
 534             = listToFM [(reg, emptyFeSet)
 535                         | reg <- regSetToList all_mentioned_regs]
 536
 537          -- Update the new map with one element of the old map
 538          upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
 539                      -> FiniteMap Reg FeSet
 540          upd_imap imap (fe, regset)
 541              = foldl upd_1_imap imap (regSetToList regset)
 542                where
 543                   upd_1_imap curr reg
 544                      = addToFM_C unionFeSets curr reg (unitFeSet fe)
 545
 546          -- the complete inverse mapping
 547          final_imap :: FiniteMap Reg FeSet
 548          final_imap
 549              = foldl upd_imap initial_imap (fmToList lis)
 550      in
 551          final_imap
 552
 553
 554
 555 -- Given the insns, calculate the FEs, and then doing fixpointing to
 556 -- figure out the set of live regs (virtual regs AND real regs) live
 557 -- on each FE.
 558
 559 calc_liveness :: [Instr] -> FiniteMap FE RegSet
 560 calc_liveness insns
 561    = let (pred_map, succ_map)
 562             = find_flow_edges insns
 563
 564          -- We use the convention that if the current approximation
 565          -- doesn't give a mapping for some FE, that FE maps to the
 566          -- empty set.
 567          initial_approx, fixpoint :: FiniteMap FE RegSet
 568          initial_approx
 569             = mk_initial_approx 0 insns succ_map emptyFM
 570          fixpoint
 571             = fix_set initial_approx 1
 572               -- If you want to live dangerously, and promise that the code
 573               -- doesn't contain any loops (ie, there are no back edges in
 574               -- the flow graph), you should be able to get away with this:
 575               -- = upd_liveness_info pred_map succ_map insn_array initial_approx
 576               -- But since I'm paranoid, and since it hardly makes any difference
 577               -- to the compiler run-time (about 0.1%), I prefer to do the
 578               -- the full fixpointing game.
 579
 580          insn_array
 581             = let n = length insns
 582               in  array (0, n-1) (zip [0..] insns)
 583
 584          sameSets []     []       = True
 585          sameSets (c:cs) (n:ns)   = eqRegSets c n && sameSets cs ns
 586          sameSets _      _        = False
 587
 588          fix_set curr_approx iter_number
 589             = let next_approx
 590                      = upd_liveness_info pred_map succ_map insn_array curr_approx
 591                   curr_sets
 592                      = eltsFM curr_approx
 593                   next_sets
 594                      = eltsFM next_approx
 595                   same
 596                      = sameSets curr_sets next_sets
 597                   final_approx
 598                      = if same then curr_approx
 599                                else fix_set next_approx (iter_number+1)
 600               in
 601                   --trace (let qqq (fe, regset)
 602                   --             = show fe ++ "  " ++ show (regSetToList regset)
 603                   --       in
 604                   --          "\n::iteration " ++ show iter_number ++ "\n"
 605                   --          ++ (unlines . map qqq . fmToList)
 606                   --                               next_approx ++"\n"
 607                   --      )
 608                   final_approx
 609      in
 610          fixpoint
 611
 612
 613 -- Create a correct initial approximation.  For each instruction that
 614 -- writes a register, we deem that the register is live on the
 615 -- flow edges leaving the instruction.  Subsequent iterations of
 616 -- the liveness AbI augment this based purely on reads of regs, not
 617 -- writes.  We need to start off with at least this minimal write-
 618 -- based information in order that writes to vregs which are never
 619 -- used have non-empty live ranges.  If we don't do that, we eventually
 620 -- wind up assigning such vregs to any old real reg, since they don't
 621 -- apparently conflict -- you can't conflict with an empty live range.
 622 -- This kludge is unfortunate, but we need to do it to cover not only
 623 -- writes to vregs which are never used, but also to deal correctly
 624 -- with the fact that calls to C will trash the callee saves registers.
 625
 626 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
 627                      -> FiniteMap FE RegSet
 628                      -> FiniteMap FE RegSet
 629 mk_initial_approx ino [] succ_map ia_so_far
 630    = ia_so_far
 631 mk_initial_approx ino (i:is) succ_map ia_so_far
 632    = let wrs
 633             = case regUsage i of RU rrr www -> www
 634          new_fes
 635             = [case ino of      { I# inoh ->
 636                case ino_succ of { I# ino_succh ->
 637                MkFE inoh ino_succh
 638                }}
 639                   | ino_succ <- succ_map ! ino]
 640
 641          loop [] ia = ia
 642          loop (fe:fes) ia
 643             = loop fes (addToFM_C unionRegSets ia fe wrs)
 644
 645          next_ia
 646             = loop new_fes ia_so_far
 647      in
 648          mk_initial_approx (ino+1) is succ_map next_ia
 649
 650
 651 -- Do one step in the liveness info calculation (AbI :).  Given the
 652 -- prior approximation (which tells you a subset of live VRegs+RRegs
 653 -- for each flow edge), calculate new information for all FEs.
 654 -- Rather than do this by iterating over FEs, it's easier to iterate
 655 -- over insns, and update their incoming FEs.
 656
 657 upd_liveness_info :: Array Int [Int]         -- instruction pred map
 658                      -> Array Int [Int]      -- instruction succ map
 659                      -> Array Int Instr      -- array of instructions
 660                      -> FiniteMap FE RegSet  -- previous approx
 661                      -> FiniteMap FE RegSet  -- improved approx
 662
 663 upd_liveness_info pred_map succ_map insn_array prev_approx
 664    = do_insns hi prev_approx
 665      where
 666         (lo, hi) = bounds insn_array
 667
 668         enquireMapFE :: FiniteMap FE RegSet -> FE
 669                         -> RegSet
 670         enquireMapFE fm fe
 671            = case lookupFM fm fe of
 672                 Just set -> set
 673                 Nothing  -> emptyRegSet
 674
 675         -- Work backwards, from the highest numbered insn to the lowest.
 676         -- This is a heuristic which causes faster convergence to the
 677         -- fixed point.  In particular, for straight-line code with no
 678         -- branches at all, arrives at the fixpoint in one iteration.
 679         do_insns ino approx
 680            | ino < lo
 681            = approx
 682            | otherwise
 683            = let fes_to_futures
 684                     = [case ino of        { I# inoh ->
 685                        case future_ino of { I# future_inoh ->
 686                        MkFE inoh future_inoh
 687                        }}
 688                           | future_ino <- succ_map ! ino]
 689                  future_lives
 690                     = map (enquireMapFE approx) fes_to_futures
 691                  future_live
 692                     = foldr unionRegSets emptyRegSet future_lives
 693
 694                  fes_from_histories
 695                     = [case history_ino of { I# history_inoh ->
 696                        case ino of         { I# inoh ->
 697                        MkFE history_inoh inoh
 698                        }}
 699                           | history_ino <- pred_map ! ino]
 700                  new_approx
 701                     = foldl update_one_history approx fes_from_histories
 702
 703                  insn
 704                     = insn_array ! ino
 705                  history_independent_component
 706                     = case regUsage insn of
 707                          RU rds wrs
 708                             -> unionRegSets rds
 709                                   (minusRegSets future_live wrs)
 710
 711                  update_one_history :: FiniteMap FE RegSet
 712                                        -> FE
 713                                        -> FiniteMap FE RegSet
 714                  update_one_history approx0 fe
 715                       = addToFM_C unionRegSets approx0 fe
 716                                   history_independent_component
 717
 718                  rest_done
 719                     = do_insns (ino-1) new_approx
 720              in
 721                  rest_done
 722
 723
 724
 725 -- Extract the flow edges from a list of insns.  Express the information
 726 -- as two mappings, from insn number to insn numbers of predecessors,
 727 -- and from insn number to insn numbers of successors.  (Since that's
 728 -- what we need to know when computing live ranges later).  Instructions
 729 -- are numbered starting at zero.  This function is long and complex
 730 -- in order to be efficient; it could equally well be shorter and slower.
 731
 732 find_flow_edges :: [Instr] -> (Array Int [Int],
 733                                Array Int [Int])
 734 find_flow_edges insns
 735    = let
 736          -- First phase: make a temp env which maps labels
 737          -- to insn numbers, so the second pass can know the insn
 738          -- numbers for jump targets.
 739
 740          label_env :: FiniteMap CLabel Int
 741
 742          mk_label_env n env [] = env
 743          mk_label_env n env ((LABEL clbl):is)
 744             = mk_label_env (n+1) (addToFM env clbl n) is
 745          mk_label_env n env (i:is)
 746             = mk_label_env (n+1) env is
 747
 748          label_env = mk_label_env 0 emptyFM insns
 749
 750          find_label :: CLabel -> Int
 751          find_label jmptarget
 752             = case lookupFM label_env jmptarget of
 753                  Just ino -> ino
 754                  Nothing  -> pprPanic "find_flow_edges: unmapped label"
 755                                       (pprCLabel jmptarget)
 756
 757          -- Second phase: traverse the insns, and make up the successor map.
 758
 759          least_ino, greatest_ino :: Int
 760          least_ino    = 0
 761          greatest_ino = length insns - 1
 762
 763          mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
 764
 765          mk_succ_map i_num rsucc_map []
 766             = reverse rsucc_map
 767
 768          mk_succ_map i_num rsucc_map (i:is)
 769             = let i_num_1 = i_num + 1
 770               in
 771               case insnFuture i of
 772
 773                  NoFuture
 774                     -> -- A non-local jump.  We can regard this insn as a terminal
 775                        -- insn in the graph, so we don't add any edges.
 776                        mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 777
 778                  Next
 779                     |  null is -- this is the last insn, and it doesn't go anywhere
 780                                -- (a meaningless scenario); handle it anyway
 781                     -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 782
 783                     |  otherwise -- flows to next insn; add fe i_num -> i_num+1
 784                     -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
 785                                            is
 786
 787                  Branch lab -- jmps to lab; add fe i_num -> i_target
 788                     -> let i_target = find_label lab
 789                        in
 790                        mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map) is
 791
 792                  NextOrBranch lab
 793                     |  null is   -- jmps to label, or falls through, and this is
 794                                  -- the last insn (a meaningless scenario);
 795                                  -- flag an error
 796                     -> error "find_flow_edges: NextOrBranch is last"
 797
 798                     |  otherwise -- add fes i_num -> i_num+1
 799                                  --     and i_num -> i_target
 800                     -> let i_target = find_label lab
 801                        in
 802                        mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
 803                                            is
 804                  MultiFuture labels
 805                     -> -- A jump, whose targets are listed explicitly.
 806                        -- (Generated from table-based switch translations).
 807                        -- Add fes  i_num -> x  for each x in labels
 808                        let is_target = nub (map find_label labels)
 809                        in
 810                        mk_succ_map i_num_1 ((i_num, is_target):rsucc_map) is
 811
 812          -- Third phase: invert the successor map to get the predecessor
 813          -- map, using an algorithm which is quadratic in the worst case,
 814          -- but runs in almost-linear time, because of the nature of our
 815          -- inputs: most insns have a single successor, the next insn.
 816
 817          invert :: [(Int, [Int])] -> [(Int, [Int])]
 818          invert fmap
 819             = let inverted_pairs
 820                      = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
 821                   sorted_inverted_pairs
 822                      = isort inverted_pairs
 823
 824                   grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
 825                   grp k vs [] = [(k, vs)]
 826                   grp k vs ((kk,vv):rest)
 827                      | k == kk   = grp k (vv:vs) rest
 828                      | otherwise = (k,vs) : grp kk [vv] rest
 829
 830                   grp_start []             = []
 831                   grp_start ((kk,vv):rest) = grp kk [vv] rest
 832
 833                   grouped
 834                      = grp_start sorted_inverted_pairs
 835
 836                   -- make sure that the reverse mapping maps all inos
 837                   add_empties ino []
 838                      | ino > greatest_ino  = []
 839                      | otherwise           = (ino,[]): add_empties (ino+1) []
 840                   add_empties ino ((k,vs):rest)
 841                      | ino <  k   = (ino,[]): add_empties (ino+1) ((k,vs):rest)
 842                      | ino == k   = (k,vs) : add_empties (ino+1) rest
 843
 844                   -- This is nearly linear provided that the fsts of the
 845                   -- list are nearly in order -- a critical assumption
 846                   -- for efficiency.
 847                   isort :: [(Int,Int)] -> [(Int,Int)]
 848                   isort []     = []
 849                   isort (x:xs) = insert x (isort xs)
 850
 851                   insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
 852                   insert y []     = [y]
 853                   insert y (z:zs)
 854                      -- specifically, this first test should almost always
 855                      -- be True in order for the near-linearity to happen
 856                      | fst y <= fst z  = y:z:zs
 857                      | otherwise       = z: insert y zs
 858               in
 859                  add_empties least_ino grouped
 860
 861          -- Finally ...
 862
 863          succ_list
 864             = mk_succ_map 0 [] insns
 865          succ_map
 866             = array (least_ino, greatest_ino) succ_list
 867          pred_list
 868             = invert succ_list
 869          pred_map
 870             = array (least_ino, greatest_ino) pred_list
 871      in
 872          (pred_map, succ_map)
 873
 874
 875 -- That's all, folks!  From here on is just some dull supporting stuff.
 876
 877 -- A data type for flow edges
 878 data FE
 879    = MkFE Int# Int# deriving (Eq, Ord)
 880
 881 -- deriving Show on types with unboxed fields doesn't work
 882 instance Show FE where
 883     showsPrec _ (MkFE s d)
 884        = showString "MkFE" . shows (I# s) . shows ' ' . shows (I# d)
 885
 886 -- Blargh.  Use ghc stuff soon!  Or: perhaps that's not such a good
 887 -- idea.  Most of these sets are either empty or very small, and it
 888 -- might be that the overheads of the FiniteMap based set implementation
 889 -- is a net loss.  The same might be true of RegSets.
 890
 891 newtype FeSet = MkFeSet [FE]
 892
 893 feSetFromList xs
 894    = MkFeSet (nukeDups (sort xs))
 895      where nukeDups :: [FE] -> [FE]
 896            nukeDups []  = []
 897            nukeDups [x] = [x]
 898            nukeDups (x:y:xys)
 899               = if x == y then nukeDups (y:xys)
 900                           else x : nukeDups (y:xys)
 901
 902 feSetToList (MkFeSet xs)            = xs
 903 isEmptyFeSet (MkFeSet xs)           = null xs
 904 emptyFeSet                          = MkFeSet []
 905 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
 906 unitFeSet x                         = MkFeSet [x]
 907
 908 elemFeSet x (MkFeSet xs)
 909    = f xs
 910      where
 911         f []     = False
 912         f (y:ys) | x == y    = True
 913                  | x < y     = False
 914                  | otherwise = f ys
 915
 916 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
 917    = MkFeSet (f xs1 xs2)
 918      where
 919         f [] bs = bs
 920         f as [] = as
 921         f (a:as) (b:bs)
 922            | a < b      = a : f as (b:bs)
 923            | a > b      = b : f (a:as) bs
 924            | otherwise  = a : f as bs
 925
 926 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
 927    = MkFeSet (f xs1 xs2)
 928      where
 929         f [] bs = []
 930         f as [] = as
 931         f (a:as) (b:bs)
 932            | a < b      = a : f as (b:bs)
 933            | a > b      = f (a:as) bs
 934            | otherwise  = f as bs
 935
 936 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
 937    = MkFeSet (f xs1 xs2)
 938      where
 939         f [] bs = []
 940         f as [] = []
 941         f (a:as) (b:bs)
 942            | a < b      = f as (b:bs)
 943            | a > b      = f (a:as) bs
 944            | otherwise  = a : f as bs
 945
 946 \end{code}