ghc/compiler/nativeGen/AsmRegAlloc.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-2000
   3 %
   4 \section[AsmRegAlloc]{Register allocator}
   5
   6 \begin{code}
   7 module AsmRegAlloc ( runRegAllocate ) where
   8
   9 #include "HsVersions.h"
  10
  11 import MachCode         ( InstrBlock )
  12 import MachMisc         ( Instr(..) )
  13 import PprMach          ( pprUserReg, pprInstr ) -- debugging
  14 import MachRegs
  15 import RegAllocInfo
  16
  17 import FiniteMap        ( FiniteMap, emptyFM, addListToFM, delListFromFM,
  18                           lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM,
  19                           listToFM, fmToList, lookupWithDefaultFM )
  20 import Maybes           ( maybeToBool )
  21 import Unique           ( mkBuiltinUnique )
  22 import Util             ( mapAccumB )
  23 import OrdList          ( unitOL, appOL, fromOL, concatOL )
  24 import Outputable
  25 import Unique           ( Unique, Uniquable(..), mkPseudoUnique3 )
  26 import CLabel           ( CLabel, pprCLabel )
  27
  28 import List             ( mapAccumL, nub, sort )
  29 import Array            ( Array, array, (!), bounds )
  30 \end{code}
  31
  32 This is the generic register allocator.  It does allocation for all
  33 architectures.  Details for specific architectures are given in
  34 RegAllocInfo.lhs.  In practice the allocator needs to know next to
  35 nothing about an architecture to do its job:
  36
  37 * It needs to be given a list of the registers it can allocate to.
  38
  39 * It needs to be able to find out which registers each insn reads and
  40   writes.
  41
  42 * It needs be able to change registers in instructions into other
  43   registers.
  44
  45 * It needs to be able to find out where execution could go after an
  46   in instruction.
  47
  48 * It needs to be able to discover sets of registers which can be
  49   used to attempt spilling.
  50
  51 First we try something extremely simple.  If that fails, we have to do
  52 things the hard way.
  53
  54 \begin{code}
  55 runRegAllocate
  56     :: [Reg]
  57     -> ([Instr] -> [[Reg]])
  58     -> InstrBlock
  59     -> [Instr]
  60
  61 runRegAllocate regs find_reserve_regs instrs
  62   = --trace ("runRegAllocate: " ++ show regs) (
  63     case simpleAlloc of
  64        Just simple -> --trace "SIMPLE"
  65                       simple
  66        Nothing     -> --trace "GENERAL"
  67                       (tryGeneral reserves)
  68     --)
  69   where
  70     tryGeneral []
  71        = error "nativeGen: spilling failed.  Workaround: compile with -fvia-C.\n"
  72     tryGeneral (resv:resvs)
  73        = case generalAlloc resv of
  74             Just success -> success
  75             Nothing      -> tryGeneral resvs
  76
  77     reserves           = find_reserve_regs flatInstrs
  78     flatInstrs         = fromOL instrs
  79     simpleAlloc        = doSimpleAlloc regs flatInstrs
  80     generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
  81 \end{code}
  82
  83 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
  84 each and every code block, we first try using this simple, fast and
  85 utterly braindead allocator.  In practice it handles about 60\% of the
  86 code blocks really fast, even with only 3 integer registers available.
  87 Since we can always give up and fall back to @doGeneralAlloc@,
  88 @doSimpleAlloc@ is geared to handling the common case as fast as
  89 possible.  It will succeed only if:
  90
  91 * The code mentions registers only of integer class, not floating
  92   class.
  93
  94 * The code doesn't mention any real registers, so we don't have to
  95   think about dodging and weaving to work around fixed register uses.
  96
  97 * The code mentions at most N virtual registers, where N is the number
  98   of real registers for allocation.
  99
 100 If those conditions are satisfied, we simply trundle along the code,
 101 doling out a real register every time we see mention of a new virtual
 102 register.  We either succeed at this, or give up when one of the above
 103 three conditions is no longer satisfied.
 104
 105 \begin{code}
 106 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
 107 doSimpleAlloc available_real_regs instrs
 108    = let available_iregs
 109             = filter ((== RcInteger).regClass) available_real_regs
 110
 111          trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
 112                     -> [ {-Real-}Reg ]
 113                     -> [Instr]
 114                     -> [Instr]
 115                     -> Maybe [Instr]
 116          trundle vreg_map uncommitted_rregs ris_done []
 117             = Just (reverse ris_done)
 118          trundle vreg_map uncommitted_rregs ris_done (i:is)
 119             = case regUsage i of
 120                  RU rds wrs
 121
 122                     -- Mentions no regs?  Move on quickly
 123                     |  null rds_l && null wrs_l
 124                     -> trundle vreg_map uncommitted_rregs (i:ris_done) is
 125
 126                     -- A case we can't be bothered to handle?
 127                     |  any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
 128                     -> Nothing
 129
 130                     -- Update the rreg commitments, and map the insn
 131                     |  otherwise
 132                     -> case upd_commitment (wrs_l++rds_l)
 133                                            vreg_map uncommitted_rregs of
 134                           Nothing -- out of rregs; give up
 135                              -> Nothing
 136                           Just (vreg_map2, uncommitted_rregs2)
 137                              -> let i2 = patchRegs i (subst_reg vreg_map2)
 138                                 in  trundle vreg_map2 uncommitted_rregs2
 139                                             (i2:ris_done) is
 140                        where
 141                           isFloatingOrReal reg
 142                              = isRealReg reg || regClass reg == RcFloat
 143                                              || regClass reg == RcDouble
 144
 145                           rds_l = regSetToList rds
 146                           wrs_l = regSetToList wrs
 147
 148                           upd_commitment [] vr_map uncomm
 149                              = Just (vr_map, uncomm)
 150                           upd_commitment (reg:regs) vr_map uncomm
 151                              | isRealReg reg
 152                              = upd_commitment regs vr_map uncomm
 153                              | reg `elem` (map fst vr_map)
 154                              = upd_commitment regs vr_map uncomm
 155                              | null uncomm
 156                              = Nothing
 157                              | otherwise
 158                              = upd_commitment regs ((reg, head uncomm):vr_map)
 159                                                    (tail uncomm)
 160
 161                           subst_reg vreg_map r
 162                              -- If it's a RealReg, it must be STG-specific one
 163                              -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
 164                              -- so isFloatingOrReal would not have objected to it.
 165                              | isRealReg r
 166                              = r
 167                              | otherwise
 168                              = case [rr | (vr,rr) <- vreg_map, vr == r] of
 169                                   [rr2] -> rr2
 170                                   other -> pprPanic
 171                                               "doSimpleAlloc: unmapped VirtualReg"
 172                                               (ppr r)
 173      in
 174          trundle [] available_iregs [] instrs
 175 \end{code}
 176
 177 From here onwards is the general register allocator and spiller.  For
 178 each flow edge (possible transition between instructions), we compute
 179 which virtual and real registers are live on that edge.  Then the
 180 mapping is inverted, to give a mapping from register (virtual+real) to
 181 sets of flow edges on which the register is live.  Finally, we can use
 182 those sets to decide whether a virtual reg v can be assigned to a real
 183 reg r, by checking that v's live-edge-set does not intersect with r's
 184 current live-edge-set.  Having made that assignment, we then augment
 185 r's current live-edge-set (its current commitment, you could say) with
 186 v's live-edge-set.
 187
 188 doGeneralAlloc takes reserve_regs as the regs to use as spill
 189 temporaries.  First it tries to allocate using all regs except
 190 reserve_regs.  If that fails, it inserts spill code and tries again to
 191 allocate regs, but this time with the spill temporaries available.
 192 Even this might not work if there are insufficient spill temporaries:
 193 in the worst case on x86, we'd need 3 of them, for insns like addl
 194 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
 195
 196 \begin{code}
 197 doGeneralAlloc
 198     :: [Reg]            -- all allocatable regs
 199     -> [Reg]            -- the reserve regs
 200     -> [Instr]          -- instrs in
 201     -> Maybe [Instr]    -- instrs out
 202
 203 doGeneralAlloc all_regs reserve_regs instrs
 204    -- succeeded without spilling
 205    | prespill_ok        = Just prespill_insns
 206    -- failed, and no spill regs avail, so pointless to attempt spilling
 207    | null reserve_regs  = Nothing
 208    -- success after spilling
 209    | postspill_ok       = maybetrace (spillMsg True) (Just postspill_insns)
 210    -- still not enough reserves after spilling; we have to give up
 211    | otherwise          = maybetrace (spillMsg False) Nothing
 212      where
 213          prespill_regs
 214             = filter (`notElem` reserve_regs) all_regs
 215          (prespill_ok, prespill_insns)
 216             = allocUsingTheseRegs instrs prespill_regs
 217          instrs_with_spill_code
 218             = insertSpillCode prespill_insns
 219          (postspill_ok, postspill_insns)
 220             = allocUsingTheseRegs instrs_with_spill_code all_regs
 221
 222          spillMsg success
 223             = "nativeGen: spilling "
 224               ++ (if success then "succeeded" else "failed   ")
 225               ++ " using "
 226               ++ showSDoc (hsep (map ppr reserve_regs))
 227
 228 #        if 1 /* ifdef DEBUG */
 229          maybetrace msg x = trace msg x
 230 #        else
 231          maybetrace msg x = x
 232 #        endif
 233 \end{code}
 234
 235 Here we patch instructions that reference ``registers'' which are
 236 really in memory somewhere (the mapping is under the control of the
 237 machine-specific code generator).  We place the appropriate load
 238 sequences before any instructions that use memory registers as
 239 sources, and we place the appropriate spill sequences after any
 240 instructions that use memory registers as destinations.  The offending
 241 instructions are rewritten with new dynamic registers, so generalAlloc
 242 has to run register allocation again after all of this is said and
 243 done.
 244
 245 On some architectures (x86, currently), we do without a frame-pointer,
 246 and instead spill relative to the stack pointer (%esp on x86).
 247 Because the stack pointer may move, the patcher needs to keep track of
 248 the current stack pointer "delta".  That's easy, because all it needs
 249 to do is spot the DELTA bogus-insns which will have been inserted by
 250 the relevant insn selector precisely so as to notify the spiller of
 251 stack-pointer movement.  The delta is passed to loadReg and spillReg,
 252 since they generate the actual spill code.  We expect the final delta
 253 to be the same as the starting one (zero), reflecting the fact that
 254 changes to the stack pointer should not extend beyond a basic block.
 255
 256 Finally, there is the issue of mapping an arbitrary set of unallocated
 257 VirtualRegs into a contiguous sequence of spill slots.  The failed
 258 allocation will have left the code peppered with references to
 259 VirtualRegs, each of which contains a unique.  So we make an env which
 260 maps these VirtualRegs to integers, starting from zero, and pass that
 261 env through to loadReg and spillReg.  There, they are used to look up
 262 spill slot numbers for the uniques.
 263
 264 \begin{code}
 265 insertSpillCode :: [Instr] -> [Instr]
 266 insertSpillCode insns
 267    = let uniques_in_insns
 268             = map getUnique
 269                   (regSetToList
 270                      (foldl unionRegSets emptyRegSet
 271                             (map vregs_in_insn insns)))
 272          vregs_in_insn i
 273             = case regUsage i of
 274                  RU rds wrs -> filterRegSet isVirtualReg
 275                                              (rds `unionRegSets` wrs)
 276          vreg_to_slot_map :: FiniteMap Unique Int
 277          vreg_to_slot_map
 278             = listToFM (zip uniques_in_insns [0..])
 279
 280          ((final_stack_delta, final_ctr), insnss)
 281             = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
 282      in
 283          if   final_stack_delta == 0
 284          then concat insnss
 285          else pprPanic "patchMem: non-zero final delta"
 286                        (int final_stack_delta)
 287
 288
 289 -- patchInstr has as a running state two Ints, one the current stack delta,
 290 -- needed to figure out offsets to stack slots on archs where we spill relative
 291 -- to the stack pointer, as opposed to the frame pointer.  The other is a
 292 -- counter, used to manufacture new temporary register names.
 293
 294 patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
 295 patchInstr vreg_to_slot_map (delta,ctr) instr
 296
 297  | null memSrcs && null memDsts
 298  = ((delta',ctr), [instr])
 299
 300  | otherwise
 301  = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
 302    where
 303         delta' = case instr of DELTA d -> d ; _ -> delta
 304
 305         (RU srcs dsts) = regUsage instr
 306
 307         -- The instr being patched may mention several vregs -- those which
 308         -- could not be assigned real registers.  For each such vreg, we
 309         -- invent a new vreg, used only around this instruction and nowhere
 310         -- else.  These new vregs replace the unallocatable vregs; they are
 311         -- loaded from the spill area, the instruction is done with them,
 312         -- and results if any are then written back to the spill area.
 313         vregs_in_instr
 314            = nub (filter isVirtualReg
 315                          (regSetToList srcs ++ regSetToList dsts))
 316         n_vregs_in_instr
 317            = length vregs_in_instr
 318         ctr'
 319            = ctr + n_vregs_in_instr
 320         vreg_env
 321            = zip vregs_in_instr [ctr, ctr+1 ..]
 322
 323         mkTmpReg vreg
 324            | isVirtualReg vreg
 325            = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
 326                 [i] -> if   regClass vreg == RcInteger
 327                        then VirtualRegI (mkPseudoUnique3 i)
 328                        else VirtualRegF (mkPseudoUnique3 i)
 329                 _   -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
 330            | otherwise
 331            = vreg
 332
 333         memSrcs   = filter isVirtualReg (regSetToList srcs)
 334         memDsts   = filter isVirtualReg (regSetToList dsts)
 335
 336         loadSrcs  = map load  memSrcs
 337         spillDsts = map spill memDsts
 338
 339         load mem  = loadReg  vreg_to_slot_map delta  mem (mkTmpReg mem)
 340         spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
 341
 342         instr'    = patchRegs instr mkTmpReg
 343 \end{code}
 344
 345 allocUsingTheseRegs is the register allocator proper.  It attempts
 346 to allocate dynamic regs to real regs, given a list of real regs
 347 which it may use.  If it fails due to lack of real regs, the returned
 348 instructions use what real regs there are, but will retain uses of
 349 dynamic regs for which a real reg could not be found.  It is these
 350 leftover dynamic reg references which insertSpillCode will later
 351 assign to spill slots.
 352
 353 Some implementation notes.
 354 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 355 Instructions are numbered sequentially, starting at zero.
 356
 357 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
 358 a possible flow of control from the first insn to the second.
 359
 360 The input to the register allocator is a list of instructions, which
 361 mention Regs.  A Reg can be a RealReg -- a real machine reg -- or a
 362 VirtualReg, which carries a unique.  After allocation, all the
 363 VirtualReg references will have been converted into RealRegs, and
 364 possible some spill code will have been inserted.
 365
 366 The heart of the register allocator works in four phases.
 367
 368 1.  (find_flow_edges) Calculate all the FEs for the code list.
 369     Return them not as a [FE], but implicitly, as a pair of
 370     Array Int [Int], being the successor and predecessor maps
 371     for instructions.
 372
 373 2.  (calc_liveness) Returns a FiniteMap FE RegSet.  For each
 374     FE, indicates the set of registers live on that FE.  Note
 375     that the set includes both RealRegs and VirtualRegs.  The
 376     former appear because the code could mention fixed register
 377     usages, and we need to take them into account from the start.
 378
 379 3.  (calc_live_range_sets) Invert the above mapping, giving a
 380     FiniteMap Reg FeSet, indicating, for each virtual and real
 381     reg mentioned in the code, which FEs it is live on.
 382
 383 4.  (calc_vreg_to_rreg_mapping) For virtual reg, try and find
 384     an allocatable real register for it.  Each real register has
 385     a "current commitment", indicating the set of FEs it is
 386     currently live on.  A virtual reg v can be assigned to
 387     real reg r iff v's live-fe-set does not intersect with r's
 388     current commitment fe-set.  If the assignment is made,
 389     v's live-fe-set is union'd into r's current commitment fe-set.
 390     There is also the minor restriction that v and r must be of
 391     the same register class (integer or floating).
 392
 393     Once this mapping is established, we simply apply it to the
 394     input insns, and that's it.
 395
 396     If no suitable real register can be found, the vreg is mapped
 397     to itself, and we deem allocation to have failed.  The partially
 398     allocated code is returned.  The higher echelons of the allocator
 399     (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
 400     code and re-run allocation, until a successful allocation is found.
 401 \begin{code}
 402
 403 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
 404 allocUsingTheseRegs instrs available_real_regs
 405    = let (all_vregs_mapped, v_to_r_mapping)
 406             = calc_vreg_to_rreg_mapping instrs available_real_regs
 407          new_insns
 408             = map (flip patchRegs sr) instrs
 409          sr reg
 410             | isRealReg reg
 411             = reg
 412             | otherwise
 413             = case lookupFM v_to_r_mapping reg of
 414                  Just r  -> r
 415                  Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
 416                                      (ppr reg)
 417      in
 418          --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
 419          (all_vregs_mapped, new_insns)
 420          --)
 421
 422
 423 -- the heart of the matter.
 424 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
 425 calc_vreg_to_rreg_mapping insns available_real_regs
 426    = let
 427          lr_sets  :: FiniteMap Reg FeSet
 428          lr_sets = calc_live_range_sets insns
 429
 430          -- lr_sets maps: vregs mentioned in insns to sets of live FEs
 431          -- and also:     rregs mentioned in insns to sets of live FEs
 432          -- We need to extract the rreg mapping, and use it as the
 433          -- initial real-register-commitment.  Also, add to the initial
 434          -- commitment, empty commitments for any real regs not
 435          -- mentioned in it.
 436
 437          -- which real regs do we want to keep track of in the running
 438          -- commitment mapping?  Precisely the available_real_regs.
 439          -- We don't care about real regs mentioned by insns which are
 440          -- not in this list, since we're not allocating to them.
 441          initial_rr_commitment :: FiniteMap Reg FeSet
 442          initial_rr_commitment
 443             = listToFM [(rreg,
 444                          case lookupFM lr_sets rreg of
 445                             Nothing            -> emptyFeSet
 446                             Just fixed_use_fes -> fixed_use_fes
 447                         )
 448                         | rreg <- available_real_regs]
 449
 450          -- These are the vregs for which we actually have to (try to)
 451          -- assign a real register. (ie, the whole reason we're here at all :)
 452          vreg_liveness_list :: [(Reg, FeSet)]
 453          vreg_liveness_list = filter (not.isRealReg.fst)
 454                                      (fmToList lr_sets)
 455
 456          -- A loop, which attempts to assign each vreg to a rreg.
 457          loop rr_commitment v_to_r_map []
 458             = v_to_r_map
 459          loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
 460             = let
 461                   -- find a real reg which is not live for any of vreg_live_fes
 462                   cand_reals
 463                      = [rreg
 464                            | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
 465                               regClass vreg == regClass rreg,
 466                               isEmptyFeSet (intersectionFeSets rreg_live_FEs
 467                                                                vreg_live_fes)
 468                        ]
 469               in
 470                  case cand_reals of
 471                     [] -> -- bummer.  No register is available.  Just go on to
 472                           -- the next vreg, mapping the vreg to itself.
 473                           loop rr_commitment (addToFM v_to_r_map vreg vreg)
 474                                not_yet_done
 475                     (r:_)
 476                        -> -- Hurrah!  Found a free reg of the right class.
 477                           -- Now we need to update the RR commitment.
 478                           loop rr_commitment2 (addToFM v_to_r_map vreg r)
 479                                not_yet_done
 480                           where
 481                              rr_commitment2
 482                                 = addToFM_C unionFeSets rr_commitment r
 483                                             vreg_live_fes
 484
 485          -- the final vreg to rreg mapping
 486          vreg_assignment
 487             = loop initial_rr_commitment emptyFM vreg_liveness_list
 488          -- did we succeed in mapping everyone to a real reg?
 489          allocation_succeeded
 490             = all isRealReg (eltsFM vreg_assignment)
 491      in
 492          (allocation_succeeded, vreg_assignment)
 493
 494
 495
 496 -- calculate liveness, then produce the live range info
 497 -- as a mapping of VRegs to the set of FEs on which they are live.
 498 -- The difficult part is inverting the mapping of Reg -> FeSet
 499 -- to produce a mapping FE -> RegSet.
 500
 501 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
 502 calc_live_range_sets insns
 503    = let
 504          -- this is the "original" (old) mapping
 505          lis :: FiniteMap FE RegSet
 506          lis = calc_liveness insns
 507
 508          -- establish the totality of reg names mentioned by the
 509          -- insns, by scanning over the insns.
 510          all_mentioned_regs :: RegSet
 511          all_mentioned_regs
 512             = foldl unionRegSets emptyRegSet
 513                     (map (\i -> case regUsage i of
 514                                    RU rds wrs -> unionRegSets rds wrs)
 515                          insns)
 516
 517          -- Initial inverted mapping, from Reg to sets of FEs
 518          initial_imap :: FiniteMap Reg FeSet
 519          initial_imap
 520             = listToFM [(reg, emptyFeSet)
 521                         | reg <- regSetToList all_mentioned_regs]
 522
 523          -- Update the new map with one element of the old map
 524          upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
 525                      -> FiniteMap Reg FeSet
 526          upd_imap imap (fe, regset)
 527              = foldl upd_1_imap imap (regSetToList regset)
 528                where
 529                   upd_1_imap curr reg
 530                      = addToFM_C unionFeSets curr reg (unitFeSet fe)
 531
 532          -- the complete inverse mapping
 533          final_imap :: FiniteMap Reg FeSet
 534          final_imap
 535              = foldl upd_imap initial_imap (fmToList lis)
 536      in
 537          final_imap
 538
 539
 540
 541 -- Given the insns, calculate the FEs, and then doing fixpointing to
 542 -- figure out the set of live regs (virtual regs AND real regs) live
 543 -- on each FE.
 544
 545 calc_liveness :: [Instr] -> FiniteMap FE RegSet
 546 calc_liveness insns
 547    = let (pred_map, succ_map)
 548             = find_flow_edges insns
 549
 550          -- We use the convention that if the current approximation
 551          -- doesn't give a mapping for some FE, that FE maps to the
 552          -- empty set.
 553          initial_approx, fixpoint :: FiniteMap FE RegSet
 554          initial_approx
 555             = mk_initial_approx 0 insns succ_map emptyFM
 556          fixpoint
 557             = fix_set initial_approx 1
 558               -- If you want to live dangerously, and promise that the code
 559               -- doesn't contain any loops (ie, there are no back edges in
 560               -- the flow graph), you should be able to get away with this:
 561               -- = upd_liveness_info pred_map succ_map insn_array initial_approx
 562               -- But since I'm paranoid, and since it hardly makes any difference
 563               -- to the compiler run-time (about 0.1%), I prefer to do the
 564               -- the full fixpointing game.
 565
 566          insn_array
 567             = let n = length insns
 568               in  array (0, n-1) (zip [0..] insns)
 569
 570          sameSets []     []       = True
 571          sameSets (c:cs) (n:ns)   = eqRegSets c n && sameSets cs ns
 572          sameSets _      _        = False
 573
 574          fix_set curr_approx iter_number
 575             = let next_approx
 576                      = upd_liveness_info pred_map succ_map insn_array curr_approx
 577                   curr_sets
 578                      = eltsFM curr_approx
 579                   next_sets
 580                      = eltsFM next_approx
 581                   same
 582                      = sameSets curr_sets next_sets
 583                   final_approx
 584                      = if same then curr_approx
 585                                else fix_set next_approx (iter_number+1)
 586               in
 587                   --trace (let qqq (fe, regset)
 588                   --             = show fe ++ "  " ++ show (regSetToList regset)
 589                   --       in
 590                   --          "\n::iteration " ++ show iter_number ++ "\n"
 591                   --          ++ (unlines . map qqq . fmToList)
 592                   --                               next_approx ++"\n"
 593                   --      )
 594                   final_approx
 595      in
 596          fixpoint
 597
 598
 599 -- Create a correct initial approximation.  For each instruction that
 600 -- writes a register, we deem that the register is live on the
 601 -- flow edges leaving the instruction.  Subsequent iterations of
 602 -- the liveness AbI augment this based purely on reads of regs, not
 603 -- writes.  We need to start off with at least this minimal write-
 604 -- based information in order that writes to vregs which are never
 605 -- used have non-empty live ranges.  If we don't do that, we eventually
 606 -- wind up assigning such vregs to any old real reg, since they don't
 607 -- apparently conflict -- you can't conflict with an empty live range.
 608 -- This kludge is unfortunate, but we need to do it to cover not only
 609 -- writes to vregs which are never used, but also to deal correctly
 610 -- with the fact that calls to C will trash the callee saves registers.
 611
 612 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
 613                      -> FiniteMap FE RegSet
 614                      -> FiniteMap FE RegSet
 615 mk_initial_approx ino [] succ_map ia_so_far
 616    = ia_so_far
 617 mk_initial_approx ino (i:is) succ_map ia_so_far
 618    = let wrs
 619             = case regUsage i of RU rrr www -> www
 620          new_fes
 621             = [case ino of      { I# inoh ->
 622                case ino_succ of { I# ino_succh ->
 623                MkFE inoh ino_succh
 624                }}
 625                   | ino_succ <- succ_map ! ino]
 626
 627          loop [] ia = ia
 628          loop (fe:fes) ia
 629             = loop fes (addToFM_C unionRegSets ia fe wrs)
 630
 631          next_ia
 632             = loop new_fes ia_so_far
 633      in
 634          mk_initial_approx (ino+1) is succ_map next_ia
 635
 636
 637 -- Do one step in the liveness info calculation (AbI :).  Given the
 638 -- prior approximation (which tells you a subset of live VRegs+RRegs
 639 -- for each flow edge), calculate new information for all FEs.
 640 -- Rather than do this by iterating over FEs, it's easier to iterate
 641 -- over insns, and update their incoming FEs.
 642
 643 upd_liveness_info :: Array Int [Int]         -- instruction pred map
 644                      -> Array Int [Int]      -- instruction succ map
 645                      -> Array Int Instr      -- array of instructions
 646                      -> FiniteMap FE RegSet  -- previous approx
 647                      -> FiniteMap FE RegSet  -- improved approx
 648
 649 upd_liveness_info pred_map succ_map insn_array prev_approx
 650    = do_insns hi prev_approx
 651      where
 652         (lo, hi) = bounds insn_array
 653
 654         enquireMapFE :: FiniteMap FE RegSet -> FE
 655                         -> RegSet
 656         enquireMapFE fm fe
 657            = case lookupFM fm fe of
 658                 Just set -> set
 659                 Nothing  -> emptyRegSet
 660
 661         -- Work backwards, from the highest numbered insn to the lowest.
 662         -- This is a heuristic which causes faster convergence to the
 663         -- fixed point.  In particular, for straight-line code with no
 664         -- branches at all, arrives at the fixpoint in one iteration.
 665         do_insns ino approx
 666            | ino < lo
 667            = approx
 668            | otherwise
 669            = let fes_to_futures
 670                     = [case ino of        { I# inoh ->
 671                        case future_ino of { I# future_inoh ->
 672                        MkFE inoh future_inoh
 673                        }}
 674                           | future_ino <- succ_map ! ino]
 675                  future_lives
 676                     = map (enquireMapFE approx) fes_to_futures
 677                  future_live
 678                     = foldr unionRegSets emptyRegSet future_lives
 679
 680                  fes_from_histories
 681                     = [case history_ino of { I# history_inoh ->
 682                        case ino of         { I# inoh ->
 683                        MkFE history_inoh inoh
 684                        }}
 685                           | history_ino <- pred_map ! ino]
 686                  new_approx
 687                     = foldl update_one_history approx fes_from_histories
 688
 689                  insn
 690                     = insn_array ! ino
 691                  history_independent_component
 692                     = case regUsage insn of
 693                          RU rds wrs
 694                             -> unionRegSets rds
 695                                   (minusRegSets future_live wrs)
 696
 697                  update_one_history :: FiniteMap FE RegSet
 698                                        -> FE
 699                                        -> FiniteMap FE RegSet
 700                  update_one_history approx0 fe
 701                       = addToFM_C unionRegSets approx0 fe
 702                                   history_independent_component
 703
 704                  rest_done
 705                     = do_insns (ino-1) new_approx
 706              in
 707                  rest_done
 708
 709
 710
 711 -- Extract the flow edges from a list of insns.  Express the information
 712 -- as two mappings, from insn number to insn numbers of predecessors,
 713 -- and from insn number to insn numbers of successors.  (Since that's
 714 -- what we need to know when computing live ranges later).  Instructions
 715 -- are numbered starting at zero.  This function is long and complex
 716 -- in order to be efficient; it could equally well be shorter and slower.
 717
 718 find_flow_edges :: [Instr] -> (Array Int [Int],
 719                                Array Int [Int])
 720 find_flow_edges insns
 721    = let
 722          -- First phase: make a temp env which maps labels
 723          -- to insn numbers, so the second pass can know the insn
 724          -- numbers for jump targets.
 725
 726          label_env :: FiniteMap CLabel Int
 727
 728          mk_label_env n env [] = env
 729          mk_label_env n env ((LABEL clbl):is)
 730             = mk_label_env (n+1) (addToFM env clbl n) is
 731          mk_label_env n env (i:is)
 732             = mk_label_env (n+1) env is
 733
 734          label_env = mk_label_env 0 emptyFM insns
 735
 736          find_label :: CLabel -> Int
 737          find_label jmptarget
 738             = case lookupFM label_env jmptarget of
 739                  Just ino -> ino
 740                  Nothing  -> pprPanic "find_flow_edges: unmapped label"
 741                                       (pprCLabel jmptarget)
 742
 743          -- Second phase: traverse the insns, and make up the successor map.
 744
 745          least_ino, greatest_ino :: Int
 746          least_ino    = 0
 747          greatest_ino = length insns - 1
 748
 749          mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
 750
 751          mk_succ_map i_num rsucc_map []
 752             = reverse rsucc_map
 753
 754          mk_succ_map i_num rsucc_map (i:is)
 755             = let i_num_1 = i_num + 1
 756               in
 757               case insnFuture i of
 758
 759                  NoFuture
 760                     -> -- A non-local jump.  We can regard this insn as a terminal
 761                        -- insn in the graph, so we don't add any edges.
 762                        mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 763
 764                  Next
 765                     |  null is -- this is the last insn, and it doesn't go anywhere
 766                                -- (a meaningless scenario); handle it anyway
 767                     -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 768
 769                     |  otherwise -- flows to next insn; add fe i_num -> i_num+1
 770                     -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
 771                                            is
 772
 773                  Branch lab -- jmps to lab; add fe i_num -> i_target
 774                     -> let i_target = find_label lab
 775                        in
 776                        mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map)
 777                                            is
 778                  NextOrBranch lab
 779                     |  null is   -- jmps to label, or falls through, and this is
 780                                  -- the last insn (a meaningless scenario);
 781                                  -- flag an error
 782                     -> error "find_flow_edges: NextOrBranch is last"
 783
 784                     |  otherwise -- add fes i_num -> i_num+1
 785                                  --     and i_num -> i_target
 786                     -> let i_target = find_label lab
 787                        in
 788                        mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
 789                                            is
 790
 791          -- Third phase: invert the successor map to get the predecessor
 792          -- map, using an algorithm which is quadratic in the worst case,
 793          -- but runs in almost-linear time, because of the nature of our
 794          -- inputs: most insns have a single successor, the next insn.
 795
 796          invert :: [(Int, [Int])] -> [(Int, [Int])]
 797          invert fmap
 798             = let inverted_pairs
 799                      = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
 800                   sorted_inverted_pairs
 801                      = isort inverted_pairs
 802
 803                   grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
 804                   grp k vs [] = [(k, vs)]
 805                   grp k vs ((kk,vv):rest)
 806                      | k == kk   = grp k (vv:vs) rest
 807                      | otherwise = (k,vs) : grp kk [vv] rest
 808
 809                   grp_start []             = []
 810                   grp_start ((kk,vv):rest) = grp kk [vv] rest
 811
 812                   grouped
 813                      = grp_start sorted_inverted_pairs
 814
 815                   -- make sure that the reverse mapping maps all inos
 816                   add_empties ino []
 817                      | ino > greatest_ino  = []
 818                      | otherwise           = (ino,[]): add_empties (ino+1) []
 819                   add_empties ino ((k,vs):rest)
 820                      | ino <  k   = (ino,[]): add_empties (ino+1) ((k,vs):rest)
 821                      | ino == k   = (k,vs) : add_empties (ino+1) rest
 822
 823                   -- This is nearly linear provided that the fsts of the
 824                   -- list are nearly in order -- a critical assumption
 825                   -- for efficiency.
 826                   isort :: [(Int,Int)] -> [(Int,Int)]
 827                   isort []     = []
 828                   isort (x:xs) = insert x (isort xs)
 829
 830                   insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
 831                   insert y []     = [y]
 832                   insert y (z:zs)
 833                      -- specifically, this first test should almost always
 834                      -- be True in order for the near-linearity to happen
 835                      | fst y <= fst z  = y:z:zs
 836                      | otherwise       = z: insert y zs
 837               in
 838                  add_empties least_ino grouped
 839
 840          -- Finally ...
 841
 842          succ_list
 843             = mk_succ_map 0 [] insns
 844          succ_map
 845             = array (least_ino, greatest_ino) succ_list
 846          pred_list
 847             = invert succ_list
 848          pred_map
 849             = array (least_ino, greatest_ino) pred_list
 850      in
 851          (pred_map, succ_map)
 852
 853
 854 -- That's all, folks!  From here on is just some dull supporting stuff.
 855
 856 -- A data type for flow edges
 857 data FE
 858    = MkFE Int# Int# deriving (Eq, Ord)
 859
 860 -- deriving Show on types with unboxed fields doesn't work
 861 instance Show FE where
 862     showsPrec _ (MkFE s d)
 863        = showString "MkFE" . shows (I# s) . shows ' ' . shows (I# d)
 864
 865 -- Blargh.  Use ghc stuff soon!  Or: perhaps that's not such a good
 866 -- idea.  Most of these sets are either empty or very small, and it
 867 -- might be that the overheads of the FiniteMap based set implementation
 868 -- is a net loss.  The same might be true of RegSets.
 869
 870 newtype FeSet = MkFeSet [FE]
 871
 872 feSetFromList xs
 873    = MkFeSet (nukeDups (sort xs))
 874      where nukeDups :: [FE] -> [FE]
 875            nukeDups []  = []
 876            nukeDups [x] = [x]
 877            nukeDups (x:y:xys)
 878               = if x == y then nukeDups (y:xys)
 879                           else x : nukeDups (y:xys)
 880
 881 feSetToList (MkFeSet xs)            = xs
 882 isEmptyFeSet (MkFeSet xs)           = null xs
 883 emptyFeSet                          = MkFeSet []
 884 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
 885 unitFeSet x                         = MkFeSet [x]
 886
 887 elemFeSet x (MkFeSet xs)
 888    = f xs
 889      where
 890         f []     = False
 891         f (y:ys) | x == y    = True
 892                  | x < y     = False
 893                  | otherwise = f ys
 894
 895 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
 896    = MkFeSet (f xs1 xs2)
 897      where
 898         f [] bs = bs
 899         f as [] = as
 900         f (a:as) (b:bs)
 901            | a < b      = a : f as (b:bs)
 902            | a > b      = b : f (a:as) bs
 903            | otherwise  = a : f as bs
 904
 905 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
 906    = MkFeSet (f xs1 xs2)
 907      where
 908         f [] bs = []
 909         f as [] = as
 910         f (a:as) (b:bs)
 911            | a < b      = a : f as (b:bs)
 912            | a > b      = f (a:as) bs
 913            | otherwise  = f as bs
 914
 915 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
 916    = MkFeSet (f xs1 xs2)
 917      where
 918         f [] bs = []
 919         f as [] = []
 920         f (a:as) (b:bs)
 921            | a < b      = f as (b:bs)
 922            | a > b      = f (a:as) bs
 923            | otherwise  = a : f as bs
 924
 925 \end{code}