ghc/compiler/nativeGen/AsmRegAlloc.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-2000
   3 %
   4 \section[AsmRegAlloc]{Register allocator}
   5
   6 \begin{code}
   7 module AsmRegAlloc ( runRegAllocate ) where
   8
   9 #include "HsVersions.h"
  10
  11 import MachCode         ( InstrBlock )
  12 import MachMisc         ( Instr(..) )
  13 import PprMach          ( pprInstr )    -- Just for debugging
  14 import MachRegs
  15 import RegAllocInfo
  16
  17 import FiniteMap        ( FiniteMap, emptyFM, addListToFM, delListFromFM,
  18                           lookupFM, keysFM, eltsFM, mapFM, addToFM_C, addToFM,
  19                           listToFM, fmToList, lookupWithDefaultFM )
  20 import Unique           ( mkBuiltinUnique )
  21 import OrdList          ( unitOL, appOL, fromOL, concatOL )
  22 import Outputable
  23 import Unique           ( Unique, Uniquable(..), mkPseudoUnique3 )
  24 import CLabel           ( CLabel, pprCLabel )
  25 import FastTypes
  26
  27 import List             ( mapAccumL, nub, sort )
  28 import Array            ( Array, array, (!), bounds )
  29 \end{code}
  30
  31 This is the generic register allocator.  It does allocation for all
  32 architectures.  Details for specific architectures are given in
  33 RegAllocInfo.lhs.  In practice the allocator needs to know next to
  34 nothing about an architecture to do its job:
  35
  36 * It needs to be given a list of the registers it can allocate to.
  37
  38 * It needs to be able to find out which registers each insn reads and
  39   writes.
  40
  41 * It needs be able to change registers in instructions into other
  42   registers.
  43
  44 * It needs to be able to find out where execution could go after an
  45   in instruction.
  46
  47 * It needs to be able to discover sets of registers which can be
  48   used to attempt spilling.
  49
  50 First we try something extremely simple.  If that fails, we have to do
  51 things the hard way.
  52
  53 \begin{code}
  54 runRegAllocate
  55     :: [Reg]
  56     -> ([Instr] -> [[Reg]])
  57     -> InstrBlock
  58     -> [Instr]
  59
  60 runRegAllocate regs find_reserve_regs instrs
  61   = --trace ("runRegAllocate: " ++ show regs) (
  62     case simpleAlloc of
  63        Just simple -> --trace "SIMPLE"
  64                       simple
  65        Nothing     -> --trace "GENERAL"
  66                       (tryGeneral reserves)
  67     --)
  68   where
  69     tryGeneral []
  70        = pprPanic "nativeGen: spilling failed.  Workaround: compile with -fvia-C.\n"
  71             ( (text "reserves = " <> ppr reserves)
  72               $$
  73               (text "code = ")
  74               $$
  75               (vcat (map pprInstr flatInstrs))
  76             )
  77     tryGeneral (resv:resvs)
  78        = case generalAlloc resv of
  79             Just success -> success
  80             Nothing      -> tryGeneral resvs
  81
  82     reserves           = find_reserve_regs flatInstrs
  83     flatInstrs         = fromOL instrs
  84     simpleAlloc        = doSimpleAlloc regs flatInstrs
  85     generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
  86 \end{code}
  87
  88 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
  89 each and every code block, we first try using this simple, fast and
  90 utterly braindead allocator.  In practice it handles about 60\% of the
  91 code blocks really fast, even with only 3 integer registers available.
  92 Since we can always give up and fall back to @doGeneralAlloc@,
  93 @doSimpleAlloc@ is geared to handling the common case as fast as
  94 possible.  It will succeed only if:
  95
  96 * The code mentions registers only of integer class, not floating
  97   class.
  98
  99 * The code doesn't mention any real registers, so we don't have to
 100   think about dodging and weaving to work around fixed register uses.
 101
 102 * The code mentions at most N virtual registers, where N is the number
 103   of real registers for allocation.
 104
 105 If those conditions are satisfied, we simply trundle along the code,
 106 doling out a real register every time we see mention of a new virtual
 107 register.  We either succeed at this, or give up when one of the above
 108 three conditions is no longer satisfied.
 109
 110 \begin{code}
 111 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
 112 doSimpleAlloc available_real_regs instrs
 113    = let available_iregs
 114             = filter ((== RcInteger).regClass) available_real_regs
 115
 116          trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
 117                     -> [ {-Real-}Reg ]
 118                     -> [Instr]
 119                     -> [Instr]
 120                     -> Maybe [Instr]
 121          trundle vreg_map uncommitted_rregs ris_done []
 122             = Just (reverse ris_done)
 123          trundle vreg_map uncommitted_rregs ris_done (i:is)
 124             = case regUsage i of
 125                  RU rds wrs
 126
 127                     -- Mentions no regs?  Move on quickly
 128                     |  null rds_l && null wrs_l
 129                     -> trundle vreg_map uncommitted_rregs (i:ris_done) is
 130
 131                     -- A case we can't be bothered to handle?
 132                     |  any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
 133                     -> Nothing
 134
 135                     -- Update the rreg commitments, and map the insn
 136                     |  otherwise
 137                     -> case upd_commitment (wrs_l++rds_l)
 138                                            vreg_map uncommitted_rregs of
 139                           Nothing -- out of rregs; give up
 140                              -> Nothing
 141                           Just (vreg_map2, uncommitted_rregs2)
 142                              -> let i2 = patchRegs i (subst_reg vreg_map2)
 143                                 in  trundle vreg_map2 uncommitted_rregs2
 144                                             (i2:ris_done) is
 145                        where
 146                           isFloatingOrReal reg
 147                              = isRealReg reg || regClass reg == RcFloat
 148                                              || regClass reg == RcDouble
 149
 150                           rds_l = regSetToList rds
 151                           wrs_l = regSetToList wrs
 152
 153                           upd_commitment [] vr_map uncomm
 154                              = Just (vr_map, uncomm)
 155                           upd_commitment (reg:regs) vr_map uncomm
 156                              | isRealReg reg
 157                              = upd_commitment regs vr_map uncomm
 158                              | reg `elem` (map fst vr_map)
 159                              = upd_commitment regs vr_map uncomm
 160                              | null uncomm
 161                              = Nothing
 162                              | otherwise
 163                              = upd_commitment regs ((reg, head uncomm):vr_map)
 164                                                    (tail uncomm)
 165
 166                           subst_reg vreg_map r
 167                              -- If it's a RealReg, it must be STG-specific one
 168                              -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
 169                              -- so isFloatingOrReal would not have objected to it.
 170                              | isRealReg r
 171                              = r
 172                              | otherwise
 173                              = case [rr | (vr,rr) <- vreg_map, vr == r] of
 174                                   [rr2] -> rr2
 175                                   other -> pprPanic
 176                                               "doSimpleAlloc: unmapped VirtualReg"
 177                                               (ppr r)
 178      in
 179          trundle [] available_iregs [] instrs
 180 \end{code}
 181
 182 From here onwards is the general register allocator and spiller.  For
 183 each flow edge (possible transition between instructions), we compute
 184 which virtual and real registers are live on that edge.  Then the
 185 mapping is inverted, to give a mapping from register (virtual+real) to
 186 sets of flow edges on which the register is live.  Finally, we can use
 187 those sets to decide whether a virtual reg v can be assigned to a real
 188 reg r, by checking that v's live-edge-set does not intersect with r's
 189 current live-edge-set.  Having made that assignment, we then augment
 190 r's current live-edge-set (its current commitment, you could say) with
 191 v's live-edge-set.
 192
 193 doGeneralAlloc takes reserve_regs as the regs to use as spill
 194 temporaries.  First it tries to allocate using all regs except
 195 reserve_regs.  If that fails, it inserts spill code and tries again to
 196 allocate regs, but this time with the spill temporaries available.
 197 Even this might not work if there are insufficient spill temporaries:
 198 in the worst case on x86, we'd need 3 of them, for insns like addl
 199 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
 200
 201 \begin{code}
 202 doGeneralAlloc
 203     :: [Reg]            -- all allocatable regs
 204     -> [Reg]            -- the reserve regs
 205     -> [Instr]          -- instrs in
 206     -> Maybe [Instr]    -- instrs out
 207
 208 doGeneralAlloc all_regs reserve_regs instrs
 209    -- succeeded without spilling
 210    | prespill_ok
 211    = Just prespill_insns
 212
 213    -- failed, and no spill regs avail, so pointless to attempt spilling
 214    | null reserve_regs  = Nothing
 215    -- success after spilling
 216    | postspill_ok       = maybetrace (spillMsg True) (Just postspill_insns)
 217    -- still not enough reserves after spilling; we have to give up
 218    | otherwise          = maybetrace (spillMsg False) Nothing
 219      where
 220          prespill_regs
 221             = filter (`notElem` reserve_regs) all_regs
 222          (prespill_ok, prespill_insns)
 223             = allocUsingTheseRegs instrs prespill_regs
 224          instrs_with_spill_code
 225             = insertSpillCode prespill_insns
 226          (postspill_ok, postspill_insns)
 227             = allocUsingTheseRegs instrs_with_spill_code all_regs
 228
 229          spillMsg success
 230             = "nativeGen: spilling "
 231               ++ (if success then "succeeded" else "failed   ")
 232               ++ " using "
 233               ++ showSDoc (hsep (map ppr reserve_regs))
 234
 235 #        ifdef NCG_DEBUG
 236          maybetrace msg x = trace msg x
 237 #        else
 238          maybetrace msg x = x
 239 #        endif
 240 \end{code}
 241
 242 Here we patch instructions that reference ``registers'' which are
 243 really in memory somewhere (the mapping is under the control of the
 244 machine-specific code generator).  We place the appropriate load
 245 sequences before any instructions that use memory registers as
 246 sources, and we place the appropriate spill sequences after any
 247 instructions that use memory registers as destinations.  The offending
 248 instructions are rewritten with new dynamic registers, so generalAlloc
 249 has to run register allocation again after all of this is said and
 250 done.
 251
 252 On some architectures (x86, currently), we do without a frame-pointer,
 253 and instead spill relative to the stack pointer (%esp on x86).
 254 Because the stack pointer may move, the patcher needs to keep track of
 255 the current stack pointer "delta".  That's easy, because all it needs
 256 to do is spot the DELTA bogus-insns which will have been inserted by
 257 the relevant insn selector precisely so as to notify the spiller of
 258 stack-pointer movement.  The delta is passed to loadReg and spillReg,
 259 since they generate the actual spill code.  We expect the final delta
 260 to be the same as the starting one (zero), reflecting the fact that
 261 changes to the stack pointer should not extend beyond a basic block.
 262
 263 Finally, there is the issue of mapping an arbitrary set of unallocated
 264 VirtualRegs into a contiguous sequence of spill slots.  The failed
 265 allocation will have left the code peppered with references to
 266 VirtualRegs, each of which contains a unique.  So we make an env which
 267 maps these VirtualRegs to integers, starting from zero, and pass that
 268 env through to loadReg and spillReg.  There, they are used to look up
 269 spill slot numbers for the uniques.
 270
 271 \begin{code}
 272 insertSpillCode :: [Instr] -> [Instr]
 273 insertSpillCode insns
 274    = let uniques_in_insns
 275             = map getUnique
 276                   (regSetToList
 277                      (foldl unionRegSets emptyRegSet
 278                             (map vregs_in_insn insns)))
 279          vregs_in_insn i
 280             = case regUsage i of
 281                  RU rds wrs -> filterRegSet isVirtualReg
 282                                              (rds `unionRegSets` wrs)
 283          vreg_to_slot_map :: FiniteMap Unique Int
 284          vreg_to_slot_map
 285             = listToFM (zip uniques_in_insns [0..])
 286
 287          ((final_stack_delta, final_ctr), insnss)
 288             = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
 289      in
 290          if   final_stack_delta == 0
 291          then concat insnss
 292          else pprPanic "patchMem: non-zero final delta"
 293                        (int final_stack_delta)
 294
 295
 296 -- patchInstr has as a running state two Ints, one the current stack delta,
 297 -- needed to figure out offsets to stack slots on archs where we spill relative
 298 -- to the stack pointer, as opposed to the frame pointer.  The other is a
 299 -- counter, used to manufacture new temporary register names.
 300
 301 patchInstr :: FiniteMap Unique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
 302 patchInstr vreg_to_slot_map (delta,ctr) instr
 303
 304  | null memSrcs && null memDsts
 305  = ((delta',ctr), [instr])
 306
 307  | otherwise
 308  = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
 309    where
 310         delta' = case instr of DELTA d -> d ; _ -> delta
 311
 312         (RU srcs dsts) = regUsage instr
 313
 314         -- The instr being patched may mention several vregs -- those which
 315         -- could not be assigned real registers.  For each such vreg, we
 316         -- invent a new vreg, used only around this instruction and nowhere
 317         -- else.  These new vregs replace the unallocatable vregs; they are
 318         -- loaded from the spill area, the instruction is done with them,
 319         -- and results if any are then written back to the spill area.
 320         vregs_in_instr
 321            = nub (filter isVirtualReg
 322                          (regSetToList srcs ++ regSetToList dsts))
 323         n_vregs_in_instr
 324            = length vregs_in_instr
 325         ctr'
 326            = ctr + n_vregs_in_instr
 327         vreg_env
 328            = zip vregs_in_instr [ctr, ctr+1 ..]
 329
 330         mkTmpReg vreg
 331            | isVirtualReg vreg
 332            = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
 333                 [i] -> case regClass vreg of
 334                           RcInteger -> VirtualRegI (mkPseudoUnique3 i)
 335                           RcFloat   -> VirtualRegF (mkPseudoUnique3 i)
 336                           RcDouble  -> VirtualRegD (mkPseudoUnique3 i)
 337                 _   -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
 338            | otherwise
 339            = vreg
 340
 341         memSrcs   = filter isVirtualReg (regSetToList srcs)
 342         memDsts   = filter isVirtualReg (regSetToList dsts)
 343
 344         loadSrcs  = map load  memSrcs
 345         spillDsts = map spill memDsts
 346
 347         load mem  = loadReg  vreg_to_slot_map delta  mem (mkTmpReg mem)
 348         spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
 349
 350         instr'    = patchRegs instr mkTmpReg
 351 \end{code}
 352
 353 allocUsingTheseRegs is the register allocator proper.  It attempts
 354 to allocate dynamic regs to real regs, given a list of real regs
 355 which it may use.  If it fails due to lack of real regs, the returned
 356 instructions use what real regs there are, but will retain uses of
 357 dynamic regs for which a real reg could not be found.  It is these
 358 leftover dynamic reg references which insertSpillCode will later
 359 assign to spill slots.
 360
 361 Some implementation notes.
 362 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 363 Instructions are numbered sequentially, starting at zero.
 364
 365 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
 366 a possible flow of control from the first insn to the second.
 367
 368 The input to the register allocator is a list of instructions, which
 369 mention Regs.  A Reg can be a RealReg -- a real machine reg -- or a
 370 VirtualReg, which carries a unique.  After allocation, all the
 371 VirtualReg references will have been converted into RealRegs, and
 372 possible some spill code will have been inserted.
 373
 374 The heart of the register allocator works in four phases.
 375
 376 1.  (find_flow_edges) Calculate all the FEs for the code list.
 377     Return them not as a [FE], but implicitly, as a pair of
 378     Array Int [Int], being the successor and predecessor maps
 379     for instructions.
 380
 381 2.  (calc_liveness) Returns a FiniteMap FE RegSet.  For each
 382     FE, indicates the set of registers live on that FE.  Note
 383     that the set includes both RealRegs and VirtualRegs.  The
 384     former appear because the code could mention fixed register
 385     usages, and we need to take them into account from the start.
 386
 387 3.  (calc_live_range_sets) Invert the above mapping, giving a
 388     FiniteMap Reg FeSet, indicating, for each virtual and real
 389     reg mentioned in the code, which FEs it is live on.
 390
 391 4.  (calc_vreg_to_rreg_mapping) For virtual reg, try and find
 392     an allocatable real register for it.  Each real register has
 393     a "current commitment", indicating the set of FEs it is
 394     currently live on.  A virtual reg v can be assigned to
 395     real reg r iff v's live-fe-set does not intersect with r's
 396     current commitment fe-set.  If the assignment is made,
 397     v's live-fe-set is union'd into r's current commitment fe-set.
 398     There is also the minor restriction that v and r must be of
 399     the same register class (integer or floating).
 400
 401     Once this mapping is established, we simply apply it to the
 402     input insns, and that's it.
 403
 404     If no suitable real register can be found, the vreg is mapped
 405     to itself, and we deem allocation to have failed.  The partially
 406     allocated code is returned.  The higher echelons of the allocator
 407     (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
 408     code and re-run allocation, until a successful allocation is found.
 409 \begin{code}
 410
 411 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
 412 allocUsingTheseRegs instrs available_real_regs
 413    = let (all_vregs_mapped, v_to_r_mapping)
 414             = calc_vreg_to_rreg_mapping instrs available_real_regs
 415          new_insns
 416             = map (flip patchRegs sr) instrs
 417          sr reg
 418             | isRealReg reg
 419             = reg
 420             | otherwise
 421             = case lookupFM v_to_r_mapping reg of
 422                  Just r  -> r
 423                  Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
 424                                      (ppr reg)
 425      in
 426          --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
 427          (all_vregs_mapped, new_insns)
 428          --)
 429
 430
 431 -- the heart of the matter.
 432 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
 433 calc_vreg_to_rreg_mapping insns available_real_regs
 434    = let
 435          lr_sets  :: FiniteMap Reg FeSet
 436          lr_sets = calc_live_range_sets insns
 437
 438          -- lr_sets maps: vregs mentioned in insns to sets of live FEs
 439          -- and also:     rregs mentioned in insns to sets of live FEs
 440          -- We need to extract the rreg mapping, and use it as the
 441          -- initial real-register-commitment.  Also, add to the initial
 442          -- commitment, empty commitments for any real regs not
 443          -- mentioned in it.
 444
 445          -- which real regs do we want to keep track of in the running
 446          -- commitment mapping?  Precisely the available_real_regs.
 447          -- We don't care about real regs mentioned by insns which are
 448          -- not in this list, since we're not allocating to them.
 449          initial_rr_commitment :: FiniteMap Reg FeSet
 450          initial_rr_commitment
 451             = listToFM [(rreg,
 452                          case lookupFM lr_sets rreg of
 453                             Nothing            -> emptyFeSet
 454                             Just fixed_use_fes -> fixed_use_fes
 455                         )
 456                         | rreg <- available_real_regs]
 457
 458          -- These are the vregs for which we actually have to (try to)
 459          -- assign a real register. (ie, the whole reason we're here at all :)
 460          vreg_liveness_list :: [(Reg, FeSet)]
 461          vreg_liveness_list = filter (not.isRealReg.fst)
 462                                      (fmToList lr_sets)
 463
 464          -- A loop, which attempts to assign each vreg to a rreg.
 465          loop rr_commitment v_to_r_map []
 466             = v_to_r_map
 467          loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
 468             = let
 469                   -- find a real reg which is not live for any of vreg_live_fes
 470                   cand_reals
 471                      = [rreg
 472                            | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
 473                               regClass vreg == regClass rreg,
 474                               isEmptyFeSet (intersectionFeSets rreg_live_FEs
 475                                                                vreg_live_fes)
 476                        ]
 477               in
 478                  case cand_reals of
 479                     [] -> -- bummer.  No register is available.  Just go on to
 480                           -- the next vreg, mapping the vreg to itself.
 481                           loop rr_commitment (addToFM v_to_r_map vreg vreg)
 482                                not_yet_done
 483                     (r:_)
 484                        -> -- Hurrah!  Found a free reg of the right class.
 485                           -- Now we need to update the RR commitment.
 486                           loop rr_commitment2 (addToFM v_to_r_map vreg r)
 487                                not_yet_done
 488                           where
 489                              rr_commitment2
 490                                 = addToFM_C unionFeSets rr_commitment r
 491                                             vreg_live_fes
 492
 493          -- the final vreg to rreg mapping
 494          vreg_assignment
 495             = loop initial_rr_commitment emptyFM vreg_liveness_list
 496          -- did we succeed in mapping everyone to a real reg?
 497          allocation_succeeded
 498             = all isRealReg (eltsFM vreg_assignment)
 499      in
 500          (allocation_succeeded, vreg_assignment)
 501
 502
 503
 504 -- calculate liveness, then produce the live range info
 505 -- as a mapping of VRegs to the set of FEs on which they are live.
 506 -- The difficult part is inverting the mapping of Reg -> FeSet
 507 -- to produce a mapping FE -> RegSet.
 508
 509 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
 510 calc_live_range_sets insns
 511    = let
 512          -- this is the "original" (old) mapping
 513          lis :: FiniteMap FE RegSet
 514          lis = calc_liveness insns
 515
 516          -- establish the totality of reg names mentioned by the
 517          -- insns, by scanning over the insns.
 518          all_mentioned_regs :: RegSet
 519          all_mentioned_regs
 520             = foldl unionRegSets emptyRegSet
 521                     (map (\i -> case regUsage i of
 522                                    RU rds wrs -> unionRegSets rds wrs)
 523                          insns)
 524
 525          -- Initial inverted mapping, from Reg to sets of FEs
 526          initial_imap :: FiniteMap Reg FeSet
 527          initial_imap
 528             = listToFM [(reg, emptyFeSet)
 529                         | reg <- regSetToList all_mentioned_regs]
 530
 531          -- Update the new map with one element of the old map
 532          upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
 533                      -> FiniteMap Reg FeSet
 534          upd_imap imap (fe, regset)
 535              = foldl upd_1_imap imap (regSetToList regset)
 536                where
 537                   upd_1_imap curr reg
 538                      = addToFM_C unionFeSets curr reg (unitFeSet fe)
 539
 540          -- the complete inverse mapping
 541          final_imap :: FiniteMap Reg FeSet
 542          final_imap
 543              = foldl upd_imap initial_imap (fmToList lis)
 544      in
 545          final_imap
 546
 547
 548
 549 -- Given the insns, calculate the FEs, and then doing fixpointing to
 550 -- figure out the set of live regs (virtual regs AND real regs) live
 551 -- on each FE.
 552
 553 calc_liveness :: [Instr] -> FiniteMap FE RegSet
 554 calc_liveness insns
 555    = let (pred_map, succ_map)
 556             = find_flow_edges insns
 557
 558          -- We use the convention that if the current approximation
 559          -- doesn't give a mapping for some FE, that FE maps to the
 560          -- empty set.
 561          initial_approx, fixpoint :: FiniteMap FE RegSet
 562          initial_approx
 563             = mk_initial_approx 0 insns succ_map emptyFM
 564          fixpoint
 565             = fix_set initial_approx 1
 566               -- If you want to live dangerously, and promise that the code
 567               -- doesn't contain any loops (ie, there are no back edges in
 568               -- the flow graph), you should be able to get away with this:
 569               -- = upd_liveness_info pred_map succ_map insn_array initial_approx
 570               -- But since I'm paranoid, and since it hardly makes any difference
 571               -- to the compiler run-time (about 0.1%), I prefer to do the
 572               -- the full fixpointing game.
 573
 574          insn_array
 575             = let n = length insns
 576               in  array (0, n-1) (zip [0..] insns)
 577
 578          sameSets []     []       = True
 579          sameSets (c:cs) (n:ns)   = eqRegSets c n && sameSets cs ns
 580          sameSets _      _        = False
 581
 582          fix_set curr_approx iter_number
 583             = let next_approx
 584                      = upd_liveness_info pred_map succ_map insn_array curr_approx
 585                   curr_sets
 586                      = eltsFM curr_approx
 587                   next_sets
 588                      = eltsFM next_approx
 589                   same
 590                      = sameSets curr_sets next_sets
 591                   final_approx
 592                      = if same then curr_approx
 593                                else fix_set next_approx (iter_number+1)
 594               in
 595                   --trace (let qqq (fe, regset)
 596                   --             = show fe ++ "  " ++ show (regSetToList regset)
 597                   --       in
 598                   --          "\n::iteration " ++ show iter_number ++ "\n"
 599                   --          ++ (unlines . map qqq . fmToList)
 600                   --                               next_approx ++"\n"
 601                   --      )
 602                   final_approx
 603      in
 604          fixpoint
 605
 606
 607 -- Create a correct initial approximation.  For each instruction that
 608 -- writes a register, we deem that the register is live on the
 609 -- flow edges leaving the instruction.  Subsequent iterations of
 610 -- the liveness AbI augment this based purely on reads of regs, not
 611 -- writes.  We need to start off with at least this minimal write-
 612 -- based information in order that writes to vregs which are never
 613 -- used have non-empty live ranges.  If we don't do that, we eventually
 614 -- wind up assigning such vregs to any old real reg, since they don't
 615 -- apparently conflict -- you can't conflict with an empty live range.
 616 -- This kludge is unfortunate, but we need to do it to cover not only
 617 -- writes to vregs which are never used, but also to deal correctly
 618 -- with the fact that calls to C will trash the callee saves registers.
 619
 620 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
 621                      -> FiniteMap FE RegSet
 622                      -> FiniteMap FE RegSet
 623 mk_initial_approx ino [] succ_map ia_so_far
 624    = ia_so_far
 625 mk_initial_approx ino (i:is) succ_map ia_so_far
 626    = let wrs
 627             = case regUsage i of RU rrr www -> www
 628          new_fes
 629             = [case iUnbox ino of      { inoh ->
 630                case iUnbox ino_succ of { ino_succh ->
 631                MkFE inoh ino_succh
 632                }}
 633                   | ino_succ <- succ_map ! ino]
 634
 635          loop [] ia = ia
 636          loop (fe:fes) ia
 637             = loop fes (addToFM_C unionRegSets ia fe wrs)
 638
 639          next_ia
 640             = loop new_fes ia_so_far
 641      in
 642          mk_initial_approx (ino+1) is succ_map next_ia
 643
 644
 645 -- Do one step in the liveness info calculation (AbI :).  Given the
 646 -- prior approximation (which tells you a subset of live VRegs+RRegs
 647 -- for each flow edge), calculate new information for all FEs.
 648 -- Rather than do this by iterating over FEs, it's easier to iterate
 649 -- over insns, and update their incoming FEs.
 650
 651 upd_liveness_info :: Array Int [Int]         -- instruction pred map
 652                      -> Array Int [Int]      -- instruction succ map
 653                      -> Array Int Instr      -- array of instructions
 654                      -> FiniteMap FE RegSet  -- previous approx
 655                      -> FiniteMap FE RegSet  -- improved approx
 656
 657 upd_liveness_info pred_map succ_map insn_array prev_approx
 658    = do_insns hi prev_approx
 659      where
 660         (lo, hi) = bounds insn_array
 661
 662         enquireMapFE :: FiniteMap FE RegSet -> FE
 663                         -> RegSet
 664         enquireMapFE fm fe
 665            = case lookupFM fm fe of
 666                 Just set -> set
 667                 Nothing  -> emptyRegSet
 668
 669         -- Work backwards, from the highest numbered insn to the lowest.
 670         -- This is a heuristic which causes faster convergence to the
 671         -- fixed point.  In particular, for straight-line code with no
 672         -- branches at all, arrives at the fixpoint in one iteration.
 673         do_insns ino approx
 674            | ino < lo
 675            = approx
 676            | otherwise
 677            = let fes_to_futures
 678                     = [case iUnbox ino of        { inoh ->
 679                        case iUnbox future_ino of { future_inoh ->
 680                        MkFE inoh future_inoh
 681                        }}
 682                           | future_ino <- succ_map ! ino]
 683                  future_lives
 684                     = map (enquireMapFE approx) fes_to_futures
 685                  future_live
 686                     = foldr unionRegSets emptyRegSet future_lives
 687
 688                  fes_from_histories
 689                     = [case iUnbox history_ino of { history_inoh ->
 690                        case iUnbox ino of         { inoh ->
 691                        MkFE history_inoh inoh
 692                        }}
 693                           | history_ino <- pred_map ! ino]
 694                  new_approx
 695                     = foldl update_one_history approx fes_from_histories
 696
 697                  insn
 698                     = insn_array ! ino
 699                  history_independent_component
 700                     = case regUsage insn of
 701                          RU rds wrs
 702                             -> unionRegSets rds
 703                                   (minusRegSets future_live wrs)
 704
 705                  update_one_history :: FiniteMap FE RegSet
 706                                        -> FE
 707                                        -> FiniteMap FE RegSet
 708                  update_one_history approx0 fe
 709                       = addToFM_C unionRegSets approx0 fe
 710                                   history_independent_component
 711
 712                  rest_done
 713                     = do_insns (ino-1) new_approx
 714              in
 715                  rest_done
 716
 717
 718
 719 -- Extract the flow edges from a list of insns.  Express the information
 720 -- as two mappings, from insn number to insn numbers of predecessors,
 721 -- and from insn number to insn numbers of successors.  (Since that's
 722 -- what we need to know when computing live ranges later).  Instructions
 723 -- are numbered starting at zero.  This function is long and complex
 724 -- in order to be efficient; it could equally well be shorter and slower.
 725
 726 find_flow_edges :: [Instr] -> (Array Int [Int],
 727                                Array Int [Int])
 728 find_flow_edges insns
 729    = let
 730          -- First phase: make a temp env which maps labels
 731          -- to insn numbers, so the second pass can know the insn
 732          -- numbers for jump targets.
 733
 734          label_env :: FiniteMap CLabel Int
 735
 736          mk_label_env n env [] = env
 737          mk_label_env n env ((LABEL clbl):is)
 738             = mk_label_env (n+1) (addToFM env clbl n) is
 739          mk_label_env n env (i:is)
 740             = mk_label_env (n+1) env is
 741
 742          label_env = mk_label_env 0 emptyFM insns
 743
 744          find_label :: CLabel -> Int
 745          find_label jmptarget
 746             = case lookupFM label_env jmptarget of
 747                  Just ino -> ino
 748                  Nothing  -> pprPanic "find_flow_edges: unmapped label"
 749                                       (pprCLabel jmptarget)
 750
 751          -- Second phase: traverse the insns, and make up the successor map.
 752
 753          least_ino, greatest_ino :: Int
 754          least_ino    = 0
 755          greatest_ino = length insns - 1
 756
 757          mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
 758
 759          mk_succ_map i_num rsucc_map []
 760             = reverse rsucc_map
 761
 762          mk_succ_map i_num rsucc_map (i:is)
 763             = let i_num_1 = i_num + 1
 764               in
 765               case insnFuture i of
 766
 767                  NoFuture
 768                     -> -- A non-local jump.  We can regard this insn as a terminal
 769                        -- insn in the graph, so we don't add any edges.
 770                        mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 771
 772                  Next
 773                     |  null is -- this is the last insn, and it doesn't go anywhere
 774                                -- (a meaningless scenario); handle it anyway
 775                     -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 776
 777                     |  otherwise -- flows to next insn; add fe i_num -> i_num+1
 778                     -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
 779                                            is
 780
 781                  Branch lab -- jmps to lab; add fe i_num -> i_target
 782                     -> let i_target = find_label lab
 783                        in
 784                        mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map) is
 785
 786                  NextOrBranch lab
 787                     |  null is   -- jmps to label, or falls through, and this is
 788                                  -- the last insn (a meaningless scenario);
 789                                  -- flag an error
 790                     -> error "find_flow_edges: NextOrBranch is last"
 791
 792                     |  otherwise -- add fes i_num -> i_num+1
 793                                  --     and i_num -> i_target
 794                     -> let i_target = find_label lab
 795                        in
 796                        mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
 797                                            is
 798                  MultiFuture labels
 799                     -> -- A jump, whose targets are listed explicitly.
 800                        -- (Generated from table-based switch translations).
 801                        -- Add fes  i_num -> x  for each x in labels
 802                        let is_target = nub (map find_label labels)
 803                        in
 804                        mk_succ_map i_num_1 ((i_num, is_target):rsucc_map) is
 805
 806          -- Third phase: invert the successor map to get the predecessor
 807          -- map, using an algorithm which is quadratic in the worst case,
 808          -- but runs in almost-linear time, because of the nature of our
 809          -- inputs: most insns have a single successor, the next insn.
 810
 811          invert :: [(Int, [Int])] -> [(Int, [Int])]
 812          invert fmap
 813             = let inverted_pairs
 814                      = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
 815                   sorted_inverted_pairs
 816                      = isort inverted_pairs
 817
 818                   grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
 819                   grp k vs [] = [(k, vs)]
 820                   grp k vs ((kk,vv):rest)
 821                      | k == kk   = grp k (vv:vs) rest
 822                      | otherwise = (k,vs) : grp kk [vv] rest
 823
 824                   grp_start []             = []
 825                   grp_start ((kk,vv):rest) = grp kk [vv] rest
 826
 827                   grouped
 828                      = grp_start sorted_inverted_pairs
 829
 830                   -- make sure that the reverse mapping maps all inos
 831                   add_empties ino []
 832                      | ino > greatest_ino  = []
 833                      | otherwise           = (ino,[]): add_empties (ino+1) []
 834                   add_empties ino ((k,vs):rest)
 835                      | ino <  k   = (ino,[]): add_empties (ino+1) ((k,vs):rest)
 836                      | ino == k   = (k,vs) : add_empties (ino+1) rest
 837
 838                   -- This is nearly linear provided that the fsts of the
 839                   -- list are nearly in order -- a critical assumption
 840                   -- for efficiency.
 841                   isort :: [(Int,Int)] -> [(Int,Int)]
 842                   isort []     = []
 843                   isort (x:xs) = insert x (isort xs)
 844
 845                   insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
 846                   insert y []     = [y]
 847                   insert y (z:zs)
 848                      -- specifically, this first test should almost always
 849                      -- be True in order for the near-linearity to happen
 850                      | fst y <= fst z  = y:z:zs
 851                      | otherwise       = z: insert y zs
 852               in
 853                  add_empties least_ino grouped
 854
 855          -- Finally ...
 856
 857          succ_list
 858             = mk_succ_map 0 [] insns
 859          succ_map
 860             = array (least_ino, greatest_ino) succ_list
 861          pred_list
 862             = invert succ_list
 863          pred_map
 864             = array (least_ino, greatest_ino) pred_list
 865      in
 866          (pred_map, succ_map)
 867
 868
 869 -- That's all, folks!  From here on is just some dull supporting stuff.
 870
 871 -- A data type for flow edges
 872 data FE
 873    = MkFE FastInt FastInt deriving (Eq, Ord)
 874
 875 -- deriving Show on types with unboxed fields doesn't work
 876 instance Show FE where
 877     showsPrec _ (MkFE s d)
 878        = showString "MkFE" . shows (iBox s) . shows ' ' . shows (iBox d)
 879
 880 -- Blargh.  Use ghc stuff soon!  Or: perhaps that's not such a good
 881 -- idea.  Most of these sets are either empty or very small, and it
 882 -- might be that the overheads of the FiniteMap based set implementation
 883 -- is a net loss.  The same might be true of RegSets.
 884
 885 newtype FeSet = MkFeSet [FE]
 886
 887 feSetFromList xs
 888    = MkFeSet (nukeDups (sort xs))
 889      where nukeDups :: [FE] -> [FE]
 890            nukeDups []  = []
 891            nukeDups [x] = [x]
 892            nukeDups (x:y:xys)
 893               = if x == y then nukeDups (y:xys)
 894                           else x : nukeDups (y:xys)
 895
 896 feSetToList (MkFeSet xs)            = xs
 897 isEmptyFeSet (MkFeSet xs)           = null xs
 898 emptyFeSet                          = MkFeSet []
 899 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
 900 unitFeSet x                         = MkFeSet [x]
 901
 902 elemFeSet x (MkFeSet xs)
 903    = f xs
 904      where
 905         f []     = False
 906         f (y:ys) | x == y    = True
 907                  | x < y     = False
 908                  | otherwise = f ys
 909
 910 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
 911    = MkFeSet (f xs1 xs2)
 912      where
 913         f [] bs = bs
 914         f as [] = as
 915         f (a:as) (b:bs)
 916            | a < b      = a : f as (b:bs)
 917            | a > b      = b : f (a:as) bs
 918            | otherwise  = a : f as bs
 919
 920 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
 921    = MkFeSet (f xs1 xs2)
 922      where
 923         f [] bs = []
 924         f as [] = as
 925         f (a:as) (b:bs)
 926            | a < b      = a : f as (b:bs)
 927            | a > b      = f (a:as) bs
 928            | otherwise  = f as bs
 929
 930 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
 931    = MkFeSet (f xs1 xs2)
 932      where
 933         f [] bs = []
 934         f as [] = []
 935         f (a:as) (b:bs)
 936            | a < b      = f as (b:bs)
 937            | a > b      = f (a:as) bs
 938            | otherwise  = a : f as bs
 939
 940 \end{code}