ghc/compiler/nativeGen/AsmRegAlloc.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-2000
   3 %
   4 \section[AsmRegAlloc]{Register allocator}
   5
   6 \begin{code}
   7 module AsmRegAlloc ( runRegAllocate ) where
   8
   9 #include "HsVersions.h"
  10
  11 import MachCode         ( InstrBlock )
  12 import MachMisc         ( Instr(..) )
  13 import PprMach          ( pprInstr )    -- Just for debugging
  14 import MachRegs
  15 import RegAllocInfo
  16
  17 import FiniteMap        ( FiniteMap, emptyFM,
  18                           lookupFM, eltsFM, addToFM_C, addToFM,
  19                           listToFM, fmToList )
  20 import OrdList          ( fromOL )
  21 import Outputable
  22 import Unique           ( mkPseudoUnique3 )
  23 import CLabel           ( CLabel, pprCLabel )
  24 import FastTypes
  25
  26 import List             ( mapAccumL, nub, sort )
  27 import Array            ( Array, array, (!), bounds )
  28 \end{code}
  29
  30 This is the generic register allocator.  It does allocation for all
  31 architectures.  Details for specific architectures are given in
  32 RegAllocInfo.lhs.  In practice the allocator needs to know next to
  33 nothing about an architecture to do its job:
  34
  35 * It needs to be given a list of the registers it can allocate to.
  36
  37 * It needs to be able to find out which registers each insn reads and
  38   writes.
  39
  40 * It needs be able to change registers in instructions into other
  41   registers.
  42
  43 * It needs to be able to find out where execution could go after an
  44   in instruction.
  45
  46 * It needs to be able to discover sets of registers which can be
  47   used to attempt spilling.
  48
  49 First we try something extremely simple.  If that fails, we have to do
  50 things the hard way.
  51
  52 \begin{code}
  53 runRegAllocate
  54     :: [Reg]
  55     -> ([Instr] -> [[Reg]])
  56     -> InstrBlock
  57     -> [Instr]
  58
  59 runRegAllocate regs find_reserve_regs instrs
  60   = --trace ("runRegAllocate: " ++ show regs) (
  61     case simpleAlloc of
  62        Just simple -> --trace "SIMPLE"
  63                       simple
  64        Nothing     -> --trace "GENERAL"
  65                       (tryGeneral reserves)
  66     --)
  67   where
  68     tryGeneral []
  69        = pprPanic "nativeGen: spilling failed.  Workaround: compile with -fvia-C.\n"
  70             ( (text "reserves = " <> ppr reserves)
  71               $$
  72               (text "code = ")
  73               $$
  74               (vcat (map (docToSDoc.pprInstr) flatInstrs))
  75             )
  76     tryGeneral (resv:resvs)
  77        = case generalAlloc resv of
  78             Just success -> success
  79             Nothing      -> tryGeneral resvs
  80
  81     reserves           = find_reserve_regs flatInstrs
  82     flatInstrs         = fromOL instrs
  83     simpleAlloc        = doSimpleAlloc regs flatInstrs
  84     generalAlloc resvd = doGeneralAlloc regs resvd flatInstrs
  85 \end{code}
  86
  87 Rather than invoke the heavyweight machinery in @doGeneralAlloc@ for
  88 each and every code block, we first try using this simple, fast and
  89 utterly braindead allocator.  In practice it handles about 60\% of the
  90 code blocks really fast, even with only 3 integer registers available.
  91 Since we can always give up and fall back to @doGeneralAlloc@,
  92 @doSimpleAlloc@ is geared to handling the common case as fast as
  93 possible.  It will succeed only if:
  94
  95 * The code mentions registers only of integer class, not floating
  96   class.
  97
  98 * The code doesn't mention any real registers, so we don't have to
  99   think about dodging and weaving to work around fixed register uses.
 100
 101 * The code mentions at most N virtual registers, where N is the number
 102   of real registers for allocation.
 103
 104 If those conditions are satisfied, we simply trundle along the code,
 105 doling out a real register every time we see mention of a new virtual
 106 register.  We either succeed at this, or give up when one of the above
 107 three conditions is no longer satisfied.
 108
 109 \begin{code}
 110 doSimpleAlloc :: [Reg] -> [Instr] -> Maybe [Instr]
 111 doSimpleAlloc available_real_regs instrs
 112    = let available_iregs
 113             = filter ((== RcInteger).regClass) available_real_regs
 114
 115          trundle :: [( {-Virtual-}Reg, {-Real-}Reg )]
 116                     -> [ {-Real-}Reg ]
 117                     -> [Instr]
 118                     -> [Instr]
 119                     -> Maybe [Instr]
 120          trundle vreg_map uncommitted_rregs ris_done []
 121             = Just (reverse ris_done)
 122          trundle vreg_map uncommitted_rregs ris_done (i:is)
 123             = case regUsage i of
 124                  RU rds wrs
 125
 126                     -- Mentions no regs?  Move on quickly
 127                     |  null rds_l && null wrs_l
 128                     -> trundle vreg_map uncommitted_rregs (i:ris_done) is
 129
 130                     -- A case we can't be bothered to handle?
 131                     |  any isFloatingOrReal rds_l || any isFloatingOrReal wrs_l
 132                     -> Nothing
 133
 134                     -- Update the rreg commitments, and map the insn
 135                     |  otherwise
 136                     -> case upd_commitment (wrs_l++rds_l)
 137                                            vreg_map uncommitted_rregs of
 138                           Nothing -- out of rregs; give up
 139                              -> Nothing
 140                           Just (vreg_map2, uncommitted_rregs2)
 141                              -> let i2 = patchRegs i (subst_reg vreg_map2)
 142                                 in  trundle vreg_map2 uncommitted_rregs2
 143                                             (i2:ris_done) is
 144                        where
 145                           isFloatingOrReal reg
 146                              = isRealReg reg || regClass reg == RcFloat
 147                                              || regClass reg == RcDouble
 148
 149                           rds_l = regSetToList rds
 150                           wrs_l = regSetToList wrs
 151
 152                           upd_commitment [] vr_map uncomm
 153                              = Just (vr_map, uncomm)
 154                           upd_commitment (reg:regs) vr_map uncomm
 155                              | isRealReg reg
 156                              = upd_commitment regs vr_map uncomm
 157                              | reg `elem` (map fst vr_map)
 158                              = upd_commitment regs vr_map uncomm
 159                              | null uncomm
 160                              = Nothing
 161                              | otherwise
 162                              = upd_commitment regs ((reg, head uncomm):vr_map)
 163                                                    (tail uncomm)
 164
 165                           subst_reg vreg_map r
 166                              -- If it's a RealReg, it must be STG-specific one
 167                              -- (Hp,Sp,BaseReg,etc), since regUsage filters them out,
 168                              -- so isFloatingOrReal would not have objected to it.
 169                              | isRealReg r
 170                              = r
 171                              | otherwise
 172                              = case [rr | (vr,rr) <- vreg_map, vr == r] of
 173                                   [rr2] -> rr2
 174                                   other -> pprPanic
 175                                               "doSimpleAlloc: unmapped VirtualReg"
 176                                               (ppr r)
 177      in
 178          trundle [] available_iregs [] instrs
 179 \end{code}
 180
 181 From here onwards is the general register allocator and spiller.  For
 182 each flow edge (possible transition between instructions), we compute
 183 which virtual and real registers are live on that edge.  Then the
 184 mapping is inverted, to give a mapping from register (virtual+real) to
 185 sets of flow edges on which the register is live.  Finally, we can use
 186 those sets to decide whether a virtual reg v can be assigned to a real
 187 reg r, by checking that v's live-edge-set does not intersect with r's
 188 current live-edge-set.  Having made that assignment, we then augment
 189 r's current live-edge-set (its current commitment, you could say) with
 190 v's live-edge-set.
 191
 192 doGeneralAlloc takes reserve_regs as the regs to use as spill
 193 temporaries.  First it tries to allocate using all regs except
 194 reserve_regs.  If that fails, it inserts spill code and tries again to
 195 allocate regs, but this time with the spill temporaries available.
 196 Even this might not work if there are insufficient spill temporaries:
 197 in the worst case on x86, we'd need 3 of them, for insns like addl
 198 (%reg1,%reg2,4) %reg3, since this insn uses all 3 regs as input.
 199
 200 \begin{code}
 201 doGeneralAlloc
 202     :: [Reg]            -- all allocatable regs
 203     -> [Reg]            -- the reserve regs
 204     -> [Instr]          -- instrs in
 205     -> Maybe [Instr]    -- instrs out
 206
 207 doGeneralAlloc all_regs reserve_regs instrs
 208    -- succeeded without spilling
 209    | prespill_ok
 210    = Just prespill_insns
 211
 212    -- failed, and no spill regs avail, so pointless to attempt spilling
 213    | null reserve_regs  = Nothing
 214    -- success after spilling
 215    | postspill_ok       = maybetrace (spillMsg True) (Just postspill_insns)
 216    -- still not enough reserves after spilling; we have to give up
 217    | otherwise          = maybetrace (spillMsg False) Nothing
 218      where
 219          prespill_regs
 220             = filter (`notElem` reserve_regs) all_regs
 221          (prespill_ok, prespill_insns)
 222             = allocUsingTheseRegs instrs prespill_regs
 223          instrs_with_spill_code
 224             = insertSpillCode prespill_insns
 225          (postspill_ok, postspill_insns)
 226             = allocUsingTheseRegs instrs_with_spill_code all_regs
 227
 228          spillMsg success
 229             = "nativeGen: spilling "
 230               ++ (if success then "succeeded" else "failed   ")
 231               ++ " using "
 232               ++ showSDoc (hsep (map ppr reserve_regs))
 233
 234 #        ifdef NCG_DEBUG
 235          maybetrace msg x = trace msg x
 236 #        else
 237          maybetrace msg x = x
 238 #        endif
 239 \end{code}
 240
 241 Here we patch instructions that reference ``registers'' which are
 242 really in memory somewhere (the mapping is under the control of the
 243 machine-specific code generator).  We place the appropriate load
 244 sequences before any instructions that use memory registers as
 245 sources, and we place the appropriate spill sequences after any
 246 instructions that use memory registers as destinations.  The offending
 247 instructions are rewritten with new dynamic registers, so generalAlloc
 248 has to run register allocation again after all of this is said and
 249 done.
 250
 251 On some architectures (x86, currently), we do without a frame-pointer,
 252 and instead spill relative to the stack pointer (%esp on x86).
 253 Because the stack pointer may move, the patcher needs to keep track of
 254 the current stack pointer "delta".  That's easy, because all it needs
 255 to do is spot the DELTA bogus-insns which will have been inserted by
 256 the relevant insn selector precisely so as to notify the spiller of
 257 stack-pointer movement.  The delta is passed to loadReg and spillReg,
 258 since they generate the actual spill code.  We expect the final delta
 259 to be the same as the starting one (zero), reflecting the fact that
 260 changes to the stack pointer should not extend beyond a basic block.
 261
 262 Finally, there is the issue of mapping an arbitrary set of unallocated
 263 VirtualRegs into a contiguous sequence of spill slots.  The failed
 264 allocation will have left the code peppered with references to
 265 VirtualRegs, each of which contains a unique.  So we make an env which
 266 maps these VirtualRegs to integers, starting from zero, and pass that
 267 env through to loadReg and spillReg.  There, they are used to look up
 268 spill slot numbers for the uniques.
 269
 270 \begin{code}
 271 insertSpillCode :: [Instr] -> [Instr]
 272 insertSpillCode insns
 273    = let uniques_in_insns
 274             = map getVRegUnique
 275                   (regSetToList
 276                      (foldl unionRegSets emptyRegSet
 277                             (map vregs_in_insn insns)))
 278          vregs_in_insn i
 279             = case regUsage i of
 280                  RU rds wrs -> filterRegSet isVirtualReg
 281                                              (rds `unionRegSets` wrs)
 282          vreg_to_slot_map :: FiniteMap VRegUnique Int
 283          vreg_to_slot_map
 284             = listToFM (zip uniques_in_insns [0..])
 285
 286          ((final_stack_delta, final_ctr), insnss)
 287             = mapAccumL (patchInstr vreg_to_slot_map) (0,0) insns
 288      in
 289          if   final_stack_delta == 0
 290          then concat insnss
 291          else pprPanic "patchMem: non-zero final delta"
 292                        (int final_stack_delta)
 293
 294
 295 -- patchInstr has as a running state two Ints, one the current stack delta,
 296 -- needed to figure out offsets to stack slots on archs where we spill relative
 297 -- to the stack pointer, as opposed to the frame pointer.  The other is a
 298 -- counter, used to manufacture new temporary register names.
 299
 300 patchInstr :: FiniteMap VRegUnique Int -> (Int,Int) -> Instr -> ((Int,Int), [Instr])
 301 patchInstr vreg_to_slot_map (delta,ctr) instr
 302
 303  | null memSrcs && null memDsts
 304  = ((delta',ctr), [instr])
 305
 306  | otherwise
 307  = ((delta',ctr'), loadSrcs ++ [instr'] ++ spillDsts)
 308    where
 309         delta' = case instr of DELTA d -> d ; _ -> delta
 310
 311         (RU srcs dsts) = regUsage instr
 312
 313         -- The instr being patched may mention several vregs -- those which
 314         -- could not be assigned real registers.  For each such vreg, we
 315         -- invent a new vreg, used only around this instruction and nowhere
 316         -- else.  These new vregs replace the unallocatable vregs; they are
 317         -- loaded from the spill area, the instruction is done with them,
 318         -- and results if any are then written back to the spill area.
 319         vregs_in_instr
 320            = nub (filter isVirtualReg
 321                          (regSetToList srcs ++ regSetToList dsts))
 322         n_vregs_in_instr
 323            = length vregs_in_instr
 324         ctr'
 325            = ctr + n_vregs_in_instr
 326         vreg_env
 327            = zip vregs_in_instr [ctr, ctr+1 ..]
 328
 329         mkTmpReg vreg
 330            | isVirtualReg vreg
 331            = case [vi | (vreg', vi) <- vreg_env, vreg' == vreg] of
 332                 [i] -> case regClass vreg of
 333                           RcInteger -> VirtualRegI (pseudoVReg i)
 334                           RcFloat   -> VirtualRegF (pseudoVReg i)
 335                           RcDouble  -> VirtualRegD (pseudoVReg i)
 336                 _   -> pprPanic "patchInstr: unmapped VReg" (ppr vreg)
 337            | otherwise
 338            = vreg
 339
 340         pseudoVReg i = VRegUniqueLo (mkPseudoUnique3 i)
 341
 342         memSrcs   = filter isVirtualReg (regSetToList srcs)
 343         memDsts   = filter isVirtualReg (regSetToList dsts)
 344
 345         loadSrcs  = map load  memSrcs
 346         spillDsts = map spill memDsts
 347
 348         load mem  = loadReg  vreg_to_slot_map delta  mem (mkTmpReg mem)
 349         spill mem = spillReg vreg_to_slot_map delta' (mkTmpReg mem) mem
 350
 351         instr'    = patchRegs instr mkTmpReg
 352 \end{code}
 353
 354 allocUsingTheseRegs is the register allocator proper.  It attempts
 355 to allocate dynamic regs to real regs, given a list of real regs
 356 which it may use.  If it fails due to lack of real regs, the returned
 357 instructions use what real regs there are, but will retain uses of
 358 dynamic regs for which a real reg could not be found.  It is these
 359 leftover dynamic reg references which insertSpillCode will later
 360 assign to spill slots.
 361
 362 Some implementation notes.
 363 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 364 Instructions are numbered sequentially, starting at zero.
 365
 366 A flow edge (FE) is a pair of insn numbers (MkFE Int Int) denoting
 367 a possible flow of control from the first insn to the second.
 368
 369 The input to the register allocator is a list of instructions, which
 370 mention Regs.  A Reg can be a RealReg -- a real machine reg -- or a
 371 VirtualReg, which carries a unique.  After allocation, all the
 372 VirtualReg references will have been converted into RealRegs, and
 373 possible some spill code will have been inserted.
 374
 375 The heart of the register allocator works in four phases.
 376
 377 1.  (find_flow_edges) Calculate all the FEs for the code list.
 378     Return them not as a [FE], but implicitly, as a pair of
 379     Array Int [Int], being the successor and predecessor maps
 380     for instructions.
 381
 382 2.  (calc_liveness) Returns a FiniteMap FE RegSet.  For each
 383     FE, indicates the set of registers live on that FE.  Note
 384     that the set includes both RealRegs and VirtualRegs.  The
 385     former appear because the code could mention fixed register
 386     usages, and we need to take them into account from the start.
 387
 388 3.  (calc_live_range_sets) Invert the above mapping, giving a
 389     FiniteMap Reg FeSet, indicating, for each virtual and real
 390     reg mentioned in the code, which FEs it is live on.
 391
 392 4.  (calc_vreg_to_rreg_mapping) For virtual reg, try and find
 393     an allocatable real register for it.  Each real register has
 394     a "current commitment", indicating the set of FEs it is
 395     currently live on.  A virtual reg v can be assigned to
 396     real reg r iff v's live-fe-set does not intersect with r's
 397     current commitment fe-set.  If the assignment is made,
 398     v's live-fe-set is union'd into r's current commitment fe-set.
 399     There is also the minor restriction that v and r must be of
 400     the same register class (integer or floating).
 401
 402     Once this mapping is established, we simply apply it to the
 403     input insns, and that's it.
 404
 405     If no suitable real register can be found, the vreg is mapped
 406     to itself, and we deem allocation to have failed.  The partially
 407     allocated code is returned.  The higher echelons of the allocator
 408     (doGeneralAlloc and runRegAlloc) then cooperate to insert spill
 409     code and re-run allocation, until a successful allocation is found.
 410 \begin{code}
 411
 412 allocUsingTheseRegs :: [Instr] -> [Reg] -> (Bool, [Instr])
 413 allocUsingTheseRegs instrs available_real_regs
 414    = let (all_vregs_mapped, v_to_r_mapping)
 415             = calc_vreg_to_rreg_mapping instrs available_real_regs
 416          new_insns
 417             = map (flip patchRegs sr) instrs
 418          sr reg
 419             | isRealReg reg
 420             = reg
 421             | otherwise
 422             = case lookupFM v_to_r_mapping reg of
 423                  Just r  -> r
 424                  Nothing -> pprPanic "allocateUsingTheseRegs: unmapped vreg: "
 425                                      (ppr reg)
 426      in
 427          --trace ("allocUsingTheseRegs: " ++ show available_real_regs) (
 428          (all_vregs_mapped, new_insns)
 429          --)
 430
 431
 432 -- the heart of the matter.
 433 calc_vreg_to_rreg_mapping :: [Instr] -> [Reg] -> (Bool, FiniteMap Reg Reg)
 434 calc_vreg_to_rreg_mapping insns available_real_regs
 435    = let
 436          lr_sets  :: FiniteMap Reg FeSet
 437          lr_sets = calc_live_range_sets insns
 438
 439          -- lr_sets maps: vregs mentioned in insns to sets of live FEs
 440          -- and also:     rregs mentioned in insns to sets of live FEs
 441          -- We need to extract the rreg mapping, and use it as the
 442          -- initial real-register-commitment.  Also, add to the initial
 443          -- commitment, empty commitments for any real regs not
 444          -- mentioned in it.
 445
 446          -- which real regs do we want to keep track of in the running
 447          -- commitment mapping?  Precisely the available_real_regs.
 448          -- We don't care about real regs mentioned by insns which are
 449          -- not in this list, since we're not allocating to them.
 450          initial_rr_commitment :: FiniteMap Reg FeSet
 451          initial_rr_commitment
 452             = listToFM [(rreg,
 453                          case lookupFM lr_sets rreg of
 454                             Nothing            -> emptyFeSet
 455                             Just fixed_use_fes -> fixed_use_fes
 456                         )
 457                         | rreg <- available_real_regs]
 458
 459          -- These are the vregs for which we actually have to (try to)
 460          -- assign a real register. (ie, the whole reason we're here at all :)
 461          vreg_liveness_list :: [(Reg, FeSet)]
 462          vreg_liveness_list = filter (not.isRealReg.fst)
 463                                      (fmToList lr_sets)
 464
 465          -- A loop, which attempts to assign each vreg to a rreg.
 466          loop rr_commitment v_to_r_map []
 467             = v_to_r_map
 468          loop rr_commitment v_to_r_map ((vreg,vreg_live_fes):not_yet_done)
 469             = let
 470                   -- find a real reg which is not live for any of vreg_live_fes
 471                   cand_reals
 472                      = [rreg
 473                            | (rreg,rreg_live_FEs) <- fmToList rr_commitment,
 474                               regClass vreg == regClass rreg,
 475                               isEmptyFeSet (intersectionFeSets rreg_live_FEs
 476                                                                vreg_live_fes)
 477                        ]
 478               in
 479                  case cand_reals of
 480                     [] -> -- bummer.  No register is available.  Just go on to
 481                           -- the next vreg, mapping the vreg to itself.
 482                           loop rr_commitment (addToFM v_to_r_map vreg vreg)
 483                                not_yet_done
 484                     (r:_)
 485                        -> -- Hurrah!  Found a free reg of the right class.
 486                           -- Now we need to update the RR commitment.
 487                           loop rr_commitment2 (addToFM v_to_r_map vreg r)
 488                                not_yet_done
 489                           where
 490                              rr_commitment2
 491                                 = addToFM_C unionFeSets rr_commitment r
 492                                             vreg_live_fes
 493
 494          -- the final vreg to rreg mapping
 495          vreg_assignment
 496             = loop initial_rr_commitment emptyFM vreg_liveness_list
 497          -- did we succeed in mapping everyone to a real reg?
 498          allocation_succeeded
 499             = all isRealReg (eltsFM vreg_assignment)
 500      in
 501          (allocation_succeeded, vreg_assignment)
 502
 503
 504
 505 -- calculate liveness, then produce the live range info
 506 -- as a mapping of VRegs to the set of FEs on which they are live.
 507 -- The difficult part is inverting the mapping of Reg -> FeSet
 508 -- to produce a mapping FE -> RegSet.
 509
 510 calc_live_range_sets :: [Instr] -> FiniteMap Reg FeSet
 511 calc_live_range_sets insns
 512    = let
 513          -- this is the "original" (old) mapping
 514          lis :: FiniteMap FE RegSet
 515          lis = calc_liveness insns
 516
 517          -- establish the totality of reg names mentioned by the
 518          -- insns, by scanning over the insns.
 519          all_mentioned_regs :: RegSet
 520          all_mentioned_regs
 521             = foldl unionRegSets emptyRegSet
 522                     (map (\i -> case regUsage i of
 523                                    RU rds wrs -> unionRegSets rds wrs)
 524                          insns)
 525
 526          -- Initial inverted mapping, from Reg to sets of FEs
 527          initial_imap :: FiniteMap Reg FeSet
 528          initial_imap
 529             = listToFM [(reg, emptyFeSet)
 530                         | reg <- regSetToList all_mentioned_regs]
 531
 532          -- Update the new map with one element of the old map
 533          upd_imap :: FiniteMap Reg FeSet -> (FE, RegSet)
 534                      -> FiniteMap Reg FeSet
 535          upd_imap imap (fe, regset)
 536              = foldl upd_1_imap imap (regSetToList regset)
 537                where
 538                   upd_1_imap curr reg
 539                      = addToFM_C unionFeSets curr reg (unitFeSet fe)
 540
 541          -- the complete inverse mapping
 542          final_imap :: FiniteMap Reg FeSet
 543          final_imap
 544              = foldl upd_imap initial_imap (fmToList lis)
 545      in
 546          final_imap
 547
 548
 549
 550 -- Given the insns, calculate the FEs, and then doing fixpointing to
 551 -- figure out the set of live regs (virtual regs AND real regs) live
 552 -- on each FE.
 553
 554 calc_liveness :: [Instr] -> FiniteMap FE RegSet
 555 calc_liveness insns
 556    = let (pred_map, succ_map)
 557             = find_flow_edges insns
 558
 559          -- We use the convention that if the current approximation
 560          -- doesn't give a mapping for some FE, that FE maps to the
 561          -- empty set.
 562          initial_approx, fixpoint :: FiniteMap FE RegSet
 563          initial_approx
 564             = mk_initial_approx 0 insns succ_map emptyFM
 565          fixpoint
 566             = fix_set initial_approx 1
 567               -- If you want to live dangerously, and promise that the code
 568               -- doesn't contain any loops (ie, there are no back edges in
 569               -- the flow graph), you should be able to get away with this:
 570               -- = upd_liveness_info pred_map succ_map insn_array initial_approx
 571               -- But since I'm paranoid, and since it hardly makes any difference
 572               -- to the compiler run-time (about 0.1%), I prefer to do the
 573               -- the full fixpointing game.
 574
 575          insn_array
 576             = let n = length insns
 577               in  array (0, n-1) (zip [0..] insns)
 578
 579          sameSets []     []       = True
 580          sameSets (c:cs) (n:ns)   = eqRegSets c n && sameSets cs ns
 581          sameSets _      _        = False
 582
 583          fix_set curr_approx iter_number
 584             = let next_approx
 585                      = upd_liveness_info pred_map succ_map insn_array curr_approx
 586                   curr_sets
 587                      = eltsFM curr_approx
 588                   next_sets
 589                      = eltsFM next_approx
 590                   same
 591                      = sameSets curr_sets next_sets
 592                   final_approx
 593                      = if same then curr_approx
 594                                else fix_set next_approx (iter_number+1)
 595               in
 596                   --trace (let qqq (fe, regset)
 597                   --             = show fe ++ "  " ++ show (regSetToList regset)
 598                   --       in
 599                   --          "\n::iteration " ++ show iter_number ++ "\n"
 600                   --          ++ (unlines . map qqq . fmToList)
 601                   --                               next_approx ++"\n"
 602                   --      )
 603                   final_approx
 604      in
 605          fixpoint
 606
 607
 608 -- Create a correct initial approximation.  For each instruction that
 609 -- writes a register, we deem that the register is live on the
 610 -- flow edges leaving the instruction.  Subsequent iterations of
 611 -- the liveness AbI augment this based purely on reads of regs, not
 612 -- writes.  We need to start off with at least this minimal write-
 613 -- based information in order that writes to vregs which are never
 614 -- used have non-empty live ranges.  If we don't do that, we eventually
 615 -- wind up assigning such vregs to any old real reg, since they don't
 616 -- apparently conflict -- you can't conflict with an empty live range.
 617 -- This kludge is unfortunate, but we need to do it to cover not only
 618 -- writes to vregs which are never used, but also to deal correctly
 619 -- with the fact that calls to C will trash the callee saves registers.
 620
 621 mk_initial_approx :: Int -> [Instr] -> Array Int [Int]
 622                      -> FiniteMap FE RegSet
 623                      -> FiniteMap FE RegSet
 624 mk_initial_approx ino [] succ_map ia_so_far
 625    = ia_so_far
 626 mk_initial_approx ino (i:is) succ_map ia_so_far
 627    = let wrs
 628             = case regUsage i of RU rrr www -> www
 629          new_fes
 630             = [case iUnbox ino of      { inoh ->
 631                case iUnbox ino_succ of { ino_succh ->
 632                MkFE inoh ino_succh
 633                }}
 634                   | ino_succ <- succ_map ! ino]
 635
 636          loop [] ia = ia
 637          loop (fe:fes) ia
 638             = loop fes (addToFM_C unionRegSets ia fe wrs)
 639
 640          next_ia
 641             = loop new_fes ia_so_far
 642      in
 643          mk_initial_approx (ino+1) is succ_map next_ia
 644
 645
 646 -- Do one step in the liveness info calculation (AbI :).  Given the
 647 -- prior approximation (which tells you a subset of live VRegs+RRegs
 648 -- for each flow edge), calculate new information for all FEs.
 649 -- Rather than do this by iterating over FEs, it's easier to iterate
 650 -- over insns, and update their incoming FEs.
 651
 652 upd_liveness_info :: Array Int [Int]         -- instruction pred map
 653                      -> Array Int [Int]      -- instruction succ map
 654                      -> Array Int Instr      -- array of instructions
 655                      -> FiniteMap FE RegSet  -- previous approx
 656                      -> FiniteMap FE RegSet  -- improved approx
 657
 658 upd_liveness_info pred_map succ_map insn_array prev_approx
 659    = do_insns hi prev_approx
 660      where
 661         (lo, hi) = bounds insn_array
 662
 663         enquireMapFE :: FiniteMap FE RegSet -> FE
 664                         -> RegSet
 665         enquireMapFE fm fe
 666            = case lookupFM fm fe of
 667                 Just set -> set
 668                 Nothing  -> emptyRegSet
 669
 670         -- Work backwards, from the highest numbered insn to the lowest.
 671         -- This is a heuristic which causes faster convergence to the
 672         -- fixed point.  In particular, for straight-line code with no
 673         -- branches at all, arrives at the fixpoint in one iteration.
 674         do_insns ino approx
 675            | ino < lo
 676            = approx
 677            | otherwise
 678            = let fes_to_futures
 679                     = [case iUnbox ino of        { inoh ->
 680                        case iUnbox future_ino of { future_inoh ->
 681                        MkFE inoh future_inoh
 682                        }}
 683                           | future_ino <- succ_map ! ino]
 684                  future_lives
 685                     = map (enquireMapFE approx) fes_to_futures
 686                  future_live
 687                     = foldr unionRegSets emptyRegSet future_lives
 688
 689                  fes_from_histories
 690                     = [case iUnbox history_ino of { history_inoh ->
 691                        case iUnbox ino of         { inoh ->
 692                        MkFE history_inoh inoh
 693                        }}
 694                           | history_ino <- pred_map ! ino]
 695                  new_approx
 696                     = foldl update_one_history approx fes_from_histories
 697
 698                  insn
 699                     = insn_array ! ino
 700                  history_independent_component
 701                     = case regUsage insn of
 702                          RU rds wrs
 703                             -> unionRegSets rds
 704                                   (minusRegSets future_live wrs)
 705
 706                  update_one_history :: FiniteMap FE RegSet
 707                                        -> FE
 708                                        -> FiniteMap FE RegSet
 709                  update_one_history approx0 fe
 710                       = addToFM_C unionRegSets approx0 fe
 711                                   history_independent_component
 712
 713                  rest_done
 714                     = do_insns (ino-1) new_approx
 715              in
 716                  rest_done
 717
 718
 719
 720 -- Extract the flow edges from a list of insns.  Express the information
 721 -- as two mappings, from insn number to insn numbers of predecessors,
 722 -- and from insn number to insn numbers of successors.  (Since that's
 723 -- what we need to know when computing live ranges later).  Instructions
 724 -- are numbered starting at zero.  This function is long and complex
 725 -- in order to be efficient; it could equally well be shorter and slower.
 726
 727 find_flow_edges :: [Instr] -> (Array Int [Int],
 728                                Array Int [Int])
 729 find_flow_edges insns
 730    = let
 731          -- First phase: make a temp env which maps labels
 732          -- to insn numbers, so the second pass can know the insn
 733          -- numbers for jump targets.
 734
 735          label_env :: FiniteMap CLabel Int
 736
 737          mk_label_env n env [] = env
 738          mk_label_env n env ((LABEL clbl):is)
 739             = mk_label_env (n+1) (addToFM env clbl n) is
 740          mk_label_env n env (i:is)
 741             = mk_label_env (n+1) env is
 742
 743          label_env = mk_label_env 0 emptyFM insns
 744
 745          find_label :: CLabel -> Int
 746          find_label jmptarget
 747             = case lookupFM label_env jmptarget of
 748                  Just ino -> ino
 749                  Nothing  -> pprPanic "find_flow_edges: unmapped label"
 750                                       (pprCLabel jmptarget)
 751
 752          -- Second phase: traverse the insns, and make up the successor map.
 753
 754          least_ino, greatest_ino :: Int
 755          least_ino    = 0
 756          greatest_ino = length insns - 1
 757
 758          mk_succ_map :: Int -> [(Int, [Int])] -> [Instr] -> [(Int, [Int])]
 759
 760          mk_succ_map i_num rsucc_map []
 761             = reverse rsucc_map
 762
 763          mk_succ_map i_num rsucc_map (i:is)
 764             = let i_num_1 = i_num + 1
 765               in
 766               case insnFuture i of
 767
 768                  NoFuture
 769                     -> -- A non-local jump.  We can regard this insn as a terminal
 770                        -- insn in the graph, so we don't add any edges.
 771                        mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 772
 773                  Next
 774                     |  null is -- this is the last insn, and it doesn't go anywhere
 775                                -- (a meaningless scenario); handle it anyway
 776                     -> mk_succ_map i_num_1 ((i_num,[]):rsucc_map) is
 777
 778                     |  otherwise -- flows to next insn; add fe i_num -> i_num+1
 779                     -> mk_succ_map i_num_1 ((i_num, [i_num_1]): rsucc_map)
 780                                            is
 781
 782                  Branch lab -- jmps to lab; add fe i_num -> i_target
 783                     -> let i_target = find_label lab
 784                        in
 785                        mk_succ_map i_num_1 ((i_num, [i_target]): rsucc_map) is
 786
 787                  NextOrBranch lab
 788                     |  null is   -- jmps to label, or falls through, and this is
 789                                  -- the last insn (a meaningless scenario);
 790                                  -- flag an error
 791                     -> error "find_flow_edges: NextOrBranch is last"
 792
 793                     |  otherwise -- add fes i_num -> i_num+1
 794                                  --     and i_num -> i_target
 795                     -> let i_target = find_label lab
 796                        in
 797                        mk_succ_map i_num_1 ((i_num, [i_num_1, i_target]):rsucc_map)
 798                                            is
 799                  MultiFuture labels
 800                     -> -- A jump, whose targets are listed explicitly.
 801                        -- (Generated from table-based switch translations).
 802                        -- Add fes  i_num -> x  for each x in labels
 803                        let is_target = nub (map find_label labels)
 804                        in
 805                        mk_succ_map i_num_1 ((i_num, is_target):rsucc_map) is
 806
 807          -- Third phase: invert the successor map to get the predecessor
 808          -- map, using an algorithm which is quadratic in the worst case,
 809          -- but runs in almost-linear time, because of the nature of our
 810          -- inputs: most insns have a single successor, the next insn.
 811
 812          invert :: [(Int, [Int])] -> [(Int, [Int])]
 813          invert fmap
 814             = let inverted_pairs
 815                      = concatMap ( \ (a, bs) -> [(b,a) | b <- bs] ) fmap
 816                   sorted_inverted_pairs
 817                      = isort inverted_pairs
 818
 819                   grp :: Int -> [Int] -> [(Int,Int)] -> [(Int,[Int])]
 820                   grp k vs [] = [(k, vs)]
 821                   grp k vs ((kk,vv):rest)
 822                      | k == kk   = grp k (vv:vs) rest
 823                      | otherwise = (k,vs) : grp kk [vv] rest
 824
 825                   grp_start []             = []
 826                   grp_start ((kk,vv):rest) = grp kk [vv] rest
 827
 828                   grouped
 829                      = grp_start sorted_inverted_pairs
 830
 831                   -- make sure that the reverse mapping maps all inos
 832                   add_empties ino []
 833                      | ino > greatest_ino  = []
 834                      | otherwise           = (ino,[]): add_empties (ino+1) []
 835                   add_empties ino ((k,vs):rest)
 836                      | ino <  k   = (ino,[]): add_empties (ino+1) ((k,vs):rest)
 837                      | ino == k   = (k,vs) : add_empties (ino+1) rest
 838
 839                   -- This is nearly linear provided that the fsts of the
 840                   -- list are nearly in order -- a critical assumption
 841                   -- for efficiency.
 842                   isort :: [(Int,Int)] -> [(Int,Int)]
 843                   isort []     = []
 844                   isort (x:xs) = insert x (isort xs)
 845
 846                   insert :: (Int,Int) -> [(Int,Int)] -> [(Int,Int)]
 847                   insert y []     = [y]
 848                   insert y (z:zs)
 849                      -- specifically, this first test should almost always
 850                      -- be True in order for the near-linearity to happen
 851                      | fst y <= fst z  = y:z:zs
 852                      | otherwise       = z: insert y zs
 853               in
 854                  add_empties least_ino grouped
 855
 856          -- Finally ...
 857
 858          succ_list
 859             = mk_succ_map 0 [] insns
 860          succ_map
 861             = array (least_ino, greatest_ino) succ_list
 862          pred_list
 863             = invert succ_list
 864          pred_map
 865             = array (least_ino, greatest_ino) pred_list
 866      in
 867          (pred_map, succ_map)
 868
 869
 870 -- That's all, folks!  From here on is just some dull supporting stuff.
 871
 872 -- A data type for flow edges
 873 data FE
 874    = MkFE FastInt FastInt deriving (Eq, Ord)
 875
 876 -- deriving Show on types with unboxed fields doesn't work
 877 instance Show FE where
 878     showsPrec _ (MkFE s d)
 879        = showString "MkFE" . shows (iBox s) . shows ' ' . shows (iBox d)
 880
 881 -- Blargh.  Use ghc stuff soon!  Or: perhaps that's not such a good
 882 -- idea.  Most of these sets are either empty or very small, and it
 883 -- might be that the overheads of the FiniteMap based set implementation
 884 -- is a net loss.  The same might be true of RegSets.
 885
 886 newtype FeSet = MkFeSet [FE]
 887
 888 feSetFromList xs
 889    = MkFeSet (nukeDups (sort xs))
 890      where nukeDups :: [FE] -> [FE]
 891            nukeDups []  = []
 892            nukeDups [x] = [x]
 893            nukeDups (x:y:xys)
 894               = if x == y then nukeDups (y:xys)
 895                           else x : nukeDups (y:xys)
 896
 897 feSetToList (MkFeSet xs)            = xs
 898 isEmptyFeSet (MkFeSet xs)           = null xs
 899 emptyFeSet                          = MkFeSet []
 900 eqFeSet (MkFeSet xs1) (MkFeSet xs2) = xs1 == xs2
 901 unitFeSet x                         = MkFeSet [x]
 902
 903 elemFeSet x (MkFeSet xs)
 904    = f xs
 905      where
 906         f []     = False
 907         f (y:ys) | x == y    = True
 908                  | x < y     = False
 909                  | otherwise = f ys
 910
 911 unionFeSets (MkFeSet xs1) (MkFeSet xs2)
 912    = MkFeSet (f xs1 xs2)
 913      where
 914         f [] bs = bs
 915         f as [] = as
 916         f (a:as) (b:bs)
 917            | a < b      = a : f as (b:bs)
 918            | a > b      = b : f (a:as) bs
 919            | otherwise  = a : f as bs
 920
 921 minusFeSets (MkFeSet xs1) (MkFeSet xs2)
 922    = MkFeSet (f xs1 xs2)
 923      where
 924         f [] bs = []
 925         f as [] = as
 926         f (a:as) (b:bs)
 927            | a < b      = a : f as (b:bs)
 928            | a > b      = f (a:as) bs
 929            | otherwise  = f as bs
 930
 931 intersectionFeSets (MkFeSet xs1) (MkFeSet xs2)
 932    = MkFeSet (f xs1 xs2)
 933      where
 934         f [] bs = []
 935         f as [] = []
 936         f (a:as) (b:bs)
 937            | a < b      = f as (b:bs)
 938            | a > b      = f (a:as) bs
 939            | otherwise  = a : f as bs
 940
 941 \end{code}