1 {-# OPTIONS -fno-warn-missing-signatures #-}
2 -----------------------------------------------------------------------------
4 -- The register allocator
6 -- (c) The University of Glasgow 2004
8 -----------------------------------------------------------------------------
11 The algorithm is roughly:
13 1) Compute strongly connected components of the basic block list.
15 2) Compute liveness (mapping from pseudo register to
18 3) Walk instructions in each basic block. We keep track of
19 (a) Free real registers (a bitmap?)
20 (b) Current assignment of temporaries to machine registers and/or
21 spill slots (call this the "assignment").
22 (c) Partial mapping from basic block ids to a virt-to-loc mapping.
23 When we first encounter a branch to a basic block,
24 we fill in its entry in this table with the current mapping.
27 (a) For each real register clobbered by this instruction:
28 If a temporary resides in it,
29 If the temporary is live after this instruction,
30 Move the temporary to another (non-clobbered & free) reg,
31 or spill it to memory. Mark the temporary as residing
32 in both memory and a register if it was spilled (it might
33 need to be read by this instruction).
34 (ToDo: this is wrong for jump instructions?)
36 (b) For each temporary *read* by the instruction:
37 If the temporary does not have a real register allocation:
38 - Allocate a real register from the free list. If
40 - Find a temporary to spill. Pick one that is
41 not used in this instruction (ToDo: not
43 - generate a spill instruction
44 - If the temporary was previously spilled,
45 generate an instruction to read the temp from its spill loc.
46 (optimisation: if we can see that a real register is going to
47 be used soon, then don't use it for allocation).
49 (c) Update the current assignment
51 (d) If the intstruction is a branch:
52 if the destination block already has a register assignment,
53 Generate a new block with fixup code and redirect the
54 jump to the new block.
56 Update the block id->assignment mapping with the current
59 (e) Delete all register assignments for temps which are read
60 (only) and die here. Update the free register list.
62 (f) Mark all registers clobbered by this instruction as not free,
63 and mark temporaries which have been spilled due to clobbering
64 as in memory (step (a) marks then as in both mem & reg).
66 (g) For each temporary *written* by this instruction:
67 Allocate a real register as for (b), spilling something
69 - except when updating the assignment, drop any memory
70 locations that the temporary was previously in, since
71 they will be no longer valid after this instruction.
73 (h) Delete all register assignments for temps which are
74 written and die here (there should rarely be any). Update
75 the free register list.
77 (i) Rewrite the instruction with the new mapping.
79 (j) For each spilled reg known to be now dead, re-add its stack slot
84 module RegAllocLinear (
86 RegAllocStats, pprStats
89 #include "HsVersions.h"
96 import Cmm hiding (RegSet)
100 import Unique ( Uniquable(getUnique), Unique )
117 #include "../includes/MachRegs.h"
119 -- -----------------------------------------------------------------------------
120 -- The free register set
122 -- This needs to be *efficient*
124 {- Here's an inefficient 'executable specification' of the FreeRegs data type:
125 type FreeRegs = [RegNo]
128 releaseReg n f = if n `elem` f then f else (n : f)
129 initFreeRegs = allocatableRegs
130 getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
131 allocateReg f r = filter (/= r) f
134 #if defined(powerpc_TARGET_ARCH)
136 -- The PowerPC has 32 integer and 32 floating point registers.
137 -- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much
139 -- Note that when getFreeRegs scans for free registers, it starts at register
140 -- 31 and counts down. This is a hack for the PowerPC - the higher-numbered
141 -- registers are callee-saves, while the lower regs are caller-saves, so it
142 -- makes sense to start at the high end.
143 -- Apart from that, the code does nothing PowerPC-specific, so feel free to
144 -- add your favourite platform to the #if (if you have 64 registers but only
147 data FreeRegs = FreeRegs !Word32 !Word32
148 deriving( Show ) -- The Show is used in an ASSERT
150 noFreeRegs :: FreeRegs
151 noFreeRegs = FreeRegs 0 0
153 releaseReg :: RegNo -> FreeRegs -> FreeRegs
154 releaseReg r (FreeRegs g f)
155 | r > 31 = FreeRegs g (f .|. (1 `shiftL` (fromIntegral r - 32)))
156 | otherwise = FreeRegs (g .|. (1 `shiftL` fromIntegral r)) f
158 initFreeRegs :: FreeRegs
159 initFreeRegs = foldr releaseReg noFreeRegs allocatableRegs
161 getFreeRegs :: RegClass -> FreeRegs -> [RegNo] -- lazilly
162 getFreeRegs cls (FreeRegs g f)
163 | RcDouble <- cls = go f (0x80000000) 63
164 | RcInteger <- cls = go g (0x80000000) 31
165 | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls)
168 go x m i | x .&. m /= 0 = i : (go x (m `shiftR` 1) $! i-1)
169 | otherwise = go x (m `shiftR` 1) $! i-1
171 allocateReg :: RegNo -> FreeRegs -> FreeRegs
172 allocateReg r (FreeRegs g f)
173 | r > 31 = FreeRegs g (f .&. complement (1 `shiftL` (fromIntegral r - 32)))
174 | otherwise = FreeRegs (g .&. complement (1 `shiftL` fromIntegral r)) f
177 #elif defined(sparc_TARGET_ARCH)
178 --------------------------------------------------------------------------------
179 -- SPARC is like PPC, except for twinning of floating point regs.
180 -- When we allocate a double reg we must take an even numbered
181 -- float reg, as well as the one after it.
184 -- Holds bitmaps showing what registers are currently allocated.
185 -- The float and double reg bitmaps overlap, but we only alloc
186 -- float regs into the float map, and double regs into the double map.
188 -- Free regs have a bit set in the corresponding bitmap.
192 !Word32 -- int reg bitmap regs 0..31
193 !Word32 -- float reg bitmap regs 32..63
194 !Word32 -- double reg bitmap regs 32..63
198 -- | A reg map where no regs are free to be allocated.
199 noFreeRegs :: FreeRegs
200 noFreeRegs = FreeRegs 0 0 0
203 -- | The initial set of free regs.
204 -- Don't treat the top half of reg pairs we're using as doubles as being free.
205 initFreeRegs :: FreeRegs
207 -- = trace (show allocable ++ "\n" ++ show freeDouble)
211 freeDouble = getFreeRegs RcDouble regs
212 regs = foldr releaseReg noFreeRegs allocable
213 allocable = allocatableRegs \\ doublePairs
214 doublePairs = [43, 45, 47, 49, 51, 53]
217 -- | Get all the free registers of this class.
218 getFreeRegs :: RegClass -> FreeRegs -> [RegNo] -- lazilly
219 getFreeRegs cls (FreeRegs g f d)
220 | RcInteger <- cls = go g 1 0
221 | RcFloat <- cls = go f 1 32
222 | RcDouble <- cls = go d 1 32
223 | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls)
226 go x m i | x .&. m /= 0 = i : (go x (m `shiftL` 1) $! i+1)
227 | otherwise = go x (m `shiftL` 1) $! i+1
229 showFreeRegs :: FreeRegs -> String
232 ++ " integer: " ++ (show $ getFreeRegs RcInteger regs) ++ "\n"
233 ++ " float: " ++ (show $ getFreeRegs RcFloat regs) ++ "\n"
234 ++ " double: " ++ (show $ getFreeRegs RcDouble regs) ++ "\n"
237 -- | Check whether a reg is free
238 regIsFree :: RegNo -> FreeRegs -> Bool
239 regIsFree r (FreeRegs g f d)
241 -- a general purpose reg
243 , mask <- 1 `shiftL` fromIntegral r
246 -- use the first 22 float regs as double precision
249 , mask <- 1 `shiftL` (fromIntegral r - 32)
252 -- use the last 10 float regs as single precision
254 , mask <- 1 `shiftL` (fromIntegral r - 32)
258 -- | Grab a register.
259 grabReg :: RegNo -> FreeRegs -> FreeRegs
260 grabReg r (FreeRegs g f d)
262 -- a general purpose reg
264 , mask <- complement (1 `shiftL` fromIntegral r)
265 = FreeRegs (g .&. mask) f d
267 -- use the first 22 float regs as double precision
270 , mask <- complement (1 `shiftL` (fromIntegral r - 32))
271 = FreeRegs g f (d .&. mask)
273 -- use the last 10 float regs as single precision
275 , mask <- complement (1 `shiftL` (fromIntegral r - 32))
276 = FreeRegs g (f .&. mask) d
280 -- | Release a register from allocation.
281 -- The register liveness information says that most regs die after a C call,
282 -- but we still don't want to allocate to some of them.
284 releaseReg :: RegNo -> FreeRegs -> FreeRegs
285 releaseReg r regs@(FreeRegs g f d)
287 -- used by STG machine, or otherwise unavailable
288 | r >= 0 && r <= 15 = regs
289 | r >= 17 && r <= 21 = regs
290 | r >= 24 && r <= 31 = regs
291 | r >= 32 && r <= 41 = regs
292 | r >= 54 && r <= 59 = regs
294 -- never release the high part of double regs.
302 -- a general purpose reg
304 , mask <- 1 `shiftL` fromIntegral r
305 = FreeRegs (g .|. mask) f d
307 -- use the first 22 float regs as double precision
310 , mask <- 1 `shiftL` (fromIntegral r - 32)
311 = FreeRegs g f (d .|. mask)
313 -- use the last 10 float regs as single precision
315 , mask <- 1 `shiftL` (fromIntegral r - 32)
316 = FreeRegs g (f .|. mask) d
319 -- | Allocate a register in the map.
320 allocateReg :: RegNo -> FreeRegs -> FreeRegs
321 allocateReg r regs@(FreeRegs g f d)
323 -- if the reg isn't actually free then we're in trouble
324 {- | not $ regIsFree r regs
326 "RegAllocLinear.allocateReg"
327 (text "reg " <> ppr r <> text " is not free")
334 --------------------------------------------------------------------------------
336 -- If we have less than 32 registers, or if we have efficient 64-bit words,
337 -- we will just use a single bitfield.
341 # if defined(alpha_TARGET_ARCH)
342 type FreeRegs = Word64
344 type FreeRegs = Word32
347 noFreeRegs :: FreeRegs
350 releaseReg :: RegNo -> FreeRegs -> FreeRegs
351 releaseReg n f = f .|. (1 `shiftL` n)
353 initFreeRegs :: FreeRegs
354 initFreeRegs = foldr releaseReg noFreeRegs allocatableRegs
356 getFreeRegs :: RegClass -> FreeRegs -> [RegNo] -- lazilly
357 getFreeRegs cls f = go f 0
360 | n .&. 1 /= 0 && regClass (RealReg m) == cls
361 = m : (go (n `shiftR` 1) $! (m+1))
363 = go (n `shiftR` 1) $! (m+1)
364 -- ToDo: there's no point looking through all the integer registers
365 -- in order to find a floating-point one.
367 allocateReg :: RegNo -> FreeRegs -> FreeRegs
368 allocateReg r f = f .&. complement (1 `shiftL` fromIntegral r)
372 -- -----------------------------------------------------------------------------
373 -- The assignment of virtual registers to stack slots
375 -- We have lots of stack slots. Memory-to-memory moves are a pain on most
376 -- architectures. Therefore, we avoid having to generate memory-to-memory moves
377 -- by simply giving every virtual register its own stack slot.
379 -- The StackMap stack map keeps track of virtual register - stack slot
380 -- associations and of which stack slots are still free. Once it has been
381 -- associated, a stack slot is never "freed" or removed from the StackMap again,
382 -- it remains associated until we are done with the current CmmProc.
385 data StackMap = StackMap [StackSlot] (UniqFM StackSlot)
387 emptyStackMap :: StackMap
388 emptyStackMap = StackMap [0..maxSpillSlots] emptyUFM
390 getStackSlotFor :: StackMap -> Unique -> (StackMap,Int)
391 getStackSlotFor (StackMap [] _) _
392 = panic "RegAllocLinear.getStackSlotFor: out of stack slots, try -fregs-graph"
393 -- This happens with darcs' SHA1.hs, see #1993
395 getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg =
396 case lookupUFM reserved reg of
397 Just slot -> (fs,slot)
398 Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot)
400 -- -----------------------------------------------------------------------------
401 -- Top level of the register allocator
403 -- Allocate registers
406 -> UniqSM (NatCmmTop, Maybe RegAllocStats)
408 regAlloc (CmmData sec d)
413 regAlloc (CmmProc (LiveInfo info _ _) lbl params (ListGraph []))
414 = return ( CmmProc info lbl params (ListGraph [])
417 regAlloc (CmmProc static lbl params (ListGraph comps))
418 | LiveInfo info (Just first_id) block_live <- static
420 -- do register allocation on each component.
421 (final_blocks, stats)
422 <- linearRegAlloc first_id block_live
423 $ map (\b -> case b of
424 BasicBlock _ [b] -> AcyclicSCC b
425 BasicBlock _ bs -> CyclicSCC bs)
428 -- make sure the block that was first in the input list
429 -- stays at the front of the output
430 let ((first':_), rest')
431 = partition ((== first_id) . blockId) final_blocks
433 return ( CmmProc info lbl params (ListGraph (first' : rest'))
436 -- bogus. to make non-exhaustive match warning go away.
437 regAlloc (CmmProc _ _ _ _)
438 = panic "RegAllocLinear.regAlloc: no match"
441 -- -----------------------------------------------------------------------------
442 -- Linear sweep to allocate registers
444 data Loc = InReg {-# UNPACK #-} !RegNo
445 | InMem {-# UNPACK #-} !Int -- stack slot
446 | InBoth {-# UNPACK #-} !RegNo
447 {-# UNPACK #-} !Int -- stack slot
448 deriving (Eq, Show, Ord)
451 A temporary can be marked as living in both a register and memory
452 (InBoth), for example if it was recently loaded from a spill location.
453 This makes it cheap to spill (no save instruction required), but we
454 have to be careful to turn this into InReg if the value in the
457 This is also useful when a temporary is about to be clobbered. We
458 save it in a spill location, but mark it as InBoth because the current
459 instruction might still want to read it.
462 instance Outputable Loc where
463 ppr l = text (show l)
466 -- | Do register allocation on some basic blocks.
467 -- But be careful to allocate a block in an SCC only if it has
468 -- an entry in the block map or it is the first block.
471 :: BlockId -- ^ the first block
472 -> BlockMap RegSet -- ^ live regs on entry to each basic block
473 -> [SCC LiveBasicBlock] -- ^ instructions annotated with "deaths"
474 -> UniqSM ([NatBasicBlock], RegAllocStats)
476 linearRegAlloc first_id block_live sccs
478 let (_, _, stats, blocks) =
479 runR emptyBlockMap initFreeRegs emptyRegMap emptyStackMap us
480 $ linearRA_SCCs first_id block_live [] sccs
482 return (blocks, stats)
484 linearRA_SCCs _ _ blocksAcc []
485 = return $ reverse blocksAcc
487 linearRA_SCCs first_id block_live blocksAcc (AcyclicSCC block : sccs)
488 = do blocks' <- processBlock block_live block
489 linearRA_SCCs first_id block_live
490 ((reverse blocks') ++ blocksAcc)
493 linearRA_SCCs first_id block_live blocksAcc (CyclicSCC blocks : sccs)
494 = do let process [] [] accum = return $ reverse accum
495 process [] next_round accum = process next_round [] accum
496 process (b@(BasicBlock id _) : blocks) next_round accum =
497 do block_assig <- getBlockAssigR
498 if isJust (lookupBlockEnv block_assig id) || id == first_id
499 then do b' <- processBlock block_live b
500 process blocks next_round (b' : accum)
501 else process blocks (b : next_round) accum
502 blockss' <- process blocks [] (return [])
503 linearRA_SCCs first_id block_live
504 (reverse (concat blockss') ++ blocksAcc)
508 -- | Do register allocation on this basic block
511 :: BlockMap RegSet -- ^ live regs on entry to each basic block
512 -> LiveBasicBlock -- ^ block to do register allocation on
513 -> RegM [NatBasicBlock] -- ^ block with registers allocated
515 processBlock block_live (BasicBlock id instrs)
518 <- linearRA block_live [] [] instrs
520 return $ BasicBlock id instrs' : fixups
523 -- | Load the freeregs and current reg assignment into the RegM state
524 -- for the basic block with this BlockId.
525 initBlock :: BlockId -> RegM ()
527 = do block_assig <- getBlockAssigR
528 case lookupBlockEnv block_assig id of
529 -- no prior info about this block: assume everything is
530 -- free and the assignment is empty.
532 -> do setFreeRegsR initFreeRegs
533 setAssigR emptyRegMap
535 -- load info about register assignments leading into this block.
536 Just (freeregs, assig)
537 -> do setFreeRegsR freeregs
543 -> [Instr] -> [NatBasicBlock] -> [LiveInstr]
544 -> RegM ([Instr], [NatBasicBlock])
546 linearRA _ instr_acc fixups []
547 = return (reverse instr_acc, fixups)
549 linearRA block_live instr_acc fixups (instr:instrs)
550 = do (instr_acc', new_fixups) <- raInsn block_live instr_acc instr
551 linearRA block_live instr_acc' (new_fixups++fixups) instrs
553 -- -----------------------------------------------------------------------------
554 -- Register allocation for a single instruction
556 type BlockAssignment = BlockMap (FreeRegs, RegMap Loc)
558 raInsn :: BlockMap RegSet -- Live temporaries at each basic block
559 -> [Instr] -- new instructions (accum.)
560 -> LiveInstr -- the instruction (with "deaths")
562 [Instr], -- new instructions
563 [NatBasicBlock] -- extra fixup blocks
566 raInsn _ new_instrs (Instr (COMMENT _) Nothing)
567 = return (new_instrs, [])
569 raInsn _ new_instrs (Instr (DELTA n) Nothing)
572 return (new_instrs, [])
574 raInsn block_live new_instrs (Instr instr (Just live))
578 -- If we have a reg->reg move between virtual registers, where the
579 -- src register is not live after this instruction, and the dst
580 -- register does not already have an assignment,
581 -- and the source register is assigned to a register, not to a spill slot,
582 -- then we can eliminate the instruction.
583 -- (we can't eliminitate it if the source register is on the stack, because
584 -- we do not want to use one spill slot for different virtual registers)
585 case isRegRegMove instr of
586 Just (src,dst) | src `elementOfUniqSet` (liveDieRead live),
588 not (dst `elemUFM` assig),
589 Just (InReg _) <- (lookupUFM assig src) -> do
591 RealReg i -> setAssigR (addToUFM assig dst (InReg i))
592 -- if src is a fixed reg, then we just map dest to this
593 -- reg in the assignment. src must be an allocatable reg,
594 -- otherwise it wouldn't be in r_dying.
595 _virt -> case lookupUFM assig src of
596 Nothing -> panic "raInsn"
598 setAssigR (addToUFM (delFromUFM assig src) dst loc)
600 -- we have eliminated this instruction
602 freeregs <- getFreeRegsR
604 pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do
606 return (new_instrs, [])
608 _ -> genRaInsn block_live new_instrs instr
609 (uniqSetToList $ liveDieRead live)
610 (uniqSetToList $ liveDieWrite live)
614 = pprPanic "raInsn" (text "no match for:" <> ppr li)
617 genRaInsn block_live new_instrs instr r_dying w_dying =
618 case regUsage instr of { RU read written ->
619 case partition isRealReg written of { (real_written1,virt_written) ->
622 real_written = [ r | RealReg r <- real_written1 ]
624 -- we don't need to do anything with real registers that are
625 -- only read by this instr. (the list is typically ~2 elements,
626 -- so using nub isn't a problem).
627 virt_read = nub (filter isVirtualReg read)
630 -- (a) save any temporaries which will be clobbered by this instruction
631 clobber_saves <- saveClobberedTemps real_written r_dying
634 {- freeregs <- getFreeRegsR
637 (docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ ppr virt_read <+> ppr virt_written
638 $$ text (show freeregs) $$ ppr assig)
642 -- (b), (c) allocate real regs for all regs read by this instruction.
643 (r_spills, r_allocd) <-
644 allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read
646 -- (d) Update block map for new destinations
647 -- NB. do this before removing dead regs from the assignment, because
648 -- these dead regs might in fact be live in the jump targets (they're
649 -- only dead in the code that follows in the current basic block).
650 (fixup_blocks, adjusted_instr)
651 <- joinToTargets block_live [] instr (jumpDests instr [])
653 -- (e) Delete all register assignments for temps which are read
654 -- (only) and die here. Update the free register list.
657 -- (f) Mark regs which are clobbered as unallocatable
658 clobberRegs real_written
660 -- (g) Allocate registers for temporaries *written* (only)
661 (w_spills, w_allocd) <-
662 allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written
664 -- (h) Release registers for temps which are written here and not
669 -- (i) Patch the instruction
670 patch_map = listToUFM [ (t,RealReg r) |
671 (t,r) <- zip virt_read r_allocd
672 ++ zip virt_written w_allocd ]
674 patched_instr = patchRegs adjusted_instr patchLookup
675 patchLookup x = case lookupUFM patch_map x of
680 -- pprTrace "patched" (docToSDoc (pprInstr patched_instr)) $ do
682 -- (j) free up stack slots for dead spilled regs
683 -- TODO (can't be bothered right now)
685 -- erase reg->reg moves where the source and destination are the same.
686 -- If the src temp didn't die in this instr but happened to be allocated
687 -- to the same real reg as the destination, then we can erase the move anyway.
688 squashed_instr = case isRegRegMove patched_instr of
693 return (squashed_instr ++ w_spills ++ reverse r_spills
694 ++ clobber_saves ++ new_instrs,
698 -- -----------------------------------------------------------------------------
701 releaseRegs regs = do
706 loop _ free _ | free `seq` False = undefined
707 loop assig free [] = do setAssigR assig; setFreeRegsR free; return ()
708 loop assig free (RealReg r : rs) = loop assig (releaseReg r free) rs
709 loop assig free (r:rs) =
710 case lookupUFM assig r of
711 Just (InBoth real _) -> loop (delFromUFM assig r) (releaseReg real free) rs
712 Just (InReg real) -> loop (delFromUFM assig r) (releaseReg real free) rs
713 _other -> loop (delFromUFM assig r) free rs
715 -- -----------------------------------------------------------------------------
716 -- Clobber real registers
719 For each temp in a register that is going to be clobbered:
720 - if the temp dies after this instruction, do nothing
721 - otherwise, put it somewhere safe (another reg if possible,
722 otherwise spill and record InBoth in the assignment).
724 for allocateRegs on the temps *read*,
725 - clobbered regs are allocatable.
727 for allocateRegs on the temps *written*,
728 - clobbered regs are not allocatable.
732 :: [RegNo] -- real registers clobbered by this instruction
733 -> [Reg] -- registers which are no longer live after this insn
734 -> RegM [Instr] -- return: instructions to spill any temps that will
737 saveClobberedTemps [] _ = return [] -- common case
738 saveClobberedTemps clobbered dying = do
741 to_spill = [ (temp,reg) | (temp, InReg reg) <- ufmToList assig,
742 reg `elem` clobbered,
743 temp `notElem` map getUnique dying ]
745 (instrs,assig') <- clobber assig [] to_spill
749 clobber assig instrs [] = return (instrs,assig)
750 clobber assig instrs ((temp,reg):rest)
752 --ToDo: copy it to another register if possible
753 (spill,slot) <- spillR (RealReg reg) temp
754 recordSpill (SpillClobber temp)
756 let new_assign = addToUFM assig temp (InBoth reg slot)
757 clobber new_assign (spill : COMMENT (fsLit "spill clobber") : instrs) rest
759 clobberRegs :: [RegNo] -> RegM ()
760 clobberRegs [] = return () -- common case
761 clobberRegs clobbered = do
762 freeregs <- getFreeRegsR
763 -- setFreeRegsR $! foldr grabReg freeregs clobbered
764 setFreeRegsR $! foldr allocateReg freeregs clobbered
767 setAssigR $! clobber assig (ufmToList assig)
769 -- if the temp was InReg and clobbered, then we will have
770 -- saved it in saveClobberedTemps above. So the only case
771 -- we have to worry about here is InBoth. Note that this
772 -- also catches temps which were loaded up during allocation
773 -- of read registers, not just those saved in saveClobberedTemps.
774 clobber assig [] = assig
775 clobber assig ((temp, InBoth reg slot) : rest)
776 | reg `elem` clobbered
777 = clobber (addToUFM assig temp (InMem slot)) rest
778 clobber assig (_:rest)
781 -- -----------------------------------------------------------------------------
782 -- allocateRegsAndSpill
784 -- This function does several things:
785 -- For each temporary referred to by this instruction,
786 -- we allocate a real register (spilling another temporary if necessary).
787 -- We load the temporary up from memory if necessary.
788 -- We also update the register assignment in the process, and
789 -- the list of free registers and free stack slots.
792 :: Bool -- True <=> reading (load up spilled regs)
793 -> [Reg] -- don't push these out
794 -> [Instr] -- spill insns
795 -> [RegNo] -- real registers allocated (accum.)
796 -> [Reg] -- temps to allocate
797 -> RegM ([Instr], [RegNo])
799 allocateRegsAndSpill _ _ spills alloc []
800 = return (spills,reverse alloc)
802 allocateRegsAndSpill reading keep spills alloc (r:rs) = do
804 case lookupUFM assig r of
805 -- case (1a): already in a register
806 Just (InReg my_reg) ->
807 allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
809 -- case (1b): already in a register (and memory)
810 -- NB1. if we're writing this register, update its assignemnt to be
811 -- InReg, because the memory value is no longer valid.
812 -- NB2. This is why we must process written registers here, even if they
813 -- are also read by the same instruction.
814 Just (InBoth my_reg _) -> do
815 when (not reading) (setAssigR (addToUFM assig r (InReg my_reg)))
816 allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
818 -- Not already in a register, so we need to find a free one...
820 freeregs <- getFreeRegsR
822 case getFreeRegs (regClass r) freeregs of
824 -- case (2): we have a free register
825 freeClass@(my_reg:_) -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -}
827 spills' <- loadTemp reading r loc my_reg spills
829 | Just (InMem slot) <- loc, reading = InBoth my_reg slot
830 | otherwise = InReg my_reg
831 setAssigR (addToUFM assig r $! new_loc)
832 setFreeRegsR $ allocateReg my_reg freeregs
833 allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
835 -- case (3): we need to push something out to free up a register
838 keep' = map getUnique keep
839 candidates1 = [ (temp,reg,mem)
840 | (temp, InBoth reg mem) <- ufmToList assig,
841 temp `notElem` keep', regClass (RealReg reg) == regClass r ]
842 candidates2 = [ (temp,reg)
843 | (temp, InReg reg) <- ufmToList assig,
844 temp `notElem` keep', regClass (RealReg reg) == regClass r ]
846 ASSERT2(not (null candidates1 && null candidates2),
847 text (show freeregs) <+> ppr r <+> ppr assig) do
851 -- we have a temporary that is in both register and mem,
852 -- just free up its register for use.
854 (temp,my_reg,slot):_ -> do
855 spills' <- loadTemp reading r loc my_reg spills
857 assig1 = addToUFM assig temp (InMem slot)
858 assig2 = addToUFM assig1 r (InReg my_reg)
861 allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
863 -- otherwise, we need to spill a temporary that currently
864 -- resides in a register.
869 -- TODO: plenty of room for optimisation in choosing which temp
870 -- to spill. We just pick the first one that isn't used in
871 -- the current instruction for now.
873 let (temp_to_push_out, my_reg)
874 = case candidates2 of
875 [] -> panic $ "RegAllocLinear.allocRegsAndSpill: no spill candidates"
876 ++ "assignment: " ++ show (ufmToList assig) ++ "\n"
879 (spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out
880 let spill_store = (if reading then id else reverse)
881 [ COMMENT (fsLit "spill alloc")
884 -- record that this temp was spilled
885 recordSpill (SpillAlloc temp_to_push_out)
887 -- update the register assignment
888 let assig1 = addToUFM assig temp_to_push_out (InMem slot)
889 let assig2 = addToUFM assig1 r (InReg my_reg)
892 -- if need be, load up a spilled temp into the reg we've just freed up.
893 spills' <- loadTemp reading r loc my_reg spills
895 allocateRegsAndSpill reading keep
896 (spill_store ++ spills')
900 -- | Load up a spilled temporary if we need to.
903 -> Reg -- the temp being loaded
904 -> Maybe Loc -- the current location of this temp
905 -> RegNo -- the hreg to load the temp into
909 loadTemp True vreg (Just (InMem slot)) hreg spills
911 insn <- loadR (RealReg hreg) slot
912 recordSpill (SpillLoad $ getUnique vreg)
913 return $ COMMENT (fsLit "spill load") : insn : spills
915 loadTemp _ _ _ _ spills =
919 -- -----------------------------------------------------------------------------
920 -- Joining a jump instruction to its targets
922 -- The first time we encounter a jump to a particular basic block, we
923 -- record the assignment of temporaries. The next time we encounter a
924 -- jump to the same block, we compare our current assignment to the
925 -- stored one. They might be different if spilling has occrred in one
926 -- branch; so some fixup code will be required to match up the
934 -> RegM ([NatBasicBlock], Instr)
936 joinToTargets _ new_blocks instr []
937 = return (new_blocks, instr)
939 joinToTargets block_live new_blocks instr (dest:dests) = do
940 block_assig <- getBlockAssigR
943 -- adjust the assignment to remove any registers which are not
944 -- live on entry to the destination block.
945 adjusted_assig = filterUFM_Directly still_live assig
947 live_set = lookItUp "joinToTargets" block_live dest
948 still_live uniq _ = uniq `elemUniqSet_Directly` live_set
950 -- and free up those registers which are now free.
952 [ r | (reg, loc) <- ufmToList assig,
953 not (elemUniqSet_Directly reg live_set),
956 regsOfLoc (InReg r) = [r]
957 regsOfLoc (InBoth r _) = [r]
958 regsOfLoc (InMem _) = []
960 case lookupBlockEnv block_assig dest of
961 -- Nothing <=> this is the first time we jumped to this
964 freeregs <- getFreeRegsR
965 let freeregs' = foldr releaseReg freeregs to_free
966 setBlockAssigR (extendBlockEnv block_assig dest
967 (freeregs',adjusted_assig))
968 joinToTargets block_live new_blocks instr dests
972 -- the assignments match
973 | ufmToList dest_assig == ufmToList adjusted_assig
974 -> joinToTargets block_live new_blocks instr dests
981 let graph = makeRegMovementGraph adjusted_assig dest_assig
982 let sccs = stronglyConnCompFromEdgedVerticesR graph
983 fixUpInstrs <- mapM (handleComponent delta instr) sccs
985 block_id <- getUniqueR
986 let block = BasicBlock (BlockId block_id) $
987 concat fixUpInstrs ++ mkBranchInstr dest
989 let instr' = patchJump instr dest (BlockId block_id)
991 joinToTargets block_live (block : new_blocks) instr' dests
994 -- | Construct a graph of register\/spill movements.
996 -- We cut some corners by
997 -- a) not handling cyclic components
998 -- b) not handling memory-to-memory moves.
1000 -- Cyclic components seem to occur only very rarely,
1001 -- and we don't need memory-to-memory moves because we
1002 -- make sure that every temporary always gets its own
1005 makeRegMovementGraph :: RegMap Loc -> RegMap Loc -> [(Unique, Loc, [Loc])]
1006 makeRegMovementGraph adjusted_assig dest_assig
1009 = expandNode vreg src
1010 $ lookupWithDefaultUFM_Directly
1012 (panic "RegAllocLinear.makeRegMovementGraph")
1015 in [ node | (vreg, src) <- ufmToList adjusted_assig
1016 , node <- mkNodes src vreg ]
1018 -- The InBoth handling is a little tricky here. If
1019 -- the destination is InBoth, then we must ensure that
1020 -- the value ends up in both locations. An InBoth
1021 -- destination must conflict with an InReg or InMem
1022 -- source, so we expand an InBoth destination as
1023 -- necessary. An InBoth source is slightly different:
1024 -- we only care about the register that the source value
1025 -- is in, so that we can move it to the destinations.
1027 expandNode vreg loc@(InReg src) (InBoth dst mem)
1028 | src == dst = [(vreg, loc, [InMem mem])]
1029 | otherwise = [(vreg, loc, [InReg dst, InMem mem])]
1031 expandNode vreg loc@(InMem src) (InBoth dst mem)
1032 | src == mem = [(vreg, loc, [InReg dst])]
1033 | otherwise = [(vreg, loc, [InReg dst, InMem mem])]
1035 expandNode _ (InBoth _ src) (InMem dst)
1036 | src == dst = [] -- guaranteed to be true
1038 expandNode _ (InBoth src _) (InReg dst)
1041 expandNode vreg (InBoth src _) dst
1042 = expandNode vreg (InReg src) dst
1044 expandNode vreg src dst
1046 | otherwise = [(vreg, src, [dst])]
1049 -- | Make a move instruction between these two locations so we
1050 -- can join together allocations for different basic blocks.
1052 makeMove :: Int -> Unique -> Loc -> Loc -> RegM Instr
1053 makeMove _ vreg (InReg src) (InReg dst)
1054 = do recordSpill (SpillJoinRR vreg)
1055 return $ mkRegRegMoveInstr (RealReg src) (RealReg dst)
1057 makeMove delta vreg (InMem src) (InReg dst)
1058 = do recordSpill (SpillJoinRM vreg)
1059 return $ mkLoadInstr (RealReg dst) delta src
1061 makeMove delta vreg (InReg src) (InMem dst)
1062 = do recordSpill (SpillJoinRM vreg)
1063 return $ mkSpillInstr (RealReg src) delta dst
1065 makeMove _ vreg src dst
1066 = panic $ "makeMove " ++ show vreg ++ " (" ++ show src ++ ") ("
1068 ++ " (workaround: use -fviaC)"
1071 -- we have eliminated any possibility of single-node cylces
1072 -- in expandNode above.
1073 handleComponent :: Int -> Instr -> SCC (Unique, Loc, [Loc]) -> RegM [Instr]
1074 handleComponent delta _ (AcyclicSCC (vreg,src,dsts))
1075 = mapM (makeMove delta vreg src) dsts
1077 -- we can not have cycles that involve memory
1078 -- locations as source nor as single destination
1079 -- because memory locations (stack slots) are
1080 -- allocated exclusively for a virtual register and
1081 -- therefore can not require a fixup
1082 handleComponent delta instr (CyclicSCC ((vreg, (InReg sreg),dsts):rest))
1084 spill_id <- getUniqueR
1085 (_, slot) <- spillR (RealReg sreg) spill_id
1086 remainingFixUps <- mapM (handleComponent delta instr) (stronglyConnCompFromEdgedVerticesR rest)
1087 restoreAndFixInstr <- getRestoreMoves dsts slot
1088 return ([instr] ++ concat remainingFixUps ++ restoreAndFixInstr)
1091 getRestoreMoves [r@(InReg reg), mem@(InMem _)] slot
1093 restoreToReg <- loadR (RealReg reg) slot
1094 moveInstr <- makeMove delta vreg r mem
1095 return $ [COMMENT (fsLit "spill join move"), restoreToReg, moveInstr]
1097 getRestoreMoves [InReg reg] slot
1098 = loadR (RealReg reg) slot >>= return . (:[])
1100 getRestoreMoves [InMem _] _ = panic "getRestoreMoves can not handle memory only restores"
1101 getRestoreMoves _ _ = panic "getRestoreMoves unknown case"
1104 handleComponent _ _ (CyclicSCC _)
1105 = panic "Register Allocator: handleComponent cyclic"
1109 -- -----------------------------------------------------------------------------
1110 -- The register allocator's monad.
1112 -- Here we keep all the state that the register allocator keeps track
1113 -- of as it walks the instructions in a basic block.
1117 ra_blockassig :: BlockAssignment,
1118 -- The current mapping from basic blocks to
1119 -- the register assignments at the beginning of that block.
1120 ra_freeregs :: {-#UNPACK#-}!FreeRegs, -- free machine registers
1121 ra_assig :: RegMap Loc, -- assignment of temps to locations
1122 ra_delta :: Int, -- current stack delta
1123 ra_stack :: StackMap, -- free stack slots for spilling
1124 ra_us :: UniqSupply, -- unique supply for generating names
1125 -- for fixup blocks.
1127 -- Record why things were spilled, for -ddrop-asm-stats.
1128 -- Just keep a list here instead of a map of regs -> reasons.
1129 -- We don't want to slow down the allocator if we're not going to emit the stats.
1130 ra_spills :: [SpillReason]
1133 newtype RegM a = RegM { unReg :: RA_State -> (# RA_State, a #) }
1136 instance Monad RegM where
1137 m >>= k = RegM $ \s -> case unReg m s of { (# s, a #) -> unReg (k a) s }
1138 return a = RegM $ \s -> (# s, a #)
1140 runR :: BlockAssignment -> FreeRegs -> RegMap Loc -> StackMap -> UniqSupply
1141 -> RegM a -> (BlockAssignment, StackMap, RegAllocStats, a)
1142 runR block_assig freeregs assig stack us thing =
1143 case unReg thing (RA_State{ ra_blockassig=block_assig, ra_freeregs=freeregs,
1144 ra_assig=assig, ra_delta=0{-???-}, ra_stack=stack,
1145 ra_us = us, ra_spills = [] }) of
1146 (# state'@RA_State{ ra_blockassig=block_assig, ra_stack=stack' }, returned_thing #)
1147 -> (block_assig, stack', makeRAStats state', returned_thing)
1149 spillR :: Reg -> Unique -> RegM (Instr, Int)
1150 spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
1151 let (stack',slot) = getStackSlotFor stack temp
1152 instr = mkSpillInstr reg delta slot
1154 (# s{ra_stack=stack'}, (instr,slot) #)
1156 loadR :: Reg -> Int -> RegM Instr
1157 loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
1158 (# s, mkLoadInstr reg delta slot #)
1160 getFreeRegsR :: RegM FreeRegs
1161 getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} ->
1164 setFreeRegsR :: FreeRegs -> RegM ()
1165 setFreeRegsR regs = RegM $ \ s ->
1166 (# s{ra_freeregs = regs}, () #)
1168 getAssigR :: RegM (RegMap Loc)
1169 getAssigR = RegM $ \ s@RA_State{ra_assig = assig} ->
1172 setAssigR :: RegMap Loc -> RegM ()
1173 setAssigR assig = RegM $ \ s ->
1174 (# s{ra_assig=assig}, () #)
1176 getBlockAssigR :: RegM BlockAssignment
1177 getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} ->
1180 setBlockAssigR :: BlockAssignment -> RegM ()
1181 setBlockAssigR assig = RegM $ \ s ->
1182 (# s{ra_blockassig = assig}, () #)
1184 setDeltaR :: Int -> RegM ()
1185 setDeltaR n = RegM $ \ s ->
1186 (# s{ra_delta = n}, () #)
1188 getDeltaR :: RegM Int
1189 getDeltaR = RegM $ \s -> (# s, ra_delta s #)
1191 getUniqueR :: RegM Unique
1192 getUniqueR = RegM $ \s ->
1193 case splitUniqSupply (ra_us s) of
1194 (us1, us2) -> (# s{ra_us = us2}, uniqFromSupply us1 #)
1196 -- | Record that a spill instruction was inserted, for profiling.
1197 recordSpill :: SpillReason -> RegM ()
1199 = RegM $ \s -> (# s { ra_spills = spill : ra_spills s}, () #)
1201 -- -----------------------------------------------------------------------------
1203 -- | Reasons why instructions might be inserted by the spiller.
1204 -- Used when generating stats for -ddrop-asm-stats.
1207 = SpillAlloc !Unique -- ^ vreg was spilled to a slot so we could use its
1208 -- current hreg for another vreg
1209 | SpillClobber !Unique -- ^ vreg was moved because its hreg was clobbered
1210 | SpillLoad !Unique -- ^ vreg was loaded from a spill slot
1212 | SpillJoinRR !Unique -- ^ reg-reg move inserted during join to targets
1213 | SpillJoinRM !Unique -- ^ reg-mem move inserted during join to targets
1216 -- | Used to carry interesting stats out of the register allocator.
1219 { ra_spillInstrs :: UniqFM [Int] }
1222 -- | Make register allocator stats from its final state.
1223 makeRAStats :: RA_State -> RegAllocStats
1226 { ra_spillInstrs = binSpillReasons (ra_spills state) }
1229 -- | Build a map of how many times each reg was alloced, clobbered, loaded etc.
1231 :: [SpillReason] -> UniqFM [Int]
1233 binSpillReasons reasons
1237 (map (\reason -> case reason of
1238 SpillAlloc r -> (r, [1, 0, 0, 0, 0])
1239 SpillClobber r -> (r, [0, 1, 0, 0, 0])
1240 SpillLoad r -> (r, [0, 0, 1, 0, 0])
1241 SpillJoinRR r -> (r, [0, 0, 0, 1, 0])
1242 SpillJoinRM r -> (r, [0, 0, 0, 0, 1])) reasons)
1245 -- | Count reg-reg moves remaining in this code.
1246 countRegRegMovesNat :: NatCmmTop -> Int
1247 countRegRegMovesNat cmm
1248 = execState (mapGenBlockTopM countBlock cmm) 0
1250 countBlock b@(BasicBlock _ instrs)
1251 = do mapM_ countInstr instrs
1255 | Just _ <- isRegRegMove instr
1263 -- | Pretty print some RegAllocStats
1264 pprStats :: [NatCmmTop] -> [RegAllocStats] -> SDoc
1265 pprStats code statss
1266 = let -- sum up all the instrs inserted by the spiller
1267 spills = foldl' (plusUFM_C (zipWith (+)))
1269 $ map ra_spillInstrs statss
1271 spillTotals = foldl' (zipWith (+))
1275 -- count how many reg-reg-moves remain in the code
1276 moves = sum $ map countRegRegMovesNat code
1278 pprSpill (reg, spills)
1279 = parens $ (hcat $ punctuate (text ", ") (doubleQuotes (ppr reg) : map ppr spills))
1281 in ( text "-- spills-added-total"
1282 $$ text "-- (allocs, clobbers, loads, joinRR, joinRM, reg_reg_moves_remaining)"
1283 $$ (parens $ (hcat $ punctuate (text ", ") (map ppr spillTotals ++ [ppr moves])))
1285 $$ text "-- spills-added"
1286 $$ text "-- (reg_name, allocs, clobbers, loads, joinRR, joinRM)"
1287 $$ (vcat $ map pprSpill
1292 -- -----------------------------------------------------------------------------
1295 my_fromJust :: String -> SDoc -> Maybe a -> a
1296 my_fromJust _ _ (Just x) = x
1297 my_fromJust s p Nothing = pprPanic ("fromJust: " ++ s) p
1299 lookItUp :: String -> BlockMap a -> BlockId -> a
1300 lookItUp str fm x = my_fromJust str (ppr x) (lookupBlockEnv fm x)