From c62b824e9e8808eb3845ddb1614494b0575eaafd Mon Sep 17 00:00:00 2001 From: "dias@eecs.harvard.edu" Date: Thu, 16 Oct 2008 10:42:18 +0000 Subject: [PATCH] Fixed linear regalloc bug, dropped some tracing code o The linear-scan register allocator sometimes allocated a block before allocating one of its predecessors, which could lead to inconsistent allocations. Now, we allocate a block only if a predecessor has set the "incoming" assignments for the block (or if it's the procedure's entry block). o Also commented out some tracing code on the new codegen path. --- compiler/cmm/CmmBuildInfoTables.hs | 14 +++++-------- compiler/cmm/CmmLiveZ.hs | 2 +- compiler/cmm/CmmProcPointZ.hs | 16 +++++++-------- compiler/cmm/CmmSpillReload.hs | 4 ++-- compiler/cmm/CmmStackLayout.hs | 25 +++++++++++++---------- compiler/cmm/ZipDataflow.hs | 2 +- compiler/main/HscMain.lhs | 6 ++++-- compiler/nativeGen/RegAllocLinear.hs | 37 ++++++++++++++++++++++------------ 8 files changed, 59 insertions(+), 47 deletions(-) diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index fa3d920..173b799 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -78,7 +78,7 @@ import ZipDataflow -- which may differ depending on whether there is an update frame. live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> [Maybe LocalReg] live_ptrs oldByte slotEnv areaMap bid = - pprTrace "live_ptrs for" (ppr bid <+> ppr youngByte <+> ppr liveSlots) $ + -- pprTrace "live_ptrs for" (ppr bid <+> ppr youngByte <+> ppr liveSlots) $ reverse $ slotsToList youngByte liveSlots [] where slotsToList n [] results | n == oldByte = results -- at old end of stack frame slotsToList n (s : _) _ | n == oldByte = @@ -181,24 +181,21 @@ type CAFEnv = BlockEnv CAFSet -- First, an analysis to find live CAFs. cafLattice :: DataflowLattice CAFSet -cafLattice = DataflowLattice "live cafs" emptyFM add True +cafLattice = DataflowLattice "live cafs" emptyFM add False where add new old = if sizeFM new' > sizeFM old then aTx new' else noTx new' where new' = new `plusFM` old cafTransfers :: BackwardTransfers Middle Last CAFSet cafTransfers = BackwardTransfers first middle last where first live _ = live - middle live m = pprTrace "cafmiddle" (ppr m) $ foldExpDeepMiddle addCaf m live + middle live m = foldExpDeepMiddle addCaf m live last env l = foldExpDeepLast addCaf l (joinOuts cafLattice env l) addCaf e set = case e of CmmLit (CmmLabel c) -> add c set CmmLit (CmmLabelOff c _) -> add c set CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set _ -> set - add l s = pprTrace "CAF analysis saw label" (ppr l) $ - if hasCAF l then - pprTrace "has caf" (ppr l) $ addToFM s (cvtToClosureLbl l) () - else (pprTrace "no cafs" (ppr l) $ s) + add l s = if hasCAF l then addToFM s (cvtToClosureLbl l) () else s type CafFix a = FuelMonad (BackwardFixedPoint Middle Last CAFSet a) cafAnal :: LGraph Middle Last -> FuelMonad CAFEnv @@ -263,8 +260,7 @@ buildSRTs topSRT topCAFMap cafs = mkSRT topSRT = do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs return (topSRT, localSRTs) - in pprTrace "cafs" (ppr cafs) $ - if length cafs > maxBmpSize then + in if length cafs > maxBmpSize then mkSRT (foldl add_if_missing topSRT cafs) else -- make sure all the cafs are near the bottom of the srt mkSRT (add_if_too_far topSRT cafs) diff --git a/compiler/cmm/CmmLiveZ.hs b/compiler/cmm/CmmLiveZ.hs index 7bafc91..70bd51b 100644 --- a/compiler/cmm/CmmLiveZ.hs +++ b/compiler/cmm/CmmLiveZ.hs @@ -31,7 +31,7 @@ type CmmLive = RegSet -- | The dataflow lattice liveLattice :: DataflowLattice CmmLive -liveLattice = DataflowLattice "live LocalReg's" emptyUniqSet add True +liveLattice = DataflowLattice "live LocalReg's" emptyUniqSet add False where add new old = let join = unionUniqSets new old in (if sizeUniqSet join > sizeUniqSet old then aTx else noTx) join diff --git a/compiler/cmm/CmmProcPointZ.hs b/compiler/cmm/CmmProcPointZ.hs index 58c63cb..5eaac74 100644 --- a/compiler/cmm/CmmProcPointZ.hs +++ b/compiler/cmm/CmmProcPointZ.hs @@ -366,8 +366,8 @@ add_CopyOuts protos procPoints g = fold_blocks mb_copy_out (return emptyBlockEnv insert z succId m = do (b, bmap) <- z (b, bs) <- insertBetween b m succId - pprTrace "insert for succ" (ppr succId <> ppr m) $ - return $ (b, foldl (flip insertBlock) bmap bs) + -- pprTrace "insert for succ" (ppr succId <> ppr m) $ do + return $ (b, foldl (flip insertBlock) bmap bs) finish (b@(Block bid _ _), bmap) = return $ (extendBlockEnv bmap bid b) skip b@(Block bid _ _) bs = @@ -402,7 +402,7 @@ splitAtProcPoints entry_label callPPs procPoints procMap areaMap where graph = lookupBlockEnv graphEnv procId `orElse` emptyBlockEnv graph' = extendBlockEnv graph bid b graphEnv_pre <- return $ fold_blocks addBlock emptyBlockEnv g - graphEnv <- return $ pprTrace "graphEnv" (ppr graphEnv_pre) graphEnv_pre + graphEnv <- return {- $ pprTrace "graphEnv" (ppr graphEnv_pre) -} graphEnv_pre -- Build a map from proc point BlockId to labels for their new procedures let add_label map pp = return $ addToFM map pp lbl where lbl = if pp == entry then entry_label else blockLbl pp @@ -454,10 +454,10 @@ splitAtProcPoints entry_label callPPs procPoints procMap areaMap -- add the jump blocks to the graph blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks let g' = LGraph ppId off blockEnv''' - pprTrace "g' pre jumps" (ppr g') $ - return (extendBlockEnv newGraphEnv ppId g') + -- pprTrace "g' pre jumps" (ppr g') $ do + return (extendBlockEnv newGraphEnv ppId g') graphEnv_pre <- foldM add_jumps emptyBlockEnv $ blockEnvToList graphEnv - graphEnv <- return $ pprTrace "graphEnv with jump blocks" (ppr graphEnv_pre) + graphEnv <- return $ -- pprTrace "graphEnv with jump blocks" (ppr graphEnv_pre) graphEnv_pre let to_proc (bid, g@(LGraph g_entry _ blocks)) | elemBlockSet bid callPPs = if bid == entry then @@ -476,8 +476,8 @@ splitAtProcPoints entry_label callPPs procPoints procMap areaMap compare (expectJust "block_order" $ lookupBlockEnv block_order bid) (expectJust "block_order" $ lookupBlockEnv block_order bid') procs <- return $ map to_proc $ sortBy sort_fn $ blockEnvToList graphEnv - return $ pprTrace "procLabels" (ppr procLabels) - $ pprTrace "splitting graphs" (ppr procs) + return -- $ pprTrace "procLabels" (ppr procLabels) + -- $ pprTrace "splitting graphs" (ppr procs) $ procs splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t] diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index be043fe..dcbde33 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -66,7 +66,7 @@ changeRegs f live = live { in_regs = f (in_regs live) } dualLiveLattice :: DataflowLattice DualLive dualLiveLattice = - DataflowLattice "variables live in registers and on stack" empty add True + DataflowLattice "variables live in registers and on stack" empty add False where empty = DualLive emptyRegSet emptyRegSet -- | compute in the Tx monad to track whether anything has changed add new old = do stack <- add1 (on_stack new) (on_stack old) @@ -195,7 +195,7 @@ data AvailRegs = UniverseMinus RegSet availRegsLattice :: DataflowLattice AvailRegs -availRegsLattice = DataflowLattice "register gotten from reloads" empty add True +availRegsLattice = DataflowLattice "register gotten from reloads" empty add False -- last True <==> debugging on where empty = UniverseMinus emptyRegSet -- | compute in the Tx monad to track whether anything has changed diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs index a2ba3f3..3518df8 100644 --- a/compiler/cmm/CmmStackLayout.hs +++ b/compiler/cmm/CmmStackLayout.hs @@ -57,7 +57,7 @@ import ZipDataflow -- a single slot, on insertion. slotLattice :: DataflowLattice SubAreaSet -slotLattice = DataflowLattice "live slots" emptyFM add True +slotLattice = DataflowLattice "live slots" emptyFM add False where add new old = case foldFM addArea (False, old) new of (True, x) -> aTx x (False, x) -> noTx x @@ -94,7 +94,8 @@ liveGen s set = liveGen' s set [] a == a' && hi >= hi' && hi - w <= hi' - w' liveKill :: SubArea -> [SubArea] -> [SubArea] -liveKill (a, hi, w) set = pprTrace "killing slots in area" (ppr a) $ liveKill' set [] +liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $ + liveKill' set [] where liveKill' [] z = z liveKill' (s'@(a', hi', w') : rst) z = if a /= a' || hi < lo' || lo > hi' then -- no overlap @@ -309,7 +310,8 @@ layout procPoints env g@(LGraph _ entrySp _) = start = case returnOff stackInfo of Just b -> max b young Nothing -> young z = allocSlotFrom ig areaSize start areaMap (CallArea (Young id)) - in pprTrace "allocCallSlot for" (ppr id <+> ppr young <+> ppr (live_in t) <+> ppr z) z + in -- pprTrace "allocCallSlot for" (ppr id <+> ppr young <+> ppr (live_in t) <+> ppr z) + z allocCallSlot areaMap _ = areaMap -- mid foreign calls need to have info tables placed on the stack allocMidCall m@(MidForeignCall (Safe bid _) _ _ _) t areaMap = @@ -326,10 +328,11 @@ layout procPoints env g@(LGraph _ entrySp _) = where layout areaMap (ZTail m t) = layout (alloc m t areaMap) t layout areaMap (ZLast _) = allocCallSlot areaMap b areaMap = foldl layoutAreas (addToFM emptyFM (CallArea Old) 0) (postorder_dfs g) - in pprTrace "ProcPoints" (ppr procPoints) $ - pprTrace "Area SizeMap" (ppr areaSize) $ - pprTrace "Entry SP" (ppr entrySp) $ - pprTrace "Area Map" (ppr areaMap) $ areaMap + in -- pprTrace "ProcPoints" (ppr procPoints) $ + -- pprTrace "Area SizeMap" (ppr areaSize) $ + -- pprTrace "Entry SP" (ppr entrySp) $ + -- pprTrace "Area Map" (ppr areaMap) $ + areaMap -- After determining the stack layout, we can: -- 1. Replace references to stack Areas with addresses relative to the stack @@ -345,7 +348,7 @@ manifestSP :: ProcPointSet -> BlockEnv Status -> AreaMap -> manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) = liftM (LGraph entry args) blocks' where blocks' = foldl replB (return emptyBlockEnv) (postorder_dfs g) - slot a = pprTrace "slot" (ppr a) $ + slot a = -- pprTrace "slot" (ppr a) $ lookupFM areaMap a `orElse` panic "unallocated Area" slot' (Just id) = slot $ CallArea (Young id) slot' Nothing = slot $ CallArea Old @@ -369,8 +372,8 @@ manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) = replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock) replB blocks (Block id o t) = do bs <- replTail (Block id o) spIn t - pprTrace "spIn" (ppr id <+> ppr spIn)$ - liftM (flip (foldr insertBlock) bs) blocks + -- pprTrace "spIn" (ppr id <+> ppr spIn) $ do + liftM (flip (foldr insertBlock) bs) blocks where spIn = sp_on_entry id replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) -> FuelMonad ([CmmBlock]) @@ -392,7 +395,7 @@ manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) = fixSp h spOff l@(LastBranch k) = let succSp = sp_on_entry k in if succSp /= spOff then - pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $ + -- pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $ updSp h spOff succSp l else return $ [h (ZLast (LastOther (last spOff l)))] fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, []) diff --git a/compiler/cmm/ZipDataflow.hs b/compiler/cmm/ZipDataflow.hs index 2d50165..8811755 100644 --- a/compiler/cmm/ZipDataflow.hs +++ b/compiler/cmm/ZipDataflow.hs @@ -1008,7 +1008,7 @@ instance FixedPoint ForwardFixedPoint where dump_things :: Bool -dump_things = True +dump_things = False my_trace :: String -> SDoc -> a -> a my_trace = if dump_things then pprTrace else \_ _ a -> a diff --git a/compiler/main/HscMain.lhs b/compiler/main/HscMain.lhs index c4e8ae7..bc2747a 100644 --- a/compiler/main/HscMain.lhs +++ b/compiler/main/HscMain.lhs @@ -773,9 +773,11 @@ tryNewCodeGen hsc_env this_mod data_tycons imported_mods ; prog <- return $ map (runTx $ runCmmOpts cmmCfgOptsZ) (srtToData topSRT : prog) -- Control flow optimisation, again - ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "New Cmm" (pprCmms prog) + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" (pprCmms prog) - ; return $ map cmmOfZgraph prog } + ; let prog' = map cmmOfZgraph prog + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Raw Cmm" (ppr prog') + ; return prog' } optionallyConvertAndOrCPS :: HscEnv -> [Cmm] -> IO [Cmm] diff --git a/compiler/nativeGen/RegAllocLinear.hs b/compiler/nativeGen/RegAllocLinear.hs index 2e6e37c..323e1ff 100644 --- a/compiler/nativeGen/RegAllocLinear.hs +++ b/compiler/nativeGen/RegAllocLinear.hs @@ -254,7 +254,7 @@ regAlloc (CmmProc static lbl params (ListGraph comps)) = do -- do register allocation on each component. (final_blocks, stats) - <- linearRegAlloc block_live + <- linearRegAlloc first_id block_live $ map (\b -> case b of BasicBlock _ [b] -> AcyclicSCC b BasicBlock _ bs -> CyclicSCC bs) @@ -299,32 +299,43 @@ instance Outputable Loc where -- | Do register allocation on some basic blocks. +-- But be careful to allocate a block in an SCC only if it has +-- an entry in the block map or it is the first block. -- linearRegAlloc - :: BlockMap RegSet -- ^ live regs on entry to each basic block + :: BlockId -- ^ the first block + -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC LiveBasicBlock] -- ^ instructions annotated with "deaths" -> UniqSM ([NatBasicBlock], RegAllocStats) -linearRegAlloc block_live sccs +linearRegAlloc first_id block_live sccs = do us <- getUs let (_, _, stats, blocks) = runR emptyBlockMap initFreeRegs emptyRegMap emptyStackMap us - $ linearRA_SCCs block_live [] sccs + $ linearRA_SCCs first_id block_live [] sccs return (blocks, stats) -linearRA_SCCs _ blocksAcc [] +linearRA_SCCs _ _ blocksAcc [] = return $ reverse blocksAcc -linearRA_SCCs block_live blocksAcc (AcyclicSCC block : sccs) +linearRA_SCCs first_id block_live blocksAcc (AcyclicSCC block : sccs) = do blocks' <- processBlock block_live block - linearRA_SCCs block_live + linearRA_SCCs first_id block_live ((reverse blocks') ++ blocksAcc) sccs -linearRA_SCCs block_live blocksAcc (CyclicSCC blocks : sccs) - = do blockss' <- mapM (processBlock block_live) blocks - linearRA_SCCs block_live +linearRA_SCCs first_id block_live blocksAcc (CyclicSCC blocks : sccs) + = do let process [] [] accum = return $ reverse accum + process [] next_round accum = process next_round [] accum + process (b@(BasicBlock id _) : blocks) next_round accum = + do block_assig <- getBlockAssigR + if isJust (lookupBlockEnv block_assig id) || id == first_id + then do b' <- processBlock block_live b + process blocks next_round (b' : accum) + else process blocks (b : next_round) accum + blockss' <- process blocks [] (return []) + linearRA_SCCs first_id block_live (reverse (concat blockss') ++ blocksAcc) sccs @@ -422,11 +433,11 @@ raInsn block_live new_instrs (Instr instr (Just live)) setAssigR (addToUFM (delFromUFM assig src) dst loc) -- we have elimianted this instruction - {- freeregs <- getFreeRegsR assig <- getAssigR - pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do - -} + {- + pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do + -} return (new_instrs, []) _ -> genRaInsn block_live new_instrs instr -- 1.7.10.4