From 8b7eaa404043294bd4cb4a0322ac1f7115bad6a0 Mon Sep 17 00:00:00 2001
From: Norman Ramsey <nr@eecs.harvard.edu>
Date: Fri, 7 Sep 2007 07:57:54 +0000
Subject: [PATCH] adding new files to do with new cmm functionality

---
 compiler/cmm/CmmCPSZ.hs         |   51 +++
 compiler/cmm/CmmContFlowOpt.hs  |  116 ++++++
 compiler/cmm/CmmCvt.hs          |  151 +++++++
 compiler/cmm/CmmExpr.hs         |  230 +++++++++++
 compiler/cmm/CmmLiveZ.hs        |   76 ++++
 compiler/cmm/CmmProcPointZ.hs   |  374 ++++++++++++++++++
 compiler/cmm/CmmSpillReload.hs  |  231 +++++++++++
 compiler/cmm/CmmTx.hs           |   59 +++
 compiler/cmm/CmmZipUtil.hs      |   17 +
 compiler/cmm/DFMonad.hs         |  287 ++++++++++++++
 compiler/cmm/MkZipCfg.hs        |  300 ++++++++++++++
 compiler/cmm/PprCmmZ.hs         |  112 ++++++
 compiler/cmm/README             |   97 +++++
 compiler/cmm/StackColor.hs      |  120 ++++++
 compiler/cmm/StackPlacements.hs |  248 ++++++++++++
 compiler/cmm/ZipCfg.hs          |  575 +++++++++++++++++++++++++++
 compiler/cmm/ZipCfgCmm.hs       |  302 ++++++++++++++
 compiler/cmm/ZipDataflow.hs     |  836 +++++++++++++++++++++++++++++++++++++++
 18 files changed, 4182 insertions(+)
 create mode 100644 compiler/cmm/CmmCPSZ.hs
 create mode 100644 compiler/cmm/CmmContFlowOpt.hs
 create mode 100644 compiler/cmm/CmmCvt.hs
 create mode 100644 compiler/cmm/CmmExpr.hs
 create mode 100644 compiler/cmm/CmmLiveZ.hs
 create mode 100644 compiler/cmm/CmmProcPointZ.hs
 create mode 100644 compiler/cmm/CmmSpillReload.hs
 create mode 100644 compiler/cmm/CmmTx.hs
 create mode 100644 compiler/cmm/CmmZipUtil.hs
 create mode 100644 compiler/cmm/DFMonad.hs
 create mode 100644 compiler/cmm/MkZipCfg.hs
 create mode 100644 compiler/cmm/PprCmmZ.hs
 create mode 100644 compiler/cmm/README
 create mode 100644 compiler/cmm/StackColor.hs
 create mode 100644 compiler/cmm/StackPlacements.hs
 create mode 100644 compiler/cmm/ZipCfg.hs
 create mode 100644 compiler/cmm/ZipCfgCmm.hs
 create mode 100644 compiler/cmm/ZipDataflow.hs

diff --git a/compiler/cmm/CmmCPSZ.hs b/compiler/cmm/CmmCPSZ.hs
new file mode 100644
index 0000000..afa1533
--- /dev/null
+++ b/compiler/cmm/CmmCPSZ.hs
@@ -0,0 +1,51 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+
+module CmmCPSZ (
+  -- | Converts C-- with full proceedures and parameters
+  -- to a CPS transformed C-- with the stack made manifest.
+  -- Well, sort of.
+  protoCmmCPSZ
+) where
+
+import Cmm
+import CmmContFlowOpt
+import CmmProcPointZ
+import CmmSpillReload
+import CmmTx
+import DFMonad
+import DynFlags
+import ErrUtils
+import Outputable
+import PprCmmZ()
+import UniqSupply
+import ZipCfg hiding (zip, unzip)
+import ZipCfgCmm
+import ZipDataflow
+
+-----------------------------------------------------------------------------
+-- |Top level driver for the CPS pass
+-----------------------------------------------------------------------------
+protoCmmCPSZ :: DynFlags -- ^ Dynamic flags: -dcmm-lint -ddump-cps-cmm
+       -> CmmZ     -- ^ Input C-- with Proceedures
+       -> IO CmmZ  -- ^ Output CPS transformed C--
+protoCmmCPSZ dflags (Cmm tops)
+  = do	{ showPass dflags "CPSZ"
+        ; u <- mkSplitUniqSupply 'p'
+        ; let txtops = initUs_ u $ mapM cpsTop tops
+        ; let pgm = Cmm $ runDFTx maxBound $ sequence txtops
+           --- XXX calling runDFTx is totally bogus
+	; dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "CPS Cmm" (ppr pgm)
+        ; return pgm
+        }
+
+cpsTop :: CmmTopZ -> UniqSM (DFTx CmmTopZ)
+cpsTop p@(CmmData {}) = return $ return p
+cpsTop (CmmProc h l args g) =
+    let procPoints = minimalProcPointSet (runTx cmmCfgOptsZ g)
+        g' = addProcPointProtocols procPoints args g
+        g'' = map_nodes id NotSpillOrReload id g'
+    in do us <- getUs
+          let g = runDFM us dualLiveLattice $ b_rewrite dualLivenessWithInsertion g''
+        --  let igraph = buildIGraph
+          return $ do g' <- g >>= return . map_nodes id spillAndReloadComments id
+                      return $ CmmProc h l args g'
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
new file mode 100644
index 0000000..149d33e
--- /dev/null
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -0,0 +1,116 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module CmmContFlowOpt
+    ( runCmmOpts, cmmCfgOpts, cmmCfgOptsZ
+    , branchChainElimZ, removeUnreachableBlocksZ
+    )
+where
+
+import Cmm
+import CmmTx
+import qualified ZipCfg as G
+import ZipCfgCmm
+import Maybes
+import Util
+import UniqFM
+
+------------------------------------
+mapProcs :: Tx (GenCmmTop d h s) -> Tx (GenCmm d h s)
+mapProcs f (Cmm tops) = fmap Cmm (mapTx f tops)
+
+
+------------------------------------
+cmmCfgOpts  :: Tx (ListGraph CmmStmt)
+cmmCfgOptsZ :: Tx CmmGraph
+
+cmmCfgOpts  = branchChainElim  -- boring, but will get more exciting later
+cmmCfgOptsZ = branchChainElimZ `seqTx` removeUnreachableBlocksZ
+        -- Here branchChainElim can ultimately be replaced
+        -- with a more exciting combination of optimisations
+
+runCmmOpts :: Tx g -> Tx (GenCmm d h g)
+runCmmOpts opt = mapProcs (optGraph opt)
+
+optGraph :: Tx g -> Tx (GenCmmTop d h g)
+optGraph _   top@(CmmData {}) = noTx top
+optGraph opt (CmmProc info lbl formals g) = fmap (CmmProc info lbl formals) (opt g)
+
+----------------------------------------------------------------
+branchChainElim :: Tx (ListGraph CmmStmt)
+-- Remove any basic block of the form L: goto L',
+-- and replace L with L' everywhere else
+branchChainElim (ListGraph blocks)
+  | null lone_branch_blocks     -- No blocks to remove
+  = noTx (ListGraph blocks)
+  | otherwise
+  = aTx (ListGraph new_blocks)
+  where
+    (lone_branch_blocks, others) = partitionWith isLoneBranch blocks
+    new_blocks = map (replaceLabels env) others
+    env = mkClosureBlockEnv lone_branch_blocks
+
+isLoneBranch :: CmmBasicBlock -> Either (BlockId, BlockId) CmmBasicBlock
+isLoneBranch (BasicBlock id [CmmBranch target]) | id /= target = Left (id, target)
+isLoneBranch other_block                                       = Right other_block
+   -- ^ An infinite loop is not a link in a branch chain!
+
+replaceLabels :: BlockEnv BlockId -> CmmBasicBlock -> CmmBasicBlock
+replaceLabels env (BasicBlock id stmts)
+  = BasicBlock id (map replace stmts)
+  where
+    replace (CmmBranch id)       = CmmBranch (lookup id)
+    replace (CmmCondBranch e id) = CmmCondBranch e (lookup id)
+    replace (CmmSwitch e tbl)    = CmmSwitch e (map (fmap lookup) tbl)
+    replace other_stmt           = other_stmt
+
+    lookup id = lookupBlockEnv env id `orElse` id 
+----------------------------------------------------------------
+branchChainElimZ :: Tx CmmGraph
+-- Remove any basic block of the form L: goto L',
+-- and replace L with L' everywhere else
+branchChainElimZ g@(G.LGraph eid _)
+  | null lone_branch_blocks     -- No blocks to remove
+  = noTx g
+  | otherwise
+  = aTx $ replaceLabelsZ env $ G.of_block_list eid (self_branches ++ others)
+  where
+    (lone_branch_blocks, others) = partitionWith isLoneBranchZ (G.to_block_list g)
+    env = mkClosureBlockEnv lone_branch_blocks
+    self_branches =
+        let loop_to (id, _) =
+                if lookup id == id then
+                    Just (G.Block id (G.ZLast (G.mkBranchNode id)))
+                else
+                    Nothing
+        in  mapMaybe loop_to lone_branch_blocks
+    lookup id = G.lookupBlockEnv env id `orElse` id 
+
+isLoneBranchZ :: CmmBlock -> Either (G.BlockId, G.BlockId) CmmBlock
+isLoneBranchZ (G.Block id (G.ZLast (G.LastOther (LastBranch target []))))
+    | id /= target  = Left (id,target)
+isLoneBranchZ other = Right other
+   -- ^ An infinite loop is not a link in a branch chain!
+
+replaceLabelsZ :: BlockEnv G.BlockId -> CmmGraph -> CmmGraph
+replaceLabelsZ env = replace_eid . G.map_nodes id id last
+  where
+    replace_eid (G.LGraph eid blocks)   = G.LGraph (lookup eid) blocks
+    last (LastBranch id args)          = LastBranch (lookup id) args
+    last (LastCondBranch e ti fi)      = LastCondBranch e (lookup ti) (lookup fi)
+    last (LastSwitch e tbl)            = LastSwitch e (map (fmap lookup) tbl)
+    last (LastCall tgt args (Just id)) = LastCall tgt args (Just $ lookup id) 
+    last exit_jump_return              = exit_jump_return
+    lookup id = G.lookupBlockEnv env id `orElse` id 
+----------------------------------------------------------------
+mkClosureBlockEnv :: [(BlockId, BlockId)] -> BlockEnv BlockId
+mkClosureBlockEnv blocks = mkBlockEnv $ map follow blocks
+    where singleEnv = mkBlockEnv blocks
+          follow (id, next) = (id, endChain id next)
+          endChain orig id = case lookupBlockEnv singleEnv id of
+                               Just id' | id /= orig -> endChain orig id'
+                               _ -> id
+----------------------------------------------------------------
+removeUnreachableBlocksZ :: Tx CmmGraph
+removeUnreachableBlocksZ g@(G.LGraph id blocks) =
+      if length blocks' < sizeUFM blocks then aTx $ G.of_block_list id blocks'
+      else noTx g
+    where blocks' = G.postorder_dfs g
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs
new file mode 100644
index 0000000..35ebb4f
--- /dev/null
+++ b/compiler/cmm/CmmCvt.hs
@@ -0,0 +1,151 @@
+{-# LANGUAGE PatternGuards #-}
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+
+module CmmCvt
+  ( cmmToZgraph, cmmOfZgraph )
+where
+import Cmm
+import CmmExpr
+import ZipCfgCmm
+import MkZipCfg 
+import CmmZipUtil
+import FastString
+import Outputable
+import Panic
+import PprCmm()
+import PprCmmZ()
+import UniqSet
+import UniqSupply
+import qualified ZipCfg as G
+
+cmmToZgraph :: GenCmm d h (ListGraph CmmStmt) -> UniqSM (GenCmm d h CmmGraph)
+cmmOfZgraph :: GenCmm d h (CmmGraph)          ->         GenCmm d h (ListGraph CmmStmt)
+
+cmmToZgraph = cmmMapGraphM toZgraph
+cmmOfZgraph = cmmMapGraph  ofZgraph
+
+
+toZgraph :: String -> ListGraph CmmStmt -> UniqSM CmmGraph
+toZgraph _ (ListGraph []) = lgraphOfAGraph emptyAGraph
+toZgraph fun_name (ListGraph (BasicBlock id ss : other_blocks)) = 
+           labelAGraph id $ mkStmts ss <*> foldr addBlock emptyAGraph other_blocks
+  where addBlock (BasicBlock id ss) g = mkLabel id   <*> mkStmts ss <*> g
+        mkStmts (CmmNop        : ss)  = mkNop        <*> mkStmts ss 
+        mkStmts (CmmComment s  : ss)  = mkComment s  <*> mkStmts ss
+        mkStmts (CmmAssign l r : ss)  = mkAssign l r <*> mkStmts ss
+        mkStmts (CmmStore  l r : ss)  = mkStore  l r <*> mkStmts ss
+        mkStmts (CmmCall f res args (CmmSafe srt) CmmMayReturn : ss) =
+                      mkCall       f res args srt <*> mkStmts ss 
+        mkStmts (CmmCall f res args CmmUnsafe CmmMayReturn : ss) =
+                      mkUnsafeCall f res args     <*> mkStmts ss
+        mkStmts (CmmCondBranch e l : fbranch) =
+            mkIfThenElse (mkCbranch e) (mkBranch l) (mkStmts fbranch)
+        mkStmts (last : []) = mkLast last
+        mkStmts []          = bad "fell off end"
+        mkStmts (_ : _ : _) = bad "last node not at end"
+        bad msg = panic (msg {- ++ " in block " ++ showSDoc (ppr b) -}
+                            ++ " in function " ++ fun_name)
+        mkLast (CmmCall f  []     args _ CmmNeverReturns) = mkFinalCall f args
+        mkLast (CmmSwitch scrutinee table) = mkSwitch scrutinee table
+        mkLast (CmmJump tgt args)          = mkJump tgt args
+        mkLast (CmmReturn ress)            = mkReturn ress
+        mkLast (CmmBranch tgt)             = mkBranch tgt
+        mkLast (CmmCall _f (_:_) _args _ CmmNeverReturns) =
+                   panic "Call never returns but has results?!"
+        mkLast _ = panic "fell off end of block"
+
+ofZgraph :: CmmGraph -> ListGraph CmmStmt
+ofZgraph g = ListGraph $ swallow blocks
+    where blocks = G.postorder_dfs g
+          -- | the next two functions are hooks on which to hang debugging info
+          extend_entry stmts = stmts
+          extend_block _id stmts = stmts
+          _extend_entry stmts = scomment showblocks : scomment cscomm : stmts
+          showblocks = "LGraph has " ++ show (length blocks) ++ " blocks:" ++
+                       concat (map (\(G.Block id _) -> " " ++ show id) blocks)
+          cscomm = "Call successors are" ++
+                   (concat $ map (\id -> " " ++ show id) $ uniqSetToList call_succs)
+          swallow [] = []
+          swallow (G.Block id t : rest) = tail id [] t rest
+          tail id prev' (G.ZTail m t)            rest = tail id (mid m : prev') t rest
+          tail id prev' (G.ZLast G.LastExit)     rest = exit id prev' rest
+          tail id prev' (G.ZLast (G.LastOther l))rest = last id prev' l rest
+          mid (MidNop)        = CmmNop
+          mid (MidComment s)  = CmmComment s
+          mid (MidAssign l r) = CmmAssign l r
+          mid (MidStore  l r) = CmmStore  l r
+          mid (MidUnsafeCall f ress args) = CmmCall f ress args CmmUnsafe CmmMayReturn
+          mid m@(CopyOut {})  = pcomment (ppr m)
+          mid m@(CopyIn {})   = pcomment (ppr m <+> text "(proc point)")
+          pcomment p = scomment $ showSDoc p
+          block' id prev'
+              | id == G.gr_entry g = BasicBlock id $ extend_entry    (reverse prev')
+              | otherwise          = BasicBlock id $ extend_block id (reverse prev')
+          last id prev' l n =
+              let endblock stmt = block' id (stmt : prev') : swallow n in
+              case l of
+                LastBranch _ (_:_) -> panic "unrepresentable branch"
+                LastBranch tgt [] ->
+                    case n of
+                      G.Block id' t : bs
+                          | tgt == id', unique_pred id' 
+                          -> tail id prev' t bs  -- optimize out redundant labels
+                      _ -> endblock (CmmBranch tgt)
+                LastCondBranch expr tid fid ->
+                  case n of
+                    G.Block id' t : bs
+                      | id' == fid, unique_pred id' ->
+                                      tail id (CmmCondBranch expr tid : prev') t bs
+                      | id' == tid, unique_pred id',
+                        Just e' <- maybeInvertCmmExpr expr ->
+                                      tail id (CmmCondBranch e'   fid : prev') t bs
+                    _ -> let instrs' = CmmBranch fid : CmmCondBranch expr tid : prev'
+                         in block' id instrs' : swallow n
+                LastJump expr params -> endblock $ CmmJump expr params 
+                LastReturn params    -> endblock $ CmmReturn params
+                LastSwitch arg ids   -> endblock $ CmmSwitch arg $ ids
+                LastCall tgt args Nothing ->
+                    endblock $ CmmCall tgt [] args CmmUnsafe CmmNeverReturns
+                LastCall tgt args (Just k)
+                   | G.Block id' (G.ZTail (CopyIn _ ress srt) t) : bs <- n,
+                     id' == k, unique_pred k ->
+                         let call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn
+                         in  tail id (call : prev') t bs
+                   | G.Block id' t : bs <- n, id' == k, unique_pred k ->
+                         let (ress, srt) = findCopyIn t
+                             call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn
+                             delayed = scomment "delayed CopyIn follows previous call"
+                         in  tail id (delayed : call : prev') t bs
+                   | otherwise -> panic "unrepairable call"
+          findCopyIn (G.ZTail (CopyIn _ ress srt) _) = (ress, srt)
+          findCopyIn (G.ZTail _ t) = findCopyIn t
+          findCopyIn (G.ZLast _) = panic "missing CopyIn after call"
+          exit id prev' n = -- highly irregular (assertion violation?)
+              let endblock stmt = block' id (stmt : prev') : swallow n in
+              case n of [] -> endblock (scomment "procedure falls off end")
+                        G.Block id' t : bs -> 
+                            if unique_pred id' then
+                                tail id (scomment "went thru exit" : prev') t bs 
+                            else
+                                endblock (CmmBranch id')
+          preds = zipPreds g
+          single_preds =
+              let add b single =
+                    let id = G.blockId b
+                    in  case G.lookupBlockEnv preds id of
+                          Nothing -> single
+                          Just s -> if sizeUniqSet s == 1 then
+                                        G.extendBlockSet single id
+                                    else single
+              in  G.fold_blocks add G.emptyBlockSet g
+          unique_pred id = G.elemBlockSet id single_preds
+          call_succs = 
+              let add b succs =
+                      case G.last (G.unzip b) of
+                        G.LastOther (LastCall _ _ (Just id)) -> extendBlockSet succs id
+                        _ -> succs
+              in  G.fold_blocks add emptyBlockSet g
+          _is_call_succ id = elemBlockSet id call_succs
+
+scomment :: String -> CmmStmt
+scomment s = CmmComment $ mkFastString s
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
new file mode 100644
index 0000000..b0a7468
--- /dev/null
+++ b/compiler/cmm/CmmExpr.hs
@@ -0,0 +1,230 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+
+module CmmExpr
+    ( CmmExpr(..), cmmExprRep, maybeInvertCmmExpr
+    , CmmReg(..), cmmRegRep
+    , CmmLit(..), cmmLitRep
+    , LocalReg(..), localRegRep, localRegGCFollow, Kind(..)
+    , GlobalReg(..), globalRegRep, spReg, hpReg, spLimReg, nodeReg, node
+    , UserOfLocalRegs, foldRegsUsed
+    , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
+            , plusRegSet, minusRegSet
+    )
+where
+
+import CLabel
+import MachOp
+import Unique
+import UniqSet
+
+-----------------------------------------------------------------------------
+--		CmmExpr
+-- An expression.  Expressions have no side effects.
+-----------------------------------------------------------------------------
+
+data CmmExpr
+  = CmmLit CmmLit               -- Literal
+  | CmmLoad CmmExpr MachRep     -- Read memory location
+  | CmmReg CmmReg		-- Contents of register
+  | CmmMachOp MachOp [CmmExpr]  -- Machine operation (+, -, *, etc.)
+  | CmmRegOff CmmReg Int	
+	-- CmmRegOff reg i
+	--        ** is shorthand only, meaning **
+	-- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
+	--	where rep = cmmRegRep reg
+  deriving Eq
+
+data CmmReg 
+  = CmmLocal  LocalReg
+  | CmmGlobal GlobalReg
+  deriving( Eq )
+
+data CmmLit
+  = CmmInt Integer  MachRep
+	-- Interpretation: the 2's complement representation of the value
+	-- is truncated to the specified size.  This is easier than trying
+	-- to keep the value within range, because we don't know whether
+	-- it will be used as a signed or unsigned value (the MachRep doesn't
+	-- distinguish between signed & unsigned).
+  | CmmFloat  Rational MachRep
+  | CmmLabel    CLabel			-- Address of label
+  | CmmLabelOff CLabel Int		-- Address of label + byte offset
+  
+        -- Due to limitations in the C backend, the following
+        -- MUST ONLY be used inside the info table indicated by label2
+        -- (label2 must be the info label), and label1 must be an
+        -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
+        -- Don't use it at all unless tablesNextToCode.
+        -- It is also used inside the NCG during when generating
+        -- position-independent code. 
+  | CmmLabelDiffOff CLabel CLabel Int   -- label1 - label2 + offset
+  deriving Eq
+
+instance Eq LocalReg where
+  (LocalReg u1 _ _) == (LocalReg u2 _ _) = u1 == u2
+
+instance Uniquable LocalReg where
+  getUnique (LocalReg uniq _ _) = uniq
+
+--------
+--- Negation for conditional branches
+
+maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
+maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
+                                            return (CmmMachOp op' args)
+maybeInvertCmmExpr _ = Nothing
+
+-----------------------------------------------------------------------------
+--		Local registers
+-----------------------------------------------------------------------------
+
+-- | Whether a 'LocalReg' is a GC followable pointer
+data Kind = KindPtr | KindNonPtr deriving (Eq)
+
+data LocalReg
+  = LocalReg
+      !Unique   -- ^ Identifier
+      MachRep   -- ^ Type
+      Kind      -- ^ Should the GC follow as a pointer
+
+-- | Sets of local registers
+
+type RegSet              =  UniqSet LocalReg
+emptyRegSet             :: RegSet
+elemRegSet              :: LocalReg -> RegSet -> Bool
+extendRegSet            :: RegSet -> LocalReg -> RegSet
+deleteFromRegSet        :: RegSet -> LocalReg -> RegSet
+mkRegSet                :: [LocalReg] -> RegSet
+minusRegSet, plusRegSet :: RegSet -> RegSet -> RegSet
+
+emptyRegSet      = emptyUniqSet
+elemRegSet       = elementOfUniqSet
+extendRegSet     = addOneToUniqSet
+deleteFromRegSet = delOneFromUniqSet
+mkRegSet         = mkUniqSet
+minusRegSet      = minusUniqSet
+plusRegSet       = unionUniqSets
+
+-----------------------------------------------------------------------------
+--    Register-use information for expressions and other types 
+-----------------------------------------------------------------------------
+
+class UserOfLocalRegs a where
+  foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
+
+instance UserOfLocalRegs CmmReg where
+    foldRegsUsed f z (CmmLocal reg) = f z reg
+    foldRegsUsed _ z (CmmGlobal _)  = z
+
+instance UserOfLocalRegs LocalReg where
+    foldRegsUsed f z r = f z r
+
+instance UserOfLocalRegs CmmExpr where
+  foldRegsUsed f z e = expr z e
+    where expr z (CmmLit _)          = z
+          expr z (CmmLoad addr _)    = foldRegsUsed f z addr
+          expr z (CmmReg r)          = foldRegsUsed f z r
+          expr z (CmmMachOp _ exprs) = foldRegsUsed f z exprs
+          expr z (CmmRegOff r _)     = foldRegsUsed f z r
+
+instance UserOfLocalRegs a => UserOfLocalRegs [a] where
+  foldRegsUsed _ set [] = set
+  foldRegsUsed f set (x:xs) = foldRegsUsed f (foldRegsUsed f set x) xs
+
+-----------------------------------------------------------------------------
+--		MachRep
+-----------------------------------------------------------------------------
+
+
+
+cmmExprRep :: CmmExpr -> MachRep
+cmmExprRep (CmmLit lit)      = cmmLitRep lit
+cmmExprRep (CmmLoad _ rep)   = rep
+cmmExprRep (CmmReg reg)      = cmmRegRep reg
+cmmExprRep (CmmMachOp op _)  = resultRepOfMachOp op
+cmmExprRep (CmmRegOff reg _) = cmmRegRep reg
+
+cmmRegRep :: CmmReg -> MachRep
+cmmRegRep (CmmLocal  reg) 	= localRegRep reg
+cmmRegRep (CmmGlobal reg)	= globalRegRep reg
+
+localRegRep :: LocalReg -> MachRep
+localRegRep (LocalReg _ rep _) = rep
+
+
+localRegGCFollow :: LocalReg -> Kind
+localRegGCFollow (LocalReg _ _ p) = p
+
+cmmLitRep :: CmmLit -> MachRep
+cmmLitRep (CmmInt _ rep)    = rep
+cmmLitRep (CmmFloat _ rep)  = rep
+cmmLitRep (CmmLabel _)      = wordRep
+cmmLitRep (CmmLabelOff _ _) = wordRep
+cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep
+
+-----------------------------------------------------------------------------
+--		Global STG registers
+-----------------------------------------------------------------------------
+
+data GlobalReg
+  -- Argument and return registers
+  = VanillaReg			-- pointers, unboxed ints and chars
+	{-# UNPACK #-} !Int	-- its number
+
+  | FloatReg		-- single-precision floating-point registers
+	{-# UNPACK #-} !Int	-- its number
+
+  | DoubleReg		-- double-precision floating-point registers
+	{-# UNPACK #-} !Int	-- its number
+
+  | LongReg	        -- long int registers (64-bit, really)
+	{-# UNPACK #-} !Int	-- its number
+
+  -- STG registers
+  | Sp			-- Stack ptr; points to last occupied stack location.
+  | SpLim		-- Stack limit
+  | Hp			-- Heap ptr; points to last occupied heap location.
+  | HpLim		-- Heap limit register
+  | CurrentTSO		-- pointer to current thread's TSO
+  | CurrentNursery	-- pointer to allocation area
+  | HpAlloc		-- allocation count for heap check failure
+
+		-- We keep the address of some commonly-called 
+		-- functions in the register table, to keep code
+		-- size down:
+  | GCEnter1		-- stg_gc_enter_1
+  | GCFun		-- stg_gc_fun
+
+  -- Base offset for the register table, used for accessing registers
+  -- which do not have real registers assigned to them.  This register
+  -- will only appear after we have expanded GlobalReg into memory accesses
+  -- (where necessary) in the native code generator.
+  | BaseReg
+
+  -- Base Register for PIC (position-independent code) calculations
+  -- Only used inside the native code generator. It's exact meaning differs
+  -- from platform to platform (see module PositionIndependentCode).
+  | PicBaseReg
+
+  deriving( Eq
+#ifdef DEBUG
+	, Show
+#endif
+	 )
+
+-- convenient aliases
+spReg, hpReg, spLimReg, nodeReg :: CmmReg
+spReg = CmmGlobal Sp
+hpReg = CmmGlobal Hp
+spLimReg = CmmGlobal SpLim
+nodeReg = CmmGlobal node
+
+node :: GlobalReg
+node = VanillaReg 1
+
+globalRegRep :: GlobalReg -> MachRep
+globalRegRep (VanillaReg _) 	= wordRep
+globalRegRep (FloatReg _) 	= F32
+globalRegRep (DoubleReg _) 	= F64
+globalRegRep (LongReg _) 	= I64
+globalRegRep _			= wordRep
diff --git a/compiler/cmm/CmmLiveZ.hs b/compiler/cmm/CmmLiveZ.hs
new file mode 100644
index 0000000..87f6c38
--- /dev/null
+++ b/compiler/cmm/CmmLiveZ.hs
@@ -0,0 +1,76 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module CmmLiveZ
+    ( CmmLive
+    , cmmLivenessZ
+    , liveLattice
+    , middleLiveness, lastLiveness
+    ) 
+where
+
+import Cmm
+import CmmExpr
+import CmmTx
+import DFMonad
+import Maybes
+import PprCmm()
+import PprCmmZ()
+import UniqSet
+import ZipDataflow
+import ZipCfgCmm
+
+-----------------------------------------------------------------------------
+-- Calculating what variables are live on entry to a basic block
+-----------------------------------------------------------------------------
+
+-- | The variables live on entry to a block
+type CmmLive = RegSet
+
+-- | The dataflow lattice
+liveLattice :: DataflowLattice CmmLive
+liveLattice = DataflowLattice "live LocalReg's" emptyUniqSet add False
+    where add new old =
+            let join = unionUniqSets new old in
+            (if sizeUniqSet join > sizeUniqSet old then aTx else noTx) join
+
+-- | A mapping from block labels to the variables live on entry
+type BlockEntryLiveness = BlockEnv CmmLive
+
+-----------------------------------------------------------------------------
+-- | Calculated liveness info for a list of 'CmmBasicBlock'
+-----------------------------------------------------------------------------
+cmmLivenessZ :: CmmGraph -> BlockEntryLiveness
+cmmLivenessZ g = env
+    where env = runDFA liveLattice $
+                do run_b_anal transfer g
+                   allFacts
+          transfer = BComp "liveness analysis" exit last middle first
+          exit         = emptyUniqSet
+          first live _ = live
+          middle       = flip middleLiveness
+          last         = flip lastLiveness
+
+-- | The transfer equations use the traditional 'gen' and 'kill'
+-- notations, which should be familiar from the dragon book.
+gen, kill :: UserOfLocalRegs a => a -> RegSet -> RegSet
+gen  a live = foldRegsUsed extendRegSet      live a
+kill a live = foldRegsUsed delOneFromUniqSet live a
+
+middleLiveness :: Middle -> CmmLive -> CmmLive
+middleLiveness m = middle m
+  where middle (MidNop)                      = id
+        middle (MidComment {})               = id
+        middle (MidAssign lhs expr)          = gen expr . kill lhs
+        middle (MidStore addr rval)          = gen addr . gen rval
+        middle (MidUnsafeCall tgt ress args) = gen tgt . gen args . kill ress
+        middle (CopyIn _ formals _)          = kill formals
+        middle (CopyOut _ formals)           = gen formals
+
+lastLiveness :: Last -> (BlockId -> CmmLive) -> CmmLive
+lastLiveness l env = last l
+  where last (LastReturn ress)             = gen ress emptyUniqSet
+        last (LastJump e args)             = gen e $ gen args emptyUniqSet
+        last (LastBranch id args)          = gen args $ env id
+        last (LastCall tgt args (Just k))  = gen tgt $ gen args $ env k
+        last (LastCall tgt args Nothing)   = gen tgt $ gen args $ emptyUniqSet
+        last (LastCondBranch e t f)        = gen e $ unionUniqSets (env t) (env f)
+        last (LastSwitch e tbl) = gen e $ unionManyUniqSets $ map env (catMaybes tbl)
diff --git a/compiler/cmm/CmmProcPointZ.hs b/compiler/cmm/CmmProcPointZ.hs
new file mode 100644
index 0000000..279c730
--- /dev/null
+++ b/compiler/cmm/CmmProcPointZ.hs
@@ -0,0 +1,374 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module CmmProcPointZ
+    ( minimalProcPointSet
+    , addProcPointProtocols
+    )
+where
+
+import Prelude hiding (zip, unzip)
+
+import ClosureInfo
+import Cmm hiding (blockId)
+import CmmExpr
+import CmmContFlowOpt
+import CmmLiveZ
+import CmmTx
+import DFMonad
+import ForeignCall -- used in protocol for the entry point
+import MachOp (MachHint(NoHint))
+import Maybes
+import Outputable
+import Panic
+import UniqFM
+import UniqSet
+import ZipCfg
+import ZipCfgCmm 
+import ZipDataflow
+
+-- Compute a minimal set of proc points for a control-flow graph.
+
+-- Determine a protocol for each proc point (which live variables will
+-- be passed as arguments and which will be on the stack). 
+
+{-
+A proc point is a basic block that, after CPS transformation, will
+start a new function.  The entry block of the original function is a
+proc point, as is the continuation of each function call.
+A third kind of proc point arises if we want to avoid copying code.
+Suppose we have code like the following:
+
+  f() {
+    if (...) { ..1..; call foo(); ..2..}
+    else     { ..3..; call bar(); ..4..}
+    x = y + z;
+    return x;
+  }
+
+The statement 'x = y + z' can be reached from two different proc
+points: the continuations of foo() and bar().  We would prefer not to
+put a copy in each continuation; instead we would like 'x = y + z' to
+be the start of a new procedure to which the continuations can jump:
+
+  f_cps () {
+    if (...) { ..1..; push k_foo; jump foo_cps(); }
+    else     { ..3..; push k_bar; jump bar_cps(); }
+  }
+  k_foo() { ..2..; jump k_join(y, z); }
+  k_bar() { ..4..; jump k_join(y, z); }
+  k_join(y, z) { x = y + z; return x; }
+
+You might think then that a criterion to make a node a proc point is
+that it is directly reached by two distinct proc points.  (Note
+[Direct reachability].)  But this criterion is a bit two simple; for
+example, 'return x' is also reached by two proc points, yet there is
+no point in pulling it out of k_join.  A good criterion would be to
+say that a node should be made a proc point if it is reached by a set
+of proc points that is different than its immediate dominator.  NR
+believes this criterion can be shown to produce a minimum set of proc
+points, and given a dominator tree, the proc points can be chosen in
+time linear in the number of blocks.  Lacking a dominator analysis,
+however, we turn instead to an iterative solution, starting with no
+proc points and adding them according to these rules:
+
+  1. The entry block is a proc point.
+  2. The continuation of a call is a proc point.
+  3. A node is a proc point if it is directly reached by more proc
+     points than one of its predecessors.
+
+Because we don't understand the problem very well, we apply rule 3 at
+most once per iteration, then recompute the reachability information.
+(See Note [No simple dataflow].)  The choice of the new proc point is
+arbitrary, and I don't know if the choice affects the final solution,
+so I don't know if the number of proc points chosen is the
+minimum---but the set will be minimal.
+-}
+
+type ProcPointSet = BlockSet
+
+data Status
+  = ReachedBy ProcPointSet  -- set of proc points that directly reach the block
+  | ProcPoint               -- this block is itself a proc point
+
+instance Outputable Status where
+  ppr (ReachedBy ps)
+      | isEmptyUniqSet ps = text "<not-reached>"
+      | otherwise = text "reached by" <+>
+                    (hsep $ punctuate comma $ map ppr $ uniqSetToList ps)
+  ppr ProcPoint = text "<procpt>"
+
+
+lattice :: DataflowLattice Status
+lattice = DataflowLattice "direct proc-point reachability" unreached add_to False
+    where unreached = ReachedBy emptyBlockSet
+          add_to _ ProcPoint = noTx ProcPoint
+          add_to ProcPoint _ = aTx ProcPoint -- aTx because of previous case again
+          add_to (ReachedBy p) (ReachedBy p') =
+              let union = unionUniqSets p p'
+              in  if sizeUniqSet union > sizeUniqSet p' then
+                      aTx (ReachedBy union)
+                  else
+                      noTx (ReachedBy p')
+--------------------------------------------------
+-- transfer equations
+
+forward :: FAnalysis Middle Last Status
+forward = FComp "proc-point reachability" first middle last exit
+    where first ProcPoint id = ReachedBy $ unitUniqSet id
+          first  x _ = x
+          middle x _ = x
+          last _ (LastCall _ _ (Just id)) = LastOutFacts [(id, ProcPoint)]
+          last x l = LastOutFacts $ map (\id -> (id, x)) (succs l)
+          exit _   = LastOutFacts []
+                
+minimalProcPointSet :: CmmGraph -> ProcPointSet
+minimalProcPointSet g = extendPPSet g (postorder_dfs g) entryPoint
+    where entryPoint = unitUniqSet (gr_entry g)
+
+extendPPSet :: CmmGraph -> [CmmBlock] -> ProcPointSet -> ProcPointSet
+extendPPSet g blocks procPoints =
+    case newPoint of Just id ->
+                       if elemBlockSet id procPoints' then panic "added old proc pt"
+                       else extendPPSet g blocks (extendBlockSet procPoints' id)
+                     Nothing -> procPoints'
+    where env = runDFA lattice $
+                do refine_f_anal forward g set_init_points
+                   allFacts
+          set_init_points = mapM_ (\id -> setFact id ProcPoint)
+                            (uniqSetToList procPoints)
+          procPoints' = fold_blocks add emptyBlockSet g
+          add block pps = let id = blockId block
+                          in  case lookupBlockEnv env id of
+                                Just ProcPoint -> extendBlockSet pps id
+                                _ -> pps
+                                     
+          newPoint = listToMaybe (mapMaybe ppSuccessor blocks)
+          ppSuccessor b@(Block id _) =
+              let nreached id = case lookupBlockEnv env id `orElse` panic "no ppt" of
+                                  ProcPoint -> 1
+                                  ReachedBy ps -> sizeUniqSet ps
+                  my_nreached = nreached id
+                  -- | Looking for a successor of b that is reached by
+                  -- more proc points than b and is not already a proc
+                  -- point.  If found, it can become a proc point.
+                  newId succ_id = not (elemBlockSet succ_id procPoints') &&
+                                  nreached id > my_nreached 
+              in  listToMaybe $ filter newId $ succs b
+                                    
+
+------------------------------------------------------------------------
+--                    Computing Proc-Point Protocols                  --
+------------------------------------------------------------------------
+
+{-
+
+There is one major trick, discovered by Michael Adams, which is that
+we want to choose protocols in a way that enables us to optimize away
+some continuations.  The optimization is very much like branch-chain
+elimination, except that it involves passing results as well as
+control.  The idea is that if a call's continuation k does nothing but
+CopyIn its results and then goto proc point P, the call's continuation
+may be changed to P, *provided* P's protocol is identical to the
+protocol for the CopyIn.  We choose protocols to make this so.
+
+Here's an explanatory example; we begin with the source code (lines
+separate basic blocks):
+
+  ..1..;
+  x, y = g();
+  goto P;
+  -------
+  P: ..2..;
+
+Zipperization converts this code as follows:
+
+  ..1..;
+  call g() returns to k;
+  -------
+  k: CopyIn(x, y);
+     goto P;
+  -------
+  P: ..2..;
+
+What we'd like to do is assign P the same CopyIn protocol as k, so we
+can eliminate k:
+
+  ..1..;
+  call g() returns to P;
+  -------
+  P: CopyIn(x, y); ..2..;
+
+Of course, P may be the target of more than one continuation, and
+different continuations may have different protocols.  Michael Adams
+implemented a voting mechanism, but he thinks a simple greedy
+algorithm would be just as good, so that's what we do.
+
+-}
+
+data Protocol = Protocol Convention CmmHintFormals
+  deriving Eq
+
+-- | Function 'optimize_calls' chooses protocols only for those proc
+-- points that are relevant to the optimization explained above.
+-- The others are assigned by 'add_unassigned', which is not yet clever.
+
+addProcPointProtocols :: ProcPointSet -> CmmFormals -> CmmGraph -> CmmGraph
+addProcPointProtocols procPoints formals g =
+       snd $ add_unassigned procPoints $ optimize_calls g
+    where optimize_calls g =  -- see Note [Separate Adams optimization]
+              let (protos, blocks') =
+                      fold_blocks maybe_add_call (init_protocols, emptyBlockEnv) g
+                  g' = LGraph (gr_entry g) (add_CopyIns protos blocks')
+              in  (protos, runTx removeUnreachableBlocksZ g')
+          maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock)
+                         -> (BlockEnv Protocol, BlockEnv CmmBlock)
+          -- ^ If the block is a call whose continuation goes to a proc point
+          -- whose protocol either matches the continuation's or is not yet set,
+          -- redirect the call (cf 'newblock') and set the protocol if necessary
+          maybe_add_call block (protos, blocks) =
+              case goto_end $ unzip block of
+                (h, LastOther (LastCall tgt args (Just k)))
+                    | Just proto <- lookupBlockEnv protos k,
+                      Just pee <- jumpsToProcPoint k
+                    -> let newblock =
+                               zipht h (tailOfLast (LastCall tgt args (Just pee)))
+                           changed_blocks   = insertBlock newblock blocks
+                           unchanged_blocks = insertBlock block    blocks
+                       in case lookupBlockEnv protos pee of
+                            Nothing -> (extendBlockEnv protos pee proto,changed_blocks)
+                            Just proto' ->
+                              if proto == proto' then (protos, changed_blocks)
+                              else (protos, unchanged_blocks)
+                _ -> (protos, insertBlock block blocks)
+
+          jumpsToProcPoint :: BlockId -> Maybe BlockId
+          -- ^ Tells whether the named block is just a jump to a proc point
+          jumpsToProcPoint id =
+              let (Block _ t) = lookupBlockEnv (gr_blocks g) id `orElse`
+                                panic "jump out of graph"
+              in case t of
+                   ZTail (CopyOut {}) (ZLast (LastOther (LastBranch pee [])))
+                       | elemBlockSet pee procPoints -> Just pee
+                   _ -> Nothing
+          init_protocols = fold_blocks maybe_add_proto emptyBlockEnv g
+          maybe_add_proto :: CmmBlock -> BlockEnv Protocol -> BlockEnv Protocol
+          maybe_add_proto (Block id (ZTail (CopyIn c fs _srt) _)) env =
+              extendBlockEnv env id (Protocol c fs)
+          maybe_add_proto (Block id _) env | id == gr_entry g =
+              extendBlockEnv env id (Protocol (Argument CmmCallConv) hinted_formals)
+          maybe_add_proto _ env = env
+          hinted_formals = map (\x -> (x, NoHint)) formals
+
+-- | For now, following a suggestion by Ben Lippmeier, we pass all
+-- live variables as arguments, hoping that a clever register
+-- allocator might help.
+
+add_unassigned
+    :: ProcPointSet -> (BlockEnv Protocol, CmmGraph) -> (BlockEnv Protocol, CmmGraph) 
+add_unassigned = pass_live_vars_as_args
+
+pass_live_vars_as_args
+    :: ProcPointSet -> (BlockEnv Protocol, CmmGraph) -> (BlockEnv Protocol, CmmGraph) 
+pass_live_vars_as_args procPoints (protos, g) = (protos', g')
+  where liveness = cmmLivenessZ g
+        protos' = foldUniqSet addLiveVars protos procPoints
+        addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol
+        addLiveVars id protos =
+            case lookupBlockEnv protos id of
+              Just _ -> protos
+              Nothing -> let live = lookupBlockEnv liveness id `orElse`
+                                    emptyRegSet -- XXX there's a bug lurking!
+                                    -- panic ("no liveness at block " ++ show id)
+                             formals = map (\x->(x,NoHint)) $ uniqSetToList live
+                         in  extendBlockEnv protos id (Protocol Local formals)
+        g' = g { gr_blocks = add_CopyIns protos' (gr_blocks g) }
+
+
+-- | Add a CopyIn node to each block that has a protocol but lacks the
+-- appropriate CopyIn node.
+
+add_CopyIns :: BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock
+add_CopyIns protos = mapUFM (maybe_insert_CopyIn protos)
+    where maybe_insert_CopyIn :: BlockEnv Protocol -> CmmBlock -> CmmBlock
+          maybe_insert_CopyIn protos b@(Block id t) =
+            case lookupBlockEnv protos id of
+              Nothing -> b
+              Just (Protocol c fs) ->
+                  case t of
+                    ZTail (CopyIn c' fs' _) _ ->
+                      if c == c' && fs == fs' then b
+                      else panic ("mismatched protocols for block " ++ show id)
+                    _ -> Block id (ZTail (CopyIn c fs NoC_SRT) t)
+
+-- XXX also need to add the relevant CopyOut nodes!!!
+
+----------------------------------------------------------------
+
+{-
+Note [Direct reachability]
+
+Block B is directly reachable from proc point P iff control can flow
+from P to B without passing through an intervening proc point.
+-}
+
+----------------------------------------------------------------
+
+{-
+Note [No simple dataflow]
+
+Sadly, it seems impossible to compute the proc points using a single
+dataflow pass.  One might attempt to use this simple lattice:
+
+  data Location = Unknown
+                | InProc BlockId -- node is in procedure headed by the named proc point
+                | ProcPoint      -- node is itself a proc point   
+
+At a join, a node in two different blocks becomes a proc point.  
+The difficulty is that the change of information during iterative
+computation may promote a node prematurely.  Here's a program that
+illustrates the difficulty:
+
+  f () {
+  entry:
+    ....
+  L1:
+    if (...) { ... }
+    else { ... }
+
+  L2: if (...) { g(); goto L1; }
+      return x + y;
+  }
+
+The only proc-point needed (besides the entry) is L1.  But in an
+iterative analysis, consider what happens to L2.  On the first pass
+through, it rises from Unknown to 'InProc entry', but when L1 is
+promoted to a proc point (because it's the successor of g()), L1's
+successors will be promoted to 'InProc L1'.  The problem hits when the
+new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
+The join operation makes it a proc point when in fact it needn't be,
+because its immediate dominator L1 is already a proc point and there
+are no other proc points that directly reach L2.
+-}
+
+
+
+{- Note [Separate Adams optimization]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It may be worthwhile to attempt the Adams optimization by rewriting
+the graph before the assignment of proc-point protocols.  Here are a
+couple of rules:
+                                                                  
+  g() returns to k;                    g() returns to L;          
+  k: CopyIn c ress; goto L:             
+   ...                        ==>        ...                       
+  L: // no CopyIn node here            L: CopyIn c ress; 
+
+                                                                  
+And when c == c' and ress == ress', this also:
+
+  g() returns to k;                    g() returns to L;          
+  k: CopyIn c ress; goto L:             
+   ...                        ==>        ...                       
+  L: CopyIn c' ress'                   L: CopyIn c' ress' ; 
+
+In both cases the goal is to eliminate k.
+-}
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
new file mode 100644
index 0000000..bef6080
--- /dev/null
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -0,0 +1,231 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+
+module CmmSpillReload
+  ( ExtendWithSpills(..)
+  , DualLive(..)
+  , dualLiveLattice, dualLiveness
+  , insertSpillsAndReloads  --- XXX todo check live-in at entry against formals
+  , dualLivenessWithInsertion
+  , spillAndReloadComments
+  )
+where
+import CmmExpr
+import CmmTx()
+import CmmLiveZ
+import DFMonad
+import FastString
+import Maybe
+import MkZipCfg
+import Outputable hiding (empty)
+import qualified Outputable as PP
+import Panic
+import PprCmm()
+import UniqSet
+import ZipCfg
+import ZipCfgCmm
+import ZipDataflow
+
+-- The point of this module is to insert spills and reloads to
+-- establish the invariant that at a call (or at any proc point with
+-- an established protocol) all live variables not expected in
+-- registers are sitting on the stack.  We use a backward analysis to
+-- insert spills and reloads.  It should some day be followed by a
+-- forward transformation to sink reloads as deeply as possible, so as
+-- to reduce register pressure.
+
+data ExtendWithSpills m
+    = NotSpillOrReload m
+    | Spill  RegSet
+    | Reload RegSet
+
+type M = ExtendWithSpills Middle
+
+-- A variable can be expected to be live in a register, live on the
+-- stack, or both.  This analysis ensures that spills and reloads are
+-- inserted as needed to make sure that every live variable needed
+-- after a call is available on the stack.  Spills are pushed back to
+-- their reaching definitions, but reloads are dropped wherever needed
+-- and will have to be sunk by a later forward transformation.
+
+data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet }
+
+dualUnion :: DualLive -> DualLive -> DualLive
+dualUnion (DualLive s r) (DualLive s' r') =
+    DualLive (s `unionUniqSets` s') (r `unionUniqSets` r') 
+
+dualUnionList :: [DualLive] -> DualLive
+dualUnionList ls = DualLive ss rs
+    where ss = unionManyUniqSets $ map on_stack ls
+          rs = unionManyUniqSets $ map in_regs  ls
+
+_changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive
+_changeStack f live = live { on_stack = f (on_stack live) }
+changeRegs   f live = live { in_regs  = f (in_regs  live) }
+
+
+dualLiveLattice :: DataflowLattice DualLive
+dualLiveLattice =
+      DataflowLattice "variables live in registers and on stack" empty add False
+    where empty = DualLive emptyRegSet emptyRegSet
+          -- | compute in the Tx monad to track whether anything has changed
+          add new old = do stack <- add1 (on_stack new) (on_stack old)
+                           regs  <- add1 (in_regs new)  (in_regs old)
+                           return $ DualLive stack regs
+          add1 = fact_add_to liveLattice
+
+dualLivenessWithInsertion :: BPass M Last DualLive
+dualLivenessWithInsertion = a_ft_b_unlimited dualLiveness insertSpillsAndReloads
+
+
+dualLiveness :: BAnalysis M Last DualLive
+dualLiveness = BComp "dual liveness" exit last middle first
+    where exit   = empty
+          last   = lastDualLiveness
+          middle = middleDualLiveness
+          first live _id = live
+          empty = fact_bot dualLiveLattice
+
+            -- ^ could take a proc-point set and choose to spill here,
+            -- but it's probably better to run this pass, choose
+            -- proc-point protocols, insert more CopyIn nodes, and run
+            -- this pass again
+
+middleDualLiveness :: DualLive -> M -> DualLive
+middleDualLiveness live m@(Spill regs) =
+    -- live-in on-stack requirements are satisfied;
+    -- live-out in-regs obligations are created
+      my_trace "before" (f4sep [ppr m, text "liveness is", ppr live']) $
+      live'
+    where live' = DualLive { on_stack = on_stack live `minusRegSet` regs
+                           , in_regs = in_regs live `plusRegSet` regs }
+
+middleDualLiveness live m@(Reload regs) =
+    -- live-in in-regs requirements are satisfied;
+    -- live-out on-stack obligations are created
+      my_trace "before" (f4sep [ppr m, text "liveness is", ppr live']) $
+      live'
+    where live' = DualLive { on_stack = on_stack live `plusRegSet` regs
+                           , in_regs = in_regs live `minusRegSet` regs }
+
+middleDualLiveness live (NotSpillOrReload m) = middle m live
+  where middle (MidNop)                         = id 
+        middle (MidComment {})                  = id 
+        middle (MidAssign (CmmLocal reg') expr) = changeRegs (gen expr . kill reg')
+        middle (MidAssign (CmmGlobal _) expr)   = changeRegs (gen expr) 
+        middle (MidStore addr rval)             = changeRegs (gen addr . gen rval) 
+        middle (MidUnsafeCall _ ress args)      = changeRegs (gen args . kill ress) 
+        middle (CopyIn  _ formals _)            = changeRegs (kill formals)
+        middle (CopyOut _ formals)              = changeRegs (gen  formals)
+
+lastDualLiveness :: (BlockId -> DualLive) -> Last -> DualLive
+lastDualLiveness env l = last l
+  where last (LastReturn ress)            = changeRegs (gen ress) empty
+        last (LastJump e args)            = changeRegs (gen e . gen args) empty
+        last (LastBranch id args)         = changeRegs (gen args) $ env id
+        last (LastCall tgt args Nothing)  = changeRegs (gen tgt. gen args) empty
+        last (LastCall tgt args (Just k)) = 
+            -- nothing can be live in registers at this point
+            -- only 'formals' can be in regs at this point
+            let live = env k in
+            if  isEmptyUniqSet (in_regs live) then
+                DualLive (on_stack live) (gen tgt $ gen args emptyRegSet)
+            else
+                panic "live values in registers at call continuation"
+        last (LastCondBranch e t f) = changeRegs (gen e) $ dualUnion (env t) (env f)
+        last (LastSwitch e tbl)     = changeRegs (gen e) $ dualUnionList $
+                                                             map env (catMaybes tbl)
+        empty = fact_bot dualLiveLattice
+                      
+gen, kill :: UserOfLocalRegs a => a -> RegSet -> RegSet
+gen  a live = foldRegsUsed extendRegSet      live a
+kill a live = foldRegsUsed delOneFromUniqSet live a
+
+insertSpillsAndReloads :: BFunctionalTransformation M Last DualLive
+insertSpillsAndReloads = BComp "CPS spiller" exit last middle first
+    where exit   = Nothing
+          last   = \_ _ -> Nothing
+          middle = middleInsertSpillsAndReloads
+          first _ _ = Nothing
+            -- ^ could take a proc-point set and choose to spill here,
+            -- but it's probably better to run this pass, choose
+            -- proc-point protocols, insert more CopyIn nodes, and run
+            -- this pass again
+
+
+middleInsertSpillsAndReloads :: DualLive -> M -> Maybe (Graph M Last)
+middleInsertSpillsAndReloads _ (Spill _)  = Nothing
+middleInsertSpillsAndReloads _ (Reload _) = Nothing
+middleInsertSpillsAndReloads live (NotSpillOrReload m) = middle m 
+  where middle (MidAssign (CmmLocal reg') _) = 
+            if reg' `elemRegSet` on_stack live then -- must spill
+                my_trace "Spilling" (f4sep [text "spill" <+> ppr reg',
+                                            text "after", ppr m]) $
+                Just $ graphOfMiddles [NotSpillOrReload m, Spill $ mkRegSet [reg']]
+            else
+                Nothing
+        middle (CopyIn _ formals _) = 
+            -- only 'formals' can be in regs at this point
+            let regs' = kill formals (in_regs live) -- live in regs; must reload
+                is_stack_var r = elemRegSet r (on_stack live)
+                needs_spilling = -- a formal that is expected on the stack; must spill
+                   foldRegsUsed (\rs r -> if is_stack_var r then extendRegSet rs r
+                                          else rs) emptyRegSet formals
+            in  if isEmptyUniqSet regs' && isEmptyUniqSet needs_spilling then
+                    Nothing
+                else
+                    let reload = if isEmptyUniqSet regs' then []
+                                 else [Reload regs']
+                        spill_reload = if isEmptyUniqSet needs_spilling then reload
+                                       else Spill needs_spilling : reload
+                        middles = NotSpillOrReload m : spill_reload
+                    in
+                    my_trace "At CopyIn" (f4sep [text "Triggered by ", ppr live,
+                                                 ppr (Reload regs' :: M),
+                                                 ppr (Spill needs_spilling :: M),
+                                                 text "after", ppr m]) $
+                    Just $ graphOfMiddles middles
+        middle _ = Nothing
+                      
+-- | For conversion back to vanilla C--
+spillAndReloadComments :: M -> Middle
+spillAndReloadComments (NotSpillOrReload m) = m
+spillAndReloadComments (Spill  regs) = show_regs "Spill" regs
+spillAndReloadComments (Reload regs) = show_regs "Reload" regs
+
+show_regs :: String -> RegSet -> Middle
+show_regs s regs = MidComment $ mkFastString $ showSDoc $ ppr_regs s regs
+
+
+---------------------
+-- prettyprinting
+
+instance Outputable m => Outputable (ExtendWithSpills m) where
+    ppr (Spill  regs) = ppr_regs "Spill"  regs
+    ppr (Reload regs) = ppr_regs "Reload" regs
+    ppr (NotSpillOrReload m) = ppr m
+
+instance Outputable (LGraph M Last) where
+    ppr = pprLgraph
+
+instance DebugNodes M Last
+                               
+ppr_regs :: String -> RegSet -> SDoc
+ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs)
+  where commafy xs = hsep $ punctuate comma xs
+
+instance Outputable DualLive where
+  ppr (DualLive {in_regs = regs, on_stack = stack}) =
+      if isEmptyUniqSet regs && isEmptyUniqSet stack then
+          text "<nothing-live>"
+      else
+          nest 2 $ fsep [if isEmptyUniqSet regs then PP.empty
+                         else (ppr_regs "live in regs =" regs),
+                         if isEmptyUniqSet stack then PP.empty
+                         else (ppr_regs "live on stack =" stack)]
+
+my_trace :: String -> SDoc -> a -> a
+my_trace = if False then pprTrace else \_ _ a -> a
+
+f4sep :: [SDoc] -> SDoc
+f4sep [] = fsep []
+f4sep (d:ds) = fsep (d : map (nest 4) ds)
diff --git a/compiler/cmm/CmmTx.hs b/compiler/cmm/CmmTx.hs
new file mode 100644
index 0000000..ef3e8e7
--- /dev/null
+++ b/compiler/cmm/CmmTx.hs
@@ -0,0 +1,59 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module CmmTx where
+
+data ChangeFlag = NoChange | SomeChange
+
+type Tx a    = a -> TxRes a
+data TxRes a = TxRes ChangeFlag a
+
+seqTx :: Tx a -> Tx a -> Tx a
+iterateTx :: Tx a -> Tx a
+runTx :: Tx a -> a -> a
+
+noTx, aTx :: a -> TxRes a
+noTx x = TxRes NoChange   x
+aTx  x = TxRes SomeChange x
+
+replaceTx :: a -> TxRes b -> TxRes a
+replaceTx a (TxRes change _) = TxRes change a
+
+txVal :: TxRes a -> a
+txVal (TxRes _ a) = a
+
+txHasChanged :: TxRes a -> Bool
+txHasChanged (TxRes NoChange   _) = False
+txHasChanged (TxRes SomeChange _) = True
+
+plusTx :: (a -> b -> c) -> TxRes a -> TxRes b -> TxRes c
+plusTx f (TxRes c1 a) (TxRes c2 b) = TxRes (c1 `orChange` c2) (f a b)
+
+mapTx :: Tx a -> Tx [a]
+mapTx _ []     = noTx []
+mapTx f (x:xs) = plusTx (:) (f x) (mapTx f xs)
+
+runTx f = txVal . f
+
+seqTx f1 f2 a =
+    let TxRes c1 a1 = f1 a
+        TxRes c2 a2 = f2 a1
+    in  TxRes (c1 `orChange` c2) a2
+
+iterateTx f a 
+  = case f a of
+	TxRes NoChange   a' -> TxRes NoChange a'
+	TxRes SomeChange a' -> let TxRes _ a'' = iterateTx f a'
+			     in TxRes SomeChange a''
+
+orChange :: ChangeFlag -> ChangeFlag -> ChangeFlag
+orChange NoChange   c = c
+orChange SomeChange _ = SomeChange
+
+
+
+instance Functor TxRes where
+  fmap f (TxRes ch a) = TxRes ch (f a)
+
+instance Monad TxRes where
+    return = TxRes NoChange
+    (TxRes NoChange a) >>= k = k a
+    (TxRes SomeChange a) >>= k = let (TxRes _ a') = k a in TxRes SomeChange a'
diff --git a/compiler/cmm/CmmZipUtil.hs b/compiler/cmm/CmmZipUtil.hs
new file mode 100644
index 0000000..2dcb55f
--- /dev/null
+++ b/compiler/cmm/CmmZipUtil.hs
@@ -0,0 +1,17 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module CmmZipUtil 
+  ( zipPreds
+  )
+where
+import Prelude hiding (last, unzip)
+import ZipCfg 
+import Maybes
+
+-- | Compute the predecessors of each *reachable* block
+zipPreds :: LastNode l => LGraph m l -> BlockEnv BlockSet
+zipPreds g = foldl add emptyBlockEnv (postorder_dfs g)
+    where add env block@(Block id _) =
+            foldl (\env sid ->
+                       let preds = lookupBlockEnv env sid `orElse` emptyBlockSet
+                       in  extendBlockEnv env sid (extendBlockSet preds id))
+            env (succs block)
diff --git a/compiler/cmm/DFMonad.hs b/compiler/cmm/DFMonad.hs
new file mode 100644
index 0000000..789b401
--- /dev/null
+++ b/compiler/cmm/DFMonad.hs
@@ -0,0 +1,287 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module DFMonad
+    ( Txlimit
+    , DFTx, runDFTx, lastTxPass, txDecrement, txRemaining, txExhausted
+
+    , DataflowLattice(..)
+    , DataflowAnalysis
+    , markFactsUnchanged, factsStatus, getFact, setFact, botFact
+                        , forgetFact, allFacts, factsEnv, checkFactMatch
+    , addLastOutFact, lastOutFacts, forgetLastOutFacts
+    , subAnalysis
+
+    , DFA, runDFA
+    , DFM, runDFM, liftTx, liftAnal
+    , markGraphRewritten
+    , freshBlockId
+    , liftUSM
+    )
+where
+
+import CmmTx
+import Control.Monad
+import Maybes
+import PprCmm()
+import UniqFM
+import UniqSupply
+import ZipCfg hiding (freshBlockId)
+import qualified ZipCfg as G
+
+import Outputable
+
+{-
+
+A dataflow monad maintains a mapping from BlockIds to dataflow facts,
+where a dataflow fact is a value of type [[a]].  Values of type [[a]]
+must form a lattice, as described by type [[Fact a]].
+
+The dataflow engine uses the lattice structure to compute a least
+solution to a set of dataflow equations.  To compute a greatest
+solution, flip the lattice over.
+
+The engine works by starting at the bottom and iterating to a fixed
+point, so in principle we require the bottom element, a join (least
+upper bound) operation, and a comparison to find out if a value has
+changed (grown).  In practice, the comparison is only ever used in
+conjunction with the join, so we have [[fact_add_to]]:
+
+  fact_add_to new old =
+     let j = join new old in
+     if j <= old then noTx old -- nothing changed
+     else aTx j                -- the fact changed
+
+-}
+
+data DataflowLattice a = DataflowLattice  { 
+  fact_name    :: String,                 -- documentation
+  fact_bot     :: a,                      -- lattice bottom element
+  fact_add_to  :: a -> a -> TxRes a,      -- lattice join and compare
+    -- ^ compute join of two args; something changed iff join is greater than 2nd arg
+  fact_do_logging :: Bool  -- log changes
+}
+
+
+-- There are three monads here:
+--   1. DFTx, the monad of transactions, to be carried through all
+--      graph-changing computations in the program
+--   2. DFA, the monad of analysis, which never changes anything
+--   3. DFM, the monad of combined analysis and transformation,
+--      which needs a UniqSupply and may consume transactions
+
+data DFAState f = DFAState { df_facts :: BlockEnv f
+                           , df_facts_change :: ChangeFlag
+                           }
+
+data DFTxState = DFTxState { df_txlimit :: Txlimit, df_lastpass :: String }
+
+data DFState f = DFState { df_uniqs :: UniqSupply
+                         , df_rewritten :: ChangeFlag
+                         , df_astate :: DFAState f
+                         , df_txstate :: DFTxState
+                         , df_last_outs :: [(BlockId, f)]
+                         }
+
+newtype DFTx a     = DFTx (DFTxState     -> (a, DFTxState))
+newtype DFA fact a = DFA  (DataflowLattice fact -> DFAState fact -> (a, DFAState fact))
+newtype DFM fact a = DFM  (DataflowLattice fact -> DFState  fact -> (a, DFState  fact))
+
+
+liftAnal :: DFA f a -> DFM f a
+liftAnal (DFA f) = DFM f'
+    where f' l s = let (a, anal) = f l (df_astate s)
+                   in  (a, s {df_astate = anal})
+
+liftTx :: DFTx a -> DFM f a
+liftTx (DFTx f) = DFM f'
+    where f' _ s = let (a, txs) = f (df_txstate s)
+                   in  (a, s {df_txstate = txs})
+
+newtype Txlimit = Txlimit Int
+  deriving (Ord, Eq, Num, Show, Bounded)
+
+initDFAState :: DFAState f
+initDFAState = DFAState emptyBlockEnv NoChange
+
+runDFA :: DataflowLattice f -> DFA f a -> a
+runDFA lattice (DFA f) = fst $ f lattice initDFAState
+
+-- XXX DFTx really needs to be in IO, so we can dump programs in
+-- intermediate states of optimization ---NR
+
+runDFTx :: Txlimit -> DFTx a -> a  --- should only be called once per program!
+runDFTx lim (DFTx f) = fst $ f $ DFTxState lim "<none>"
+
+lastTxPass :: DFTx String
+lastTxPass = DFTx f
+    where f s = (df_lastpass s, s)
+
+runDFM :: UniqSupply -> DataflowLattice f -> DFM f a -> DFTx a
+runDFM uniqs lattice (DFM f) = DFTx f'
+    where f' txs =
+            let (a, s) = f lattice $ DFState uniqs NoChange initDFAState txs [] in
+            (a, df_txstate s)
+
+txExhausted :: DFTx Bool
+txExhausted = DFTx f
+    where f s = (df_txlimit s <= 0, s)
+
+txRemaining :: DFTx Txlimit
+txRemaining = DFTx f
+    where f s = (df_txlimit s, s)
+
+txDecrement :: String -> Txlimit -> Txlimit -> DFTx ()
+txDecrement optimizer old new = DFTx f
+    where f s = ((), s { df_txlimit = lim s, df_lastpass = optimizer })
+          lim s = if old == df_txlimit s then new
+                  else panic $ concat ["lost track of ", optimizer, "'s transactions"]
+
+
+class DataflowAnalysis m where
+  markFactsUnchanged :: m f ()   -- ^ Useful for starting a new iteration
+  factsStatus :: m f ChangeFlag
+  subAnalysis :: m f a -> m f a  -- ^ Do a new analysis and then throw away
+                                 -- *all* the related state.  Even the Uniques
+                                 -- will be reused.
+
+  getFact :: BlockId -> m f f
+  setFact :: Outputable f => BlockId -> f -> m f ()
+  checkFactMatch :: Outputable f =>
+                    BlockId -> f -> m f () -- ^ assert fact already at this val
+  botFact :: m f f
+  forgetFact :: BlockId -> m f ()
+  forgetLastOutFacts :: m f ()
+  allFacts :: m f (BlockEnv f)
+  factsEnv :: Monad (m f) => m f (BlockId -> f)
+
+  lattice :: m f (DataflowLattice f)
+  factsEnv = do { map <- allFacts
+                ; bot <- botFact
+                ; return $ \id -> lookupBlockEnv map id `orElse` bot }
+
+instance DataflowAnalysis DFA where
+  markFactsUnchanged = DFA f
+    where f _ s = ((), s {df_facts_change = NoChange}) 
+  factsStatus = DFA f'
+    where f' _ s = (df_facts_change s, s)
+  subAnalysis (DFA f) = DFA f'
+    where f' l s = let (a, _) = f l (subAnalysisState s) in (a, s)
+  getFact id = DFA get
+    where get lattice s = (lookupBlockEnv (df_facts s) id `orElse` fact_bot lattice, s)
+  setFact id a =
+    do old <- getFact id
+       DataflowLattice { fact_add_to = add_fact
+                       , fact_name = name, fact_do_logging = log } <- lattice
+       case add_fact a old of
+         TxRes NoChange _ -> return ()
+         TxRes SomeChange join -> DFA $ \_ s ->
+             let facts' = extendBlockEnv (df_facts s) id join
+                 debug = if log then pprTrace else \_ _ a -> a
+             in  debug name (pprSetFact id old a join) $
+                 ((), s { df_facts = facts', df_facts_change = SomeChange })
+  botFact = DFA f
+    where f lattice s = (fact_bot lattice, s)
+  forgetFact id = DFA f 
+    where f _ s = ((), s { df_facts = delFromUFM (df_facts s) id })
+  forgetLastOutFacts = return ()
+  allFacts = DFA f
+    where f _ s = (df_facts s, s)
+  checkFactMatch id a =
+      do { fact <- lattice
+         ; old_a <- getFact id
+         ; case fact_add_to fact a old_a of
+             TxRes NoChange _ -> return ()
+             TxRes SomeChange new ->
+               do { facts <- allFacts
+                  ; pprPanic "checkFactMatch"
+                            (f4sep [text (fact_name fact), text "at id" <+> ppr id,
+                                    text "changed from", nest 4 (ppr old_a), text "to",
+                                    nest 4 (ppr new),
+                                    text "after supposedly reaching fixed point;",
+                                    text "env is", pprFacts facts]) 
+                  ; setFact id a }
+         }
+    where pprFacts env = vcat (map pprFact (ufmToList env))
+          pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a)
+
+  lattice = DFA f
+    where f l s = (l, s)
+
+subAnalysisState :: DFAState f -> DFAState f
+subAnalysisState s = s {df_facts_change = NoChange}
+
+
+instance DataflowAnalysis DFM where
+  markFactsUnchanged  = liftAnal $ markFactsUnchanged
+  factsStatus         = liftAnal $ factsStatus
+  subAnalysis         = dfmSubAnalysis
+  getFact id          = liftAnal $ getFact id
+  setFact id new      = liftAnal $ setFact id new
+  botFact             = liftAnal $ botFact
+  forgetFact id       = liftAnal $ forgetFact id
+  forgetLastOutFacts  = dfmForgetLastOutFacts
+  allFacts            = liftAnal $ allFacts
+  checkFactMatch id a = liftAnal $ checkFactMatch id a
+
+  lattice             = liftAnal $ lattice
+
+dfmSubAnalysis :: DFM f a -> DFM f a
+dfmSubAnalysis (DFM f) = DFM f'
+    where f' l s = let s' = s { df_astate = subAnalysisState (df_astate s) }
+                       (a, _) = f l s'
+                   in  (a, s)
+
+dfmForgetLastOutFacts :: DFM f ()
+dfmForgetLastOutFacts = DFM f
+    where f _ s = ((), s { df_last_outs = [] })
+
+addLastOutFact :: (BlockId, f) -> DFM f ()
+addLastOutFact pair = DFM f
+    where f _ s = ((), s { df_last_outs = pair : df_last_outs s })
+
+lastOutFacts :: DFM f [(BlockId, f)]
+lastOutFacts = DFM f
+    where f _ s = (df_last_outs s, s)
+
+markGraphRewritten :: DFM f ()
+markGraphRewritten = DFM f
+    where f _ s = ((), s {df_rewritten = SomeChange})
+
+freshBlockId :: String -> DFM f BlockId
+freshBlockId s = liftUSM $ G.freshBlockId s
+
+liftUSM :: UniqSM a -> DFM f a
+liftUSM uc = DFM f
+    where f _ s = let (a, us') = initUs (df_uniqs s) uc
+                  in (a, s {df_uniqs = us'})
+
+instance Monad (DFA f) where
+  DFA f >>= k = DFA (\l s -> let (a, s') = f l s
+                                 DFA f' = k a
+                             in  f' l s')
+  return a = DFA (\_ s -> (a, s))
+
+instance Monad (DFM f) where
+  DFM f >>= k = DFM (\l s -> let (a, s') = f l s
+                                 DFM f' = k a
+                             in  f' l s')
+  return a = DFM (\_ s -> (a, s))
+
+instance Monad (DFTx) where
+  DFTx f >>= k = DFTx (\s -> let (a, s') = f s
+                                 DFTx f' = k a
+                             in  f' s')
+  return a = DFTx (\s -> (a, s))
+
+pprSetFact :: Outputable f => BlockId -> f -> f -> f -> SDoc
+pprSetFact id old a join =
+    f4sep [text "at" <+> text (show id),
+           text "added" <+> ppr a, text "to" <+> ppr old,
+           text "yielding" <+> ppr join]
+
+f4sep :: [SDoc] -> SDoc
+f4sep [] = fsep []
+f4sep (d:ds) = fsep (d : map (nest 4) ds)
+
+
+_I_am_abstract :: Int -> Txlimit
+_I_am_abstract = Txlimit -- prevents a warning about Txlimit being unused
diff --git a/compiler/cmm/MkZipCfg.hs b/compiler/cmm/MkZipCfg.hs
new file mode 100644
index 0000000..33fd6cb
--- /dev/null
+++ b/compiler/cmm/MkZipCfg.hs
@@ -0,0 +1,300 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module MkZipCfg
+    ( AGraph, (<*>), emptyAGraph, withFreshLabel
+    , mkMiddle, mkMiddles, mkLast, mkZTail, mkBranch, mkLabel, mkIfThenElse, mkWhileDo
+    , emptyGraph, graphOfMiddles, graphOfZTail
+    , lgraphOfAGraph, graphOfAGraph, labelAGraph
+    )
+where
+
+import Outputable
+import Prelude hiding (zip, unzip, last)
+import UniqSupply
+import ZipCfg
+
+-------------------------------------------------------------------------
+--     GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH (CONSTRUCTOR VIEW)      --
+-------------------------------------------------------------------------
+
+{-
+
+You can think of an AGraph like this: it is the program built by
+composing in sequence three kinds of nodes:
+  * Label nodes (e.g. L2:)
+  * Middle nodes (e.g. x = y*3)
+  * Last nodes (e.g. if b then goto L1 else goto L2)
+
+The constructors mkLabel, mkMiddle, and mkLast build single-node
+AGraphs of the indicated type.  The composition operator <*> glues
+AGraphs together in sequence (in constant time).
+
+For example:
+       x = 0
+  L1:  
+       x = x+1
+       if x<10 then goto L1 else goto L2
+  L2:  
+       y = y*x
+       x = 0
+
+Notice that the AGraph may begin without a label, and may end without
+a control transfer.  Control *always* falls through a label and middle
+node, and *never* falls through a Last node.
+
+A 'AGraph m l' is simply an abstract version of a 'Graph m l' from
+module 'ZipCfg'.  The only difference is that the 'AGraph m l'
+supports a constant-time splicing operation, written infix <*>.
+That splicing operation, together with the constructor functions in
+this module (and with 'labelAGraph'), is the recommended way to build
+large graphs.  Each construction or splice has constant cost, and to
+turn an AGraph into a Graph requires time linear in the number of
+nodes and N log N in the number of basic blocks.
+
+The splicing operation warrants careful explanation.  Like a Graph, an
+AGraph is a control-flow graph which begins with a distinguished,
+unlabelled sequence of middle nodes called the *entry*.  An unlabelled
+graph may also end with a sequence of middle nodes called the *exit*.
+The entry may fall straight through to the exit, or it may fall into 
+the rest of the graph, which may include arbitrary control flow.
+
+Using ASCII art, here are examples of the two kinds of graph.  On the
+left, the entry and exit sequences are labelled A and B, where the
+control flow in the middle is labelled X.   On the right, there is no
+exit sequence:
+                                              
+        |                      |              
+        | A                    | C            
+        |                      |              
+       / \                    / \
+      /   \                  /   \
+     |  X  |                |  Y  |           
+      \   /                  \   /            
+       \ /                    \_/             
+        |                      
+        | B                    
+        |                      
+
+
+The AGraph has these properties:
+
+  * A AGraph is opaque; nothing about its structure can be observed.
+
+  * A AGraph may be turned into a LGraph in time linear in the number
+    of nodes and O(N log N) in the number of basic blocks.
+
+  * Two AGraphs may be spliced in constant time by writing  g1 <*> g2
+
+There are two rules for splicing, depending on whether the left-hand
+graph falls through.  If it does, the rule is as follows:
+                                              
+        |                      |                          |      
+        | A                    | C                        | A    
+        |                      |                          |      
+       / \                    / \                        / \
+      /   \                  /   \                      /   \
+     |  X  |      <*>       |  Y  |           =        |  X  |   
+      \   /                  \   /                      \   /    
+       \ /                    \_/                        \ /     
+        |                      |                          |          
+        | B                    | D                        | B        
+        |                      |                          |          
+                                                          |      
+                                                          | C
+                                                          |      
+                                                         / \
+                                                        /   \
+                                                       |  Y  |   
+                                                        \   /    
+                                                         \ /     
+                                                          |      
+                                                          | D    
+                                                          |      
+
+And in the case where the left-hand graph does not fall through, the
+rule is
+
+                                              
+        |                      |                          |      
+        | A                    | C                        | A    
+        |                      |                          |      
+       / \                    / \                        / \
+      /   \                  /   \                      /   \
+     |  X  |      <*>       |  Y  |           =        |  X  |   
+      \   /                  \   /                      \   /    
+       \_/                    \_/                        \_/     
+                               |                                    
+                               | D                        _      
+                               |                         / \
+                                                        /   \
+                                                       |  Y  |   
+                                                        \   /    
+                                                         \ /     
+                                                          |      
+                                                          | D    
+                                                          |      
+
+In this case C will become unreachable and is lost; when such a graph
+is converted into a data structure, the system will bleat about
+unreachable code.  Also it must be assumed that there are branches
+from somewhere in X to labelled blocks in Y; otherwise Y and D are
+unreachable as well.   (However, it may be the case that X branches
+into some third AGraph, which in turn branches into D; the
+representation is agnostic on this point.)
+
+-}
+
+infixr 3 <*>
+(<*>) :: AGraph m l -> AGraph m l -> AGraph m l
+
+-- | A graph is built up by splicing together graphs each containing a
+-- single node (where a label is considered a 'first' node.  The empty
+-- graph is a left and right unit for splicing.  All of the AGraph
+-- constructors (even complex ones like 'mkIfThenElse', as well as the
+-- splicing operation <*>, are constant-time operations.
+
+emptyAGraph :: AGraph m l
+mkLabel     :: LastNode l =>
+               BlockId -> AGraph m l              -- graph contains the label
+mkMiddle    :: m       -> AGraph m l              -- graph contains the node
+mkLast      :: (Outputable m, Outputable l, LastNode l) =>
+               l       -> AGraph m l              -- graph contains the node
+
+-- | This function provides access to fresh labels without requiring
+-- clients to be programmed monadically.
+withFreshLabel :: String -> (BlockId -> AGraph m l) -> AGraph m l
+
+-- below for convenience
+mkMiddles ::                                             [m]       -> AGraph m l
+mkZTail   :: (Outputable m, Outputable l, LastNode l) => ZTail m l -> AGraph m l
+mkBranch  :: (Outputable m, Outputable l, LastNode l) => BlockId   -> AGraph m l
+
+-- | For the structured control-flow constructs, a condition is
+-- represented as a function that takes as arguments the labels to
+-- goto on truth or falsehood.
+
+mkIfThenElse :: (Outputable m, Outputable l, LastNode l)
+                => (BlockId -> BlockId -> AGraph m l) -- branch condition
+                -> AGraph m l   -- code in the 'then' branch
+                -> AGraph m l   -- code in the 'else' branch 
+                -> AGraph m l   -- resulting if-then-else construct
+
+mkWhileDo    :: (Outputable m, Outputable l, LastNode l)
+                => (BlockId -> BlockId -> AGraph m l) -- loop condition
+                -> AGraph m l  -- body of the bloop
+                -> AGraph m l  -- the final while loop
+
+-- | Converting an abstract graph to a concrete form is expensive: the
+-- cost is linear in the number of nodes in the answer, plus N log N
+-- in the number of basic blocks.  The conversion is also monadic
+-- because it may require the allocation of fresh, unique labels.
+
+graphOfAGraph  ::            AGraph m l -> UniqSM (Graph  m l)
+lgraphOfAGraph ::            AGraph m l -> UniqSM (LGraph m l)
+  -- ^ allocate a fresh label for the entry point
+labelAGraph    :: BlockId -> AGraph m l -> UniqSM (LGraph m l)
+  -- ^ use the given BlockId as the label of the entry point
+
+
+-- | The functions below build Graphs directly; for convenience, they
+-- are included here with the rest of the constructor functions.
+
+emptyGraph     ::              Graph m l
+graphOfMiddles :: [m]       -> Graph m l
+graphOfZTail   :: ZTail m l -> Graph m l
+
+
+-- ================================================================
+--                          IMPLEMENTATION
+-- ================================================================
+
+newtype AGraph m l = AGraph (Graph m l -> UniqSM (Graph m l))
+  -- an AGraph is a monadic function from a successor Graph to a new Graph
+
+AGraph f1 <*> AGraph f2 = AGraph f 
+    where f g = f2 g >>= f1 -- note right associativity
+
+emptyAGraph = AGraph return
+
+graphOfAGraph (AGraph f) = f emptyGraph
+emptyGraph = Graph (ZLast LastExit) emptyBlockEnv
+
+labelAGraph id g =
+    do Graph tail blocks <- graphOfAGraph g
+       return $ LGraph id $ insertBlock (Block id tail) blocks
+
+lgraphOfAGraph g = do id <- freshBlockId "graph entry"
+                      labelAGraph id g
+
+-------------------------------------
+-- constructors
+
+mkLabel id = AGraph f
+    where f (Graph tail blocks) =
+            return $ Graph (ZLast (mkBranchNode id))
+                           (insertBlock (Block id tail) blocks)
+
+mkBranch target = mkLast $ mkBranchNode target
+
+mkMiddle m = AGraph f
+    where f (Graph tail blocks) = return $ Graph (ZTail m tail) blocks
+
+mkMiddles ms = AGraph f
+    where f (Graph tail blocks) = return $ Graph (foldr ZTail tail ms) blocks
+
+graphOfMiddles ms = Graph (foldr ZTail (ZLast LastExit) ms) emptyBlockEnv
+graphOfZTail   t  = Graph t emptyBlockEnv
+
+
+mkLast l = AGraph f
+    where f (Graph tail blocks) =
+            do note_this_code_becomes_unreachable tail
+               return $ Graph (ZLast (LastOther l)) blocks
+
+mkZTail tail = AGraph f
+    where f (Graph utail blocks) =
+            do note_this_code_becomes_unreachable utail
+               return $ Graph tail blocks
+
+withFreshLabel name ofId = AGraph f
+  where f g = do id <- freshBlockId name
+                 let AGraph f' = ofId id
+                 f' g
+
+mkIfThenElse cbranch tbranch fbranch = 
+    withFreshLabel "end of if"     $ \endif ->
+    withFreshLabel "start of then" $ \tid ->
+    withFreshLabel "start of else" $ \fid ->
+        cbranch tid fid <*>
+        mkLabel tid <*> tbranch <*> mkBranch endif <*>
+        mkLabel fid <*> fbranch <*> mkLabel endif
+
+
+mkWhileDo cbranch body = 
+  withFreshLabel "loop test" $ \test ->
+  withFreshLabel "loop head" $ \head ->
+  withFreshLabel "end while" $ \endwhile ->
+     -- Forrest Baskett's while-loop layout
+     mkBranch test <*> mkLabel head <*> body <*> mkLabel test
+                   <*> cbranch head endwhile <*> mkLabel endwhile
+
+
+-- | Bleat if the insertion of a last node will create unreachable code
+note_this_code_becomes_unreachable ::
+    (Monad m, LastNode l, Outputable middle, Outputable l) => ZTail middle l -> m ()
+
+note_this_code_becomes_unreachable = u
+    where u (ZLast LastExit)                       = return ()
+          u (ZLast (LastOther l)) | isBranchNode l = return ()
+                                    -- Note [Branch follows branch]
+          u tail = fail ("unreachable code: " ++ showSDoc (ppr tail))
+{-
+Note [Branch follows branch]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Why do we say it's ok for a Branch to follow a Branch?
+Because the standard constructor mkLabel-- has fall-through
+semantics. So if you do a mkLabel, you finish the current block,
+giving it a label, and start a new one that branches to that label.
+Emitting a Branch at this point is fine: 
+       goto L1; L2: ...stuff... 
+-}
diff --git a/compiler/cmm/PprCmmZ.hs b/compiler/cmm/PprCmmZ.hs
new file mode 100644
index 0000000..390bca6
--- /dev/null
+++ b/compiler/cmm/PprCmmZ.hs
@@ -0,0 +1,112 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing -fno-warn-orphans #-}
+
+module PprCmmZ 
+    ( pprCmmGraph
+    )
+where
+
+#include "HsVersions.h"
+
+import Cmm
+import CmmExpr
+import PprCmm()
+import Outputable
+import qualified ZipCfgCmm as G
+import qualified ZipCfg as Z
+import qualified ZipDataflow as DF
+import CmmZipUtil
+
+import UniqSet
+import FastString
+
+----------------------------------------------------------------
+instance DF.DebugNodes G.Middle G.Last
+
+
+instance Outputable G.CmmGraph where
+    ppr = pprCmmGraph
+
+pprCmmGraph :: G.CmmGraph -> SDoc
+pprCmmGraph g = vcat (swallow blocks)
+    where blocks = Z.postorder_dfs g
+          swallow :: [G.CmmBlock] -> [SDoc]
+          swallow [] = []
+          swallow (Z.Block id t : rest) = tail id [] t rest
+          tail id prev' (Z.ZTail m t)            rest = tail id (mid m : prev') t rest
+          tail id prev' (Z.ZLast Z.LastExit)     rest = exit id prev' rest
+          tail id prev' (Z.ZLast (Z.LastOther l))rest = last id prev' l rest
+          mid (G.CopyIn _ [] _) = text "// proc point (no parameters)"
+          mid m@(G.CopyIn {}) = ppr m <+> text "(proc point)"
+          mid m = ppr m
+          block' id prev'
+              | id == Z.gr_entry g, entry_has_no_pred =
+                            vcat (text "<entry>" : reverse prev')
+              | otherwise = hang (ppr id <> colon) 4 (vcat (reverse prev'))
+          last id prev' l n =
+              let endblock stmt = block' id (stmt : prev') : swallow n in
+              case l of
+                G.LastBranch tgt [] ->
+                    case n of
+                      Z.Block id' t : bs
+                          | tgt == id', unique_pred id' 
+                          -> tail id prev' t bs  -- optimize out redundant labels
+                      _ -> endblock (ppr $ CmmBranch tgt)
+                l@(G.LastBranch {}) -> endblock (ppr l)
+                l@(G.LastCondBranch expr tid fid) ->
+                  let ft id = text "// fall through to " <> ppr id in
+                  case n of
+                    Z.Block id' t : bs
+                      | id' == fid, False ->
+                          tail id (ft fid : ppr (CmmCondBranch expr tid) : prev') t bs
+                      | id' == tid, Just e' <- maybeInvertCmmExpr expr, False ->
+                          tail id (ft tid : ppr (CmmCondBranch e'   fid) : prev') t bs
+                    _ -> endblock (ppr l)
+                l@(G.LastJump   {}) -> endblock $ ppr l
+                l@(G.LastReturn {}) -> endblock $ ppr l
+                l@(G.LastSwitch {}) -> endblock $ ppr l
+                l@(G.LastCall _ _ Nothing) -> endblock $ ppr l
+                l@(G.LastCall tgt args (Just k))
+                   | Z.Block id' (Z.ZTail (G.CopyIn _ ress srt) t) : bs <- n,
+                     id' == k ->
+                         let call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn
+                             ppcall = ppr call <+> parens (text "ret to" <+> ppr k)
+                         in if unique_pred k then
+                                tail id (ppcall : prev') t bs
+                            else
+                                endblock (ppcall)
+                   | Z.Block id' t : bs <- n, id' == k, unique_pred k,
+                     Just (ress, srt) <- findCopyIn t ->
+                         let call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn
+                             delayed =
+                                 ptext SLIT("// delayed CopyIn follows previous call")
+                         in  tail id (delayed : ppr call : prev') t bs
+                   | otherwise -> endblock $ ppr l
+          findCopyIn (Z.ZTail (G.CopyIn _ ress srt) _) = Just (ress, srt)
+          findCopyIn (Z.ZTail _ t) = findCopyIn t
+          findCopyIn (Z.ZLast _) = Nothing
+          exit id prev' n = -- highly irregular (assertion violation?)
+              let endblock stmt = block' id (stmt : prev') : swallow n in
+              endblock (text "// <exit>")
+{-
+              case n of [] -> [text "<exit>"]
+                        Z.Block id' t : bs -> 
+                            if unique_pred id' then
+                                tail id (ptext SLIT("went thru exit") : prev') t bs 
+                            else
+                                endblock (ppr $ CmmBranch id')
+-}
+          preds = zipPreds g
+          entry_has_no_pred = case Z.lookupBlockEnv preds (Z.gr_entry g) of
+                                Nothing -> True
+                                Just s -> isEmptyUniqSet s
+          single_preds =
+              let add b single =
+                    let id = Z.blockId b
+                    in  case Z.lookupBlockEnv preds id of
+                          Nothing -> single
+                          Just s -> if sizeUniqSet s == 1 then
+                                        Z.extendBlockSet single id
+                                    else single
+              in  Z.fold_blocks add Z.emptyBlockSet g
+          unique_pred id = Z.elemBlockSet id single_preds
+
diff --git a/compiler/cmm/README b/compiler/cmm/README
new file mode 100644
index 0000000..c0d1c68
--- /dev/null
+++ b/compiler/cmm/README
@@ -0,0 +1,97 @@
+Sketch of the new arrivals:
+
+  MkZipCfg       Constructor functions for control-flow graphs.
+                 Not understandable in its entirety without reference
+                 to ZipCfg, but nevertheless a worthy starting point,
+                 as it is a good deal simpler than full ZipCfg.
+                 MkZipCfg is polymorphic in the types of middle and last
+                 nodes. 
+
+  ZipCfg         Describes a zipper-like representation for true basic-block
+                 control-flow graphs.  A block has a single entry point,
+                 which is a always a label, followed by zero or mode 'middle
+                 nodes', each of which represents an uninterruptible
+                 single-entry, single-exit computation, then finally a 'last
+                 node', which may have zero or more successors.  A special
+                 'exit node' is used for splicing together graphs.
+
+                 In addition to three representations of flow graphs, the
+                 module provides a surfeit of functions for observing and
+                 modifying graphs and related data:
+                   - Block IDs, sets and environments thereof
+                   - supply of fresh block IDS (as String -> UniqSM BlockId)
+                   - myriad functions for splicing graphs
+                   - postorder_dfs layout of blocks
+                   - folding, mapping, and translation functions
+
+                 ZipCFG is polymorphic in the type of middle and last nodes.
+
+  CmmExpr        Code for C-- expressions, which is shared among old and new
+                 representations of flow graphs.  Of primary interest is the
+                 type class UserOfLocalRegs and its method foldRegsUsed,
+                 which is sufficiently overloaded to be used against
+                 expressions, statements, formals, hinted formals, and so
+                 on.  This overloading greatly clarifies the computation of
+                 liveness as well as some other analyses.
+
+  ZipCfgCmm      Types to instantiate ZipCfg for C--: middle and last nodes,
+                 and a bunch of abbreviations of types in ZipCfg and Cmm.
+                 Also provides suitable constructor functions for building
+                 graphs from Cmm statements.
+
+  CmmLiveZ       A good example of a very simple dataflow analysis.  It
+                 computes the set of live local registers at each point.
+
+  DFMonad        Support for dataflow analysis and dataflow-based
+                 transformation.   This module needs work.  Includes 
+                   DataflowLattice - for tracking dataflow facts (good)
+                   DFA - monad for iterative dataflow analysis (OK)
+                   DFM - monad for iterative dataflow analysis and rewriting (OK)
+                   DFTx - monad to track Whalley/Davidson transactions (ugly)
+                   type class DataflowAnalysis - operations common to DFA, DFM
+                 Some dodgy bits are 
+                   subAnalysis, which may not be right
+
+  ZipDataflow    Iteratively solve forward and backward dataflow problems over
+                 flow graphs.  Polymorphic in the type of graph and in the
+                 lattice of dataflow facts.   Supports the incremental
+                 rewriting technique described by Lerner, Grove, and Chambers
+                 in POPL 2002.  The code is a mess and is still being
+                 sorted out.
+
+
+  CmmTx          A simple monad for tracking when a transformation has
+                 occurred (something has changed).
+
+  CmmCvt         Converts between Cmm and ZipCfgCmm representations.
+
+  CmmProcPointZ  One module that performs three analyses and
+                 transformations:
+
+                    1. Using Michael Adams's iterative algorithm, computes a
+                       minimal set of proc points that enable code to be
+                       generated without copying any basic blocks.
+
+                    2. Assigns a protocol to each proc point.  The assigner
+                       is rigged to enable the 'Adams optimization' whereby
+                       we attempt to eliminate return continuations by
+                       making procedures return directly to join points.
+                       Arguably this could be done by a separate rewriting
+                       pass to perform earlier.
+
+                    3. Insert CopyIn and CopyOut nodes where needed
+                       according to the protocols.
+
+  CmmSpillReload Inserts spills and reloads to establish the invariant that
+                 at a safe call, there are no live variables in registers.
+
+  CmmCPSZ        The CPS transformation so far.
+
+  CmmContFlowOpt Branch-chain elimination and elimination of unreachable code.
+
+  CmmCvt         Conversion to and from the new format.
+
+  CmmOpt         Changed optimization to use 'foldRegsUsed'; eliminated
+                 significant duplication of code.
+
+  PprCmmZ        Prettyprinting functions related to ZipCfg and ZipCfgCmm
diff --git a/compiler/cmm/StackColor.hs b/compiler/cmm/StackColor.hs
new file mode 100644
index 0000000..e3b6ba8
--- /dev/null
+++ b/compiler/cmm/StackColor.hs
@@ -0,0 +1,120 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module StackColor where
+
+import StackPlacements
+import qualified GraphColor as Color
+import CmmExpr
+import CmmSpillReload
+import DFMonad
+import qualified GraphOps
+import MachOp
+import ZipCfg
+import ZipCfgCmm
+import ZipDataflow
+
+import Maybes
+import Panic
+import UniqSet
+
+import Data.List
+
+type M = ExtendWithSpills Middle
+
+
+foldConflicts :: (RegSet -> a -> a) -> a -> LGraph M Last -> a
+foldConflicts f z g =
+  let env = runDFA dualLiveLattice (run_b_anal dualLiveness g >> allFacts)
+      lookup id = lookupBlockEnv env id `orElse` fact_bot dualLiveLattice
+      f' dual z = f (on_stack dual) z
+  in  fold_edge_facts_b f' dualLiveness g lookup z
+
+
+type IGraph = Color.Graph LocalReg SlotClass StackPlacement
+type ClassCount = [(SlotClass, Int)]
+
+buildIGraphAndCounts :: LGraph M Last -> (IGraph, ClassCount)
+buildIGraphAndCounts g = igraph_and_counts
+    where igraph_and_counts = foldConflicts add (Color.initGraph, zero) g
+          zero = map (\c -> (c, 0)) allSlotClasses
+          add live (igraph, counts) = (graphAddConflictSet live igraph,
+                                       addSimulCounts (classCounts live) counts)
+          addSimulCounts =
+            zipWith (\(c, n) (c', n') -> if c == c' then (c, max n n')
+                                         else panic "slot classes out of order")
+          classCounts regs = foldUniqSet addReg zero regs
+          addReg reg counts =
+              let cls = slotClass reg in
+              map (\(c, n) -> (c, if c == cls then n + 1 else n)) counts
+                           
+
+-- | Add some conflict edges to the graph.
+--	Conflicts between virtual and real regs are recorded as exclusions.
+--
+
+graphAddConflictSet :: RegSet -> IGraph -> IGraph
+graphAddConflictSet set graph = GraphOps.addConflicts set slotClass graph
+
+slotClass :: LocalReg -> SlotClass
+slotClass (LocalReg _ machRep _) = 
+    case machRep of -- the horror, the horror
+      I8   -> SlotClass32
+      I16  -> SlotClass32
+      I32  -> SlotClass32
+      I64  -> SlotClass64
+      I128 -> SlotClass128
+      F32  -> SlotClass32
+      F64  -> SlotClass64
+      F80  -> SlotClass64
+
+{-
+colorMe :: (IGraph, ClassCount) -> (IGraph, UniqSet LocalReg)
+colorMe (igraph, counts) = Color.colorGraph starter_colors triv spill_max_degree igraph
+    where starter_colors = allocate [] counts allStackSlots
+          allocate prev [] colors = insert prev colors
+          allocate prev ((c, n) : counts) colors =
+              let go prev 0 colors = allocate prev counts colors
+                  go prev n colors = let (p, colors') = getStackSlot c colors in
+                                     go (p:prev) (n-1) colors'
+              in  go prev n colors
+          insert :: [StackPlacement] -> SlotSet -> SlotSet
+          insert [] colors = colors
+          insert (p:ps) colors = insert ps (extendSlotSet colors p)
+          triv :: Color.Triv LocalReg SlotClass StackPlacement
+          triv = trivColorable (mkSizeOf counts)
+
+spill_max_degree :: IGraph -> LocalReg
+spill_max_degree igraph = Color.nodeId node
+    where node	= maximumBy (\n1 n2 -> compare 
+ 				(sizeUniqSet $ Color.nodeConflicts n1) 
+				(sizeUniqSet $ Color.nodeConflicts n2)) $
+ 		  eltsUFM $ Color.graphMap igraph
+
+
+type Worst = SlotClass -> (Int, Int, Int) -> Int
+
+trivColorable :: (SlotClass -> Int) -> 
+                 SlotClass -> UniqSet LocalReg -> UniqSet StackPlacement -> Bool
+trivColorable sizeOf classN conflicts exclusions = squeeze < sizeOf classN
+  where	squeeze = worst classN counts
+        counts   = if isEmptyUniqSet exclusions then foldUniqSet acc zero conflicts
+                   else panic "exclusions in stack slots?!"
+        zero = (0, 0, 0)
+	acc r (word, dbl, quad)	=
+            case slotClass r of
+              SlotClass32  -> (word+1, dbl, quad)
+              SlotClass64  -> (word, dbl+1, quad)
+              SlotClass128 -> (word, dbl, quad+1)
+        worst SlotClass128 (_, _, q) = q
+        worst SlotClass64  (_, d, q) = d + 2 * q
+        worst SlotClass32  (w, d, q) = w + 2 * d + 4 * q
+-}
+
+-- | number of placements available is from class and all larger classes
+mkSizeOf :: ClassCount -> (SlotClass -> Int)
+mkSizeOf counts = sizeOf
+    where sizeOf SlotClass32  = n32
+          sizeOf SlotClass64  = n64
+          sizeOf SlotClass128 = n128
+          n128 = (lookup SlotClass128 counts `orElse` 0)
+          n64  = (lookup SlotClass64  counts `orElse` 0) + 2 * n128
+          n32  = (lookup SlotClass32  counts `orElse` 0) + 2 * n32
diff --git a/compiler/cmm/StackPlacements.hs b/compiler/cmm/StackPlacements.hs
new file mode 100644
index 0000000..31a5198
--- /dev/null
+++ b/compiler/cmm/StackPlacements.hs
@@ -0,0 +1,248 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module StackPlacements
+  ( SlotSet, allStackSlots  -- the infinite set of stack slots
+  , SlotClass(..), slotClassBits, stackSlot32, stackSlot64, stackSlot128
+  , allSlotClasses
+  , getStackSlot, extendSlotSet, deleteFromSlotSet, elemSlotSet, chooseSlot
+  , StackPlacement(..)
+  )
+where
+
+import Maybes
+import Outputable
+import Unique
+
+import Prelude hiding (pi)
+import Data.List
+
+{- 
+
+The goal here is to provide placements on the stack that will allow,
+for example, two 32-bit words to spill to a slot previously used by a
+64-bit floating-point value.  I use a simple buddy-system allocator
+that splits large slots in half as needed; this will work fine until
+the day when somebody wants to spill an 80-bit Intel floating-point
+register into the Intel standard 96-bit stack slot.
+
+-}
+
+data SlotClass = SlotClass32 | SlotClass64 | SlotClass128
+  deriving (Eq)
+
+instance Uniquable SlotClass where
+    getUnique = getUnique . slotClassBits
+
+instance Outputable SlotClass where
+    ppr cls = text "class of" <+> int (slotClassBits cls) <> text "-bit stack slots"
+
+slotClassBits :: SlotClass -> Int
+slotClassBits SlotClass32 = 32
+slotClassBits SlotClass64 = 64
+slotClassBits SlotClass128 = 128
+
+data StackPlacement = FullSlot SlotClass Int
+               | YoungHalf StackPlacement
+               | OldHalf StackPlacement
+  deriving (Eq)
+
+data OneSize = OneSize { full_slots :: [StackPlacement], fragments :: [StackPlacement] }
+  -- ^ Always used for slots that have been previously used
+
+data SlotSet = SlotSet { s32, s64, s128 :: OneSize, next_unused :: Int }
+
+allStackSlots :: SlotSet
+allStackSlots = SlotSet empty empty empty 0
+    where empty = OneSize [] []
+
+
+psize :: StackPlacement -> Int
+psize (FullSlot cls _) = slotClassBits cls
+psize (YoungHalf p) = psize p `div` 2
+psize (OldHalf   p) = psize p `div` 2
+
+
+
+
+-- | Get a slot no matter what
+get32, get64, get128 :: SlotSet -> (StackPlacement, SlotSet)
+
+-- | Get a previously used slot if one exists
+getu32, getu64, getu128 :: SlotSet -> Maybe (StackPlacement, SlotSet)
+
+-- | Only supported slot classes
+
+stackSlot32, stackSlot64, stackSlot128 :: SlotClass
+stackSlot32  = SlotClass32
+stackSlot64  = SlotClass64
+stackSlot128 = SlotClass128
+
+allSlotClasses :: [SlotClass]
+allSlotClasses = [stackSlot32, stackSlot64, stackSlot128]
+
+-- | Get a fresh slot, never before used
+getFull :: SlotClass -> SlotSet -> (StackPlacement, SlotSet)
+
+infixr 4 |||
+
+(|||) :: (SlotSet -> Maybe (StackPlacement, SlotSet)) ->
+         (SlotSet ->       (StackPlacement, SlotSet)) ->
+         (SlotSet ->       (StackPlacement, SlotSet))
+      
+f1 ||| f2 = \slots -> f1 slots `orElse`   f2 slots
+
+getFull cls slots = (FullSlot cls n, slots { next_unused = n + 1 })
+    where n = next_unused slots
+
+get32  = getu32  ||| (fmap split64  . getu64)  ||| getFull stackSlot32
+get64  = getu64  ||| (fmap split128 . getu128) ||| getFull stackSlot64
+get128 = getu128 ||| getFull stackSlot128
+
+type SizeGetter = SlotSet -> OneSize
+type SizeSetter = OneSize -> SlotSet -> SlotSet
+
+upd32, upd64, upd128 :: SizeSetter
+upd32  this_size slots = slots { s32  = this_size }
+upd64  this_size slots = slots { s64  = this_size }
+upd128 this_size slots = slots { s128 = this_size }
+
+with_size :: Int -> (SizeGetter -> SizeSetter -> a) -> a
+with_size  32 = with_32
+with_size  64 = with_64
+with_size 128 = with_128
+with_size _   = panic "non-standard slot size -- error in size computation?"
+
+with_32, with_64, with_128 :: (SizeGetter -> SizeSetter -> a) -> a
+with_32  f = f s32  upd32
+with_64  f = f s64  upd64
+with_128 f = f s128 upd128
+
+getu32  = with_32  getUsed
+getu64  = with_64  getUsed
+getu128 = with_128 getUsed
+
+getUsed :: SizeGetter -> SizeSetter -> SlotSet -> Maybe (StackPlacement, SlotSet)
+getUsed get set slots = 
+    let this_size = get slots in
+    case full_slots this_size of
+      p : ps -> Just (p, set (this_size { full_slots = ps }) slots)
+      [] -> case fragments this_size of
+              p : ps -> Just (p, set (this_size { fragments = ps }) slots)
+              [] -> Nothing
+
+-- | When splitting, allocate the old half first in case it makes the
+-- stack smaller at a call site.
+split64, split128 :: (StackPlacement, SlotSet) -> (StackPlacement, SlotSet)
+split64  (p, slots) = (OldHalf p, slots { s32 = cons_frag (YoungHalf p) (s32 slots) })
+split128 (p, slots) = (OldHalf p, slots { s64 = cons_frag (YoungHalf p) (s64 slots) })
+
+cons_frag :: StackPlacement -> OneSize -> OneSize
+cons_frag p this_size = this_size { fragments = p : fragments this_size }
+
+
+----------------------------
+instance Outputable StackPlacement where
+  ppr (FullSlot cls n) = int (slotClassBits cls) <> text "-bit slot " <> int n
+  ppr (YoungHalf p) = text "young half of" <+> ppr p
+  ppr (OldHalf   p) = text "old half of"   <+> ppr p
+
+instance Outputable SlotSet where
+  ppr slots = fsep $ punctuate comma
+              (pprSlots (s32 slots) ++ pprSlots (s64 slots) ++ pprSlots (s128 slots) ++
+               [text "and slots numbered" <+> int (next_unused slots)
+                         <+> text "and up"])
+   where pprSlots (OneSize w fs) = map ppr w ++ map ppr fs
+
+{-
+instance ColorSet SlotSet SlotClass StackPlacement where
+  emptyColorSet = panic "The set of stack slots is never empty"
+  deleteFromColorSet = deleteFromSlotSet
+  extendColorSet slots (cls, p@(FullSlot {})) =
+      with_size (slotClassBits cls) add_full p (pi slots)
+  extendColorSet slots (cls, p) = with_size (slotClassBits cls) add_frag p (pi slots)
+  chooseColor        = chooseSlot
+-}
+
+deleteFromSlotSet :: StackPlacement -> SlotSet -> SlotSet
+deleteFromSlotSet p@(FullSlot {}) slots = with_size (psize p) remove_full p (pi slots)
+deleteFromSlotSet p               slots = with_size (psize p) remove_frag p (pi slots)
+
+extendSlotSet :: SlotSet -> StackPlacement -> SlotSet
+extendSlotSet slots p@(FullSlot {}) = with_size (psize p) add_full p (pi slots)
+extendSlotSet slots p               = with_size (psize p) add_frag p (pi slots)
+
+elemSlotSet :: StackPlacement -> SlotSet -> Bool
+elemSlotSet p@(FullSlot {}) slots = with_size (psize p) elem_full p slots
+elemSlotSet p               slots = with_size (psize p) elem_frag p slots
+
+remove_full, remove_frag, add_full, add_frag
+    :: SizeGetter -> SizeSetter -> StackPlacement -> SlotSet -> SlotSet
+
+remove_full get set p slots = set p' slots
+    where this_size = get slots
+          p' = this_size { full_slots = delete p $ full_slots this_size }
+
+remove_frag get set p slots = set p' slots
+    where this_size = get slots
+          p' = this_size { full_slots = delete p $ full_slots this_size }
+
+add_full get set p slots = set p' slots
+    where this_size = get slots
+          p' = this_size { full_slots = add p $ full_slots this_size }
+
+add_frag get set p slots = set p' slots
+    where this_size = get slots
+          p' = this_size { full_slots = add p $ full_slots this_size }
+
+add :: Eq a => a -> [a] -> [a]
+add x xs = if notElem x xs then x : xs else xs
+
+elem_full, elem_frag :: SizeGetter -> SizeSetter -> StackPlacement -> SlotSet -> Bool
+elem_full get _set p slots = elem p (full_slots $ get slots)
+elem_frag get _set p slots = elem p (fragments  $ get slots)
+
+
+
+
+getStackSlot :: SlotClass -> SlotSet -> (StackPlacement, SlotSet)
+getStackSlot cls slots =
+  case cls of
+    SlotClass32  -> get32  (pi slots)
+    SlotClass64  -> get64  (pi slots)
+    SlotClass128 -> get128 (pi slots)
+ 
+
+chooseSlot :: SlotClass -> [StackPlacement] -> SlotSet -> Maybe (StackPlacement, SlotSet)
+chooseSlot cls prefs slots =
+  case filter (flip elemSlotSet slots) prefs of
+    placement : _ -> Just (placement, deleteFromSlotSet placement (pi slots))
+    [] -> Just (getStackSlot cls slots)
+
+check_invariant :: Bool
+check_invariant = True
+
+pi :: SlotSet -> SlotSet
+pi = if check_invariant then panic_on_invariant_violation else id
+
+panic_on_invariant_violation :: SlotSet -> SlotSet
+panic_on_invariant_violation slots =
+    check 32 (s32 slots) $ check 64 (s64 slots) $ check 128 (s128 slots) $ slots
+  where n = next_unused slots
+        check bits this_size = (check_full bits $ full_slots this_size) .
+                               (check_frag bits $ fragments  this_size)
+        check_full _ [] = id
+        check_full bits (FullSlot cls k : ps) =
+            if slotClassBits cls /= bits then panic "slot in bin of wrong size"
+            else if k >= n then panic "slot number is unreasonably fresh"
+                 else check_full bits ps
+        check_full _ _ = panic "a fragment is in a bin reserved for full slots"
+        check_frag _ [] = id
+        check_frag _ (FullSlot {} : _) =
+            panic "a full slot is in a bin reserved for fragments"
+        check_frag bits (p : ps) =
+            if bits /= psize p then panic "slot in bin of wrong size"
+            else if pnumber p >= n then panic "slot number is unreasonably fresh"
+                 else check_frag bits ps
+        pnumber (FullSlot _ k) = k
+        pnumber (YoungHalf p) = pnumber p
+        pnumber (OldHalf p)   = pnumber p
+
diff --git a/compiler/cmm/ZipCfg.hs b/compiler/cmm/ZipCfg.hs
new file mode 100644
index 0000000..e8fc5ed
--- /dev/null
+++ b/compiler/cmm/ZipCfg.hs
@@ -0,0 +1,575 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module ZipCfg
+    ( BlockId(..), freshBlockId
+    , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, insertBlock, mkBlockEnv
+    , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, mkBlockSet
+    , Graph(..), LGraph(..), FGraph(..)
+    , Block(..), ZBlock(..), ZHead(..), ZTail(..), ZLast(..)
+    , HavingSuccessors, succs, fold_succs
+    , LastNode, mkBranchNode, isBranchNode, branchNodeTarget
+
+        -- Observers and transformers
+    , entry, exit, focus, focusp, unfocus
+    , blockId, zip, unzip, last, goto_end, ht_to_first, ht_to_last, zipht
+    , tailOfLast
+    , splice_head, splice_tail, splice_head_only, splice_focus_entry
+                 , splice_focus_exit, remove_entry_label
+    , of_block_list, to_block_list
+    , postorder_dfs
+    , fold_layout, fold_blocks
+    , fold_fwd_block, foldM_fwd_block
+    , map_nodes, translate
+
+    , pprLgraph
+    )
+where
+
+import Maybes
+import Outputable hiding (empty)
+import Panic
+import Prelude hiding (zip, unzip, last)
+import Unique
+import UniqFM
+import UniqSet
+import UniqSupply
+
+-------------------------------------------------------------------------
+--               GENERIC ZIPPER-BASED CONTROL-FLOW GRAPH               --
+-------------------------------------------------------------------------
+{-
+
+This module defines datatypes used to represent control-flow graphs,
+along with some functions for analyzing and splicing graphs.
+Functions for building graphs are found in a separate module 'MkZipCfg'.
+
+Every graph has a distinguished entry point.  A graph has at least one
+exit; most exits are instructions (or statements) like 'jump' or
+'return', which transfer control to other procedures, but a graph may
+have up to one 'fall through' exit.  (A graph that represents an
+entire Haskell or C-- procedure does not have a 'fall through' exit.)
+
+A graph is a collection of basic blocks.  A basic block begins with a
+label (unique id; see Note [Unique BlockId]) which is followed by a
+sequence of zero or more 'middle' nodes; the basic block ends with a
+'last' node.  Each 'middle' node is a single-entry, single-exit,
+uninterruptible computation.  A 'last' node is a single-entry,
+multiple-exit computation.  A last node may have zero or more successors,
+which are identified by their unique ids.
+
+A special case of last node is the ``default exit,'' which represents
+'falling off the end' of the graph.  Such a node is always represented by
+the data constructor 'LastExit'.  A graph may contain at most one
+'LastExit' node, and a graph representing a full procedure should not
+contain any 'LastExit' nodes.  'LastExit' nodes are used only to splice
+graphs together, either during graph construction (see module 'MkZipCfg')
+or during optimization (see module 'ZipDataflow').
+
+A graph is parameterized over the types of middle and last nodes.  Each of
+these types will typically be instantiated with a subset of C-- statements
+(see module 'ZipCfgCmm') or a subset of machine instructions (yet to be
+implemented as of August 2007).
+
+
+
+This module exposes three representations of graphs.  In order of
+increasing complexity, they are:
+
+  Graph  m l      The basic graph with its distinguished entry point
+
+  LGraph m l      A graph with a *labelled* entry point
+
+  FGraph m l      A labelled graph with the *focus* on a particular edge
+
+There are three types because each type offers a slightly different
+invariant or cost model.  
+
+  * The distinguished entry of a Graph has no label.  Because labels must
+    be unique, acquiring one requires a monadic operation ('freshBlockId').
+    The primary advantage of the Graph representation is that we can build
+    a small Graph purely functionally, without entering a monad.  For
+    example, during optimization we can easily rewrite a single middle
+    node into a Graph containing a sequence of two middle nodes followed by
+    LastExit.
+
+  * In an LGraph, every basic block is labelled.  The primary advantage of
+    this representation is its simplicity: each basic block can be treated
+    like any other.  This representation is used for mapping, folding, and
+    translation, as well as layout.
+
+    Like any graph, an LGraph still has a distinguished entry point, 
+    which you can discover using 'gr_entry'.
+
+  * An FGraph is an LGraph with the *focus* on one particular edge.  The
+    primary advantage of this representation is that it provides
+    constant-time access to the nodes connected by that edge, and it also
+    allows constant-time, functional *replacement* of those nodes---in the
+    style of Huet's 'zipper'.
+
+None of these representations is ideally suited to the incremental
+construction of large graphs.  A separate module, 'MkZipCfg', provides a
+fourth representation that is asymptotically optimal for such construction.
+    
+-}
+
+entry   :: LGraph m l -> FGraph m l         -- focus on edge out of entry node 
+exit    :: LGraph m l -> FGraph m l         -- focus on edge into default exit node 
+                                            -- (fails if there isn't one)
+focus   :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id 
+focusp  :: (Block m l -> Bool) -> LGraph m l -> Maybe (FGraph m l)
+                                      -- focus on start of block satisfying predicate
+unfocus :: FGraph m l -> LGraph m l            -- lose focus 
+
+-- | We can insert a single-entry, single-exit subgraph at
+-- the current focus.
+-- The new focus can be at either the entry edge or the exit edge.
+
+splice_focus_entry :: FGraph m l -> LGraph m l -> FGraph m l
+splice_focus_exit  :: FGraph m l -> LGraph m l -> FGraph m l
+
+--------------- Representation --------------------
+
+-- | A basic block is a [[first]] node, followed by zero or more [[middle]]
+-- nodes, followed by a [[last]] node.
+
+-- eventually this module should probably replace the original Cmm, but for
+-- now we leave it to dynamic invariants what can be found where
+
+data ZLast l
+  = LastExit     -- fall through; used for the block that has no last node
+                 -- LastExit is a device used only for graphs under 
+                 -- construction, or framgments of graph under optimisation,
+                 -- so we don't want to pollute the 'l' type parameter with it
+  | LastOther l
+
+data ZHead m   = ZFirst BlockId  | ZHead (ZHead m) m
+    -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId
+data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l)
+    -- ZTail is a sequence of middle nodes followed by a last node
+
+-- | Blocks and flow graphs
+data Block m l = Block BlockId (ZTail m l)
+
+data Graph m l = Graph (ZTail m l) (BlockEnv (Block m l))
+
+data LGraph m l = LGraph  { gr_entry  :: BlockId
+                          , gr_blocks :: BlockEnv (Block m l) }
+
+-- | And now the zipper.  The focus is between the head and tail.
+-- Notice we cannot ever focus on an inter-block edge.
+data ZBlock m l = ZBlock (ZHead m) (ZTail m l)
+data FGraph m l = FGraph { zg_entry  :: BlockId
+                         , zg_focus  :: ZBlock m l
+                         , zg_others :: BlockEnv (Block m l) }
+                    -- Invariant: the block represented by 'zg_focus' is *not*
+                    -- in the map 'zg_others'
+
+----  Utility functions ---
+
+blockId   :: Block  m l -> BlockId
+zip       :: ZBlock m l -> Block m l
+unzip     :: Block m l  -> ZBlock m l
+
+last     :: ZBlock m l -> ZLast l
+goto_end :: ZBlock m l -> (ZHead m, ZLast l)
+
+tailOfLast :: l -> ZTail m l
+
+-- | Some ways to combine parts:
+ht_to_first :: ZHead m -> ZTail m l -> Block m l -- was (ZFirst, ZTail)
+ht_to_last  :: ZHead m -> ZTail m l -> (ZHead m, ZLast l)
+
+zipht       :: ZHead m -> ZTail m l -> Block m l
+
+-- | We can splice a single-entry, single-exit LGraph onto a head or a tail.
+-- For a head, we have a head~[[h]] followed by a LGraph~[[g]].
+-- The entry node of~[[g]] gets joined to~[[h]], forming the entry into
+-- the new LGraph.  The exit of~[[g]] becomes the new head.
+-- For both arguments and results, the order of values is the order of
+-- control flow: before splicing, the head flows into the LGraph; after
+-- splicing, the LGraph flows into the head.
+-- Splicing a tail is the dual operation.
+-- (In order to maintain the order-means-control-flow convention, the
+-- orders are reversed.)
+
+splice_head :: ZHead m   -> LGraph m l -> (LGraph m l, ZHead m)
+splice_tail :: LGraph m l -> ZTail m l -> (ZTail m l, LGraph m l)
+
+-- | We can also splice a single-entry, no-exit LGraph into a head.
+splice_head_only :: ZHead m -> LGraph m l -> LGraph m l
+
+-- | Finally, we can remove the entry label of an LGraph and remove
+-- it, leaving a Graph:
+remove_entry_label :: LGraph m l -> Graph m l
+
+of_block_list :: BlockId -> [Block m l] -> LGraph m l  -- N log N
+to_block_list :: LGraph m l -> [Block m l]  -- N log N
+
+-- | Traversal: [[postorder_dfs]] returns a list of blocks reachable from
+-- the entry node.
+-- The postorder depth-first-search order means the list is in roughly
+-- first-to-last order, as suitable for use in a forward dataflow problem.
+
+postorder_dfs :: forall m l . LastNode l => LGraph m l -> [Block m l]
+
+-- | For layout, we fold over pairs of [[Block m l]] and [[Maybe BlockId]] 
+-- in layout order.  The [[BlockId]], if any, identifies the block that
+-- will be the layout successor of the current block.  This may be
+-- useful to help an emitter omit the final [[goto]] of a block that
+-- flows directly to its layout successor.
+fold_layout ::
+    LastNode l => (Block m l -> Maybe BlockId -> a -> a) -> a -> LGraph m l-> a
+
+-- | We can also fold and iterate over blocks.
+fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a
+
+map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l'
+   -- mapping includes the entry id!
+translate :: forall m l m' l' .
+             (m -> UniqSM (LGraph m' l')) -> (l -> UniqSM (LGraph m' l')) ->
+             LGraph m l -> UniqSM (LGraph m' l')
+
+{-
+translateA :: forall m l m' l' .
+              (m -> Agraph m' l') -> (l -> AGraph m' l') -> LGraph m l -> LGraph m' l'
+-}
+
+------------------- Last nodes
+
+-- | We can't make a graph out of just any old 'last node' type.  A
+-- last node has to be able to find its successors, and we need to
+-- be able to create and identify unconditional branches.  We put
+-- these capabilities in a type class.
+
+class HavingSuccessors b where
+  succs :: b -> [BlockId]
+  fold_succs :: (BlockId -> a -> a) -> b -> a -> a
+
+  fold_succs add l z = foldr add z $ succs l
+
+class HavingSuccessors l => LastNode l where
+  mkBranchNode :: BlockId -> l
+  isBranchNode :: l -> Bool
+  branchNodeTarget :: l -> BlockId  -- panics if not branch node
+
+instance HavingSuccessors l => HavingSuccessors (ZLast l) where
+    succs LastExit = []
+    succs (LastOther l) = succs l
+    fold_succs _ LastExit z = z
+    fold_succs f (LastOther l) z = fold_succs f l z
+
+instance LastNode l => LastNode (ZLast l) where
+    mkBranchNode id = LastOther $ mkBranchNode id
+    isBranchNode LastExit = False
+    isBranchNode (LastOther l) = isBranchNode l
+    branchNodeTarget LastExit = panic "branchNodeTarget LastExit"
+    branchNodeTarget (LastOther l) = branchNodeTarget l
+
+instance LastNode l => HavingSuccessors (ZBlock m l) where
+    succs b = succs (last b)
+
+instance LastNode l => HavingSuccessors (Block m l) where
+    succs b = succs (unzip b)
+
+
+------------------- Observing nodes
+
+-- | Fold from first to last
+fold_fwd_block ::
+  (BlockId -> a -> a) -> (m -> a -> a) -> (ZLast l -> a -> a) ->
+  Block m l -> a -> a
+
+-- | iterate from first to last
+foldM_fwd_block ::
+  Monad m => (BlockId -> a -> m a) -> (mid -> a -> m a) -> (ZLast l -> a -> m a) ->
+             Block mid l -> a -> m a
+
+-- ================ IMPLEMENTATION ================--
+
+blockId (Block id _) = id
+
+-- | Convert block between forms.
+-- These functions are tail-recursive, so we can go as deep as we like
+-- without fear of stack overflow.  
+
+ht_to_first head tail = case head of
+  ZFirst id -> Block id tail
+  ZHead h m -> ht_to_first h (ZTail m tail) 
+
+head_id :: ZHead m -> BlockId
+head_id (ZFirst id) = id
+head_id (ZHead h _) = head_id h
+
+zip (ZBlock h t) = ht_to_first h t
+
+ht_to_last head (ZLast l)   = (head, l)
+ht_to_last head (ZTail m t) = ht_to_last (ZHead head m) t 
+
+goto_end (ZBlock h t) = ht_to_last h t
+
+tailOfLast l = ZLast (LastOther l)
+
+zipht = ht_to_first
+unzip (Block id t) = ZBlock (ZFirst id) t
+
+last (ZBlock _ t) = lastt t
+  where lastt (ZLast l) = l
+        lastt (ZTail _ t) = lastt t
+
+focus id (LGraph entry blocks) =
+    case lookupBlockEnv blocks id of
+      Just b -> FGraph entry (unzip b) (delFromUFM blocks id)
+      Nothing -> panic "asked for nonexistent block in flow graph"
+
+focusp p (LGraph entry blocks) =
+    fmap (\(b, bs) -> FGraph entry (unzip b) bs) (splitp_blocks p blocks)
+
+splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) ->
+                 Maybe (Block m l, BlockEnv (Block m l))
+splitp_blocks p blocks = lift $ foldUFM scan (Nothing, emptyBlockEnv) blocks 
+    where scan b (yes, no) =
+              case yes of
+                Nothing | p b -> (Just b, no)
+                        | otherwise -> (yes, insertBlock b no)
+                Just _ -> (yes, insertBlock b no)
+          lift (Nothing, _) = Nothing
+          lift (Just b, bs) = Just (b, bs)
+
+entry g@(LGraph eid _) = focus eid g
+
+exit g@(LGraph eid _) = FGraph eid (ZBlock h (ZLast l)) others
+    where FGraph _ b others = focusp is_exit g `orElse` panic "no exit in flow graph"
+          (h, l) = goto_end b
+
+is_exit :: Block m l -> Bool
+is_exit b = case last (unzip b) of { LastExit -> True; _ -> False }
+
+-- | 'insertBlock' should not be used to *replace* an existing block
+-- but only to insert a new one
+insertBlock :: Block m l -> BlockEnv (Block m l) -> BlockEnv (Block m l)
+insertBlock b bs =
+    case lookupBlockEnv bs id of
+      Nothing -> extendBlockEnv bs id b
+      Just _ -> panic ("duplicate labels " ++ show id ++ " in ZipCfg graph")
+    where id = blockId b
+
+unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs)
+
+check_single_exit :: LGraph l m -> a -> a
+check_single_exit g =
+  let check block found = case last (unzip block) of
+                            LastExit -> if found then panic "graph has multiple exits"
+                                        else True
+                            _ -> found
+  in if not (foldUFM check False (gr_blocks g)) then
+         panic "graph does not have an exit"
+     else
+         \a -> a
+
+freshBlockId :: String -> UniqSM BlockId
+freshBlockId _ = do { u <- getUniqueUs; return $ BlockId u }
+
+postorder_dfs g@(LGraph _ blocks) =
+  let FGraph _ eblock _ = entry g
+  in  vnode (zip eblock) (\acc _visited -> acc) [] emptyBlockSet
+  where
+    vnode :: Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet ->a
+    vnode block@(Block id _) cont acc visited =
+        if elemBlockSet id visited then
+            cont acc visited
+        else
+            vchildren block (get_children block) cont acc (extendBlockSet visited id)
+    vchildren block bs cont acc visited =
+        let next children acc visited =
+                case children of []     -> cont (block : acc) visited
+                                 (b:bs) -> vnode b (next bs) acc visited
+        in next bs acc visited
+    get_children block = foldl add_id [] (succs block)
+    add_id rst id = case lookupBlockEnv blocks id of
+                      Just b -> b : rst
+                      Nothing -> rst
+
+fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z
+  where fold blocks z =
+            case blocks of [] -> z
+                           [b] -> f b Nothing z
+                           b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z)
+        nextlabel (Block id _) =
+            if id == eid then panic "entry as successor"
+            else Just id
+
+fold_fwd_block first middle last (Block id t) z = tail t (first id z)
+    where tail (ZTail m t) z = tail t (middle m z)
+          tail (ZLast l)   z = last l z
+
+foldM_fwd_block first middle last (Block id t) z = do { z <- first id z; tail t z }
+    where tail (ZTail m t) z = do { z <- middle m z; tail t z }
+          tail (ZLast l)   z = last l z
+
+fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks
+
+map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks)
+    where block (Block id t) = Block (idm id) (tail t)
+          tail (ZTail m t) = ZTail (middle m) (tail t)
+          tail (ZLast LastExit) = ZLast LastExit
+          tail (ZLast (LastOther l)) = ZLast (LastOther (last l))
+
+of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks 
+to_block_list (LGraph _ blocks) = eltsUFM blocks
+
+{-
+\paragraph{Splicing support}
+
+We want to be able to scrutinize a single-entry, single-exit LGraph for
+splicing purposes. 
+There are two useful cases: the LGraph is a single block or it isn't.
+We use continuation-passing style.
+-}
+
+prepare_for_splicing ::
+  LGraph m l -> (ZTail m l -> a) -> (ZTail m l -> ZHead m -> BlockEnv (Block m l) -> a)
+  -> a
+prepare_for_splicing g single multi =
+  let FGraph _ gentry gblocks = entry g 
+      ZBlock _ etail = gentry
+  in if isNullUFM gblocks then
+         case last gentry of
+           LastExit -> single etail
+           _ -> panic "bad single block"
+     else
+       case splitp_blocks is_exit gblocks of
+         Nothing -> panic "Can't find an exit block"
+         Just (gexit, gblocks) ->
+              let (gh, gl) = goto_end $ unzip gexit in
+              case gl of LastExit -> multi etail gh gblocks
+                         _ -> panic "exit is not exit?!"
+
+splice_head head g =
+  check_single_exit g $
+  let eid = head_id head
+      splice_one_block tail' =
+          case ht_to_last head tail' of
+            (head, LastExit) -> (LGraph eid emptyBlockEnv, head)
+            _ -> panic "spliced LGraph without exit" 
+      splice_many_blocks entry exit others =
+          (LGraph eid (insertBlock (zipht head entry) others), exit)
+  in  prepare_for_splicing g splice_one_block splice_many_blocks
+
+splice_tail g tail =
+  check_single_exit g $
+  let splice_one_block tail' =  -- return tail' .. tail 
+        case ht_to_last (ZFirst (gr_entry g)) tail' of
+          (head', LastExit) ->
+              case ht_to_first head' tail of
+                 Block id t | id == gr_entry g -> (t, LGraph id emptyBlockEnv)
+                 _ -> panic "entry in; garbage out"
+          _ -> panic "spliced single block without Exit" 
+      splice_many_blocks entry exit others =
+         (entry, LGraph (gr_entry g) (insertBlock (zipht exit tail) others))
+  in  prepare_for_splicing g splice_one_block splice_many_blocks
+
+splice_focus_entry (FGraph eid (ZBlock head tail) blocks) g =
+  let (tail', g') = splice_tail g tail in
+  FGraph eid (ZBlock head tail') (plusUFM (gr_blocks g') blocks)
+
+splice_focus_exit (FGraph eid (ZBlock head tail) blocks) g =
+  let (g', head') = splice_head head g in
+  FGraph eid (ZBlock head' tail) (plusUFM (gr_blocks g') blocks)
+
+splice_head_only head g =
+  let FGraph eid gentry gblocks = entry g
+  in case gentry of
+       ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks)
+       _ -> panic "entry not at start of block?!"
+
+remove_entry_label g =
+    let FGraph e eblock others = entry g
+    in case eblock of
+         ZBlock (ZFirst id) tail
+             | id == e -> Graph tail others
+         _ -> panic "id doesn't match on entry block"
+
+--- Translation
+
+translate txm txl (LGraph eid blocks) =
+    do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks
+       return $ LGraph eid blocks'
+    where
+      txblock ::
+        Block m l -> UniqSM (BlockEnv (Block m' l')) -> UniqSM (BlockEnv (Block m' l'))
+      txblock (Block id t) expanded =
+        do blocks' <- expanded
+           txtail (ZFirst id) t blocks'
+      txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') ->
+                UniqSM (BlockEnv (Block m' l'))
+      txtail h (ZTail m t) blocks' =
+        do m' <- txm m 
+           let (g, h') = splice_head h m' 
+           txtail h' t (plusUFM (gr_blocks g) blocks')
+      txtail h (ZLast (LastOther l)) blocks' =
+        do l' <- txl l
+           return $ plusUFM (gr_blocks (splice_head_only h l')) blocks'
+      txtail h (ZLast LastExit) blocks' =
+        return $ insertBlock (zipht h (ZLast LastExit)) blocks'
+
+----------------------------------------------------------------
+--- Block Ids, their environments, and their sets
+
+{- Note [Unique BlockId]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Although a 'BlockId' is a local label, for reasons of implementation,
+'BlockId's must be unique within an entire compilation unit.  The reason
+is that each local label is mapped to an assembly-language label, and in
+most assembly languages allow, a label is visible throughout the enitre
+compilation unit in which it appears.
+-}
+
+newtype BlockId = BlockId Unique
+  deriving (Eq,Ord)
+
+instance Uniquable BlockId where
+  getUnique (BlockId u) = u
+
+instance Show BlockId where
+  show (BlockId u) = show u
+
+instance Outputable BlockId where
+  ppr = ppr . getUnique
+
+
+type BlockEnv a = UniqFM {- BlockId -} a
+emptyBlockEnv :: BlockEnv a
+emptyBlockEnv = emptyUFM
+lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
+lookupBlockEnv = lookupUFM
+extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
+extendBlockEnv = addToUFM
+mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
+mkBlockEnv = listToUFM
+
+type BlockSet = UniqSet BlockId
+emptyBlockSet :: BlockSet
+emptyBlockSet = emptyUniqSet
+elemBlockSet :: BlockId -> BlockSet -> Bool
+elemBlockSet = elementOfUniqSet
+extendBlockSet :: BlockSet -> BlockId -> BlockSet
+extendBlockSet = addOneToUniqSet
+mkBlockSet :: [BlockId] -> BlockSet
+mkBlockSet = mkUniqSet
+
+----------------------------------------------------------------
+-- putting this code in PprCmmZ leads to circular imports :-(
+
+instance (Outputable m, Outputable l) => Outputable (ZTail m l) where
+    ppr = pprTail
+
+-- | 'pprTail' is used for debugging only
+pprTail :: (Outputable m, Outputable l) => ZTail m l -> SDoc 
+pprTail (ZTail m t) = ppr m $$ ppr t
+pprTail (ZLast LastExit) = text "<exit>"
+pprTail (ZLast (LastOther l)) = ppr l
+
+pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc
+pprLgraph g = text "{" $$ nest 2 (vcat $ map pprBlock blocks) $$ text "}"
+    where pprBlock (Block id tail) = ppr id <> colon $$ ppr tail
+          blocks = postorder_dfs g
diff --git a/compiler/cmm/ZipCfgCmm.hs b/compiler/cmm/ZipCfgCmm.hs
new file mode 100644
index 0000000..367d952
--- /dev/null
+++ b/compiler/cmm/ZipCfgCmm.hs
@@ -0,0 +1,302 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+module ZipCfgCmm
+  ( mkNop, mkAssign, mkStore, mkCall, mkUnsafeCall, mkFinalCall
+         , mkJump, mkCbranch, mkSwitch, mkReturn, mkComment, mkCmmIfThenElse
+         , mkCmmWhileDo
+  , mkCopyIn, mkCopyOut
+  , CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph, Middle(..), Last(..), Convention(..)
+  )
+where
+
+#include "HsVersions.h"
+
+import CmmExpr
+import Cmm ( GenCmm(..), GenCmmTop(..), CmmStatic, CmmInfo
+           , CmmCallTarget(..), CmmActuals, CmmFormals, CmmHintFormals
+           , CmmStmt(CmmJump, CmmSwitch, CmmReturn) -- imported in order to call ppr
+           )
+import PprCmm()
+
+import CLabel
+import ClosureInfo
+import FastString
+import ForeignCall
+import Maybes
+import Outputable hiding (empty)
+import qualified Outputable as PP
+import Prelude hiding (zip, unzip, last)
+import ZipCfg 
+import MkZipCfg
+
+type CmmGraph  = LGraph Middle Last
+type CmmAGraph = AGraph Middle Last
+type CmmBlock  = Block  Middle Last
+type CmmZ      = GenCmm    CmmStatic CmmInfo CmmGraph
+type CmmTopZ   = GenCmmTop CmmStatic CmmInfo CmmGraph
+
+mkNop        :: CmmAGraph
+mkAssign     :: CmmReg  -> CmmExpr -> CmmAGraph
+mkStore      :: CmmExpr -> CmmExpr -> CmmAGraph
+mkCall       :: CmmCallTarget -> CmmHintFormals -> CmmActuals -> C_SRT -> CmmAGraph
+mkUnsafeCall :: CmmCallTarget -> CmmHintFormals -> CmmActuals -> CmmAGraph
+mkFinalCall  :: CmmCallTarget -> CmmActuals -> CmmAGraph -- never returns
+mkJump       :: CmmExpr -> CmmActuals -> CmmAGraph
+mkCbranch    :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
+mkSwitch     :: CmmExpr -> [Maybe BlockId] -> CmmAGraph
+mkReturn     :: CmmActuals -> CmmAGraph
+mkComment    :: FastString -> CmmAGraph
+
+-- Not to be forgotten, but exported by MkZipCfg:
+--mkBranch      :: BlockId -> CmmAGraph
+--mkLabel       :: BlockId -> CmmAGraph
+mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph
+mkCmmWhileDo    :: CmmExpr -> CmmAGraph -> CmmAGraph 
+
+--------------------------------------------------------------------------
+
+mkCmmIfThenElse e = mkIfThenElse (mkCbranch e)
+mkCmmWhileDo    e = mkWhileDo    (mkCbranch e)
+
+mkCopyIn     :: Convention -> CmmHintFormals -> C_SRT -> CmmAGraph
+mkCopyOut    :: Convention -> CmmHintFormals -> CmmAGraph
+
+  -- ^ XXX: Simon or Simon thinks maybe the hints are being abused and
+  -- we should have CmmFormals here, but for now it is CmmHintFormals
+  -- for consistency with the rest of the back end ---NR
+
+mkComment fs = mkMiddle (MidComment fs)
+
+data Middle
+  = MidNop
+  | MidComment FastString
+
+  | MidAssign CmmReg CmmExpr     -- Assign to register
+
+  | MidStore CmmExpr CmmExpr     -- Assign to memory location.  Size is
+                                 -- given by cmmExprRep of the rhs.
+
+  | MidUnsafeCall                -- An "unsafe" foreign call;
+     CmmCallTarget               -- just a fat machine instructoin
+     CmmHintFormals              -- zero or more results
+     CmmActuals                  -- zero or more arguments
+
+  | CopyIn    -- Move parameters or results from conventional locations to registers
+              -- Note [CopyIn invariant]
+        Convention 
+        CmmHintFormals      
+        C_SRT           -- Static things kept alive by this block
+  | CopyOut Convention CmmHintFormals 
+
+data Last
+  = LastReturn CmmActuals          -- Return from a function,
+                                  -- with these return values.
+
+  | LastJump   CmmExpr CmmActuals
+        -- Tail call to another procedure
+
+  | LastBranch BlockId CmmFormals
+        -- To another block in the same procedure
+        -- The parameters are unused at present.
+
+  | LastCall {                   -- A call (native or safe foreign)
+        cml_target :: CmmCallTarget,
+        cml_actual :: CmmActuals,        -- Zero or more arguments
+        cml_next   :: Maybe BlockId }  -- BlockId of continuation, if call returns
+
+  | LastCondBranch {            -- conditional branch
+        cml_pred :: CmmExpr,
+        cml_true, cml_false :: BlockId
+    }
+
+  | LastSwitch CmmExpr [Maybe BlockId]   -- Table branch
+        -- The scrutinee is zero-based; 
+        --      zero -> first block
+        --      one  -> second block etc
+        -- Undefined outside range, and when there's a Nothing
+
+data Convention
+  = Argument CCallConv  -- Used for function formal params
+  | Result CCallConv    -- Used for function results
+
+  | Local       -- Used for control transfers within a (pre-CPS) procedure
+                -- All jump sites known, never pushed on the stack (hence no SRT)
+                -- You can choose whatever calling convention
+                -- you please (provided you make sure
+                -- all the call sites agree)!
+  deriving Eq
+
+-- ^ In a complete LGraph for a procedure, the [[Exit]] node should not
+-- appear, but it is useful in a subgraph (e.g., replacement for a node).
+
+{-
+Note [CopyIn invariant]
+~~~~~~~~~~~~~~~~~~~~~~~
+In principle, CopyIn ought to be a First node, but in practice, the
+possibility raises all sorts of hairy issues with graph splicing,
+rewriting, and so on.  In the end, NR finds it better to make the
+placement of CopyIn a dynamic invariant.  This change will complicate
+the dataflow fact for the proc-point calculation, but it should make
+things easier in many other respects.  
+-}
+
+
+-- ================ IMPLEMENTATION ================--
+
+mkNop                     = mkMiddle $ MidNop
+mkAssign l r              = mkMiddle $ MidAssign l r
+mkStore  l r              = mkMiddle $ MidStore  l r
+mkCopyIn  conv args srt   = mkMiddle $ CopyIn  conv args srt
+mkCopyOut conv args       = mkMiddle $ CopyOut conv args 
+
+mkJump e args             = mkLast   $ LastJump e args
+mkCbranch pred ifso ifnot = mkLast   $ LastCondBranch pred ifso ifnot
+mkReturn actuals          = mkLast   $ LastReturn actuals
+mkSwitch e tbl            = mkLast   $ LastSwitch e tbl
+
+mkUnsafeCall tgt results actuals = mkMiddle $ MidUnsafeCall tgt results actuals
+mkFinalCall  tgt actuals         = mkLast   $ LastCall      tgt actuals Nothing
+
+mkCall tgt results actuals srt =
+  withFreshLabel "call successor" $ \k ->
+    mkLast (LastCall tgt actuals (Just k)) <*>
+    mkLabel k <*>
+    mkCopyIn (Result CmmCallConv) results srt
+
+instance HavingSuccessors Last where
+    succs = cmmSuccs
+    fold_succs = fold_cmm_succs
+
+instance LastNode Last where
+    mkBranchNode id = LastBranch id []
+    isBranchNode (LastBranch _ []) = True
+    isBranchNode _ = False
+    branchNodeTarget (LastBranch id []) = id
+    branchNodeTarget _ = panic "asked for target of non-branch"
+
+cmmSuccs :: Last -> [BlockId]
+cmmSuccs (LastReturn {})          = []
+cmmSuccs (LastJump {})            = [] 
+cmmSuccs (LastBranch id _)        = [id]
+cmmSuccs (LastCall _ _ (Just id)) = [id]
+cmmSuccs (LastCall _ _ Nothing)   = []
+cmmSuccs (LastCondBranch _ t f)   = [f, t]  -- meets layout constraint
+cmmSuccs (LastSwitch _ edges)     = catMaybes edges
+
+fold_cmm_succs :: (BlockId -> a -> a) -> Last -> a -> a
+fold_cmm_succs _f (LastReturn {})          z = z
+fold_cmm_succs _f (LastJump {})            z = z
+fold_cmm_succs  f (LastBranch id _)        z = f id z
+fold_cmm_succs  f (LastCall _ _ (Just id)) z = f id z
+fold_cmm_succs _f (LastCall _ _ Nothing)   z = z
+fold_cmm_succs  f (LastCondBranch _ te fe) z = f te (f fe z)
+fold_cmm_succs  f (LastSwitch _ edges)     z = foldl (flip f) z $ catMaybes edges
+
+
+----------------------------------------------------------------
+-- prettyprinting (avoids recursive imports)
+
+instance Outputable Middle where
+    ppr s = pprMiddle s
+
+instance Outputable Last where
+    ppr s = pprLast s
+
+instance Outputable Convention where
+    ppr = pprConvention
+
+pprMiddle :: Middle -> SDoc    
+pprMiddle stmt = case stmt of
+
+    MidNop -> semi
+
+    CopyIn conv args _ ->
+        if null args then ptext SLIT("empty CopyIn")
+        else commafy (map ppr args) <+> equals <+>
+             ptext SLIT("foreign") <+> doubleQuotes(ppr conv) <+> ptext SLIT("...")
+
+    CopyOut conv args ->
+        if null args then PP.empty
+        else ptext SLIT("CopyOut") <+> doubleQuotes(ppr conv) <+>
+             parens (commafy (map ppr args))
+
+    --  // text
+    MidComment s -> text "//" <+> ftext s
+
+    -- reg = expr;
+    MidAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi
+
+    -- rep[lv] = expr;
+    MidStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi
+        where
+          rep = ppr ( cmmExprRep expr )
+
+    -- call "ccall" foo(x, y)[r1, r2];
+    -- ToDo ppr volatile
+    MidUnsafeCall (CmmCallee fn cconv) results args ->
+        hcat [ if null results
+                  then PP.empty
+                  else parens (commafy $ map ppr results) <>
+                       ptext SLIT(" = "),
+               ptext SLIT("call"), space, 
+               doubleQuotes(ppr cconv), space,
+               target fn, parens  ( commafy $ map ppr args ),
+               semi ]
+        where
+            target t@(CmmLit _) = ppr t
+            target fn'          = parens (ppr fn')
+
+    MidUnsafeCall (CmmPrim op) results args ->
+        pprMiddle (MidUnsafeCall (CmmCallee (CmmLit lbl) CCallConv) results args)
+        where
+          lbl = CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False)
+
+
+pprLast :: Last -> SDoc    
+pprLast stmt = case stmt of
+
+    LastBranch ident args     -> genBranchWithArgs ident args
+    LastCondBranch expr t f   -> genFullCondBranch expr t f
+    LastJump expr params      -> ppr $ CmmJump expr params
+    LastReturn params         -> ppr $ CmmReturn params
+    LastSwitch arg ids        -> ppr $ CmmSwitch arg ids
+    LastCall tgt params k     -> genCall tgt params k
+
+genCall :: CmmCallTarget -> CmmActuals -> Maybe BlockId -> SDoc
+genCall (CmmCallee fn cconv) args k =
+        hcat [ ptext SLIT("foreign"), space, 
+               doubleQuotes(ppr cconv), space,
+               target fn, parens  ( commafy $ map ppr args ),
+               case k of Nothing -> ptext SLIT("never returns")
+                         Just k -> ptext SLIT("returns to") <+> ppr k,
+               semi ]
+        where
+            target t@(CmmLit _) = ppr t
+            target fn'          = parens (ppr fn')
+
+genCall (CmmPrim op) args k =
+    hcat [ text "%", text (show op), parens  ( commafy $ map ppr args ),
+           ptext SLIT("returns to"), space, ppr k,
+           semi ]
+
+genBranchWithArgs :: (Outputable id, Outputable arg) => id -> [arg] -> SDoc
+genBranchWithArgs ident [] = ptext SLIT("goto") <+> ppr ident <> semi
+genBranchWithArgs ident args = ptext SLIT("goto") <+> ppr ident <+>
+                               parens (commafy (map ppr args)) <> semi
+
+genFullCondBranch :: Outputable id => CmmExpr -> id -> id -> SDoc
+genFullCondBranch expr t f =
+    hsep [ ptext SLIT("if")
+         , parens(ppr expr)
+         , ptext SLIT("goto")
+         , ppr t <> semi
+         , ptext SLIT("else goto")
+         , ppr f <> semi
+         ]
+
+pprConvention :: Convention -> SDoc
+pprConvention (Argument c) = ppr c
+pprConvention (Result c) = ppr c
+pprConvention Local = text "<local>"
+
+commafy :: [SDoc] -> SDoc
+commafy xs = hsep $ punctuate comma xs
diff --git a/compiler/cmm/ZipDataflow.hs b/compiler/cmm/ZipDataflow.hs
new file mode 100644
index 0000000..290faa2
--- /dev/null
+++ b/compiler/cmm/ZipDataflow.hs
@@ -0,0 +1,836 @@
+{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
+{-# LANGUAGE ScopedTypeVariables, MultiParamTypeClasses #-}
+module ZipDataflow
+  ( Answer(..)
+  , BComputation(..), BAnalysis, BTransformation, BFunctionalTransformation
+        , BPass, BUnlimitedPass
+  , FComputation(..), FAnalysis, FTransformation, FPass, FUnlimitedPass
+  , LastOutFacts(..)
+  , DebugNodes
+  , anal_b, a_t_b, a_ft_b, a_ft_b_unlimited, ignore_transactions_b
+  , anal_f, a_t_f 
+  , run_b_anal, run_f_anal
+  , refine_f_anal, refine_b_anal, fold_edge_facts_b, fold_edge_facts_with_nodes_b
+  , b_rewrite, f_rewrite
+  , solve_graph_b, solve_graph_f
+  )
+where
+
+import CmmTx
+import DFMonad
+import ZipCfg hiding (freshBlockId) -- use version from DFMonad
+import qualified ZipCfg as G
+
+import Outputable
+import Panic
+import UniqFM
+import UniqSupply
+
+import Control.Monad
+import Maybe
+
+{-
+
+\section{A very polymorphic infrastructure for dataflow problems}
+
+This module presents a framework for solving iterative dataflow
+problems. 
+There are two major submodules: one for forward problems and another
+for backward problems.
+Both modules incorporate the composition framework developed by
+Lerner, Grove, and Chambers.
+They also support a \emph{transaction limit}, which enables the
+binary-search debugging technique developed by Whalley and Davidson
+under the name \emph{vpoiso}.
+Transactions may either be known to the individual dataflow solvers or
+may be managed by the framework.
+-}
+
+-- | In the composition framework, a pass either produces a dataflow
+-- fact or proposes to rewrite the graph.  To make life easy for the
+-- clients, the rewrite is given in unlabelled form, but we use
+-- labelled form internally throughout, because it greatly simplifies
+-- the implementation not to have the first block be a special case
+-- edverywhere.
+
+data Answer m l a = Dataflow a | Rewrite (Graph m l)
+
+
+{-
+
+\subsection {Descriptions of dataflow passes}
+
+\paragraph{Passes for backward dataflow problems}
+
+The computation of a fact is the basis of a dataflow pass.
+A~computation takes not one but two type parameters:
+\begin{itemize}
+\item
+Type parameter [['i]] is an input, from which it should be possible to
+derived a dataflow fact of interest.
+For example, [['i]] might be equal to a fact, or it might be a tuple
+of which one element is a fact.
+\item
+Type parameter [['o]] is an output, or possibly a function from
+[[txlimit]] to an output
+\end{itemize}
+Backward analyses compute [[in]] facts (facts on inedges). 
+<<exported types for backward analyses>>=
+
+-}
+
+data BComputation middle last input output = BComp
+   { bc_name      :: String
+   , bc_exit_in   ::                                  output
+   , bc_last_in   :: (BlockId -> input) -> last    -> output
+   , bc_middle_in :: input              -> middle  -> output
+   , bc_first_in  :: input              -> BlockId -> output
+   } 
+
+-- | From these elements we build several kinds of passes:
+--     * A pure analysis computes a fact, using that fact as input and output.
+--     * A pure transformation computes no facts but only changes the graph.
+--     * A fully general pass both computes a fact and rewrites the graph,
+--       respecting the current transaction limit.
+
+type BAnalysis                 m l a = BComputation m l a a
+type BTransformation           m l a = BComputation m l a (Maybe (UniqSM (Graph m l)))
+type BFunctionalTransformation m l a = BComputation m l a (Maybe         (Graph m l))
+
+type BPass          m l a = BComputation m l a (Txlimit -> DFM a (Answer m l a))
+type BUnlimitedPass m l a = BComputation m l a (           DFM a (Answer m l a))
+
+{-
+\paragraph{Passes for forward dataflow problems}
+
+A forward dataflow pass has a similar structure, but the details are
+different.  In particular, the output fact from a [[last]] node has a
+higher-order representation: it takes a function that mutates a
+[[uid]] to account for the new fact, then performs the necessary
+mutation on every successor of the last node.  We therefore have two
+kinds of type parameter for outputs: output from a [[middle]] node
+is~[[outmid]], and output from a [[last]] node is~[[outlast]].
+-}
+
+data FComputation middle last input outmid outlast = FComp
+ { fc_name       :: String 
+ , fc_first_out  :: input -> BlockId   -> outmid
+ , fc_middle_out :: input -> middle    -> outmid
+ , fc_last_outs  :: input -> last      -> outlast
+ , fc_exit_outs  :: input              -> outlast
+ } 
+
+-- | The notions of analysis, pass, and transformation are analogous to the
+-- backward case.
+
+newtype LastOutFacts a = LastOutFacts [(BlockId, a)] 
+  -- ^ These are facts flowing out of a last node to the node's successors.
+  -- They are either to be set (if they pertain to the graph currently
+  -- under analysis) or propagated out of a sub-analysis
+
+type FAnalysis m l a       = FComputation m l a a (LastOutFacts a)
+type FTransformation m l a = FComputation m l a (Maybe (UniqSM (Graph m l)))
+                                                (Maybe (UniqSM (Graph m l)))
+type FPass m l a           = FComputation m l a
+                                (Txlimit -> DFM a (Answer m l a))
+                                (Txlimit -> DFM a (Answer m l (LastOutFacts a)))
+
+type FUnlimitedPass m l a  = FComputation m l a
+                                (DFM a (Answer m l a))
+                                (DFM a (Answer m l (LastOutFacts a)))
+
+{-
+\paragraph{Composing passes}
+
+Both forward and backward engines share a handful of functions for
+composing analyses, transformations, and passes.
+
+We can make an analysis pass, or we can 
+combine a related analysis and transformation into a full pass.
+-}
+
+anal_b :: BAnalysis m l a -> BPass m l a
+a_t_b  :: BAnalysis m l a -> BTransformation           m l a -> BPass m l a
+a_ft_b :: BAnalysis m l a -> BFunctionalTransformation m l a -> BPass m l a
+a_ft_b_unlimited
+       :: BAnalysis m l a -> BFunctionalTransformation m l a -> BPass m l a
+  -- ^ Ignores transaction limits.  Could produce a BUnlimitedPass statically,
+  -- but that would cost too much code in the implementation for a
+  -- static distinction that is not worth so much. 
+ignore_transactions_b :: BUnlimitedPass m l a -> BPass m l a
+
+
+
+anal_f :: FAnalysis m l a -> FPass m l a
+a_t_f  :: FAnalysis m l a -> FTransformation m l a -> FPass m l a
+
+
+{-
+\paragraph {Running the dataflow engine}
+
+Every function for running analyses has two forms, because for a
+forward analysis, we supply an entry fact, whereas for a backward
+analysis, we don't need to supply an exit fact (because a graph for a
+procedure doesn't have an exit node).
+It's possible we could make these things more regular.
+-}
+
+-- | The analysis functions set properties on unique IDs.
+
+run_b_anal :: forall m l a . (DebugNodes m l, LastNode l, Outputable a) =>
+              BAnalysis m l a ->      LGraph m l -> DFA a ()
+run_f_anal :: forall m l a . (DebugNodes m l, LastNode l, Outputable a) =>
+              FAnalysis m l a -> a -> LGraph m l -> DFA a ()
+                              -- ^ extra parameter is the entry fact
+
+-- | Rematerialize results of analysis for use elsewhere.  Simply applies a
+-- fold function to every edge fact, in reverse postorder dfs.  The facts
+-- should already have been computed into the monady by run_b_anal or b_rewrite.
+fold_edge_facts_b
+    :: LastNode l =>
+       (a -> b -> b) -> BAnalysis m l a -> LGraph m l -> (BlockId -> a) -> b -> b
+
+fold_edge_facts_with_nodes_b :: LastNode l
+                             => (l -> a -> b -> b)  -- ^ inedge to last node
+                             -> (m -> a -> b -> b)  -- ^ inedge to middle node
+                             -> (BlockId -> a -> b -> b) -- ^ fact at label
+                             -> BAnalysis m l a          -- ^ backwards analysis
+                             -> LGraph m l               -- ^ graph
+                             -> (BlockId -> a)           -- ^ solution to bwd anal
+                             -> b -> b
+
+
+-- | It can be useful to refine the results of an existing analysis,
+-- or for example to use the outcome of a forward analsysis in a
+-- backward analysis.  These functions can also be used to compute a
+-- fixed point iteratively starting from somewhere other than bottom
+-- (as in the reachability analysis done for proc points).
+
+class (Outputable m, Outputable l, LastNode l, Outputable (LGraph m l)) => DebugNodes m l
+
+refine_f_anal :: forall m l a . (DebugNodes m l, LastNode l, Outputable a) =>
+        FAnalysis m l a -> LGraph m l -> DFA a () -> DFA a ()
+
+refine_b_anal :: forall m l a . (DebugNodes m l, LastNode l, Outputable a) =>
+        BAnalysis m l a -> LGraph m l -> DFA a () -> DFA a ()
+
+b_rewrite :: (DebugNodes m l, Outputable a) =>
+             BPass m l a ->      LGraph m l -> DFM a (LGraph m l)
+f_rewrite :: (DebugNodes m l, LastNode l, Outputable m, Outputable a) =>
+             FPass m l a -> a -> LGraph m l -> DFM a (LGraph m l)
+                    -- ^ extra parameter is the entry fact
+
+-- | If the solution to a problem is already sitting in a monad, we
+-- should be able to take a short cut and just rewrite it in one pass.
+-- But not yet implemented.
+
+{-
+f_rewrite_solved :: (LastNode l, Outputable m, Outputable a) =>
+                    FPass m l a -> DFM a () -> LGraph m l -> DFM a (LGraph m l)
+b_rewrite_solved :: (LastNode l, Outputable m, Outputable a) =>
+                    BPass m l a -> DFM a () -> LGraph m l -> DFM a (LGraph m l)
+-}
+
+-- ===================== IMPLEMENTATION ======================--
+
+-- | Here's a function to run an action on blocks until we reach a fixed point.
+run :: (DataflowAnalysis anal, Monad (anal a), Outputable a, DebugNodes m l) =>
+       String -> String -> anal a () -> (b -> Block m l -> anal a b) ->
+       b -> [Block m l] -> anal a b
+run dir name set_entry do_block b blocks =
+   do { set_entry; show_blocks $ iterate (1::Int) }
+   where
+     -- N.B. Each iteration starts with the same transaction limit;
+     -- only the rewrites in the final iteration actually count
+     trace_block b block = my_trace "about to do" (text name <+> text "on" <+> ppr (blockId block)) $
+                           do_block b block
+     iterate n = 
+         do { markFactsUnchanged
+            ; b <- foldM trace_block b blocks
+            ; changed <- factsStatus
+            ; facts <- allFacts
+            ; let depth = 0 -- was nesting depth
+            ; ppIter depth n $
+              case changed of
+                NoChange -> unchanged depth $ return b
+                SomeChange ->
+                    pprFacts depth n facts $ 
+                    if n < 1000 then iterate (n+1)
+                    else panic $ msg n
+            }
+     msg n = concat [name, " didn't converge in ", show n, " " , dir,
+                     " iterations"]
+     my_nest depth sdoc = my_trace "" $ nest (3*depth) sdoc
+     ppIter depth n = my_nest depth (empty $$ text "*************** iteration" <+> pp_i n)
+     pp_i n = int n <+> text "of" <+> text name <+> text "on" <+> graphId
+     unchanged depth = my_nest depth (text "facts are unchanged")
+
+     pprFacts depth n env =
+         my_nest depth (text "facts for iteration" <+> pp_i n <+> text "are:" $$
+                        (nest 2 $ vcat $ map pprFact $ ufmToList env))
+     pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a)
+     graphId = case blocks of { Block id _ : _ -> ppr id ; [] -> text "<empty>" }
+     show_blocks = my_trace "Blocks:" (vcat (map pprBlock blocks))
+     pprBlock (Block id t) = nest 2 (pprFact (id, t))
+
+{-
+\subsection{Backward problems}
+
+In a backward problem, we compute \emph{in} facts from \emph{out}
+facts.  The analysis gives us [[exit_in]], [[last_in]], [[middle_in]],
+and [[first_in]], each of which computes an \emph{in} fact for one
+kind of node.  We provide [[head_in]], which computes the \emph{in}
+fact for a first node followed by zero or more middle nodes.
+
+We don't compute and return the \emph{in} fact for block; instead, we
+use [[setFact]] to attach that fact to the block's unique~ID.
+We iterate until no more facts have changed.
+-}
+run_b_anal comp graph =
+  refine_b_anal comp graph (return ()) 
+      -- for a backward analysis, everything is initially bottom
+
+refine_b_anal comp graph initial =
+      run "backward" (bc_name comp) initial set_block_fact () blocks
+  where
+    blocks = reverse (postorder_dfs graph)
+    set_block_fact () b@(G.Block id _) =              
+      let (h, l) = G.goto_end (G.unzip b) in
+      do  env <- factsEnv
+          let block_in = head_in h (last_in comp env l) -- 'in' fact for the block
+          setFact id block_in 
+    head_in (G.ZHead h m) out = head_in h (bc_middle_in comp out m)
+    head_in (G.ZFirst id) out = bc_first_in comp out id
+
+last_in :: BComputation m l i o -> (BlockId -> i) -> G.ZLast l -> o
+last_in comp env (G.LastOther l) = bc_last_in comp env l
+last_in comp _   (G.LastExit)    = bc_exit_in comp 
+
+------ we can now pass those facts elsewhere
+fold_edge_facts_b f comp graph env z =
+    foldl fold_block_facts z (postorder_dfs graph)
+  where
+    fold_block_facts z b =              
+      let (h, l) = G.goto_end (G.unzip b) 
+      in head_fold h (last_in comp env l) z
+    head_fold (G.ZHead h m) out z = head_fold h (bc_middle_in comp out m) (f out z)
+    head_fold (G.ZFirst id) out z = f (bc_first_in comp out id) (f out z)
+
+fold_edge_facts_with_nodes_b fl fm ff comp graph env z =
+    foldl fold_block_facts z (postorder_dfs graph)
+  where
+    fold_block_facts z b =
+      let (h, l) = G.goto_end (G.unzip b)
+          in' = last_in comp env l
+          z' = case l of { G.LastExit -> z ; G.LastOther l -> fl l in' z }
+      in head_fold h in' z'
+    head_fold (G.ZHead h m) out z =
+      let a  = bc_middle_in comp out m
+          z' = fm m a z
+      in  head_fold h a z'
+    head_fold (G.ZFirst id) out z = 
+      let a  = bc_first_in comp out id
+          z' = ff id a z
+      in  z'
+
+
+-- | In the general case we solve a graph in the context of a larger subgraph.
+-- To do this, we need a locally modified computation that allows an
+-- ``exit fact'' to flow into the exit node.
+
+comp_with_exit_b :: BComputation m l i (Txlimit -> DFM f (Answer m l o)) -> o ->
+                    BComputation m l i (Txlimit -> DFM f (Answer m l o))
+comp_with_exit_b comp exit_fact =
+    comp { bc_exit_in = \_txlim -> return $ Dataflow $ exit_fact }
+
+-- | Given this function, we can now solve a graph simply by doing a
+-- backward analysis on the modified computation.  Note we have to be
+-- very careful with 'Rewrite'.  Either a rewrite is going to
+-- participate, in which case we mark the graph rerewritten, or we're
+-- going to analysis the proposed rewrite and then throw away
+-- everything but the answer, in which case it's a 'subAnalysis'.  A
+-- Rewrite should always use exactly one of these monadic operations.
+
+solve_graph_b ::
+    forall m l a . (DebugNodes m l, Outputable a) =>
+                   BPass m l a -> Txlimit -> G.LGraph m l -> a -> DFM a (Txlimit, a)
+solve_graph_b comp txlim graph exit_fact =
+    general_backward (comp_with_exit_b comp exit_fact) txlim graph
+  where
+    general_backward :: BPass m l a -> Txlimit -> G.LGraph m l -> DFM a (Txlimit, a)
+    general_backward comp txlim graph = 
+      let set_block_fact :: Txlimit -> G.Block m l -> DFM a Txlimit
+          set_block_fact txlim b =
+              do { (txlim, block_in) <-
+                        let (h, l) = G.goto_end (G.unzip b) in
+                            factsEnv >>= \env -> last_in comp env l txlim >>= \x ->
+                              case x of
+                                Dataflow a -> head_in txlim h a
+                                Rewrite g ->
+                                  do { bot <- botFact
+                                     ; g <- lgraphOfGraph g
+                                     ; (txlim, a) <- subAnalysis' $
+                                                     solve_graph_b comp (txlim-1) g bot
+                                     ; head_in txlim h a }
+                 ; my_trace "result of" (text (bc_name comp) <+>
+                   text "on" <+> ppr (G.blockId b) <+> text "is" <+> ppr block_in) $
+                   setFact (G.blockId b) block_in
+                 ; return txlim
+                 }
+          head_in txlim (G.ZHead h m) out = 
+              bc_middle_in comp out m txlim >>= \x -> case x of
+                Dataflow a -> head_in txlim h a
+                Rewrite g ->
+                  do { g <- lgraphOfGraph g
+                     ; (txlim, a) <- subAnalysis' $ solve_graph_b comp (txlim-1) g out 
+                     ; my_trace "Rewrote middle node" (f4sep [ppr m, text "to", ppr g]) $
+                       head_in txlim h a }
+          head_in txlim (G.ZFirst id) out =
+              bc_first_in comp out id txlim >>= \x -> case x of
+                Dataflow a -> return (txlim, a)
+                Rewrite g -> do { g <- lgraphOfGraph g
+                                ; subAnalysis' $ solve_graph_b comp (txlim-1) g out }
+
+      in do { txlim <-
+                  run "backward" (bc_name comp) (return ()) set_block_fact txlim blocks
+            ; a <- getFact (G.gr_entry graph)
+            ; facts <- allFacts
+            ; my_trace "Solution to graph after pass 1 is" (pprFacts graph facts a) $
+              return (txlim, a) }
+               
+    blocks = reverse (G.postorder_dfs graph)
+    pprFacts g env a = (ppr a <+> text "with") $$ vcat (pprLgraph g : map pprFact (ufmToList env))
+    pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a)
+
+
+lgraphOfGraph :: G.Graph m l -> DFM f (G.LGraph m l)
+lgraphOfGraph g =
+    do id <- freshBlockId "temporary id for dataflow analysis"
+       return $ labelGraph id g
+
+labelGraph :: BlockId -> G.Graph m l -> G.LGraph m l
+labelGraph id (Graph tail blocks) = LGraph id (insertBlock (Block id tail) blocks)
+
+{-
+We solve and rewrite in two passes: the first pass iterates to a fixed
+point to reach a dataflow solution, and the second pass uses that
+solution to rewrite the graph.
+
+The
+key job is done by [[propagate]], which propagates a fact of type~[[a]]
+between a head and tail.
+The tail is in final form; the head is still to be rewritten.
+-}
+
+solve_and_rewrite_b ::
+  forall m l a. (DebugNodes m l, Outputable a) =>
+         BPass m l a -> Txlimit -> LGraph m l -> a -> DFM a (Txlimit, a, LGraph m l)
+
+solve_and_rewrite_b comp txlim graph exit_fact =
+  do { (_, a) <- solve_graph_b comp txlim graph exit_fact -- pass 1
+     ; facts <- allFacts
+     ; (txlim, g) <-                                           -- pass 2
+       my_trace "Solution to graph after pass 1 is" (pprFacts graph facts) $
+           backward_rewrite (comp_with_exit_b comp exit_fact) txlim graph 
+     ; facts <- allFacts
+     ; my_trace "Rewritten graph after pass 2 is" (pprFacts g facts) $
+       return (txlim, a, g) }
+  where
+    pprFacts g env = vcat (pprLgraph g : map pprFact (ufmToList env))
+    pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a)
+    eid = G.gr_entry graph
+    backward_rewrite comp txlim graph =
+      rewrite_blocks comp txlim emptyBlockEnv $ reverse (G.postorder_dfs graph)
+    rewrite_blocks ::
+      BPass m l a -> Txlimit ->
+      BlockEnv (Block m l) -> [Block m l] -> DFM a (Txlimit,G.LGraph m l)
+    rewrite_blocks _comp txlim rewritten [] = return (txlim, G.LGraph eid rewritten)
+    rewrite_blocks  comp txlim rewritten (b:bs) =
+      let rewrite_next_block txlim =
+            let (h, l) = G.goto_end (G.unzip b) in
+            factsEnv >>= \env -> last_in comp env l txlim >>= \x -> case x of
+              Dataflow a -> propagate txlim h a (G.ZLast l) rewritten
+              Rewrite g ->  -- see Note [Rewriting labelled LGraphs]
+                do { bot <- botFact
+                   ; g <- lgraphOfGraph g
+                   ; (txlim, a, g') <- solve_and_rewrite_b comp (txlim-1) g bot
+                   ; let G.Graph t new_blocks = G.remove_entry_label g'
+                   ; markGraphRewritten
+                   ; let rewritten' = plusUFM new_blocks rewritten
+                   ; -- continue at entry of g
+                     propagate txlim h a t rewritten'
+                   } 
+          propagate :: Txlimit -> G.ZHead m -> a -> G.ZTail m l ->
+                       BlockEnv (Block m l) -> DFM a (Txlimit, G.LGraph m l)
+          propagate txlim (G.ZHead h m) out tail rewritten =
+              bc_middle_in comp out m txlim >>= \x -> case x of
+                Dataflow a -> propagate txlim h a (G.ZTail m tail) rewritten
+                Rewrite g ->
+                  do { g <- lgraphOfGraph g
+                     ; (txlim, a, g') <- solve_and_rewrite_b comp (txlim-1) g out
+                     ; markGraphRewritten
+                     ; let (t, g'') = G.splice_tail g' tail 
+                     ; let rewritten' = plusUFM (G.gr_blocks g'') rewritten
+                     ; my_trace "Rewrote middle node" (f4sep [ppr m, text "to", ppr g]) $
+                       propagate txlim h a t rewritten' }
+          propagate txlim h@(G.ZFirst id) out tail rewritten =
+              bc_first_in comp out id txlim >>= \x -> case x of
+                Dataflow a ->
+                  let b = G.Block id tail in
+                  do { checkFactMatch id a
+                     ; rewrite_blocks comp txlim (extendBlockEnv rewritten id b) bs }
+                Rewrite fg ->
+                  do { g <- lgraphOfGraph fg
+                     ; (txlim, a, g') <- solve_and_rewrite_b comp (txlim-1) g out
+                     ; markGraphRewritten
+                     ; let (t, g'') = G.splice_tail g' tail 
+                     ; let rewritten' = plusUFM (G.gr_blocks g'') rewritten
+                     ; my_trace "Rewrote label " (f4sep [ppr id, text "to", ppr g]) $
+                       propagate txlim h a t rewritten' }
+      in rewrite_next_block txlim 
+
+b_rewrite comp g =
+  do { txlim <- liftTx txRemaining
+     ; bot <- botFact
+     ; (txlim', _, gc) <- solve_and_rewrite_b comp txlim g bot
+     ; liftTx $ txDecrement (bc_name comp) txlim txlim'
+     ; return gc
+     }
+
+{-
+This debugging stuff is left over from imperative-land.
+It might be useful one day if I learn how to cheat the IO monad!
+
+debug_b :: (Outputable m, Outputable l, Outputable a) => BPass m l a -> BPass m l a
+
+let debug s (f, comp) =
+  let pr = Printf.eprintf in
+  let fact dir node a = pr "%s %s for %s = %s\n" f.fact_name dir node (s a) in
+  let rewr node g = pr "%s rewrites %s to <not-shown>\n" comp.name node in
+  let wrap f nodestring node txlim =
+    let answer = f node txlim in
+    let () = match answer with
+    | Dataflow a -> fact "in " (nodestring node) a
+    | Rewrite g  -> rewr (nodestring node) g in
+    answer in
+  let wrapout f nodestring out node txlim =
+    fact "out" (nodestring node) out;
+    wrap (f out) nodestring node txlim in
+  let last_in = wrap comp.last_in (RS.rtl << G.last_instr) in
+  let middle_in = wrapout comp.middle_in (RS.rtl << G.mid_instr) in
+  let first_in  =
+    let first = function G.Entry -> "<entry>" | G.Label ((u, l), _, _) -> l in
+    wrapout comp.first_in first in
+  f, { comp with last_in = last_in; middle_in = middle_in; first_in = first_in; }
+-}
+
+anal_b comp = comp { bc_last_in   = wrap2 $ bc_last_in   comp
+                   , bc_exit_in   = wrap0 $ bc_exit_in   comp
+                   , bc_middle_in = wrap2 $ bc_middle_in comp
+                   , bc_first_in  = wrap2 $ bc_first_in  comp }
+  where wrap2 f out node _txlim = return $ Dataflow (f out node)
+        wrap0 fact       _txlim = return $ Dataflow fact
+
+ignore_transactions_b comp =
+    comp { bc_last_in   = wrap2 $ bc_last_in   comp
+         , bc_exit_in   = wrap0 $ bc_exit_in   comp
+         , bc_middle_in = wrap2 $ bc_middle_in comp
+         , bc_first_in  = wrap2 $ bc_first_in  comp }
+  where wrap2 f out node _txlim = f out node
+        wrap0 fact       _txlim = fact
+
+answer' :: (b -> DFM f (Graph m l)) -> Txlimit -> Maybe b -> a -> DFM f (Answer m l a)
+answer' lift txlim r a = 
+    case r of Just gc | txlim > 0 -> do { g <- lift gc; return $ Rewrite g }
+              _ -> return $ Dataflow a
+
+unlimited_answer'
+    :: (b -> DFM f (Graph m l)) -> Txlimit -> Maybe b -> a -> DFM f (Answer m l a)
+unlimited_answer' lift _txlim r a =
+    case r of Just gc -> do { g <- lift gc; return $ Rewrite g }
+              _ -> return $ Dataflow a
+
+combine_a_t_with :: (Txlimit -> Maybe b -> a -> DFM a (Answer m l a)) ->
+                    BAnalysis m l a -> BComputation m l a (Maybe b) ->
+                    BPass m l a
+combine_a_t_with answer anal tx =
+ let last_in env l txlim =
+       answer txlim (bc_last_in tx env l) (bc_last_in anal env l)
+     exit_in txlim = answer txlim (bc_exit_in tx) (bc_exit_in anal)
+     middle_in out m txlim =
+       answer txlim (bc_middle_in tx out m) (bc_middle_in anal out m) 
+     first_in out f txlim =
+       answer txlim (bc_first_in tx out f) (bc_first_in anal out f) 
+ in BComp { bc_name = concat [bc_name anal, " and ", bc_name tx]
+          , bc_last_in = last_in, bc_middle_in = middle_in
+          , bc_first_in = first_in, bc_exit_in = exit_in }
+
+a_t_b            = combine_a_t_with (answer' liftUSM)
+a_ft_b           = combine_a_t_with (answer' return)
+a_ft_b_unlimited = combine_a_t_with (unlimited_answer' return)
+
+
+-- =============== FORWARD ================
+
+-- | We don't compute and return the \emph{in} fact for block; instead, we
+-- use [[P.set]] to attach that fact to the block's unique~ID.
+-- We iterate until no more facts have changed.
+
+dump_things :: Bool
+dump_things = False
+
+my_trace :: String -> SDoc -> a -> a
+my_trace = if dump_things then pprTrace else \_ _ a -> a
+
+run_f_anal comp entry_fact graph = refine_f_anal comp graph set_entry
+  where set_entry = setFact (G.gr_entry graph) entry_fact
+
+refine_f_anal comp graph initial =
+    run "forward" (fc_name comp) initial set_successor_facts () blocks
+  where blocks = G.postorder_dfs graph
+        set_successor_facts () (G.Block id t) =
+          let forward in' (G.ZTail m t) = forward (fc_middle_out comp in' m) t
+              forward in' (G.ZLast l)   = setEdgeFacts (last_outs comp in' l) 
+              _blockname = if id == G.gr_entry graph then "<entry>" else show id
+          in  getFact id >>= \a -> forward (fc_first_out comp a id) t
+        setEdgeFacts (LastOutFacts fs) = mapM_ setEdgeFact fs
+        setEdgeFact (id, a) = setFact id a
+
+last_outs :: FComputation m l i om ol -> i -> G.ZLast l -> ol
+last_outs comp i (G.LastExit)    = fc_exit_outs comp i
+last_outs comp i (G.LastOther l) = fc_last_outs comp i l
+
+-- | In the general case we solve a graph in the context of a larger subgraph.
+-- To do this, we need a locally modified computation that allows an
+-- ``exit fact'' to flow out of the exit node.  We pass in a fresh BlockId 
+-- to which the exit fact can flow
+
+comp_with_exit_f :: FPass m l a -> BlockId -> FPass m l a
+comp_with_exit_f comp exit_fact_id = comp { fc_exit_outs = exit_outs } 
+    where exit_outs in' _txlimit =
+              return $ Dataflow $ LastOutFacts [(exit_fact_id, in')]
+
+-- | Given [[comp_with_exit_f]], we can now solve a graph simply by doing a
+-- forward analysis on the modified computation.
+solve_graph_f ::
+    forall m l a . (DebugNodes m l, Outputable a) =>
+                   FPass m l a -> Txlimit -> G.LGraph m l -> a ->
+                   DFM a (Txlimit, a, LastOutFacts a)
+solve_graph_f comp txlim g in_fact =
+  do { exit_fact_id <- freshBlockId "proxy for exit node"
+     ; txlim <- general_forward (comp_with_exit_f comp exit_fact_id) txlim in_fact g
+     ; a <- getFact exit_fact_id
+     ; outs <- lastOutFacts
+     ; forgetFact exit_fact_id -- close space leak
+     ; return (txlim, a, LastOutFacts outs) }
+  where
+    general_forward :: FPass m l a -> Txlimit -> a -> G.LGraph m l -> DFM a Txlimit
+    general_forward comp txlim entry_fact graph =
+      let blocks = G.postorder_dfs g
+          is_local id = isJust $ lookupBlockEnv (G.gr_blocks g) id
+          set_or_save :: LastOutFacts a -> DFM a ()
+          set_or_save (LastOutFacts l) = mapM_ set_or_save_one l
+          set_or_save_one (id, a) =
+            if is_local id then setFact id a else addLastOutFact (id, a)
+          set_entry = setFact (G.gr_entry graph) entry_fact
+
+          set_successor_facts txlim b =
+            let set_tail_facts txlim in' (G.ZTail m t) =
+                  my_trace "Solving middle node" (ppr m) $
+                  fc_middle_out comp in' m txlim >>= \ x -> case x of
+                    Dataflow a -> set_tail_facts txlim a t
+                    Rewrite g -> 
+                      do g <- lgraphOfGraph g
+                         (txlim, out, last_outs) <- subAnalysis' $
+                                         solve_graph_f comp (txlim-1) g in'
+                         set_or_save last_outs
+                         set_tail_facts txlim out t
+                set_tail_facts txlim in' (G.ZLast l) =
+                  last_outs comp in' l txlim >>= \x -> case x of
+                    Dataflow outs -> do { set_or_save outs; return txlim }
+                    Rewrite g ->
+                      do g <- lgraphOfGraph g
+                         (txlim, _, last_outs) <- subAnalysis' $
+                                         solve_graph_f comp (txlim-1) g in'
+                         set_or_save last_outs
+                         return txlim
+                G.Block id t = b
+            in  do idfact <- getFact id
+                   infact <- fc_first_out comp idfact id txlim
+                   case infact of Dataflow a -> set_tail_facts txlim a t
+                                  Rewrite g ->
+                                    do g <- lgraphOfGraph g
+                                       (txlim, out, last_outs) <- subAnalysis' $
+                                           solve_graph_f comp (txlim-1) g idfact
+                                       set_or_save last_outs
+                                       set_tail_facts txlim out t
+      in run "forward" (fc_name comp) set_entry set_successor_facts txlim blocks
+
+
+
+{-
+We solve and rewrite in two passes: the first pass iterates to a fixed
+point to reach a dataflow solution, and the second pass uses that
+solution to rewrite the graph.
+
+The key job is done by [[propagate]], which propagates a fact of type~[[a]]
+between a head and tail.
+The tail is in final form; the head is still to be rewritten.
+-}
+solve_and_rewrite_f ::
+  forall m l a . (DebugNodes m l, Outputable a) =>
+         FPass m l a -> Txlimit -> LGraph m l -> a -> DFM a (Txlimit, a, LGraph m l)
+solve_and_rewrite_f comp txlim graph in_fact =
+  do solve_graph_f comp txlim graph in_fact                   -- pass 1
+     exit_id    <- freshBlockId "proxy for exit node"
+     (txlim, g) <- forward_rewrite (comp_with_exit_f comp exit_id) txlim graph in_fact
+     exit_fact  <- getFact exit_id
+     return (txlim, exit_fact, g)
+
+forward_rewrite ::
+  forall m l a . (DebugNodes m l, Outputable a) =>
+         FPass m l a -> Txlimit -> G.LGraph m l -> a -> DFM a (Txlimit, G.LGraph m l)
+forward_rewrite comp txlim graph entry_fact =
+  do setFact eid entry_fact
+     rewrite_blocks txlim emptyBlockEnv (G.postorder_dfs graph) 
+  where
+    eid = G.gr_entry graph
+    is_local id = isJust $ lookupBlockEnv (G.gr_blocks graph) id
+    set_or_save :: LastOutFacts a -> DFM a ()
+    set_or_save (LastOutFacts l) = mapM_ set_or_save_one l
+    set_or_save_one (id, a) =
+        if is_local id then checkFactMatch id a
+        else panic "set fact outside graph during rewriting pass?!"
+
+    rewrite_blocks ::
+      Txlimit -> BlockEnv (Block m l) -> [Block m l] -> DFM a (Txlimit, LGraph m l)
+    rewrite_blocks txlim rewritten [] = return (txlim, G.LGraph eid rewritten)
+    rewrite_blocks txlim rewritten (G.Block id t : bs) = 
+        do id_fact <- getFact id
+           first_out <- fc_first_out comp id_fact id txlim
+           case first_out of
+             Dataflow a -> propagate txlim (G.ZFirst id) a t rewritten bs
+             Rewrite fg -> do { markGraphRewritten
+                              ; rewrite_blocks (txlim-1) rewritten
+                                (G.postorder_dfs (labelGraph id fg) ++ bs) }
+    propagate :: Txlimit -> G.ZHead m -> a -> G.ZTail m l -> BlockEnv (G.Block m l) ->
+                 [G.Block m l] -> DFM a (Txlimit, G.LGraph m l)
+    propagate txlim h in' (G.ZTail m t) rewritten bs = 
+        my_trace "Rewriting middle node" (ppr m) $
+        do fc_middle_out comp in' m txlim >>= \x -> case x of
+             Dataflow a -> propagate txlim (G.ZHead h m) a t rewritten bs
+             Rewrite g ->
+               my_trace "Rewriting middle node...\n" empty $
+               do g <- lgraphOfGraph g
+                  (txlim, a, g) <- solve_and_rewrite_f comp (txlim-1) g in' 
+                  markGraphRewritten
+                  my_trace "Rewrite of middle node completed\n" empty $
+                     let (g', h') = G.splice_head h g in
+                     propagate txlim h' a t (plusUFM (G.gr_blocks g') rewritten) bs
+    propagate txlim h in' (G.ZLast l) rewritten bs = 
+        do last_outs comp in' l txlim >>= \x -> case x of
+             Dataflow outs ->
+               do set_or_save outs
+                  let b = G.zip (G.ZBlock h (G.ZLast l))
+                  rewrite_blocks txlim (G.insertBlock b rewritten) bs
+             Rewrite g ->
+                -- could test here that [[exits g = exits (G.Entry, G.ZLast l)]]
+                {- if Debug.on "rewrite-last" then 
+                      Printf.eprintf "ZLast node %s rewritten to:\n"
+                        (RS.rtl (G.last_instr l)); -}
+                do g <- lgraphOfGraph g
+                   (txlim, _, g) <- solve_and_rewrite_f comp (txlim-1) g in' 
+                   markGraphRewritten
+                   let g' = G.splice_head_only h g
+                   rewrite_blocks txlim (plusUFM (G.gr_blocks g') rewritten) bs
+
+f_rewrite comp entry_fact g =
+  do { txlim <- liftTx txRemaining
+     ; (txlim', _, gc) <- solve_and_rewrite_f comp txlim g entry_fact
+     ; liftTx $ txDecrement (fc_name comp) txlim txlim'
+     ; return gc
+     }
+
+
+{-
+debug_f :: (Outputable m, Outputable l, Outputable a) => FPass m l a -> FPass m l a
+
+let debug s (f, comp) =
+  let pr = Printf.eprintf in
+  let fact dir node a = pr "%s %s for %s = %s\n" f.fact_name dir node (s a) in
+  let setter dir node run_sets set =
+    run_sets (fun u a -> pr "%s %s for %s = %s\n" f.fact_name dir node (s a); set u a) in
+  let rewr node g = pr "%s rewrites %s to <not-shown>\n" comp.name node in
+  let wrap f nodestring wrap_answer in' node txlim =
+    fact "in " (nodestring node) in';
+    wrap_answer (nodestring node) (f in' node txlim)
+  and wrap_fact n answer =
+    let () = match answer with
+    | Dataflow a -> fact "out" n a
+    | Rewrite g  -> rewr n g in
+    answer
+  and wrap_setter n answer =
+    match answer with
+    | Dataflow set -> Dataflow (setter "out" n set)
+    | Rewrite g  -> (rewr n g; Rewrite g) in
+  let middle_out = wrap comp.middle_out (RS.rtl << G.mid_instr) wrap_fact in
+  let last_outs = wrap comp.last_outs (RS.rtl << G.last_instr) wrap_setter in
+  f, { comp with last_outs = last_outs; middle_out = middle_out; }
+-}
+
+anal_f comp = comp { fc_first_out  = wrap2 $ fc_first_out  comp 
+                   , fc_middle_out = wrap2 $ fc_middle_out comp
+                   , fc_last_outs  = wrap2 $ fc_last_outs  comp
+                   , fc_exit_outs  = wrap1 $ fc_exit_outs  comp
+                   }
+  where wrap2 f out node _txlim = return $ Dataflow (f out node)
+        wrap1 f fact     _txlim = return $ Dataflow (f fact)
+
+
+a_t_f anal tx =
+ let answer = answer' liftUSM
+     first_out in' id txlim =
+         answer txlim (fc_first_out tx in' id) (fc_first_out anal in' id)
+     middle_out in' m txlim =
+         answer txlim (fc_middle_out tx in' m) (fc_middle_out anal in' m)
+     last_outs in' l txlim = 
+         answer txlim (fc_last_outs tx in' l) (fc_last_outs anal in' l)
+     exit_outs in' txlim = undefined
+         answer txlim (fc_exit_outs tx in') (fc_exit_outs anal in')
+ in  FComp { fc_name = concat [fc_name anal, " and ", fc_name tx]
+           , fc_last_outs = last_outs, fc_middle_out = middle_out
+           , fc_first_out = first_out, fc_exit_outs = exit_outs }
+
+
+{- Note [Rewriting labelled LGraphs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's hugely annoying that we get in an LGraph and in order to solve it
+we have to slap on a new label which we then immediately strip off.
+But the alternative is to have all the iterative solvers work on
+Graphs, and then suddenly instead of a single case (ZBlock) every
+solver has to deal with two cases (ZBlock and ZTail).  So until
+somebody comes along who is smart enough to do this and still leave
+the code understandable for mortals, it stays as it is.
+
+(A good place to start changing things would be to figure out what is
+the analogue of postorder_dfs for Graphs, and to figure out what
+higher-order functions would do for dealing with the resulting
+sequences of *things*.)
+-}
+
+f4sep :: [SDoc] -> SDoc
+f4sep [] = fsep []
+f4sep (d:ds) = fsep (d : map (nest 4) ds)
+
+subAnalysis' :: (Monad (m f), DataflowAnalysis m, Outputable f) =>
+                m f a -> m f a
+subAnalysis' m =
+    do { a <- subAnalysis $
+               do { a <- m; facts <- allFacts
+                  ; my_trace "after sub-analysis facts are" (pprFacts facts) $
+                    return a }
+       ; facts <- allFacts
+       ; my_trace "in parent analysis facts are" (pprFacts facts) $
+         return a }
+  where pprFacts env = nest 2 $ vcat $ map pprFact $ ufmToList env
+        pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a)
-- 
1.7.10.4