------------------------------------------------------------------------------
---
--- Cmm data types
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module Cmm (
- GenCmm(..), Cmm,
- GenCmmTop(..), CmmTop,
- GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts,
- CmmStmt(..), CmmActuals, CmmFormals,
- CmmCallTarget(..),
- CmmStatic(..), Section(..),
- CmmExpr(..), cmmExprRep,
- CmmReg(..), cmmRegRep,
- CmmLit(..), cmmLitRep,
- LocalReg(..), localRegRep,
- BlockId(..),
- GlobalReg(..), globalRegRep,
-
- node, nodeReg, spReg, hpReg, spLimReg
- ) where
-
-#include "HsVersions.h"
-
-import MachOp
-import CLabel
-import ForeignCall
-import Unique
-import FastString
-
-import Data.Word
-
------------------------------------------------------------------------------
--- Cmm, CmmTop, CmmBasicBlock
------------------------------------------------------------------------------
-
--- A file is a list of top-level chunks. These may be arbitrarily
--- re-orderd during code generation.
-
--- GenCmm is abstracted over
--- (a) the type of static data elements
--- (b) the contents of a basic block.
--- We expect there to be two main instances of this type:
--- (a) Plain C--, i.e. populated with CmmLit and CmmExpr respectively,
--- (b) Native code, populated with instructions
---
-newtype GenCmm d i = Cmm [GenCmmTop d i]
-
-type Cmm = GenCmm CmmStatic CmmStmt
-
--- A top-level chunk, abstracted over the type of the contents of
--- the basic blocks (Cmm or instructions are the likely instantiations).
-data GenCmmTop d i
- = CmmProc
- [d] -- Info table, may be empty
- CLabel -- Used to generate both info & entry labels
- [LocalReg] -- Argument locals live on entry (C-- procedure params)
- [GenBasicBlock i] -- Code, may be empty. The first block is
- -- the entry point. The order is otherwise initially
- -- unimportant, but at some point the code gen will
- -- fix the order.
-
- -- the BlockId of the first block does not give rise
- -- to a label. To jump to the first block in a Proc,
- -- use the appropriate CLabel.
-
- -- some static data.
- | CmmData Section [d] -- constant values only
-
-type CmmTop = GenCmmTop CmmStatic CmmStmt
+-- Cmm representations using Hoopl's Graph CmmNode e x.
+{-# LANGUAGE GADTs #-}
+{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
+
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
+#if __GLASGOW_HASKELL__ >= 701
+-- GHC 7.0.1 improved incomplete pattern warnings with GADTs
+{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+#endif
--- A basic block containing a single label, at the beginning.
--- The list of basic blocks in a top-level code block may be re-ordered.
--- Fall-through is not allowed: there must be an explicit jump at the
--- end of each basic block, but the code generator might rearrange basic
--- blocks in order to turn some jumps into fallthroughs.
+module Cmm
+ ( CmmGraph, GenCmmGraph(..), CmmBlock
+ , CmmStackInfo(..), CmmTopInfo(..), Cmm, CmmTop
+ , CmmReplGraph, CmmFwdRewrite, CmmBwdRewrite
+
+ , modifyGraph
+ , lastNode, replaceLastNode, insertBetween
+ , ofBlockMap, toBlockMap, insertBlock
+ , ofBlockList, toBlockList, bodyToBlockList
+ , foldGraphBlocks, mapGraphNodes, postorderDfs
+
+ , analFwd, analBwd, analRewFwd, analRewBwd
+ , dataflowPassFwd, dataflowPassBwd
+ , module CmmNode
+ )
+where
+
+import BlockId
+import CmmDecl
+import CmmNode
+import OptimizationFuel as F
+import SMRep
+import UniqSupply
+
+import Compiler.Hoopl
+import Control.Monad
+import Data.Maybe
+import Panic
-data GenBasicBlock i = BasicBlock BlockId [i]
- -- ToDo: Julian suggests that we might need to annotate this type
- -- with the out & in edges in the graph, i.e. two * [BlockId]. This
- -- information can be derived from the contents, but it might be
- -- helpful to cache it here.
-
-type CmmBasicBlock = GenBasicBlock CmmStmt
-
-blockId :: GenBasicBlock i -> BlockId
--- The branch block id is that of the first block in
--- the branch, which is that branch's entry point
-blockId (BasicBlock blk_id _ ) = blk_id
+#include "HsVersions.h"
-blockStmts :: GenBasicBlock i -> [i]
-blockStmts (BasicBlock _ stmts) = stmts
-
-
------------------------------------------------------------------------------
--- CmmStmt
--- A "statement". Note that all branches are explicit: there are no
--- control transfers to computed addresses, except when transfering
--- control to a new function.
------------------------------------------------------------------------------
-
-data CmmStmt
- = CmmNop
- | CmmComment FastString
-
- | CmmAssign CmmReg CmmExpr -- Assign to register
-
- | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is
- -- given by cmmExprRep of the rhs.
-
- | CmmCall -- A foreign call, with
- CmmCallTarget
- CmmFormals -- zero or more results
- CmmActuals -- zero or more arguments
- (Maybe [GlobalReg]) -- Global regs that may need to be saved
- -- if they will be clobbered by the call.
- -- Nothing <=> save *all* globals that
- -- might be clobbered.
-
- | CmmBranch BlockId -- branch to another BB in this fn
-
- | CmmCondBranch CmmExpr BlockId -- conditional branch
-
- | CmmSwitch CmmExpr [Maybe BlockId] -- Table branch
- -- The scrutinee is zero-based;
- -- zero -> first block
- -- one -> second block etc
- -- Undefined outside range, and when there's a Nothing
-
- | CmmJump CmmExpr -- Jump to another function,
- CmmActuals -- with these parameters.
-
- | CmmReturn -- Return from a function,
- CmmActuals -- with these return values.
-
-type CmmActuals = [(CmmExpr,MachHint)]
-type CmmFormals = [(CmmReg,MachHint)]
-
-{-
-Discussion
-~~~~~~~~~~
-
-One possible problem with the above type is that the only way to do a
-non-local conditional jump is to encode it as a branch to a block that
-contains a single jump. This leads to inefficient code in the back end.
-
-One possible way to fix this would be:
-
-data CmmStat =
- ...
- | CmmJump CmmBranchDest
- | CmmCondJump CmmExpr CmmBranchDest
- ...
-
-data CmmBranchDest
- = Local BlockId
- | NonLocal CmmExpr [LocalReg]
-
-In favour:
-
-+ one fewer constructors in CmmStmt
-+ allows both cond branch and switch to jump to non-local destinations
-
-Against:
-
-- not strictly necessary: can already encode as branch+jump
-- not always possible to implement any better in the back end
-- could do the optimisation in the back end (but then plat-specific?)
-- C-- doesn't have it
-- back-end optimisation might be more general (jump shortcutting)
-
-So we'll stick with the way it is, and add the optimisation to the NCG.
--}
-
------------------------------------------------------------------------------
--- CmmCallTarget
---
--- The target of a CmmCall.
------------------------------------------------------------------------------
-
-data CmmCallTarget
- = CmmForeignCall -- Call to a foreign function
- CmmExpr -- literal label <=> static call
- -- other expression <=> dynamic call
- CCallConv -- The calling convention
-
- | CmmPrim -- Call to a "primitive" (eg. sin, cos)
- CallishMachOp -- These might be implemented as inline
- -- code by the backend.
-
------------------------------------------------------------------------------
--- CmmExpr
--- An expression. Expressions have no side effects.
------------------------------------------------------------------------------
-
-data CmmExpr
- = CmmLit CmmLit -- Literal
- | CmmLoad CmmExpr MachRep -- Read memory location
- | CmmReg CmmReg -- Contents of register
- | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
- | CmmRegOff CmmReg Int
- -- CmmRegOff reg i
- -- ** is shorthand only, meaning **
- -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
- -- where rep = cmmRegRep reg
- deriving Eq
-
-cmmExprRep :: CmmExpr -> MachRep
-cmmExprRep (CmmLit lit) = cmmLitRep lit
-cmmExprRep (CmmLoad _ rep) = rep
-cmmExprRep (CmmReg reg) = cmmRegRep reg
-cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op
-cmmExprRep (CmmRegOff reg _) = cmmRegRep reg
-
-data CmmReg
- = CmmLocal LocalReg
- | CmmGlobal GlobalReg
- deriving( Eq )
-
-cmmRegRep :: CmmReg -> MachRep
-cmmRegRep (CmmLocal reg) = localRegRep reg
-cmmRegRep (CmmGlobal reg) = globalRegRep reg
-
-data LocalReg
- = LocalReg !Unique MachRep
-
-instance Eq LocalReg where
- (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
-
-instance Uniquable LocalReg where
- getUnique (LocalReg uniq _) = uniq
-
-localRegRep :: LocalReg -> MachRep
-localRegRep (LocalReg _ rep) = rep
-
-data CmmLit
- = CmmInt Integer MachRep
- -- Interpretation: the 2's complement representation of the value
- -- is truncated to the specified size. This is easier than trying
- -- to keep the value within range, because we don't know whether
- -- it will be used as a signed or unsigned value (the MachRep doesn't
- -- distinguish between signed & unsigned).
- | CmmFloat Rational MachRep
- | CmmLabel CLabel -- Address of label
- | CmmLabelOff CLabel Int -- Address of label + byte offset
-
- -- Due to limitations in the C backend, the following
- -- MUST ONLY be used inside the info table indicated by label2
- -- (label2 must be the info label), and label1 must be an
- -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
- -- Don't use it at all unless tablesNextToCode.
- -- It is also used inside the NCG during when generating
- -- position-independent code.
- | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
- deriving Eq
-
-cmmLitRep :: CmmLit -> MachRep
-cmmLitRep (CmmInt _ rep) = rep
-cmmLitRep (CmmFloat _ rep) = rep
-cmmLitRep (CmmLabel _) = wordRep
-cmmLitRep (CmmLabelOff _ _) = wordRep
-cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep
-
------------------------------------------------------------------------------
--- A local label.
-
--- Local labels must be unique within a single compilation unit.
-
-newtype BlockId = BlockId Unique
- deriving (Eq,Ord)
-
-instance Uniquable BlockId where
- getUnique (BlockId u) = u
-
-type BlockEnv a = UniqFM {- BlockId -} a
-
------------------------------------------------------------------------------
--- Static Data
------------------------------------------------------------------------------
-
-data Section
- = Text
- | Data
- | ReadOnlyData
- | RelocatableReadOnlyData
- | UninitialisedData
- | ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned
- | OtherSection String
-
-data CmmStatic
- = CmmStaticLit CmmLit
- -- a literal value, size given by cmmLitRep of the literal.
- | CmmUninitialised Int
- -- uninitialised data, N bytes long
- | CmmAlign Int
- -- align to next N-byte boundary (N must be a power of 2).
- | CmmDataLabel CLabel
- -- label the current position in this section.
- | CmmString [Word8]
- -- string of 8-bit values only, not zero terminated.
-
------------------------------------------------------------------------------
--- Global STG registers
------------------------------------------------------------------------------
-
-data GlobalReg
- -- Argument and return registers
- = VanillaReg -- pointers, unboxed ints and chars
- {-# UNPACK #-} !Int -- its number
-
- | FloatReg -- single-precision floating-point registers
- {-# UNPACK #-} !Int -- its number
-
- | DoubleReg -- double-precision floating-point registers
- {-# UNPACK #-} !Int -- its number
-
- | LongReg -- long int registers (64-bit, really)
- {-# UNPACK #-} !Int -- its number
-
- -- STG registers
- | Sp -- Stack ptr; points to last occupied stack location.
- | SpLim -- Stack limit
- | Hp -- Heap ptr; points to last occupied heap location.
- | HpLim -- Heap limit register
- | CurrentTSO -- pointer to current thread's TSO
- | CurrentNursery -- pointer to allocation area
- | HpAlloc -- allocation count for heap check failure
-
- -- We keep the address of some commonly-called
- -- functions in the register table, to keep code
- -- size down:
- | GCEnter1 -- stg_gc_enter_1
- | GCFun -- stg_gc_fun
-
- -- Base offset for the register table, used for accessing registers
- -- which do not have real registers assigned to them. This register
- -- will only appear after we have expanded GlobalReg into memory accesses
- -- (where necessary) in the native code generator.
- | BaseReg
-
- -- Base Register for PIC (position-independent code) calculations
- -- Only used inside the native code generator. It's exact meaning differs
- -- from platform to platform (see module PositionIndependentCode).
- | PicBaseReg
-
- deriving( Eq
-#ifdef DEBUG
- , Show
-#endif
- )
-
--- convenient aliases
-spReg, hpReg, spLimReg, nodeReg :: CmmReg
-spReg = CmmGlobal Sp
-hpReg = CmmGlobal Hp
-spLimReg = CmmGlobal SpLim
-nodeReg = CmmGlobal node
-
-node :: GlobalReg
-node = VanillaReg 1
-
-globalRegRep :: GlobalReg -> MachRep
-globalRegRep (VanillaReg _) = wordRep
-globalRegRep (FloatReg _) = F32
-globalRegRep (DoubleReg _) = F64
-globalRegRep (LongReg _) = I64
-globalRegRep _ = wordRep
+-------------------------------------------------
+-- CmmBlock, CmmGraph and Cmm
+
+type CmmGraph = GenCmmGraph CmmNode
+data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
+type CmmBlock = Block CmmNode C C
+
+type CmmReplGraph e x = FuelUniqSM (Maybe (Graph CmmNode e x))
+type CmmFwdRewrite f = FwdRewrite FuelUniqSM CmmNode f
+type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f
+
+data CmmStackInfo = StackInfo {arg_space :: ByteOff, updfr_space :: Maybe ByteOff}
+data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo}
+type Cmm = GenCmm CmmStatic CmmTopInfo CmmGraph
+type CmmTop = GenCmmTop CmmStatic CmmTopInfo CmmGraph
+
+-------------------------------------------------
+-- Manipulating CmmGraphs
+
+modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
+modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
+
+toBlockMap :: CmmGraph -> LabelMap CmmBlock
+toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
+
+ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
+ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
+
+insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock
+insertBlock block map =
+ ASSERT (isNothing $ mapLookup id map)
+ mapInsert id block map
+ where id = entryLabel block
+
+toBlockList :: CmmGraph -> [CmmBlock]
+toBlockList g = mapElems $ toBlockMap g
+
+ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph
+ofBlockList entry blocks = CmmGraph {g_entry=entry, g_graph=GMany NothingO body NothingO}
+ where body = foldr addBlock emptyBody blocks
+
+bodyToBlockList :: Body CmmNode -> [CmmBlock]
+bodyToBlockList body = mapElems body
+
+mapGraphNodes :: ( CmmNode C O -> CmmNode C O
+ , CmmNode O O -> CmmNode O O
+ , CmmNode O C -> CmmNode O C)
+ -> CmmGraph -> CmmGraph
+mapGraphNodes funs@(mf,_,_) g =
+ ofBlockMap (entryLabel $ mf $ CmmEntry $ g_entry g) $ mapMap (blockMapNodes3 funs) $ toBlockMap g
+
+foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a
+foldGraphBlocks k z g = mapFold k z $ toBlockMap g
+
+postorderDfs :: CmmGraph -> [CmmBlock]
+postorderDfs g = postorder_dfs_from (toBlockMap g) (g_entry g)
+
+-------------------------------------------------
+-- Manipulating CmmBlocks
+
+lastNode :: CmmBlock -> CmmNode O C
+lastNode block = foldBlockNodesF3 (nothing, nothing, const) block ()
+ where nothing :: a -> b -> ()
+ nothing _ _ = ()
+
+replaceLastNode :: Block CmmNode e C -> CmmNode O C -> Block CmmNode e C
+replaceLastNode block last = blockOfNodeList (first, middle, JustC last)
+ where (first, middle, _) = blockToNodeList block
+
+----------------------------------------------------------------------
+----- Splicing between blocks
+-- Given a middle node, a block, and a successor BlockId,
+-- we can insert the middle node between the block and the successor.
+-- We return the updated block and a list of new blocks that must be added
+-- to the graph.
+-- The semantics is a bit tricky. We consider cases on the last node:
+-- o For a branch, we can just insert before the branch,
+-- but sometimes the optimizer does better if we actually insert
+-- a fresh basic block, enabling some common blockification.
+-- o For a conditional branch, switch statement, or call, we must insert
+-- a new basic block.
+-- o For a jump or return, this operation is impossible.
+
+insertBetween :: MonadUnique m => CmmBlock -> [CmmNode O O] -> BlockId -> m (CmmBlock, [CmmBlock])
+insertBetween b ms succId = insert $ lastNode b
+ where insert :: MonadUnique m => CmmNode O C -> m (CmmBlock, [CmmBlock])
+ insert (CmmBranch bid) =
+ if bid == succId then
+ do (bid', bs) <- newBlocks
+ return (replaceLastNode b (CmmBranch bid'), bs)
+ else panic "tried invalid block insertBetween"
+ insert (CmmCondBranch c t f) =
+ do (t', tbs) <- if t == succId then newBlocks else return $ (t, [])
+ (f', fbs) <- if f == succId then newBlocks else return $ (f, [])
+ return (replaceLastNode b (CmmCondBranch c t' f'), tbs ++ fbs)
+ insert (CmmSwitch e ks) =
+ do (ids, bs) <- mapAndUnzipM mbNewBlocks ks
+ return (replaceLastNode b (CmmSwitch e ids), join bs)
+ insert (CmmCall {}) =
+ panic "unimp: insertBetween after a call -- probably not a good idea"
+ insert (CmmForeignCall {}) =
+ panic "unimp: insertBetween after a foreign call -- probably not a good idea"
+
+ newBlocks :: MonadUnique m => m (BlockId, [CmmBlock])
+ newBlocks = do id <- liftM mkBlockId $ getUniqueM
+ return $ (id, [blockOfNodeList (JustC (CmmEntry id), ms, JustC (CmmBranch succId))])
+ mbNewBlocks :: MonadUnique m => Maybe BlockId -> m (Maybe BlockId, [CmmBlock])
+ mbNewBlocks (Just k) = if k == succId then liftM fstJust newBlocks
+ else return (Just k, [])
+ mbNewBlocks Nothing = return (Nothing, [])
+ fstJust (id, bs) = (Just id, bs)
+
+-------------------------------------------------
+-- Running dataflow analysis and/or rewrites
+
+-- Constructing forward and backward analysis-only pass
+analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f
+analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f
+
+analFwd lat xfer = analRewFwd lat xfer noFwdRewrite
+analBwd lat xfer = analRewBwd lat xfer noBwdRewrite
+
+-- Constructing forward and backward analysis + rewrite pass
+analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f
+analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f
+
+analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew}
+analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew}
+
+-- Running forward and backward dataflow analysis + optional rewrite
+dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
+dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do
+ (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
+ return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
+
+dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
+dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do
+ (graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)
+ return (CmmGraph {g_entry=entry, g_graph=graph}, facts)