X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fcmm%2FCmm.hs;h=24542e1020763fccbd06fbeedbcc022120183fdf;hp=d3bdeada1e2890404cb0baeccec8d1351d044926;hb=3ed54797c2824e1ad5cbe8015401e9e648bf122a;hpb=c614ed092823906e3e71421f3767facd7a83a6cc diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index d3bdead..24542e1 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -1,3 +1,7 @@ +{-# OPTIONS -fno-warn-name-shadowing -w #-} +-- We'd like to use -fno-warn-orphans rather than -w, but old compilers +-- don't understand it so building stage1 fails. + ----------------------------------------------------------------------------- -- -- Cmm data types @@ -7,32 +11,58 @@ ----------------------------------------------------------------------------- module Cmm ( - GenCmm(..), Cmm, - GenCmmTop(..), CmmTop, - GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, - CmmStmt(..), CmmActuals, CmmFormals, + GenCmm(..), Cmm, RawCmm, + GenCmmTop(..), CmmTop, RawCmmTop, + ListGraph(..), + cmmMapGraph, cmmTopMapGraph, + cmmMapGraphM, cmmTopMapGraphM, + CmmInfo(..), UpdateFrame(..), + CmmInfoTable(..), ClosureTypeInfo(..), ProfilingInfo(..), ClosureTypeTag, + GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts, + CmmReturnInfo(..), + CmmStmt(..), CmmActuals, CmmFormal, CmmFormals, CmmHintFormals, + CmmSafety(..), CmmCallTarget(..), CmmStatic(..), Section(..), - CmmExpr(..), cmmExprRep, + CmmExpr(..), cmmExprRep, maybeInvertCmmExpr, CmmReg(..), cmmRegRep, CmmLit(..), cmmLitRep, - LocalReg(..), localRegRep, - BlockId(..), + LocalReg(..), localRegRep, localRegGCFollow, Kind(..), + BlockId(..), freshBlockId, + BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, mkBlockEnv, + BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, GlobalReg(..), globalRegRep, - node, nodeReg, spReg, hpReg, + node, nodeReg, spReg, hpReg, spLimReg ) where +-- ^ In order not to do violence to the import structure of the rest +-- of the compiler, module Cmm re-exports a number of identifiers +-- defined in 'CmmExpr' + #include "HsVersions.h" +import CmmExpr import MachOp import CLabel import ForeignCall -import Unique +import SMRep +import ClosureInfo +import Outputable import FastString import Data.Word +import ZipCfg ( BlockId(..), freshBlockId + , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, mkBlockEnv + , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet + ) + +-- A [[BlockId]] is a local label. +-- Local labels must be unique within an entire compilation unit, not +-- just a single top-level item, because local labels map one-to-one +-- with assembly-language labels. + ----------------------------------------------------------------------------- -- Cmm, CmmTop, CmmBasicBlock ----------------------------------------------------------------------------- @@ -41,36 +71,50 @@ import Data.Word -- re-orderd during code generation. -- GenCmm is abstracted over --- (a) the type of static data elements --- (b) the contents of a basic block. +-- d, the type of static data elements in CmmData +-- h, the static info preceding the code of a CmmProc +-- g, the control-flow graph of a CmmProc +-- -- We expect there to be two main instances of this type: --- (a) Plain C--, i.e. populated with CmmLit and CmmExpr respectively, --- (b) Native code, populated with instructions +-- (a) C--, i.e. populated with various C-- constructs +-- (Cmm and RawCmm below) +-- (b) Native code, populated with data/instructions -- -newtype GenCmm d i = Cmm [GenCmmTop d i] - -type Cmm = GenCmm CmmStatic CmmStmt +-- A second family of instances based on ZipCfg is work in progress. +-- +newtype GenCmm d h g = Cmm [GenCmmTop d h g] --- A top-level chunk, abstracted over the type of the contents of +-- | A top-level chunk, abstracted over the type of the contents of -- the basic blocks (Cmm or instructions are the likely instantiations). -data GenCmmTop d i - = CmmProc - [d] -- Info table, may be empty +data GenCmmTop d h g + = CmmProc -- A procedure + h -- Extra header such as the info table CLabel -- Used to generate both info & entry labels - [LocalReg] -- Argument locals live on entry (C-- procedure params) - [GenBasicBlock i] -- Code, may be empty. The first block is - -- the entry point. The order is otherwise initially - -- unimportant, but at some point the code gen will - -- fix the order. + CmmFormals -- Argument locals live on entry (C-- procedure params) + g -- Control-flow graph for the procedure's code + + | CmmData -- Static data + Section + [d] + +-- | A control-flow graph represented as a list of extended basic blocks. +newtype ListGraph i = ListGraph [GenBasicBlock i] + -- ^ Code, may be empty. The first block is the entry point. The + -- order is otherwise initially unimportant, but at some point the + -- code gen will fix the order. - -- the BlockId of the first block does not give rise - -- to a label. To jump to the first block in a Proc, - -- use the appropriate CLabel. + -- BlockIds must be unique across an entire compilation unit, since + -- they are translated to assembly-language labels, which scope + -- across a whole compilation unit. - -- some static data. - | CmmData Section [d] -- constant values only +-- | Cmm with the info table as a data type +type Cmm = GenCmm CmmStatic CmmInfo (ListGraph CmmStmt) +type CmmTop = GenCmmTop CmmStatic CmmInfo (ListGraph CmmStmt) + +-- | Cmm with the info tables converted to a list of 'CmmStatic' +type RawCmm = GenCmm CmmStatic [CmmStatic] (ListGraph CmmStmt) +type RawCmmTop = GenCmmTop CmmStatic [CmmStatic] (ListGraph CmmStmt) -type CmmTop = GenCmmTop CmmStatic CmmStmt -- A basic block containing a single label, at the beginning. -- The list of basic blocks in a top-level code block may be re-ordered. @@ -79,12 +123,10 @@ type CmmTop = GenCmmTop CmmStatic CmmStmt -- blocks in order to turn some jumps into fallthroughs. data GenBasicBlock i = BasicBlock BlockId [i] - -- ToDo: Julian suggests that we might need to annotate this type - -- with the out & in edges in the graph, i.e. two * [BlockId]. This - -- information can be derived from the contents, but it might be - -- helpful to cache it here. +type CmmBasicBlock = GenBasicBlock CmmStmt -type CmmBasicBlock = GenBasicBlock CmmStmt +instance UserOfLocalRegs i => UserOfLocalRegs (GenBasicBlock i) where + foldRegsUsed f set (BasicBlock _ l) = foldRegsUsed f set l blockId :: GenBasicBlock i -> BlockId -- The branch block id is that of the first block in @@ -95,6 +137,80 @@ blockStmts :: GenBasicBlock i -> [i] blockStmts (BasicBlock _ stmts) = stmts +mapBlockStmts :: (i -> i') -> GenBasicBlock i -> GenBasicBlock i' +mapBlockStmts f (BasicBlock id bs) = BasicBlock id (map f bs) +---------------------------------------------------------------- +-- graph maps +---------------------------------------------------------------- + +cmmMapGraph :: (g -> g') -> GenCmm d h g -> GenCmm d h g' +cmmTopMapGraph :: (g -> g') -> GenCmmTop d h g -> GenCmmTop d h g' + +cmmMapGraphM :: Monad m => (String -> g -> m g') -> GenCmm d h g -> m (GenCmm d h g') +cmmTopMapGraphM :: Monad m => (String -> g -> m g') -> GenCmmTop d h g -> m (GenCmmTop d h g') + +cmmMapGraph f (Cmm tops) = Cmm $ map (cmmTopMapGraph f) tops +cmmTopMapGraph f (CmmProc h l args g) = CmmProc h l args (f g) +cmmTopMapGraph _ (CmmData s ds) = CmmData s ds + +cmmMapGraphM f (Cmm tops) = mapM (cmmTopMapGraphM f) tops >>= return . Cmm +cmmTopMapGraphM f (CmmProc h l args g) = f (showSDoc $ ppr l) g >>= return . CmmProc h l args +cmmTopMapGraphM _ (CmmData s ds) = return $ CmmData s ds + +----------------------------------------------------------------------------- +-- Info Tables +----------------------------------------------------------------------------- + +data CmmInfo + = CmmInfo + (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check + (Maybe UpdateFrame) -- Update frame + CmmInfoTable -- Info table + +-- Info table as a haskell data type +data CmmInfoTable + = CmmInfoTable + ProfilingInfo + ClosureTypeTag -- Int + ClosureTypeInfo + | CmmNonInfoTable -- Procedure doesn't need an info table + +-- TODO: The GC target shouldn't really be part of CmmInfo +-- as it doesn't appear in the resulting info table. +-- It should be factored out. + +data ClosureTypeInfo + = ConstrInfo ClosureLayout ConstrTag ConstrDescription + | FunInfo ClosureLayout C_SRT FunType FunArity ArgDescr SlowEntry + | ThunkInfo ClosureLayout C_SRT + | ThunkSelectorInfo SelectorOffset C_SRT + | ContInfo + [Maybe LocalReg] -- Forced stack parameters + C_SRT + +data CmmReturnInfo = CmmMayReturn + | CmmNeverReturns + +-- TODO: These types may need refinement +data ProfilingInfo = ProfilingInfo CmmLit CmmLit -- closure_type, closure_desc +type ClosureTypeTag = StgHalfWord +type ClosureLayout = (StgHalfWord, StgHalfWord) -- ptrs, nptrs +type ConstrTag = StgHalfWord +type ConstrDescription = CmmLit +type FunType = StgHalfWord +type FunArity = StgHalfWord +type SlowEntry = CmmLit + -- ^We would like this to be a CLabel but + -- for now the parser sets this to zero on an INFO_TABLE_FUN. +type SelectorOffset = StgWord + +-- | A frame that is to be pushed before entry to the function. +-- Used to handle 'update' frames. +data UpdateFrame = + UpdateFrame + CmmExpr -- Frame header. Behaves like the target of a 'jump'. + [CmmExpr] -- Frame remainder. Behaves like the arguments of a 'jump'. + ----------------------------------------------------------------------------- -- CmmStmt -- A "statement". Note that all branches are explicit: there are no @@ -111,14 +227,12 @@ data CmmStmt | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is -- given by cmmExprRep of the rhs. - | CmmCall -- A foreign call, with + | CmmCall -- A call (forign, native or primitive), with CmmCallTarget - CmmFormals -- zero or more results + CmmHintFormals -- zero or more results CmmActuals -- zero or more arguments - (Maybe [GlobalReg]) -- Global regs that may need to be saved - -- if they will be clobbered by the call. - -- Nothing <=> save *all* globals that - -- might be clobbered. + CmmSafety -- whether to build a continuation + CmmReturnInfo | CmmBranch BlockId -- branch to another BB in this fn @@ -130,14 +244,40 @@ data CmmStmt -- one -> second block etc -- Undefined outside range, and when there's a Nothing - | CmmJump CmmExpr -- Jump to another function, - CmmActuals -- with these parameters. - - | CmmReturn -- Return from a function, - CmmActuals -- with these return values. - -type CmmActuals = [(CmmExpr,MachHint)] -type CmmFormals = [(CmmReg,MachHint)] + | CmmJump CmmExpr -- Jump to another C-- function, + CmmActuals -- with these parameters. + + | CmmReturn -- Return from a native C-- function, + CmmActuals -- with these return values. + +type CmmActual = CmmExpr +type CmmActuals = [(CmmActual,MachHint)] +type CmmFormal = LocalReg +type CmmHintFormals = [(CmmFormal,MachHint)] +type CmmFormals = [CmmFormal] +data CmmSafety = CmmUnsafe | CmmSafe C_SRT + +-- | enable us to fold used registers over 'CmmActuals' and 'CmmHintFormals' +instance UserOfLocalRegs a => UserOfLocalRegs (a, MachHint) where + foldRegsUsed f set (a, _) = foldRegsUsed f set a + +instance UserOfLocalRegs CmmStmt where + foldRegsUsed f set s = stmt s set + where stmt (CmmNop) = id + stmt (CmmComment {}) = id + stmt (CmmAssign _ e) = gen e + stmt (CmmStore e1 e2) = gen e1 . gen e2 + stmt (CmmCall target _ es _ _) = gen target . gen es + stmt (CmmBranch _) = id + stmt (CmmCondBranch e _) = gen e + stmt (CmmSwitch e _) = gen e + stmt (CmmJump e es) = gen e . gen es + stmt (CmmReturn es) = gen es + gen a set = foldRegsUsed f set a + +instance UserOfLocalRegs CmmCallTarget where + foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e + foldRegsUsed _ set (CmmPrim {}) = set {- Discussion @@ -147,6 +287,10 @@ One possible problem with the above type is that the only way to do a non-local conditional jump is to encode it as a branch to a block that contains a single jump. This leads to inefficient code in the back end. +[N.B. This problem will go away when we make the transition to the +'zipper' form of control-flow graph, in which both targets of a +conditional jump are explicit. ---NR] + One possible way to fix this would be: data CmmStat = @@ -182,100 +326,16 @@ So we'll stick with the way it is, and add the optimisation to the NCG. ----------------------------------------------------------------------------- data CmmCallTarget - = CmmForeignCall -- Call to a foreign function + = CmmCallee -- Call a function (foreign or native) CmmExpr -- literal label <=> static call -- other expression <=> dynamic call CCallConv -- The calling convention - | CmmPrim -- Call to a "primitive" (eg. sin, cos) + | CmmPrim -- Call a "primitive" (eg. sin, cos) CallishMachOp -- These might be implemented as inline -- code by the backend. ----------------------------------------------------------------------------- --- CmmExpr --- An expression. Expressions have no side effects. ------------------------------------------------------------------------------ - -data CmmExpr - = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr MachRep -- Read memory location - | CmmReg CmmReg -- Contents of register - | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) - | CmmRegOff CmmReg Int - -- CmmRegOff reg i - -- ** is shorthand only, meaning ** - -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) - -- where rep = cmmRegRep reg - deriving Eq - -cmmExprRep :: CmmExpr -> MachRep -cmmExprRep (CmmLit lit) = cmmLitRep lit -cmmExprRep (CmmLoad _ rep) = rep -cmmExprRep (CmmReg reg) = cmmRegRep reg -cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op -cmmExprRep (CmmRegOff reg _) = cmmRegRep reg - -data CmmReg - = CmmLocal LocalReg - | CmmGlobal GlobalReg - deriving( Eq ) - -cmmRegRep :: CmmReg -> MachRep -cmmRegRep (CmmLocal reg) = localRegRep reg -cmmRegRep (CmmGlobal reg) = globalRegRep reg - -data LocalReg - = LocalReg !Unique MachRep - -instance Eq LocalReg where - (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 - -instance Uniquable LocalReg where - getUnique (LocalReg uniq _) = uniq - -localRegRep :: LocalReg -> MachRep -localRegRep (LocalReg _ rep) = rep - -data CmmLit - = CmmInt Integer MachRep - -- Interpretation: the 2's complement representation of the value - -- is truncated to the specified size. This is easier than trying - -- to keep the value within range, because we don't know whether - -- it will be used as a signed or unsigned value (the MachRep doesn't - -- distinguish between signed & unsigned). - | CmmFloat Rational MachRep - | CmmLabel CLabel -- Address of label - | CmmLabelOff CLabel Int -- Address of label + byte offset - - -- Due to limitations in the C backend, the following - -- MUST ONLY be used inside the info table indicated by label2 - -- (label2 must be the info label), and label1 must be an - -- SRT, a slow entrypoint or a large bitmap (see the Mangler) - -- Don't use it at all unless tablesNextToCode. - -- It is also used inside the NCG during when generating - -- position-independent code. - | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset - deriving Eq - -cmmLitRep :: CmmLit -> MachRep -cmmLitRep (CmmInt _ rep) = rep -cmmLitRep (CmmFloat _ rep) = rep -cmmLitRep (CmmLabel _) = wordRep -cmmLitRep (CmmLabelOff _ _) = wordRep -cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep - ------------------------------------------------------------------------------ --- A local label. - --- Local labels must be unique within a single compilation unit. - -newtype BlockId = BlockId Unique - deriving (Eq,Ord) - -instance Uniquable BlockId where - getUnique (BlockId u) = u - ------------------------------------------------------------------------------ -- Static Data ----------------------------------------------------------------------------- @@ -300,68 +360,3 @@ data CmmStatic | CmmString [Word8] -- string of 8-bit values only, not zero terminated. ------------------------------------------------------------------------------ --- Global STG registers ------------------------------------------------------------------------------ - -data GlobalReg - -- Argument and return registers - = VanillaReg -- pointers, unboxed ints and chars - {-# UNPACK #-} !Int -- its number - - | FloatReg -- single-precision floating-point registers - {-# UNPACK #-} !Int -- its number - - | DoubleReg -- double-precision floating-point registers - {-# UNPACK #-} !Int -- its number - - | LongReg -- long int registers (64-bit, really) - {-# UNPACK #-} !Int -- its number - - -- STG registers - | Sp -- Stack ptr; points to last occupied stack location. - | SpLim -- Stack limit - | Hp -- Heap ptr; points to last occupied heap location. - | HpLim -- Heap limit register - | CurrentTSO -- pointer to current thread's TSO - | CurrentNursery -- pointer to allocation area - | HpAlloc -- allocation count for heap check failure - - -- We keep the address of some commonly-called - -- functions in the register table, to keep code - -- size down: - | GCEnter1 -- stg_gc_enter_1 - | GCFun -- stg_gc_fun - - -- Base offset for the register table, used for accessing registers - -- which do not have real registers assigned to them. This register - -- will only appear after we have expanded GlobalReg into memory accesses - -- (where necessary) in the native code generator. - | BaseReg - - -- Base Register for PIC (position-independent code) calculations - -- Only used inside the native code generator. It's exact meaning differs - -- from platform to platform (see module PositionIndependentCode). - | PicBaseReg - - deriving( Eq -#ifdef DEBUG - , Show -#endif - ) - --- convenient aliases -spReg, hpReg, nodeReg :: CmmReg -spReg = CmmGlobal Sp -hpReg = CmmGlobal Hp -nodeReg = CmmGlobal node - -node :: GlobalReg -node = VanillaReg 1 - -globalRegRep :: GlobalReg -> MachRep -globalRegRep (VanillaReg _) = wordRep -globalRegRep (FloatReg _) = F32 -globalRegRep (DoubleReg _) = F64 -globalRegRep (LongReg _) = I64 -globalRegRep _ = wordRep