1 -----------------------------------------------------------------------------
5 -- (c) The University of Glasgow 2004-2006
7 -----------------------------------------------------------------------------
10 GenCmm(..), Cmm, RawCmm,
11 GenCmmTop(..), CmmTop, RawCmmTop,
13 CmmInfo(..), UpdateFrame(..),
14 CmmInfoTable(..), ClosureTypeInfo(..), ProfilingInfo(..), ClosureTypeTag,
15 GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts,
17 CmmStmt(..), CmmActuals, CmmFormal, CmmFormals, CmmHintFormals,
20 CmmStatic(..), Section(..),
21 CmmExpr(..), cmmExprRep,
22 CmmReg(..), cmmRegRep,
23 CmmLit(..), cmmLitRep,
24 LocalReg(..), localRegRep, localRegGCFollow, Kind(..),
25 BlockId(..), BlockEnv,
26 GlobalReg(..), globalRegRep,
28 node, nodeReg, spReg, hpReg, spLimReg
31 #include "HsVersions.h"
44 -----------------------------------------------------------------------------
45 -- Cmm, CmmTop, CmmBasicBlock
46 -----------------------------------------------------------------------------
48 -- A file is a list of top-level chunks. These may be arbitrarily
49 -- re-orderd during code generation.
51 -- GenCmm is abstracted over
52 -- d, the type of static data elements in CmmData
53 -- h, the static info preceding the code of a CmmProc
54 -- g, the control-flow graph of a CmmProc
56 -- We expect there to be two main instances of this type:
57 -- (a) C--, i.e. populated with various C-- constructs
58 -- (Cmm and RawCmm below)
59 -- (b) Native code, populated with data/instructions
61 newtype GenCmm d h g = Cmm [GenCmmTop d h g]
63 -- | A top-level chunk, abstracted over the type of the contents of
64 -- the basic blocks (Cmm or instructions are the likely instantiations).
66 = CmmProc -- A procedure
67 h -- Extra header such as the info table
68 CLabel -- Used to generate both info & entry labels
69 CmmFormals -- Argument locals live on entry (C-- procedure params)
70 g -- Control-flow graph for the procedure's code
72 | CmmData -- Static data
76 -- | A control-flow graph represented as a list of extended basic blocks.
77 newtype ListGraph i = ListGraph [GenBasicBlock i]
78 -- ^ Code, may be empty. The first block is the entry point. The
79 -- order is otherwise initially unimportant, but at some point the
80 -- code gen will fix the order.
82 -- BlockIds must be unique across an entire compilation unit, since
83 -- they are translated to assembly-language labels, which scope
84 -- across a whole compilation unit.
86 -- | Cmm with the info table as a data type
87 type Cmm = GenCmm CmmStatic CmmInfo (ListGraph CmmStmt)
88 type CmmTop = GenCmmTop CmmStatic CmmInfo (ListGraph CmmStmt)
90 -- | Cmm with the info tables converted to a list of 'CmmStatic'
91 type RawCmm = GenCmm CmmStatic [CmmStatic] (ListGraph CmmStmt)
92 type RawCmmTop = GenCmmTop CmmStatic [CmmStatic] (ListGraph CmmStmt)
95 -- A basic block containing a single label, at the beginning.
96 -- The list of basic blocks in a top-level code block may be re-ordered.
97 -- Fall-through is not allowed: there must be an explicit jump at the
98 -- end of each basic block, but the code generator might rearrange basic
99 -- blocks in order to turn some jumps into fallthroughs.
101 data GenBasicBlock i = BasicBlock BlockId [i]
102 type CmmBasicBlock = GenBasicBlock CmmStmt
104 blockId :: GenBasicBlock i -> BlockId
105 -- The branch block id is that of the first block in
106 -- the branch, which is that branch's entry point
107 blockId (BasicBlock blk_id _ ) = blk_id
109 blockStmts :: GenBasicBlock i -> [i]
110 blockStmts (BasicBlock _ stmts) = stmts
112 mapBlockStmts :: (i -> i') -> GenBasicBlock i -> GenBasicBlock i'
113 mapBlockStmts f (BasicBlock id bs) = BasicBlock id (map f bs)
115 -----------------------------------------------------------------------------
117 -----------------------------------------------------------------------------
121 (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check
122 (Maybe UpdateFrame) -- Update frame
123 CmmInfoTable -- Info table
125 -- Info table as a haskell data type
129 ClosureTypeTag -- Int
131 | CmmNonInfoTable -- Procedure doesn't need an info table
133 -- TODO: The GC target shouldn't really be part of CmmInfo
134 -- as it doesn't appear in the resulting info table.
135 -- It should be factored out.
138 = ConstrInfo ClosureLayout ConstrTag ConstrDescription
139 | FunInfo ClosureLayout C_SRT FunType FunArity ArgDescr SlowEntry
140 | ThunkInfo ClosureLayout C_SRT
141 | ThunkSelectorInfo SelectorOffset C_SRT
143 [Maybe LocalReg] -- Forced stack parameters
146 data CmmReturnInfo = CmmMayReturn
149 -- TODO: These types may need refinement
150 data ProfilingInfo = ProfilingInfo CmmLit CmmLit -- closure_type, closure_desc
151 type ClosureTypeTag = StgHalfWord
152 type ClosureLayout = (StgHalfWord, StgHalfWord) -- ptrs, nptrs
153 type ConstrTag = StgHalfWord
154 type ConstrDescription = CmmLit
155 type FunType = StgHalfWord
156 type FunArity = StgHalfWord
157 type SlowEntry = CmmLit
158 -- ^We would like this to be a CLabel but
159 -- for now the parser sets this to zero on an INFO_TABLE_FUN.
160 type SelectorOffset = StgWord
162 -- | A frame that is to be pushed before entry to the function.
163 -- Used to handle 'update' frames.
166 CmmExpr -- Frame header. Behaves like the target of a 'jump'.
167 [CmmExpr] -- Frame remainder. Behaves like the arguments of a 'jump'.
169 -----------------------------------------------------------------------------
171 -- A "statement". Note that all branches are explicit: there are no
172 -- control transfers to computed addresses, except when transfering
173 -- control to a new function.
174 -----------------------------------------------------------------------------
178 | CmmComment FastString
180 | CmmAssign CmmReg CmmExpr -- Assign to register
182 | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is
183 -- given by cmmExprRep of the rhs.
185 | CmmCall -- A call (forign, native or primitive), with
187 CmmHintFormals -- zero or more results
188 CmmActuals -- zero or more arguments
189 CmmSafety -- whether to build a continuation
192 | CmmBranch BlockId -- branch to another BB in this fn
194 | CmmCondBranch CmmExpr BlockId -- conditional branch
196 | CmmSwitch CmmExpr [Maybe BlockId] -- Table branch
197 -- The scrutinee is zero-based;
198 -- zero -> first block
199 -- one -> second block etc
200 -- Undefined outside range, and when there's a Nothing
202 | CmmJump CmmExpr -- Jump to another C-- function,
203 CmmActuals -- with these parameters.
205 | CmmReturn -- Return from a native C-- function,
206 CmmActuals -- with these return values.
208 type CmmActual = CmmExpr
209 type CmmActuals = [(CmmActual,MachHint)]
210 type CmmFormal = LocalReg
211 type CmmHintFormals = [(CmmFormal,MachHint)]
212 type CmmFormals = [CmmFormal]
213 data CmmSafety = CmmUnsafe | CmmSafe C_SRT
219 One possible problem with the above type is that the only way to do a
220 non-local conditional jump is to encode it as a branch to a block that
221 contains a single jump. This leads to inefficient code in the back end.
223 One possible way to fix this would be:
227 | CmmJump CmmBranchDest
228 | CmmCondJump CmmExpr CmmBranchDest
233 | NonLocal CmmExpr [LocalReg]
237 + one fewer constructors in CmmStmt
238 + allows both cond branch and switch to jump to non-local destinations
242 - not strictly necessary: can already encode as branch+jump
243 - not always possible to implement any better in the back end
244 - could do the optimisation in the back end (but then plat-specific?)
245 - C-- doesn't have it
246 - back-end optimisation might be more general (jump shortcutting)
248 So we'll stick with the way it is, and add the optimisation to the NCG.
251 -----------------------------------------------------------------------------
254 -- The target of a CmmCall.
255 -----------------------------------------------------------------------------
258 = CmmCallee -- Call a function (foreign or native)
259 CmmExpr -- literal label <=> static call
260 -- other expression <=> dynamic call
261 CCallConv -- The calling convention
263 | CmmPrim -- Call a "primitive" (eg. sin, cos)
264 CallishMachOp -- These might be implemented as inline
265 -- code by the backend.
267 -----------------------------------------------------------------------------
269 -- An expression. Expressions have no side effects.
270 -----------------------------------------------------------------------------
273 = CmmLit CmmLit -- Literal
274 | CmmLoad CmmExpr MachRep -- Read memory location
275 | CmmReg CmmReg -- Contents of register
276 | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
277 | CmmRegOff CmmReg Int
279 -- ** is shorthand only, meaning **
280 -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
281 -- where rep = cmmRegRep reg
286 | CmmGlobal GlobalReg
289 -- | Whether a 'LocalReg' is a GC followable pointer
290 data Kind = KindPtr | KindNonPtr deriving (Eq)
294 !Unique -- ^ Identifier
296 Kind -- ^ Should the GC follow as a pointer
299 = CmmInt Integer MachRep
300 -- Interpretation: the 2's complement representation of the value
301 -- is truncated to the specified size. This is easier than trying
302 -- to keep the value within range, because we don't know whether
303 -- it will be used as a signed or unsigned value (the MachRep doesn't
304 -- distinguish between signed & unsigned).
305 | CmmFloat Rational MachRep
306 | CmmLabel CLabel -- Address of label
307 | CmmLabelOff CLabel Int -- Address of label + byte offset
309 -- Due to limitations in the C backend, the following
310 -- MUST ONLY be used inside the info table indicated by label2
311 -- (label2 must be the info label), and label1 must be an
312 -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
313 -- Don't use it at all unless tablesNextToCode.
314 -- It is also used inside the NCG during when generating
315 -- position-independent code.
316 | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
319 instance Eq LocalReg where
320 (LocalReg u1 _ _) == (LocalReg u2 _ _) = u1 == u2
322 instance Uniquable LocalReg where
323 getUnique (LocalReg uniq _ _) = uniq
325 -----------------------------------------------------------------------------
327 -----------------------------------------------------------------------------
328 cmmExprRep :: CmmExpr -> MachRep
329 cmmExprRep (CmmLit lit) = cmmLitRep lit
330 cmmExprRep (CmmLoad _ rep) = rep
331 cmmExprRep (CmmReg reg) = cmmRegRep reg
332 cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op
333 cmmExprRep (CmmRegOff reg _) = cmmRegRep reg
335 cmmRegRep :: CmmReg -> MachRep
336 cmmRegRep (CmmLocal reg) = localRegRep reg
337 cmmRegRep (CmmGlobal reg) = globalRegRep reg
339 localRegRep :: LocalReg -> MachRep
340 localRegRep (LocalReg _ rep _) = rep
342 localRegGCFollow :: LocalReg -> Kind
343 localRegGCFollow (LocalReg _ _ p) = p
345 cmmLitRep :: CmmLit -> MachRep
346 cmmLitRep (CmmInt _ rep) = rep
347 cmmLitRep (CmmFloat _ rep) = rep
348 cmmLitRep (CmmLabel _) = wordRep
349 cmmLitRep (CmmLabelOff _ _) = wordRep
350 cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep
352 -----------------------------------------------------------------------------
355 -- Local labels must be unique within a single compilation unit.
357 newtype BlockId = BlockId Unique
360 instance Uniquable BlockId where
361 getUnique (BlockId u) = u
363 type BlockEnv a = UniqFM {- BlockId -} a
365 -----------------------------------------------------------------------------
367 -----------------------------------------------------------------------------
373 | RelocatableReadOnlyData
375 | ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned
376 | OtherSection String
379 = CmmStaticLit CmmLit
380 -- a literal value, size given by cmmLitRep of the literal.
381 | CmmUninitialised Int
382 -- uninitialised data, N bytes long
384 -- align to next N-byte boundary (N must be a power of 2).
385 | CmmDataLabel CLabel
386 -- label the current position in this section.
388 -- string of 8-bit values only, not zero terminated.
390 -----------------------------------------------------------------------------
391 -- Global STG registers
392 -----------------------------------------------------------------------------
395 -- Argument and return registers
396 = VanillaReg -- pointers, unboxed ints and chars
397 {-# UNPACK #-} !Int -- its number
399 | FloatReg -- single-precision floating-point registers
400 {-# UNPACK #-} !Int -- its number
402 | DoubleReg -- double-precision floating-point registers
403 {-# UNPACK #-} !Int -- its number
405 | LongReg -- long int registers (64-bit, really)
406 {-# UNPACK #-} !Int -- its number
409 | Sp -- Stack ptr; points to last occupied stack location.
410 | SpLim -- Stack limit
411 | Hp -- Heap ptr; points to last occupied heap location.
412 | HpLim -- Heap limit register
413 | CurrentTSO -- pointer to current thread's TSO
414 | CurrentNursery -- pointer to allocation area
415 | HpAlloc -- allocation count for heap check failure
417 -- We keep the address of some commonly-called
418 -- functions in the register table, to keep code
420 | GCEnter1 -- stg_gc_enter_1
421 | GCFun -- stg_gc_fun
423 -- Base offset for the register table, used for accessing registers
424 -- which do not have real registers assigned to them. This register
425 -- will only appear after we have expanded GlobalReg into memory accesses
426 -- (where necessary) in the native code generator.
429 -- Base Register for PIC (position-independent code) calculations
430 -- Only used inside the native code generator. It's exact meaning differs
431 -- from platform to platform (see module PositionIndependentCode).
440 -- convenient aliases
441 spReg, hpReg, spLimReg, nodeReg :: CmmReg
444 spLimReg = CmmGlobal SpLim
445 nodeReg = CmmGlobal node
450 globalRegRep :: GlobalReg -> MachRep
451 globalRegRep (VanillaReg _) = wordRep
452 globalRegRep (FloatReg _) = F32
453 globalRegRep (DoubleReg _) = F64
454 globalRegRep (LongReg _) = I64
455 globalRegRep _ = wordRep