1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import CmmOpt ( cmmMiniInline, cmmMachOpFold )
26 import PprCmm ( pprStmt, pprCmms )
28 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
29 #if powerpc_TARGET_ARCH
30 import CLabel ( mkRtsCodeLabel )
34 import Unique ( Unique, getUnique )
37 import List ( groupBy, sortBy )
38 import CLabel ( pprCLabel )
39 import ErrUtils ( dumpIfSet_dyn )
40 import DynFlags ( DynFlags, DynFlag(..), dopt )
41 import StaticFlags ( opt_Static, opt_PIC )
42 import Config ( cProjectVersion )
45 import qualified Pretty
53 import List ( intersperse )
62 The native-code generator has machine-independent and
63 machine-dependent modules.
65 This module ("AsmCodeGen") is the top-level machine-independent
66 module. Before entering machine-dependent land, we do some
67 machine-independent optimisations (defined below) on the
70 We convert to the machine-specific 'Instr' datatype with
71 'cmmCodeGen', assuming an infinite supply of registers. We then use
72 a machine-independent register allocator ('regAlloc') to rejoin
73 reality. Obviously, 'regAlloc' has machine-specific helper
74 functions (see about "RegAllocInfo" below).
76 Finally, we order the basic blocks of the function so as to minimise
77 the number of jumps between blocks, by utilising fallthrough wherever
80 The machine-dependent bits break down as follows:
82 * ["MachRegs"] Everything about the target platform's machine
83 registers (and immediate operands, and addresses, which tend to
84 intermingle/interact with registers).
86 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
87 have a module of its own), plus a miscellany of other things
88 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
90 * ["MachCodeGen"] is where 'Cmm' stuff turns into
93 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
96 * ["RegAllocInfo"] In the register allocator, we manipulate
97 'MRegsState's, which are 'BitSet's, one bit per machine register.
98 When we want to say something about a specific machine register
99 (e.g., ``it gets clobbered by this instruction''), we set/unset
100 its bit. Obviously, we do this 'BitSet' thing for efficiency
103 The 'RegAllocInfo' module collects together the machine-specific
104 info needed to do register allocation.
106 * ["RegisterAlloc"] The (machine-independent) register allocator.
109 -- -----------------------------------------------------------------------------
110 -- Top-level of the native codegen
112 -- NB. We *lazilly* compile each block of code for space reasons.
114 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
115 nativeCodeGen dflags cmms us
116 = let (res, _) = initUs us $
117 cgCmm (concat (map add_split cmms))
119 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
121 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
122 case unzip3 results of { (cmms,docs,imps) ->
123 returnUs (Cmm cmms, my_vcat docs, concat imps)
126 case res of { (ppr_cmms, insn_sdoc, imports) -> do
127 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
128 return (insn_sdoc Pretty.$$ dyld_stubs imports
129 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
130 -- On recent versions of Darwin, the linker supports
131 -- dead-stripping of code and data on a per-symbol basis.
132 -- There's a hack to make this work in PprMach.pprNatCmmTop.
133 Pretty.$$ Pretty.text ".subsections_via_symbols"
135 #if HAVE_GNU_NONEXEC_STACK
136 -- On recent GNU ELF systems one can mark an object file
137 -- as not requiring an executable stack. If all objects
138 -- linked into a program have this note then the program
139 -- will not use an executable stack, which is good for
140 -- security. GHC generated code does not need an executable
141 -- stack so add the note in:
142 Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
144 -- And just because every other compiler does, lets stick in
145 -- an identifier directive: .ident "GHC x.y.z"
146 Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
147 Pretty.text cProjectVersion
148 in Pretty.text ".ident" Pretty.<+>
149 Pretty.doubleQuotes compilerIdent
156 | dopt Opt_SplitObjs dflags = split_marker : tops
159 split_marker = CmmProc [] mkSplitMarkerLabel [] []
161 -- Generate "symbol stubs" for all external symbols that might
162 -- come from a dynamic library.
163 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
164 map head $ group $ sort imps-}
166 -- (Hack) sometimes two Labels pretty-print the same, but have
167 -- different uniques; so we compare their text versions...
169 | needImportedSymbols
171 (pprGotDeclaration :) $
172 map (pprImportedSymbol . fst . head) $
173 groupBy (\(_,a) (_,b) -> a == b) $
174 sortBy (\(_,a) (_,b) -> compare a b) $
180 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
181 astyle = mkCodeStyle AsmStyle
184 my_vcat sds = Pretty.vcat sds
186 my_vcat sds = Pretty.vcat (
189 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
190 Pretty.$$ Pretty.char ' '
197 -- Complete native code generation phase for a single top-level chunk
200 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
201 cmmNativeGen dflags cmm
202 = {-# SCC "fixAssigns" #-}
203 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
204 {-# SCC "genericOpt" #-}
205 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
206 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
208 else CmmData Text []) `bind` \ ppr_cmm ->
209 {-# SCC "genMachCode" #-}
210 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
211 {-# SCC "regAlloc" #-}
212 mapUs regAlloc pre_regalloc `thenUs` \ with_regs ->
213 {-# SCC "sequenceBlocks" #-}
214 map sequenceTop with_regs `bind` \ sequenced ->
215 {-# SCC "x86fp_kludge" #-}
216 map x86fp_kludge sequenced `bind` \ final_mach_code ->
218 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
220 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
222 x86fp_kludge :: NatCmmTop -> NatCmmTop
223 x86fp_kludge top@(CmmData _ _) = top
225 x86fp_kludge top@(CmmProc info lbl params code) =
226 CmmProc info lbl params (map bb_i386_insert_ffrees code)
228 bb_i386_insert_ffrees (BasicBlock id instrs) =
229 BasicBlock id (i386_insert_ffrees instrs)
231 x86fp_kludge top = top
234 -- -----------------------------------------------------------------------------
235 -- Sequencing the basic blocks
237 -- Cmm BasicBlocks are self-contained entities: they always end in a
238 -- jump, either non-local or to another basic block in the same proc.
239 -- In this phase, we attempt to place the basic blocks in a sequence
240 -- such that as many of the local jumps as possible turn into
243 sequenceTop :: NatCmmTop -> NatCmmTop
244 sequenceTop top@(CmmData _ _) = top
245 sequenceTop (CmmProc info lbl params blocks) =
246 CmmProc info lbl params (sequenceBlocks blocks)
248 -- The algorithm is very simple (and stupid): we make a graph out of
249 -- the blocks where there is an edge from one block to another iff the
250 -- first block ends by jumping to the second. Then we topologically
251 -- sort this graph. Then traverse the list: for each block, we first
252 -- output the block, then if it has an out edge, we move the
253 -- destination of the out edge to the front of the list, and continue.
255 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
256 sequenceBlocks [] = []
257 sequenceBlocks (entry:blocks) =
258 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
259 -- the first block is the entry point ==> it must remain at the start.
261 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
262 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
264 getOutEdges :: [Instr] -> [Unique]
265 getOutEdges instrs = case jumpDests (last instrs) [] of
266 [one] -> [getUnique one]
268 -- we're only interested in the last instruction of
269 -- the block, and only if it has a single destination.
271 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
274 seqBlocks ((block,_,[]) : rest)
275 = block : seqBlocks rest
276 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
277 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
278 | otherwise = block : seqBlocks rest'
280 (can_fallthrough, rest') = reorder next [] rest
281 -- TODO: we should do a better job for cycles; try to maximise the
282 -- fallthroughs within a loop.
283 seqBlocks _ = panic "AsmCodegen:seqBlocks"
285 reorder id accum [] = (False, reverse accum)
286 reorder id accum (b@(block,id',out) : rest)
287 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
288 | otherwise = reorder id (b:accum) rest
290 -- -----------------------------------------------------------------------------
291 -- Instruction selection
293 -- Native code instruction selection for a chunk of stix code. For
294 -- this part of the computation, we switch from the UniqSM monad to
295 -- the NatM monad. The latter carries not only a Unique, but also an
296 -- Int denoting the current C stack pointer offset in the generated
297 -- code; this is needed for creating correct spill offsets on
298 -- architectures which don't offer, or for which it would be
299 -- prohibitively expensive to employ, a frame pointer register. Viz,
302 -- The offset is measured in bytes, and indicates the difference
303 -- between the current (simulated) C stack-ptr and the value it was at
304 -- the beginning of the block. For stacks which grow down, this value
305 -- should be either zero or negative.
307 -- Switching between the two monads whilst carrying along the same
308 -- Unique supply breaks abstraction. Is that bad?
310 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
313 = do { initial_us <- getUs
314 ; let initial_st = mkNatM_State initial_us 0
315 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
316 final_us = natm_us final_st
317 final_delta = natm_delta final_st
318 final_imports = natm_imports final_st
319 ; if final_delta == 0
320 then return (new_tops, final_imports)
321 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
324 -- -----------------------------------------------------------------------------
325 -- Fixup assignments to global registers so that they assign to
326 -- locations within the RegTable, if appropriate.
328 -- Note that we currently don't fixup reads here: they're done by
329 -- the generic optimiser below, to avoid having two separate passes
332 fixAssignsTop :: CmmTop -> UniqSM CmmTop
333 fixAssignsTop top@(CmmData _ _) = returnUs top
334 fixAssignsTop (CmmProc info lbl params blocks) =
335 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
336 returnUs (CmmProc info lbl params blocks')
338 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
339 fixAssignsBlock (BasicBlock id stmts) =
340 fixAssigns stmts `thenUs` \ stmts' ->
341 returnUs (BasicBlock id stmts')
343 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
345 mapUs fixAssign stmts `thenUs` \ stmtss ->
346 returnUs (concat stmtss)
348 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
349 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
350 = panic "cmmStmtConFold: assignment to BaseReg";
352 fixAssign (CmmAssign (CmmGlobal reg) src)
353 | Left realreg <- reg_or_addr
354 = returnUs [CmmAssign (CmmGlobal reg) src]
355 | Right baseRegAddr <- reg_or_addr
356 = returnUs [CmmStore baseRegAddr src]
357 -- Replace register leaves with appropriate StixTrees for
358 -- the given target. GlobalRegs which map to a reg on this
359 -- arch are left unchanged. Assigning to BaseReg is always
360 -- illegal, so we check for that.
362 reg_or_addr = get_GlobalReg_reg_or_addr reg
364 fixAssign (CmmCall target results args vols)
365 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
366 returnUs (caller_save ++
367 CmmCall target results' args vols :
371 -- we also save/restore any caller-saves STG registers here
372 (caller_save, caller_restore) = callerSaveVolatileRegs vols
374 fixResult g@(CmmGlobal reg,hint) =
375 case get_GlobalReg_reg_or_addr reg of
376 Left realreg -> returnUs (g, [])
378 getUniqueUs `thenUs` \ uq ->
379 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
380 returnUs ((local,hint),
381 [CmmStore baseRegAddr (CmmReg local)])
385 fixAssign other_stmt = returnUs [other_stmt]
387 -- -----------------------------------------------------------------------------
388 -- Generic Cmm optimiser
394 (b) Simple inlining: a temporary which is assigned to and then
395 used, once, can be shorted.
396 (c) Replacement of references to GlobalRegs which do not have
397 machine registers by the appropriate memory load (eg.
398 Hp ==> *(BaseReg + 34) ).
399 (d) Position independent code and dynamic linking
400 (i) introduce the appropriate indirections
401 and position independent refs
402 (ii) compile a list of imported symbols
404 Ideas for other things we could do (ToDo):
406 - shortcut jumps-to-jumps
407 - eliminate dead code blocks
408 - simple CSE: if an expr is assigned to a temp, then replace later occs of
409 that expr with the temp, until the expr is no longer valid (can push through
410 temp assignments, and certain assigns to mem...)
413 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
414 cmmToCmm top@(CmmData _ _) = (top, [])
415 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
416 blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
417 return $ CmmProc info lbl params blocks'
419 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
421 instance Monad CmmOptM where
422 return x = CmmOptM $ \imports -> (# x,imports #)
424 CmmOptM $ \imports ->
428 CmmOptM g' -> g' imports'
430 addImportCmmOpt :: CLabel -> CmmOptM ()
431 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
433 runCmmOpt :: CmmOptM a -> (a, [CLabel])
434 runCmmOpt (CmmOptM f) = case f [] of
435 (# result, imports #) -> (result, imports)
437 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
438 cmmBlockConFold (BasicBlock id stmts) = do
439 stmts' <- mapM cmmStmtConFold stmts
440 return $ BasicBlock id stmts'
445 -> do src' <- cmmExprConFold False src
446 return $ case src' of
447 CmmReg reg' | reg == reg' -> CmmNop
448 new_src -> CmmAssign reg new_src
451 -> do addr' <- cmmExprConFold False addr
452 src' <- cmmExprConFold False src
453 return $ CmmStore addr' src'
456 -> do addr' <- cmmExprConFold True addr
457 return $ CmmJump addr' regs
459 CmmCall target regs args vols
460 -> do target' <- case target of
461 CmmForeignCall e conv -> do
462 e' <- cmmExprConFold True e
463 return $ CmmForeignCall e' conv
464 other -> return other
465 args' <- mapM (\(arg, hint) -> do
466 arg' <- cmmExprConFold False arg
467 return (arg', hint)) args
468 return $ CmmCall target' regs args' vols
470 CmmCondBranch test dest
471 -> do test' <- cmmExprConFold False test
472 return $ case test' of
473 CmmLit (CmmInt 0 _) ->
474 CmmComment (mkFastString ("deleted: " ++
475 showSDoc (pprStmt stmt)))
477 CmmLit (CmmInt n _) -> CmmBranch dest
478 other -> CmmCondBranch test' dest
481 -> do expr' <- cmmExprConFold False expr
482 return $ CmmSwitch expr' ids
488 cmmExprConFold isJumpTarget expr
491 -> do addr' <- cmmExprConFold False addr
492 return $ CmmLoad addr' rep
495 -- For MachOps, we first optimize the children, and then we try
496 -- our hand at some constant-folding.
497 -> do args' <- mapM (cmmExprConFold False) args
498 return $ cmmMachOpFold mop args'
500 CmmLit (CmmLabel lbl)
501 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
502 CmmLit (CmmLabelOff lbl off)
503 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
504 return $ cmmMachOpFold (MO_Add wordRep) [
506 (CmmLit $ CmmInt (fromIntegral off) wordRep)
509 #if powerpc_TARGET_ARCH
510 -- On powerpc (non-PIC), it's easier to jump directly to a label than
511 -- to use the register table, so we replace these registers
512 -- with the corresponding labels:
513 CmmReg (CmmGlobal GCEnter1)
515 -> cmmExprConFold isJumpTarget $
516 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
517 CmmReg (CmmGlobal GCFun)
519 -> cmmExprConFold isJumpTarget $
520 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
523 CmmReg (CmmGlobal mid)
524 -- Replace register leaves with appropriate StixTrees for
525 -- the given target. MagicIds which map to a reg on this
526 -- arch are left unchanged. For the rest, BaseReg is taken
527 -- to mean the address of the reg table in MainCapability,
528 -- and for all others we generate an indirection to its
529 -- location in the register table.
530 -> case get_GlobalReg_reg_or_addr mid of
531 Left realreg -> return expr
534 BaseReg -> cmmExprConFold False baseRegAddr
535 other -> cmmExprConFold False (CmmLoad baseRegAddr
537 -- eliminate zero offsets
539 -> cmmExprConFold False (CmmReg reg)
541 CmmRegOff (CmmGlobal mid) offset
542 -- RegOf leaves are just a shorthand form. If the reg maps
543 -- to a real reg, we keep the shorthand, otherwise, we just
544 -- expand it and defer to the above code.
545 -> case get_GlobalReg_reg_or_addr mid of
546 Left realreg -> return expr
548 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
549 CmmReg (CmmGlobal mid),
550 CmmLit (CmmInt (fromIntegral offset)
555 -- -----------------------------------------------------------------------------