1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import CmmOpt ( cmmMiniInline, cmmMachOpFold )
26 import PprCmm ( pprStmt, pprCmms )
28 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
29 #if powerpc_TARGET_ARCH
30 import CLabel ( mkRtsCodeLabel )
34 import Unique ( Unique, getUnique )
37 import List ( groupBy, sortBy )
38 import CLabel ( pprCLabel )
39 import ErrUtils ( dumpIfSet_dyn )
40 import DynFlags ( DynFlags, DynFlag(..), dopt )
41 import StaticFlags ( opt_Static, opt_PIC )
44 import qualified Pretty
52 import List ( intersperse )
61 The native-code generator has machine-independent and
62 machine-dependent modules.
64 This module ("AsmCodeGen") is the top-level machine-independent
65 module. Before entering machine-dependent land, we do some
66 machine-independent optimisations (defined below) on the
69 We convert to the machine-specific 'Instr' datatype with
70 'cmmCodeGen', assuming an infinite supply of registers. We then use
71 a machine-independent register allocator ('regAlloc') to rejoin
72 reality. Obviously, 'regAlloc' has machine-specific helper
73 functions (see about "RegAllocInfo" below).
75 Finally, we order the basic blocks of the function so as to minimise
76 the number of jumps between blocks, by utilising fallthrough wherever
79 The machine-dependent bits break down as follows:
81 * ["MachRegs"] Everything about the target platform's machine
82 registers (and immediate operands, and addresses, which tend to
83 intermingle/interact with registers).
85 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
86 have a module of its own), plus a miscellany of other things
87 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
89 * ["MachCodeGen"] is where 'Cmm' stuff turns into
92 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
95 * ["RegAllocInfo"] In the register allocator, we manipulate
96 'MRegsState's, which are 'BitSet's, one bit per machine register.
97 When we want to say something about a specific machine register
98 (e.g., ``it gets clobbered by this instruction''), we set/unset
99 its bit. Obviously, we do this 'BitSet' thing for efficiency
102 The 'RegAllocInfo' module collects together the machine-specific
103 info needed to do register allocation.
105 * ["RegisterAlloc"] The (machine-independent) register allocator.
108 -- -----------------------------------------------------------------------------
109 -- Top-level of the native codegen
111 -- NB. We *lazilly* compile each block of code for space reasons.
113 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
114 nativeCodeGen dflags cmms us
115 = let (res, _) = initUs us $
116 cgCmm (concat (map add_split cmms))
118 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
120 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
121 case unzip3 results of { (cmms,docs,imps) ->
122 returnUs (Cmm cmms, my_vcat docs, concat imps)
125 case res of { (ppr_cmms, insn_sdoc, imports) -> do
126 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
127 return (insn_sdoc Pretty.$$ dyld_stubs imports
128 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
129 -- On recent versions of Darwin, the linker supports
130 -- dead-stripping of code and data on a per-symbol basis.
131 -- There's a hack to make this work in PprMach.pprNatCmmTop.
132 Pretty.$$ Pretty.text ".subsections_via_symbols"
140 | dopt Opt_SplitObjs dflags = split_marker : tops
143 split_marker = CmmProc [] mkSplitMarkerLabel [] []
145 -- Generate "symbol stubs" for all external symbols that might
146 -- come from a dynamic library.
147 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
148 map head $ group $ sort imps-}
150 -- (Hack) sometimes two Labels pretty-print the same, but have
151 -- different uniques; so we compare their text versions...
153 | needImportedSymbols
155 (pprGotDeclaration :) $
156 map (pprImportedSymbol . fst . head) $
157 groupBy (\(_,a) (_,b) -> a == b) $
158 sortBy (\(_,a) (_,b) -> compare a b) $
164 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
165 astyle = mkCodeStyle AsmStyle
168 my_vcat sds = Pretty.vcat sds
170 my_vcat sds = Pretty.vcat (
173 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
174 Pretty.$$ Pretty.char ' '
181 -- Complete native code generation phase for a single top-level chunk
184 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
185 cmmNativeGen dflags cmm
186 = {-# SCC "fixAssigns" #-}
187 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
188 {-# SCC "genericOpt" #-}
189 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
190 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
192 else CmmData Text []) `bind` \ ppr_cmm ->
193 {-# SCC "genMachCode" #-}
194 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
195 {-# SCC "regAlloc" #-}
196 mapUs regAlloc pre_regalloc `thenUs` \ with_regs ->
197 {-# SCC "sequenceBlocks" #-}
198 map sequenceTop with_regs `bind` \ sequenced ->
199 {-# SCC "x86fp_kludge" #-}
200 map x86fp_kludge sequenced `bind` \ final_mach_code ->
202 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
204 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
206 x86fp_kludge :: NatCmmTop -> NatCmmTop
207 x86fp_kludge top@(CmmData _ _) = top
209 x86fp_kludge top@(CmmProc info lbl params code) =
210 CmmProc info lbl params (map bb_i386_insert_ffrees code)
212 bb_i386_insert_ffrees (BasicBlock id instrs) =
213 BasicBlock id (i386_insert_ffrees instrs)
215 x86fp_kludge top = top
218 -- -----------------------------------------------------------------------------
219 -- Sequencing the basic blocks
221 -- Cmm BasicBlocks are self-contained entities: they always end in a
222 -- jump, either non-local or to another basic block in the same proc.
223 -- In this phase, we attempt to place the basic blocks in a sequence
224 -- such that as many of the local jumps as possible turn into
227 sequenceTop :: NatCmmTop -> NatCmmTop
228 sequenceTop top@(CmmData _ _) = top
229 sequenceTop (CmmProc info lbl params blocks) =
230 CmmProc info lbl params (sequenceBlocks blocks)
232 -- The algorithm is very simple (and stupid): we make a graph out of
233 -- the blocks where there is an edge from one block to another iff the
234 -- first block ends by jumping to the second. Then we topologically
235 -- sort this graph. Then traverse the list: for each block, we first
236 -- output the block, then if it has an out edge, we move the
237 -- destination of the out edge to the front of the list, and continue.
239 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
240 sequenceBlocks [] = []
241 sequenceBlocks (entry:blocks) =
242 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
243 -- the first block is the entry point ==> it must remain at the start.
245 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
246 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
248 getOutEdges :: [Instr] -> [Unique]
249 getOutEdges instrs = case jumpDests (last instrs) [] of
250 [one] -> [getUnique one]
252 -- we're only interested in the last instruction of
253 -- the block, and only if it has a single destination.
255 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
258 seqBlocks ((block,_,[]) : rest)
259 = block : seqBlocks rest
260 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
261 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
262 | otherwise = block : seqBlocks rest'
264 (can_fallthrough, rest') = reorder next [] rest
265 -- TODO: we should do a better job for cycles; try to maximise the
266 -- fallthroughs within a loop.
267 seqBlocks _ = panic "AsmCodegen:seqBlocks"
269 reorder id accum [] = (False, reverse accum)
270 reorder id accum (b@(block,id',out) : rest)
271 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
272 | otherwise = reorder id (b:accum) rest
274 -- -----------------------------------------------------------------------------
275 -- Instruction selection
277 -- Native code instruction selection for a chunk of stix code. For
278 -- this part of the computation, we switch from the UniqSM monad to
279 -- the NatM monad. The latter carries not only a Unique, but also an
280 -- Int denoting the current C stack pointer offset in the generated
281 -- code; this is needed for creating correct spill offsets on
282 -- architectures which don't offer, or for which it would be
283 -- prohibitively expensive to employ, a frame pointer register. Viz,
286 -- The offset is measured in bytes, and indicates the difference
287 -- between the current (simulated) C stack-ptr and the value it was at
288 -- the beginning of the block. For stacks which grow down, this value
289 -- should be either zero or negative.
291 -- Switching between the two monads whilst carrying along the same
292 -- Unique supply breaks abstraction. Is that bad?
294 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
296 genMachCode cmm_top initial_us
297 = let initial_st = mkNatM_State initial_us 0
298 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
299 final_us = natm_us final_st
300 final_delta = natm_delta final_st
301 final_imports = natm_imports final_st
304 then ((new_tops, final_imports), final_us)
305 else pprPanic "genMachCode: nonzero final delta"
308 -- -----------------------------------------------------------------------------
309 -- Fixup assignments to global registers so that they assign to
310 -- locations within the RegTable, if appropriate.
312 -- Note that we currently don't fixup reads here: they're done by
313 -- the generic optimiser below, to avoid having two separate passes
316 fixAssignsTop :: CmmTop -> UniqSM CmmTop
317 fixAssignsTop top@(CmmData _ _) = returnUs top
318 fixAssignsTop (CmmProc info lbl params blocks) =
319 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
320 returnUs (CmmProc info lbl params blocks')
322 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
323 fixAssignsBlock (BasicBlock id stmts) =
324 fixAssigns stmts `thenUs` \ stmts' ->
325 returnUs (BasicBlock id stmts')
327 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
329 mapUs fixAssign stmts `thenUs` \ stmtss ->
330 returnUs (concat stmtss)
332 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
333 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
334 = panic "cmmStmtConFold: assignment to BaseReg";
336 fixAssign (CmmAssign (CmmGlobal reg) src)
337 | Left realreg <- reg_or_addr
338 = returnUs [CmmAssign (CmmGlobal reg) src]
339 | Right baseRegAddr <- reg_or_addr
340 = returnUs [CmmStore baseRegAddr src]
341 -- Replace register leaves with appropriate StixTrees for
342 -- the given target. GlobalRegs which map to a reg on this
343 -- arch are left unchanged. Assigning to BaseReg is always
344 -- illegal, so we check for that.
346 reg_or_addr = get_GlobalReg_reg_or_addr reg
348 fixAssign (CmmCall target results args vols)
349 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
350 returnUs (caller_save ++
351 CmmCall target results' args vols :
355 -- we also save/restore any caller-saves STG registers here
356 (caller_save, caller_restore) = callerSaveVolatileRegs vols
358 fixResult g@(CmmGlobal reg,hint) =
359 case get_GlobalReg_reg_or_addr reg of
360 Left realreg -> returnUs (g, [])
362 getUniqueUs `thenUs` \ uq ->
363 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
364 returnUs ((local,hint),
365 [CmmStore baseRegAddr (CmmReg local)])
369 fixAssign other_stmt = returnUs [other_stmt]
371 -- -----------------------------------------------------------------------------
372 -- Generic Cmm optimiser
378 (b) Simple inlining: a temporary which is assigned to and then
379 used, once, can be shorted.
380 (c) Replacement of references to GlobalRegs which do not have
381 machine registers by the appropriate memory load (eg.
382 Hp ==> *(BaseReg + 34) ).
383 (d) Position independent code and dynamic linking
384 (i) introduce the appropriate indirections
385 and position independent refs
386 (ii) compile a list of imported symbols
388 Ideas for other things we could do (ToDo):
390 - shortcut jumps-to-jumps
391 - eliminate dead code blocks
392 - simple CSE: if an expr is assigned to a temp, then replace later occs of
393 that expr with the temp, until the expr is no longer valid (can push through
394 temp assignments, and certain assigns to mem...)
397 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
398 cmmToCmm top@(CmmData _ _) = (top, [])
399 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
400 blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
401 return $ CmmProc info lbl params blocks'
403 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
405 instance Monad CmmOptM where
406 return x = CmmOptM $ \imports -> (# x,imports #)
408 CmmOptM $ \imports ->
412 CmmOptM g' -> g' imports'
414 addImportCmmOpt :: CLabel -> CmmOptM ()
415 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
417 runCmmOpt :: CmmOptM a -> (a, [CLabel])
418 runCmmOpt (CmmOptM f) = case f [] of
419 (# result, imports #) -> (result, imports)
421 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
422 cmmBlockConFold (BasicBlock id stmts) = do
423 stmts' <- mapM cmmStmtConFold stmts
424 return $ BasicBlock id stmts'
429 -> do src' <- cmmExprConFold False src
430 return $ case src' of
431 CmmReg reg' | reg == reg' -> CmmNop
432 new_src -> CmmAssign reg new_src
435 -> do addr' <- cmmExprConFold False addr
436 src' <- cmmExprConFold False src
437 return $ CmmStore addr' src'
440 -> do addr' <- cmmExprConFold True addr
441 return $ CmmJump addr' regs
443 CmmCall target regs args vols
444 -> do target' <- case target of
445 CmmForeignCall e conv -> do
446 e' <- cmmExprConFold True e
447 return $ CmmForeignCall e' conv
448 other -> return other
449 args' <- mapM (\(arg, hint) -> do
450 arg' <- cmmExprConFold False arg
451 return (arg', hint)) args
452 return $ CmmCall target' regs args' vols
454 CmmCondBranch test dest
455 -> do test' <- cmmExprConFold False test
456 return $ case test' of
457 CmmLit (CmmInt 0 _) ->
458 CmmComment (mkFastString ("deleted: " ++
459 showSDoc (pprStmt stmt)))
461 CmmLit (CmmInt n _) -> CmmBranch dest
462 other -> CmmCondBranch test' dest
465 -> do expr' <- cmmExprConFold False expr
466 return $ CmmSwitch expr' ids
472 cmmExprConFold isJumpTarget expr
475 -> do addr' <- cmmExprConFold False addr
476 return $ CmmLoad addr' rep
479 -- For MachOps, we first optimize the children, and then we try
480 -- our hand at some constant-folding.
481 -> do args' <- mapM (cmmExprConFold False) args
482 return $ cmmMachOpFold mop args'
484 CmmLit (CmmLabel lbl)
485 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
486 CmmLit (CmmLabelOff lbl off)
487 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
488 return $ cmmMachOpFold (MO_Add wordRep) [
490 (CmmLit $ CmmInt (fromIntegral off) wordRep)
493 #if powerpc_TARGET_ARCH
494 -- On powerpc (non-PIC), it's easier to jump directly to a label than
495 -- to use the register table, so we replace these registers
496 -- with the corresponding labels:
497 CmmReg (CmmGlobal GCEnter1)
499 -> cmmExprConFold isJumpTarget $
500 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
501 CmmReg (CmmGlobal GCFun)
503 -> cmmExprConFold isJumpTarget $
504 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
507 CmmReg (CmmGlobal mid)
508 -- Replace register leaves with appropriate StixTrees for
509 -- the given target. MagicIds which map to a reg on this
510 -- arch are left unchanged. For the rest, BaseReg is taken
511 -- to mean the address of the reg table in MainCapability,
512 -- and for all others we generate an indirection to its
513 -- location in the register table.
514 -> case get_GlobalReg_reg_or_addr mid of
515 Left realreg -> return expr
518 BaseReg -> cmmExprConFold False baseRegAddr
519 other -> cmmExprConFold False (CmmLoad baseRegAddr
521 -- eliminate zero offsets
523 -> cmmExprConFold False (CmmReg reg)
525 CmmRegOff (CmmGlobal mid) offset
526 -- RegOf leaves are just a shorthand form. If the reg maps
527 -- to a real reg, we keep the shorthand, otherwise, we just
528 -- expand it and defer to the above code.
529 -> case get_GlobalReg_reg_or_addr mid of
530 Left realreg -> return expr
532 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
533 CmmReg (CmmGlobal mid),
534 CmmLit (CmmInt (fromIntegral offset)
539 -- -----------------------------------------------------------------------------