1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import CmmOpt ( cmmMiniInline, cmmMachOpFold )
26 import PprCmm ( pprStmt, pprCmms )
28 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
29 #if powerpc_TARGET_ARCH
30 import CLabel ( mkRtsCodeLabel )
34 import Unique ( Unique, getUnique )
37 import List ( groupBy, sortBy )
38 import CLabel ( pprCLabel )
39 import ErrUtils ( dumpIfSet_dyn )
40 import DynFlags ( DynFlags, DynFlag(..), dopt )
41 import StaticFlags ( opt_Static, opt_PIC )
44 import qualified Pretty
52 import List ( intersperse )
61 The native-code generator has machine-independent and
62 machine-dependent modules.
64 This module ("AsmCodeGen") is the top-level machine-independent
65 module. Before entering machine-dependent land, we do some
66 machine-independent optimisations (defined below) on the
69 We convert to the machine-specific 'Instr' datatype with
70 'cmmCodeGen', assuming an infinite supply of registers. We then use
71 a machine-independent register allocator ('regAlloc') to rejoin
72 reality. Obviously, 'regAlloc' has machine-specific helper
73 functions (see about "RegAllocInfo" below).
75 Finally, we order the basic blocks of the function so as to minimise
76 the number of jumps between blocks, by utilising fallthrough wherever
79 The machine-dependent bits break down as follows:
81 * ["MachRegs"] Everything about the target platform's machine
82 registers (and immediate operands, and addresses, which tend to
83 intermingle/interact with registers).
85 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
86 have a module of its own), plus a miscellany of other things
87 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
89 * ["MachCodeGen"] is where 'Cmm' stuff turns into
92 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
95 * ["RegAllocInfo"] In the register allocator, we manipulate
96 'MRegsState's, which are 'BitSet's, one bit per machine register.
97 When we want to say something about a specific machine register
98 (e.g., ``it gets clobbered by this instruction''), we set/unset
99 its bit. Obviously, we do this 'BitSet' thing for efficiency
102 The 'RegAllocInfo' module collects together the machine-specific
103 info needed to do register allocation.
105 * ["RegisterAlloc"] The (machine-independent) register allocator.
108 -- -----------------------------------------------------------------------------
109 -- Top-level of the native codegen
111 -- NB. We *lazilly* compile each block of code for space reasons.
113 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
114 nativeCodeGen dflags cmms us
115 = let (res, _) = initUs us $
116 cgCmm (concat (map add_split cmms))
118 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
120 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
121 case unzip3 results of { (cmms,docs,imps) ->
122 returnUs (Cmm cmms, my_vcat docs, concat imps)
125 case res of { (ppr_cmms, insn_sdoc, imports) -> do
126 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
127 return (insn_sdoc Pretty.$$ dyld_stubs imports
128 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
129 -- On recent versions of Darwin, the linker supports
130 -- dead-stripping of code and data on a per-symbol basis.
131 -- There's a hack to make this work in PprMach.pprNatCmmTop.
132 Pretty.$$ Pretty.text ".subsections_via_symbols"
134 #if HAVE_GNU_NONEXEC_STACK
135 -- On recent GNU ELF systems one can mark an object file
136 -- as not requiring an executable stack. If all objects
137 -- linked into a program have this note then the program
138 -- will not use an executable stack, which is good for
139 -- security. GHC generated code does not need an executable
140 -- stack so add the note in:
141 Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
149 | dopt Opt_SplitObjs dflags = split_marker : tops
152 split_marker = CmmProc [] mkSplitMarkerLabel [] []
154 -- Generate "symbol stubs" for all external symbols that might
155 -- come from a dynamic library.
156 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
157 map head $ group $ sort imps-}
159 -- (Hack) sometimes two Labels pretty-print the same, but have
160 -- different uniques; so we compare their text versions...
162 | needImportedSymbols
164 (pprGotDeclaration :) $
165 map (pprImportedSymbol . fst . head) $
166 groupBy (\(_,a) (_,b) -> a == b) $
167 sortBy (\(_,a) (_,b) -> compare a b) $
173 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
174 astyle = mkCodeStyle AsmStyle
177 my_vcat sds = Pretty.vcat sds
179 my_vcat sds = Pretty.vcat (
182 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
183 Pretty.$$ Pretty.char ' '
190 -- Complete native code generation phase for a single top-level chunk
193 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
194 cmmNativeGen dflags cmm
195 = {-# SCC "fixAssigns" #-}
196 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
197 {-# SCC "genericOpt" #-}
198 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
199 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
201 else CmmData Text []) `bind` \ ppr_cmm ->
202 {-# SCC "genMachCode" #-}
203 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
204 {-# SCC "regAlloc" #-}
205 mapUs regAlloc pre_regalloc `thenUs` \ with_regs ->
206 {-# SCC "sequenceBlocks" #-}
207 map sequenceTop with_regs `bind` \ sequenced ->
208 {-# SCC "x86fp_kludge" #-}
209 map x86fp_kludge sequenced `bind` \ final_mach_code ->
211 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
213 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
215 x86fp_kludge :: NatCmmTop -> NatCmmTop
216 x86fp_kludge top@(CmmData _ _) = top
218 x86fp_kludge top@(CmmProc info lbl params code) =
219 CmmProc info lbl params (map bb_i386_insert_ffrees code)
221 bb_i386_insert_ffrees (BasicBlock id instrs) =
222 BasicBlock id (i386_insert_ffrees instrs)
224 x86fp_kludge top = top
227 -- -----------------------------------------------------------------------------
228 -- Sequencing the basic blocks
230 -- Cmm BasicBlocks are self-contained entities: they always end in a
231 -- jump, either non-local or to another basic block in the same proc.
232 -- In this phase, we attempt to place the basic blocks in a sequence
233 -- such that as many of the local jumps as possible turn into
236 sequenceTop :: NatCmmTop -> NatCmmTop
237 sequenceTop top@(CmmData _ _) = top
238 sequenceTop (CmmProc info lbl params blocks) =
239 CmmProc info lbl params (sequenceBlocks blocks)
241 -- The algorithm is very simple (and stupid): we make a graph out of
242 -- the blocks where there is an edge from one block to another iff the
243 -- first block ends by jumping to the second. Then we topologically
244 -- sort this graph. Then traverse the list: for each block, we first
245 -- output the block, then if it has an out edge, we move the
246 -- destination of the out edge to the front of the list, and continue.
248 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
249 sequenceBlocks [] = []
250 sequenceBlocks (entry:blocks) =
251 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
252 -- the first block is the entry point ==> it must remain at the start.
254 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
255 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
257 getOutEdges :: [Instr] -> [Unique]
258 getOutEdges instrs = case jumpDests (last instrs) [] of
259 [one] -> [getUnique one]
261 -- we're only interested in the last instruction of
262 -- the block, and only if it has a single destination.
264 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
267 seqBlocks ((block,_,[]) : rest)
268 = block : seqBlocks rest
269 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
270 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
271 | otherwise = block : seqBlocks rest'
273 (can_fallthrough, rest') = reorder next [] rest
274 -- TODO: we should do a better job for cycles; try to maximise the
275 -- fallthroughs within a loop.
276 seqBlocks _ = panic "AsmCodegen:seqBlocks"
278 reorder id accum [] = (False, reverse accum)
279 reorder id accum (b@(block,id',out) : rest)
280 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
281 | otherwise = reorder id (b:accum) rest
283 -- -----------------------------------------------------------------------------
284 -- Instruction selection
286 -- Native code instruction selection for a chunk of stix code. For
287 -- this part of the computation, we switch from the UniqSM monad to
288 -- the NatM monad. The latter carries not only a Unique, but also an
289 -- Int denoting the current C stack pointer offset in the generated
290 -- code; this is needed for creating correct spill offsets on
291 -- architectures which don't offer, or for which it would be
292 -- prohibitively expensive to employ, a frame pointer register. Viz,
295 -- The offset is measured in bytes, and indicates the difference
296 -- between the current (simulated) C stack-ptr and the value it was at
297 -- the beginning of the block. For stacks which grow down, this value
298 -- should be either zero or negative.
300 -- Switching between the two monads whilst carrying along the same
301 -- Unique supply breaks abstraction. Is that bad?
303 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
305 genMachCode cmm_top initial_us
306 = let initial_st = mkNatM_State initial_us 0
307 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
308 final_us = natm_us final_st
309 final_delta = natm_delta final_st
310 final_imports = natm_imports final_st
313 then ((new_tops, final_imports), final_us)
314 else pprPanic "genMachCode: nonzero final delta"
317 -- -----------------------------------------------------------------------------
318 -- Fixup assignments to global registers so that they assign to
319 -- locations within the RegTable, if appropriate.
321 -- Note that we currently don't fixup reads here: they're done by
322 -- the generic optimiser below, to avoid having two separate passes
325 fixAssignsTop :: CmmTop -> UniqSM CmmTop
326 fixAssignsTop top@(CmmData _ _) = returnUs top
327 fixAssignsTop (CmmProc info lbl params blocks) =
328 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
329 returnUs (CmmProc info lbl params blocks')
331 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
332 fixAssignsBlock (BasicBlock id stmts) =
333 fixAssigns stmts `thenUs` \ stmts' ->
334 returnUs (BasicBlock id stmts')
336 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
338 mapUs fixAssign stmts `thenUs` \ stmtss ->
339 returnUs (concat stmtss)
341 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
342 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
343 = panic "cmmStmtConFold: assignment to BaseReg";
345 fixAssign (CmmAssign (CmmGlobal reg) src)
346 | Left realreg <- reg_or_addr
347 = returnUs [CmmAssign (CmmGlobal reg) src]
348 | Right baseRegAddr <- reg_or_addr
349 = returnUs [CmmStore baseRegAddr src]
350 -- Replace register leaves with appropriate StixTrees for
351 -- the given target. GlobalRegs which map to a reg on this
352 -- arch are left unchanged. Assigning to BaseReg is always
353 -- illegal, so we check for that.
355 reg_or_addr = get_GlobalReg_reg_or_addr reg
357 fixAssign (CmmCall target results args vols)
358 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
359 returnUs (caller_save ++
360 CmmCall target results' args vols :
364 -- we also save/restore any caller-saves STG registers here
365 (caller_save, caller_restore) = callerSaveVolatileRegs vols
367 fixResult g@(CmmGlobal reg,hint) =
368 case get_GlobalReg_reg_or_addr reg of
369 Left realreg -> returnUs (g, [])
371 getUniqueUs `thenUs` \ uq ->
372 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
373 returnUs ((local,hint),
374 [CmmStore baseRegAddr (CmmReg local)])
378 fixAssign other_stmt = returnUs [other_stmt]
380 -- -----------------------------------------------------------------------------
381 -- Generic Cmm optimiser
387 (b) Simple inlining: a temporary which is assigned to and then
388 used, once, can be shorted.
389 (c) Replacement of references to GlobalRegs which do not have
390 machine registers by the appropriate memory load (eg.
391 Hp ==> *(BaseReg + 34) ).
392 (d) Position independent code and dynamic linking
393 (i) introduce the appropriate indirections
394 and position independent refs
395 (ii) compile a list of imported symbols
397 Ideas for other things we could do (ToDo):
399 - shortcut jumps-to-jumps
400 - eliminate dead code blocks
401 - simple CSE: if an expr is assigned to a temp, then replace later occs of
402 that expr with the temp, until the expr is no longer valid (can push through
403 temp assignments, and certain assigns to mem...)
406 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
407 cmmToCmm top@(CmmData _ _) = (top, [])
408 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
409 blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
410 return $ CmmProc info lbl params blocks'
412 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
414 instance Monad CmmOptM where
415 return x = CmmOptM $ \imports -> (# x,imports #)
417 CmmOptM $ \imports ->
421 CmmOptM g' -> g' imports'
423 addImportCmmOpt :: CLabel -> CmmOptM ()
424 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
426 runCmmOpt :: CmmOptM a -> (a, [CLabel])
427 runCmmOpt (CmmOptM f) = case f [] of
428 (# result, imports #) -> (result, imports)
430 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
431 cmmBlockConFold (BasicBlock id stmts) = do
432 stmts' <- mapM cmmStmtConFold stmts
433 return $ BasicBlock id stmts'
438 -> do src' <- cmmExprConFold False src
439 return $ case src' of
440 CmmReg reg' | reg == reg' -> CmmNop
441 new_src -> CmmAssign reg new_src
444 -> do addr' <- cmmExprConFold False addr
445 src' <- cmmExprConFold False src
446 return $ CmmStore addr' src'
449 -> do addr' <- cmmExprConFold True addr
450 return $ CmmJump addr' regs
452 CmmCall target regs args vols
453 -> do target' <- case target of
454 CmmForeignCall e conv -> do
455 e' <- cmmExprConFold True e
456 return $ CmmForeignCall e' conv
457 other -> return other
458 args' <- mapM (\(arg, hint) -> do
459 arg' <- cmmExprConFold False arg
460 return (arg', hint)) args
461 return $ CmmCall target' regs args' vols
463 CmmCondBranch test dest
464 -> do test' <- cmmExprConFold False test
465 return $ case test' of
466 CmmLit (CmmInt 0 _) ->
467 CmmComment (mkFastString ("deleted: " ++
468 showSDoc (pprStmt stmt)))
470 CmmLit (CmmInt n _) -> CmmBranch dest
471 other -> CmmCondBranch test' dest
474 -> do expr' <- cmmExprConFold False expr
475 return $ CmmSwitch expr' ids
481 cmmExprConFold isJumpTarget expr
484 -> do addr' <- cmmExprConFold False addr
485 return $ CmmLoad addr' rep
488 -- For MachOps, we first optimize the children, and then we try
489 -- our hand at some constant-folding.
490 -> do args' <- mapM (cmmExprConFold False) args
491 return $ cmmMachOpFold mop args'
493 CmmLit (CmmLabel lbl)
494 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
495 CmmLit (CmmLabelOff lbl off)
496 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
497 return $ cmmMachOpFold (MO_Add wordRep) [
499 (CmmLit $ CmmInt (fromIntegral off) wordRep)
502 #if powerpc_TARGET_ARCH
503 -- On powerpc (non-PIC), it's easier to jump directly to a label than
504 -- to use the register table, so we replace these registers
505 -- with the corresponding labels:
506 CmmReg (CmmGlobal GCEnter1)
508 -> cmmExprConFold isJumpTarget $
509 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
510 CmmReg (CmmGlobal GCFun)
512 -> cmmExprConFold isJumpTarget $
513 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
516 CmmReg (CmmGlobal mid)
517 -- Replace register leaves with appropriate StixTrees for
518 -- the given target. MagicIds which map to a reg on this
519 -- arch are left unchanged. For the rest, BaseReg is taken
520 -- to mean the address of the reg table in MainCapability,
521 -- and for all others we generate an indirection to its
522 -- location in the register table.
523 -> case get_GlobalReg_reg_or_addr mid of
524 Left realreg -> return expr
527 BaseReg -> cmmExprConFold False baseRegAddr
528 other -> cmmExprConFold False (CmmLoad baseRegAddr
530 -- eliminate zero offsets
532 -> cmmExprConFold False (CmmReg reg)
534 CmmRegOff (CmmGlobal mid) offset
535 -- RegOf leaves are just a shorthand form. If the reg maps
536 -- to a real reg, we keep the shorthand, otherwise, we just
537 -- expand it and defer to the above code.
538 -> case get_GlobalReg_reg_or_addr mid of
539 Left realreg -> return expr
541 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
542 CmmReg (CmmGlobal mid),
543 CmmLit (CmmInt (fromIntegral offset)
548 -- -----------------------------------------------------------------------------