1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
13 #include "nativeGen/NCG.h"
22 import PositionIndependentCode
25 import CmmOpt ( cmmMiniInline, cmmMachOpFold )
26 import PprCmm ( pprStmt, pprCmms )
29 #if powerpc_TARGET_ARCH
30 import CLabel ( mkRtsCodeLabel )
34 import Unique ( Unique, getUnique )
37 import List ( groupBy, sortBy )
38 import CLabel ( pprCLabel )
39 import ErrUtils ( dumpIfSet_dyn )
41 import StaticFlags ( opt_Static, opt_PIC )
43 import Config ( cProjectVersion )
46 import qualified Pretty
54 import List ( intersperse )
63 The native-code generator has machine-independent and
64 machine-dependent modules.
66 This module ("AsmCodeGen") is the top-level machine-independent
67 module. Before entering machine-dependent land, we do some
68 machine-independent optimisations (defined below) on the
71 We convert to the machine-specific 'Instr' datatype with
72 'cmmCodeGen', assuming an infinite supply of registers. We then use
73 a machine-independent register allocator ('regAlloc') to rejoin
74 reality. Obviously, 'regAlloc' has machine-specific helper
75 functions (see about "RegAllocInfo" below).
77 Finally, we order the basic blocks of the function so as to minimise
78 the number of jumps between blocks, by utilising fallthrough wherever
81 The machine-dependent bits break down as follows:
83 * ["MachRegs"] Everything about the target platform's machine
84 registers (and immediate operands, and addresses, which tend to
85 intermingle/interact with registers).
87 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
88 have a module of its own), plus a miscellany of other things
89 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
91 * ["MachCodeGen"] is where 'Cmm' stuff turns into
94 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
97 * ["RegAllocInfo"] In the register allocator, we manipulate
98 'MRegsState's, which are 'BitSet's, one bit per machine register.
99 When we want to say something about a specific machine register
100 (e.g., ``it gets clobbered by this instruction''), we set/unset
101 its bit. Obviously, we do this 'BitSet' thing for efficiency
104 The 'RegAllocInfo' module collects together the machine-specific
105 info needed to do register allocation.
107 * ["RegisterAlloc"] The (machine-independent) register allocator.
110 -- -----------------------------------------------------------------------------
111 -- Top-level of the native codegen
113 -- NB. We *lazilly* compile each block of code for space reasons.
115 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
116 nativeCodeGen dflags cmms us
117 = let (res, _) = initUs us $
118 cgCmm (concat (map add_split cmms))
120 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
122 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
123 case unzip3 results of { (cmms,docs,imps) ->
124 returnUs (Cmm cmms, my_vcat docs, concat imps)
127 case res of { (ppr_cmms, insn_sdoc, imports) -> do
128 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
129 return (insn_sdoc Pretty.$$ dyld_stubs imports
130 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
131 -- On recent versions of Darwin, the linker supports
132 -- dead-stripping of code and data on a per-symbol basis.
133 -- There's a hack to make this work in PprMach.pprNatCmmTop.
134 Pretty.$$ Pretty.text ".subsections_via_symbols"
136 #if HAVE_GNU_NONEXEC_STACK
137 -- On recent GNU ELF systems one can mark an object file
138 -- as not requiring an executable stack. If all objects
139 -- linked into a program have this note then the program
140 -- will not use an executable stack, which is good for
141 -- security. GHC generated code does not need an executable
142 -- stack so add the note in:
143 Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
145 #if !defined(darwin_TARGET_OS)
146 -- And just because every other compiler does, lets stick in
147 -- an identifier directive: .ident "GHC x.y.z"
148 Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
149 Pretty.text cProjectVersion
150 in Pretty.text ".ident" Pretty.<+>
151 Pretty.doubleQuotes compilerIdent
159 | dopt Opt_SplitObjs dflags = split_marker : tops
162 split_marker = CmmProc [] mkSplitMarkerLabel [] []
164 -- Generate "symbol stubs" for all external symbols that might
165 -- come from a dynamic library.
166 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
167 map head $ group $ sort imps-}
169 -- (Hack) sometimes two Labels pretty-print the same, but have
170 -- different uniques; so we compare their text versions...
172 | needImportedSymbols
174 (pprGotDeclaration :) $
175 map (pprImportedSymbol . fst . head) $
176 groupBy (\(_,a) (_,b) -> a == b) $
177 sortBy (\(_,a) (_,b) -> compare a b) $
183 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
184 astyle = mkCodeStyle AsmStyle
187 my_vcat sds = Pretty.vcat sds
189 my_vcat sds = Pretty.vcat (
192 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
193 Pretty.$$ Pretty.char ' '
200 -- Complete native code generation phase for a single top-level chunk
203 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
204 cmmNativeGen dflags cmm
205 = {-# SCC "fixAssigns" #-}
206 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
207 {-# SCC "genericOpt" #-}
208 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
209 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
211 else CmmData Text []) `bind` \ ppr_cmm ->
212 {-# SCC "genMachCode" #-}
213 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
214 {-# SCC "regAlloc" #-}
215 mapUs regAlloc pre_regalloc `thenUs` \ with_regs ->
216 {-# SCC "shortcutBranches" #-}
217 shortcutBranches dflags with_regs `bind` \ shorted ->
218 {-# SCC "sequenceBlocks" #-}
219 map sequenceTop shorted `bind` \ sequenced ->
220 {-# SCC "x86fp_kludge" #-}
221 map x86fp_kludge sequenced `bind` \ final_mach_code ->
223 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
225 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
227 x86fp_kludge :: NatCmmTop -> NatCmmTop
228 x86fp_kludge top@(CmmData _ _) = top
230 x86fp_kludge top@(CmmProc info lbl params code) =
231 CmmProc info lbl params (map bb_i386_insert_ffrees code)
233 bb_i386_insert_ffrees (BasicBlock id instrs) =
234 BasicBlock id (i386_insert_ffrees instrs)
236 x86fp_kludge top = top
239 -- -----------------------------------------------------------------------------
240 -- Sequencing the basic blocks
242 -- Cmm BasicBlocks are self-contained entities: they always end in a
243 -- jump, either non-local or to another basic block in the same proc.
244 -- In this phase, we attempt to place the basic blocks in a sequence
245 -- such that as many of the local jumps as possible turn into
248 sequenceTop :: NatCmmTop -> NatCmmTop
249 sequenceTop top@(CmmData _ _) = top
250 sequenceTop (CmmProc info lbl params blocks) =
251 CmmProc info lbl params (makeFarBranches $ sequenceBlocks blocks)
253 -- The algorithm is very simple (and stupid): we make a graph out of
254 -- the blocks where there is an edge from one block to another iff the
255 -- first block ends by jumping to the second. Then we topologically
256 -- sort this graph. Then traverse the list: for each block, we first
257 -- output the block, then if it has an out edge, we move the
258 -- destination of the out edge to the front of the list, and continue.
260 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
261 sequenceBlocks [] = []
262 sequenceBlocks (entry:blocks) =
263 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
264 -- the first block is the entry point ==> it must remain at the start.
266 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
267 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
269 getOutEdges :: [Instr] -> [Unique]
270 getOutEdges instrs = case jumpDests (last instrs) [] of
271 [one] -> [getUnique one]
273 -- we're only interested in the last instruction of
274 -- the block, and only if it has a single destination.
276 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
279 seqBlocks ((block,_,[]) : rest)
280 = block : seqBlocks rest
281 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
282 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
283 | otherwise = block : seqBlocks rest'
285 (can_fallthrough, rest') = reorder next [] rest
286 -- TODO: we should do a better job for cycles; try to maximise the
287 -- fallthroughs within a loop.
288 seqBlocks _ = panic "AsmCodegen:seqBlocks"
290 reorder id accum [] = (False, reverse accum)
291 reorder id accum (b@(block,id',out) : rest)
292 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
293 | otherwise = reorder id (b:accum) rest
296 -- -----------------------------------------------------------------------------
297 -- Making far branches
299 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
300 -- big, we have to work around this limitation.
302 makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
304 #if powerpc_TARGET_ARCH
305 makeFarBranches blocks
306 | last blockAddresses < nearLimit = blocks
307 | otherwise = zipWith handleBlock blockAddresses blocks
309 blockAddresses = scanl (+) 0 $ map blockLen blocks
310 blockLen (BasicBlock _ instrs) = length instrs
312 handleBlock addr (BasicBlock id instrs)
313 = BasicBlock id (zipWith makeFar [addr..] instrs)
315 makeFar addr (BCC ALWAYS tgt) = BCC ALWAYS tgt
316 makeFar addr (BCC cond tgt)
317 | abs (addr - targetAddr) >= nearLimit
321 where Just targetAddr = lookupUFM blockAddressMap tgt
322 makeFar addr other = other
324 nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
325 -- distance, as we have a few pseudo-insns that are
326 -- pretty-printed as multiple instructions,
327 -- and it's just not worth the effort to calculate
330 blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
335 -- -----------------------------------------------------------------------------
338 shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
339 shortcutBranches dflags tops
340 | optLevel dflags < 1 = tops -- only with -O or higher
341 | otherwise = map (apply_mapping mapping) tops'
343 (tops', mappings) = mapAndUnzip build_mapping tops
344 mapping = foldr plusUFM emptyUFM mappings
346 build_mapping top@(CmmData _ _) = (top, emptyUFM)
347 build_mapping (CmmProc info lbl params [])
348 = (CmmProc info lbl params [], emptyUFM)
349 build_mapping (CmmProc info lbl params (head:blocks))
350 = (CmmProc info lbl params (head:others), mapping)
351 -- drop the shorted blocks, but don't ever drop the first one,
352 -- because it is pointed to by a global label.
354 -- find all the blocks that just consist of a jump that can be
356 (shortcut_blocks, others) = partitionWith split blocks
357 split (BasicBlock id [insn]) | Just dest <- canShortcut insn
359 split other = Right other
361 -- build a mapping from BlockId to JumpDest for shorting branches
362 mapping = foldl add emptyUFM shortcut_blocks
363 add ufm (id,dest) = addToUFM ufm id dest
365 apply_mapping ufm (CmmData sec statics)
366 = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics)
367 -- we need to get the jump tables, so apply the mapping to the entries
369 apply_mapping ufm (CmmProc info lbl params blocks)
370 = CmmProc info lbl params (map short_bb blocks)
372 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
373 short_insn i = shortcutJump (lookupUFM ufm) i
374 -- shortcutJump should apply the mapping repeatedly,
375 -- just in case we can short multiple branches.
377 -- -----------------------------------------------------------------------------
378 -- Instruction selection
380 -- Native code instruction selection for a chunk of stix code. For
381 -- this part of the computation, we switch from the UniqSM monad to
382 -- the NatM monad. The latter carries not only a Unique, but also an
383 -- Int denoting the current C stack pointer offset in the generated
384 -- code; this is needed for creating correct spill offsets on
385 -- architectures which don't offer, or for which it would be
386 -- prohibitively expensive to employ, a frame pointer register. Viz,
389 -- The offset is measured in bytes, and indicates the difference
390 -- between the current (simulated) C stack-ptr and the value it was at
391 -- the beginning of the block. For stacks which grow down, this value
392 -- should be either zero or negative.
394 -- Switching between the two monads whilst carrying along the same
395 -- Unique supply breaks abstraction. Is that bad?
397 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
400 = do { initial_us <- getUs
401 ; let initial_st = mkNatM_State initial_us 0
402 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
403 final_us = natm_us final_st
404 final_delta = natm_delta final_st
405 final_imports = natm_imports final_st
406 ; if final_delta == 0
407 then return (new_tops, final_imports)
408 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
411 -- -----------------------------------------------------------------------------
412 -- Fixup assignments to global registers so that they assign to
413 -- locations within the RegTable, if appropriate.
415 -- Note that we currently don't fixup reads here: they're done by
416 -- the generic optimiser below, to avoid having two separate passes
419 fixAssignsTop :: CmmTop -> UniqSM CmmTop
420 fixAssignsTop top@(CmmData _ _) = returnUs top
421 fixAssignsTop (CmmProc info lbl params blocks) =
422 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
423 returnUs (CmmProc info lbl params blocks')
425 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
426 fixAssignsBlock (BasicBlock id stmts) =
427 fixAssigns stmts `thenUs` \ stmts' ->
428 returnUs (BasicBlock id stmts')
430 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
432 mapUs fixAssign stmts `thenUs` \ stmtss ->
433 returnUs (concat stmtss)
435 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
436 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
437 = panic "cmmStmtConFold: assignment to BaseReg";
439 fixAssign (CmmAssign (CmmGlobal reg) src)
440 | Left realreg <- reg_or_addr
441 = returnUs [CmmAssign (CmmGlobal reg) src]
442 | Right baseRegAddr <- reg_or_addr
443 = returnUs [CmmStore baseRegAddr src]
444 -- Replace register leaves with appropriate StixTrees for
445 -- the given target. GlobalRegs which map to a reg on this
446 -- arch are left unchanged. Assigning to BaseReg is always
447 -- illegal, so we check for that.
449 reg_or_addr = get_GlobalReg_reg_or_addr reg
451 fixAssign (CmmCall target results args vols)
452 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
453 returnUs (caller_save ++
454 CmmCall target results' args vols :
458 -- we also save/restore any caller-saves STG registers here
459 (caller_save, caller_restore) = callerSaveVolatileRegs vols
461 fixResult g@(CmmGlobal reg,hint) =
462 case get_GlobalReg_reg_or_addr reg of
463 Left realreg -> returnUs (g, [])
465 getUniqueUs `thenUs` \ uq ->
466 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
467 returnUs ((local,hint),
468 [CmmStore baseRegAddr (CmmReg local)])
472 fixAssign other_stmt = returnUs [other_stmt]
474 -- -----------------------------------------------------------------------------
475 -- Generic Cmm optimiser
481 (b) Simple inlining: a temporary which is assigned to and then
482 used, once, can be shorted.
483 (c) Replacement of references to GlobalRegs which do not have
484 machine registers by the appropriate memory load (eg.
485 Hp ==> *(BaseReg + 34) ).
486 (d) Position independent code and dynamic linking
487 (i) introduce the appropriate indirections
488 and position independent refs
489 (ii) compile a list of imported symbols
491 Ideas for other things we could do (ToDo):
493 - shortcut jumps-to-jumps
494 - eliminate dead code blocks
495 - simple CSE: if an expr is assigned to a temp, then replace later occs of
496 that expr with the temp, until the expr is no longer valid (can push through
497 temp assignments, and certain assigns to mem...)
500 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
501 cmmToCmm top@(CmmData _ _) = (top, [])
502 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
503 blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
504 return $ CmmProc info lbl params blocks'
506 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
508 instance Monad CmmOptM where
509 return x = CmmOptM $ \imports -> (# x,imports #)
511 CmmOptM $ \imports ->
515 CmmOptM g' -> g' imports'
517 addImportCmmOpt :: CLabel -> CmmOptM ()
518 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
520 runCmmOpt :: CmmOptM a -> (a, [CLabel])
521 runCmmOpt (CmmOptM f) = case f [] of
522 (# result, imports #) -> (result, imports)
524 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
525 cmmBlockConFold (BasicBlock id stmts) = do
526 stmts' <- mapM cmmStmtConFold stmts
527 return $ BasicBlock id stmts'
532 -> do src' <- cmmExprConFold DataReference src
533 return $ case src' of
534 CmmReg reg' | reg == reg' -> CmmNop
535 new_src -> CmmAssign reg new_src
538 -> do addr' <- cmmExprConFold DataReference addr
539 src' <- cmmExprConFold DataReference src
540 return $ CmmStore addr' src'
543 -> do addr' <- cmmExprConFold JumpReference addr
544 return $ CmmJump addr' regs
546 CmmCall target regs args vols
547 -> do target' <- case target of
548 CmmForeignCall e conv -> do
549 e' <- cmmExprConFold CallReference e
550 return $ CmmForeignCall e' conv
551 other -> return other
552 args' <- mapM (\(arg, hint) -> do
553 arg' <- cmmExprConFold DataReference arg
554 return (arg', hint)) args
555 return $ CmmCall target' regs args' vols
557 CmmCondBranch test dest
558 -> do test' <- cmmExprConFold DataReference test
559 return $ case test' of
560 CmmLit (CmmInt 0 _) ->
561 CmmComment (mkFastString ("deleted: " ++
562 showSDoc (pprStmt stmt)))
564 CmmLit (CmmInt n _) -> CmmBranch dest
565 other -> CmmCondBranch test' dest
568 -> do expr' <- cmmExprConFold DataReference expr
569 return $ CmmSwitch expr' ids
575 cmmExprConFold referenceKind expr
578 -> do addr' <- cmmExprConFold DataReference addr
579 return $ CmmLoad addr' rep
582 -- For MachOps, we first optimize the children, and then we try
583 -- our hand at some constant-folding.
584 -> do args' <- mapM (cmmExprConFold DataReference) args
585 return $ cmmMachOpFold mop args'
587 CmmLit (CmmLabel lbl)
588 -> cmmMakeDynamicReference addImportCmmOpt referenceKind lbl
589 CmmLit (CmmLabelOff lbl off)
590 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt referenceKind lbl
591 return $ cmmMachOpFold (MO_Add wordRep) [
593 (CmmLit $ CmmInt (fromIntegral off) wordRep)
596 #if powerpc_TARGET_ARCH
597 -- On powerpc (non-PIC), it's easier to jump directly to a label than
598 -- to use the register table, so we replace these registers
599 -- with the corresponding labels:
600 CmmReg (CmmGlobal GCEnter1)
602 -> cmmExprConFold referenceKind $
603 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
604 CmmReg (CmmGlobal GCFun)
606 -> cmmExprConFold referenceKind $
607 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
610 CmmReg (CmmGlobal mid)
611 -- Replace register leaves with appropriate StixTrees for
612 -- the given target. MagicIds which map to a reg on this
613 -- arch are left unchanged. For the rest, BaseReg is taken
614 -- to mean the address of the reg table in MainCapability,
615 -- and for all others we generate an indirection to its
616 -- location in the register table.
617 -> case get_GlobalReg_reg_or_addr mid of
618 Left realreg -> return expr
621 BaseReg -> cmmExprConFold DataReference baseRegAddr
622 other -> cmmExprConFold DataReference
623 (CmmLoad baseRegAddr (globalRegRep mid))
624 -- eliminate zero offsets
626 -> cmmExprConFold referenceKind (CmmReg reg)
628 CmmRegOff (CmmGlobal mid) offset
629 -- RegOf leaves are just a shorthand form. If the reg maps
630 -- to a real reg, we keep the shorthand, otherwise, we just
631 -- expand it and defer to the above code.
632 -> case get_GlobalReg_reg_or_addr mid of
633 Left realreg -> return expr
635 -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [
636 CmmReg (CmmGlobal mid),
637 CmmLit (CmmInt (fromIntegral offset)
642 -- -----------------------------------------------------------------------------