1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
13 #include "nativeGen/NCG.h"
16 import qualified X86.CodeGen
17 import qualified X86.Regs
18 import qualified X86.Instr
19 import qualified X86.Ppr
21 import qualified SPARC.CodeGen
22 import qualified SPARC.Regs
23 import qualified SPARC.Instr
24 import qualified SPARC.Ppr
25 import qualified SPARC.ShortcutJump
26 import qualified SPARC.CodeGen.Expand
28 import qualified PPC.CodeGen
29 import qualified PPC.Cond
30 import qualified PPC.Regs
31 import qualified PPC.RegInfo
32 import qualified PPC.Instr
33 import qualified PPC.Ppr
35 import RegAlloc.Liveness
36 import qualified RegAlloc.Linear.Main as Linear
38 import qualified GraphColor as Color
39 import qualified RegAlloc.Graph.Main as Color
40 import qualified RegAlloc.Graph.Stats as Color
41 import qualified RegAlloc.Graph.TrivColorable as Color
52 import CgUtils ( fixStgRegisters )
54 import CmmOpt ( cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold )
59 import Unique ( Unique, getUnique )
67 import qualified Pretty
84 The native-code generator has machine-independent and
85 machine-dependent modules.
87 This module ("AsmCodeGen") is the top-level machine-independent
88 module. Before entering machine-dependent land, we do some
89 machine-independent optimisations (defined below) on the
92 We convert to the machine-specific 'Instr' datatype with
93 'cmmCodeGen', assuming an infinite supply of registers. We then use
94 a machine-independent register allocator ('regAlloc') to rejoin
95 reality. Obviously, 'regAlloc' has machine-specific helper
96 functions (see about "RegAllocInfo" below).
98 Finally, we order the basic blocks of the function so as to minimise
99 the number of jumps between blocks, by utilising fallthrough wherever
102 The machine-dependent bits break down as follows:
104 * ["MachRegs"] Everything about the target platform's machine
105 registers (and immediate operands, and addresses, which tend to
106 intermingle/interact with registers).
108 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
109 have a module of its own), plus a miscellany of other things
110 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
112 * ["MachCodeGen"] is where 'Cmm' stuff turns into
113 machine instructions.
115 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
118 * ["RegAllocInfo"] In the register allocator, we manipulate
119 'MRegsState's, which are 'BitSet's, one bit per machine register.
120 When we want to say something about a specific machine register
121 (e.g., ``it gets clobbered by this instruction''), we set/unset
122 its bit. Obviously, we do this 'BitSet' thing for efficiency
125 The 'RegAllocInfo' module collects together the machine-specific
126 info needed to do register allocation.
128 * ["RegisterAlloc"] The (machine-independent) register allocator.
131 -- -----------------------------------------------------------------------------
132 -- Top-level of the native codegen
134 data NcgImpl instr jumpDest = NcgImpl {
135 cmmTopCodeGen :: DynFlags -> RawCmmTop -> NatM [NatCmmTop instr],
136 generateJumpTableForInstr :: instr -> Maybe (NatCmmTop instr),
137 getJumpDestBlockId :: jumpDest -> Maybe BlockId,
138 canShortcut :: instr -> Maybe jumpDest,
139 shortcutStatic :: (BlockId -> Maybe jumpDest) -> CmmStatic -> CmmStatic,
140 shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
141 pprNatCmmTop :: NatCmmTop instr -> Doc,
142 maxSpillSlots :: Int,
143 allocatableRegs :: [RealReg],
144 ncg_x86fp_kludge :: [NatCmmTop instr] -> [NatCmmTop instr],
145 ncgExpandTop :: [NatCmmTop instr] -> [NatCmmTop instr],
146 ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
150 nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO ()
151 nativeCodeGen dflags h us cmms
152 = let nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
153 x86NcgImpl = NcgImpl {
154 cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
155 ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr
156 ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
157 ,canShortcut = X86.Instr.canShortcut
158 ,shortcutStatic = X86.Instr.shortcutStatic
159 ,shortcutJump = X86.Instr.shortcutJump
160 ,pprNatCmmTop = X86.Ppr.pprNatCmmTop
161 ,maxSpillSlots = X86.Instr.maxSpillSlots
162 ,allocatableRegs = X86.Regs.allocatableRegs
163 ,ncg_x86fp_kludge = id
165 ,ncgMakeFarBranches = id
167 in case platformArch $ targetPlatform dflags of
168 ArchX86 -> nCG' (x86NcgImpl { ncg_x86fp_kludge = map x86fp_kludge })
169 ArchX86_64 -> nCG' x86NcgImpl
172 cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
173 ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr
174 ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
175 ,canShortcut = PPC.RegInfo.canShortcut
176 ,shortcutStatic = PPC.RegInfo.shortcutStatic
177 ,shortcutJump = PPC.RegInfo.shortcutJump
178 ,pprNatCmmTop = PPC.Ppr.pprNatCmmTop
179 ,maxSpillSlots = PPC.Instr.maxSpillSlots
180 ,allocatableRegs = PPC.Regs.allocatableRegs
181 ,ncg_x86fp_kludge = id
183 ,ncgMakeFarBranches = makeFarBranches
187 cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
188 ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr
189 ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
190 ,canShortcut = SPARC.ShortcutJump.canShortcut
191 ,shortcutStatic = SPARC.ShortcutJump.shortcutStatic
192 ,shortcutJump = SPARC.ShortcutJump.shortcutJump
193 ,pprNatCmmTop = SPARC.Ppr.pprNatCmmTop
194 ,maxSpillSlots = SPARC.Instr.maxSpillSlots
195 ,allocatableRegs = SPARC.Regs.allocatableRegs
196 ,ncg_x86fp_kludge = id
197 ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
198 ,ncgMakeFarBranches = id
201 panic "nativeCodeGen: No NCG for PPC 64"
203 panic "nativeCodeGen: No NCG for unknown arch"
205 nativeCodeGen' :: (Instruction instr, Outputable instr)
207 -> NcgImpl instr jumpDest
208 -> Handle -> UniqSupply -> [RawCmm] -> IO ()
209 nativeCodeGen' dflags ncgImpl h us cmms
211 let split_cmms = concat $ map add_split cmms
212 -- BufHandle is a performance hack. We could hide it inside
213 -- Pretty if it weren't for the fact that we do lots of little
214 -- printDocs here (in order to do codegen in constant space).
215 bufh <- newBufHandle h
216 (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0
219 let (native, colorStats, linearStats)
224 Opt_D_dump_asm "Asm code"
225 (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) $ concat native)
227 -- dump global NCG stats for graph coloring allocator
228 (case concat $ catMaybes colorStats of
231 -- build the global register conflict graph
233 = foldl Color.union Color.initGraph
234 $ [ Color.raGraph stat
235 | stat@Color.RegAllocStatsStart{} <- stats]
237 dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
238 $ Color.pprStats stats graphGlobal
241 Opt_D_dump_asm_conflicts "Register conflict graph"
245 targetVirtualRegSqueeze
246 targetRealRegSqueeze)
250 -- dump global NCG stats for linear allocator
251 (case concat $ catMaybes linearStats of
253 stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
254 $ Linear.pprStats (concat native) stats)
256 -- write out the imports
257 Pretty.printDoc Pretty.LeftMode h
258 $ makeImportsDoc dflags (concat imports)
262 where add_split (Cmm tops)
263 | dopt Opt_SplitObjs dflags = split_marker : tops
266 split_marker = CmmProc [] mkSplitMarkerLabel (ListGraph [])
269 -- | Do native code generation on all these cmms.
271 cmmNativeGens :: (Instruction instr, Outputable instr)
273 -> NcgImpl instr jumpDest
278 -> [ ([NatCmmTop instr],
279 Maybe [Color.RegAllocStats instr],
280 Maybe [Linear.RegAllocStats]) ]
284 Maybe [Color.RegAllocStats instr],
285 Maybe [Linear.RegAllocStats])] )
287 cmmNativeGens _ _ _ _ [] impAcc profAcc _
288 = return (reverse impAcc, reverse profAcc)
290 cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count
292 (us', native, imports, colorStats, linearStats)
293 <- cmmNativeGen dflags ncgImpl us cmm count
295 Pretty.bufLeftRender h
296 $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map (pprNatCmmTop ncgImpl) native
298 -- carefully evaluate this strictly. Binding it with 'let'
299 -- and then using 'seq' doesn't work, because the let
300 -- apparently gets inlined first.
301 lsPprNative <- return $!
302 if dopt Opt_D_dump_asm dflags
303 || dopt Opt_D_dump_asm_stats dflags
307 count' <- return $! count + 1;
309 -- force evaulation all this stuff to avoid space leaks
310 seqString (showSDoc $ vcat $ map ppr imports) `seq` return ()
312 cmmNativeGens dflags ncgImpl
315 ((lsPprNative, colorStats, linearStats) : profAcc)
318 where seqString [] = ()
319 seqString (x:xs) = x `seq` seqString xs `seq` ()
322 -- | Complete native code generation phase for a single top-level chunk of Cmm.
323 -- Dumping the output of each stage along the way.
324 -- Global conflict graph and NGC stats
326 :: (Instruction instr, Outputable instr)
328 -> NcgImpl instr jumpDest
330 -> RawCmmTop -- ^ the cmm to generate code for
331 -> Int -- ^ sequence number of this top thing
333 , [NatCmmTop instr] -- native code
334 , [CLabel] -- things imported by this cmm
335 , Maybe [Color.RegAllocStats instr] -- stats for the coloring register allocator
336 , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
338 cmmNativeGen dflags ncgImpl us cmm count
341 -- rewrite assignments to global regs
343 {-# SCC "fixStgRegisters" #-}
346 -- cmm to cmm optimisations
347 let (opt_cmm, imports) =
348 {-# SCC "cmmToCmm" #-}
349 cmmToCmm dflags fixed_cmm
352 Opt_D_dump_opt_cmm "Optimised Cmm"
353 (pprCmm $ Cmm [opt_cmm])
355 -- generate native code from cmm
356 let ((native, lastMinuteImports), usGen) =
357 {-# SCC "genMachCode" #-}
358 initUs us $ genMachCode dflags (cmmTopCodeGen ncgImpl) opt_cmm
361 Opt_D_dump_asm_native "Native code"
362 (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) native)
364 -- tag instructions with register liveness information
365 let (withLiveness, usLive) =
366 {-# SCC "regLiveness" #-}
369 $ map natCmmTopToLive native
372 Opt_D_dump_asm_liveness "Liveness annotations added"
373 (vcat $ map ppr withLiveness)
375 -- allocate registers
376 (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
377 if ( dopt Opt_RegsGraph dflags
378 || dopt Opt_RegsIterative dflags)
380 -- the regs usable for allocation
381 let (alloc_regs :: UniqFM (UniqSet RealReg))
382 = foldr (\r -> plusUFM_C unionUniqSets
383 $ unitUFM (targetClassOfRealReg r) (unitUniqSet r))
385 $ allocatableRegs ncgImpl
387 -- do the graph coloring register allocation
388 let ((alloced, regAllocStats), usAlloc)
389 = {-# SCC "RegAlloc" #-}
394 (mkUniqSet [0 .. maxSpillSlots ncgImpl])
397 -- dump out what happened during register allocation
399 Opt_D_dump_asm_regalloc "Registers allocated"
400 (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) alloced)
403 Opt_D_dump_asm_regalloc_stages "Build/spill stages"
404 (vcat $ map (\(stage, stats)
405 -> text "# --------------------------"
406 $$ text "# cmm " <> int count <> text " Stage " <> int stage
408 $ zip [0..] regAllocStats)
411 if dopt Opt_D_dump_asm_stats dflags
412 then Just regAllocStats else Nothing
414 -- force evaluation of the Maybe to avoid space leak
415 mPprStats `seq` return ()
417 return ( alloced, usAlloc
422 -- do linear register allocation
423 let ((alloced, regAllocStats), usAlloc)
424 = {-# SCC "RegAlloc" #-}
427 $ mapUs (Linear.regAlloc dflags) withLiveness
430 Opt_D_dump_asm_regalloc "Registers allocated"
431 (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) alloced)
434 if dopt Opt_D_dump_asm_stats dflags
435 then Just (catMaybes regAllocStats) else Nothing
437 -- force evaluation of the Maybe to avoid space leak
438 mPprStats `seq` return ()
440 return ( alloced, usAlloc
444 ---- x86fp_kludge. This pass inserts ffree instructions to clear
445 ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
446 ---- is clear, and library functions can return odd results if it
449 ---- NB. must happen before shortcutBranches, because that
450 ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
451 let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
453 ---- generate jump tables
455 {-# SCC "generateJumpTables" #-}
456 generateJumpTables ncgImpl kludged
458 ---- shortcut branches
460 {-# SCC "shortcutBranches" #-}
461 shortcutBranches dflags ncgImpl tabled
465 {-# SCC "sequenceBlocks" #-}
466 map (sequenceTop ncgImpl) shorted
468 ---- expansion of SPARC synthetic instrs
470 {-# SCC "sparc_expand" #-}
471 ncgExpandTop ncgImpl sequenced
474 Opt_D_dump_asm_expanded "Synthetic instructions expanded"
475 (vcat $ map (docToSDoc . pprNatCmmTop ncgImpl) expanded)
479 , lastMinuteImports ++ imports
484 x86fp_kludge :: NatCmmTop X86.Instr.Instr -> NatCmmTop X86.Instr.Instr
485 x86fp_kludge top@(CmmData _ _) = top
486 x86fp_kludge (CmmProc info lbl (ListGraph code)) =
487 CmmProc info lbl (ListGraph $ X86.Instr.i386_insert_ffrees code)
490 -- | Build a doc for all the imports.
492 makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
493 makeImportsDoc dflags imports
496 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
497 -- On recent versions of Darwin, the linker supports
498 -- dead-stripping of code and data on a per-symbol basis.
499 -- There's a hack to make this work in PprMach.pprNatCmmTop.
500 Pretty.$$ Pretty.text ".subsections_via_symbols"
502 #if HAVE_GNU_NONEXEC_STACK
503 -- On recent GNU ELF systems one can mark an object file
504 -- as not requiring an executable stack. If all objects
505 -- linked into a program have this note then the program
506 -- will not use an executable stack, which is good for
507 -- security. GHC generated code does not need an executable
508 -- stack so add the note in:
509 Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
511 -- And just because every other compiler does, lets stick in
512 -- an identifier directive: .ident "GHC x.y.z"
513 Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
514 Pretty.text cProjectVersion
515 in Pretty.text ".ident" Pretty.<+>
516 Pretty.doubleQuotes compilerIdent
519 -- Generate "symbol stubs" for all external symbols that might
520 -- come from a dynamic library.
521 dyld_stubs :: [CLabel] -> Pretty.Doc
522 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
523 map head $ group $ sort imps-}
525 arch = platformArch $ targetPlatform dflags
526 os = platformOS $ targetPlatform dflags
528 -- (Hack) sometimes two Labels pretty-print the same, but have
529 -- different uniques; so we compare their text versions...
531 | needImportedSymbols arch os
533 (pprGotDeclaration arch os :) $
534 map ( pprImportedSymbol arch os . fst . head) $
535 groupBy (\(_,a) (_,b) -> a == b) $
536 sortBy (\(_,a) (_,b) -> compare a b) $
542 doPpr lbl = (lbl, renderWithStyle (pprCLabel lbl) astyle)
543 astyle = mkCodeStyle AsmStyle
546 -- -----------------------------------------------------------------------------
547 -- Sequencing the basic blocks
549 -- Cmm BasicBlocks are self-contained entities: they always end in a
550 -- jump, either non-local or to another basic block in the same proc.
551 -- In this phase, we attempt to place the basic blocks in a sequence
552 -- such that as many of the local jumps as possible turn into
557 => NcgImpl instr jumpDest -> NatCmmTop instr -> NatCmmTop instr
559 sequenceTop _ top@(CmmData _ _) = top
560 sequenceTop ncgImpl (CmmProc info lbl (ListGraph blocks)) =
561 CmmProc info lbl (ListGraph $ ncgMakeFarBranches ncgImpl $ sequenceBlocks blocks)
563 -- The algorithm is very simple (and stupid): we make a graph out of
564 -- the blocks where there is an edge from one block to another iff the
565 -- first block ends by jumping to the second. Then we topologically
566 -- sort this graph. Then traverse the list: for each block, we first
567 -- output the block, then if it has an out edge, we move the
568 -- destination of the out edge to the front of the list, and continue.
570 -- FYI, the classic layout for basic blocks uses postorder DFS; this
571 -- algorithm is implemented in Hoopl.
575 => [NatBasicBlock instr]
576 -> [NatBasicBlock instr]
578 sequenceBlocks [] = []
579 sequenceBlocks (entry:blocks) =
580 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
581 -- the first block is the entry point ==> it must remain at the start.
586 => [NatBasicBlock instr]
587 -> [SCC ( NatBasicBlock instr
591 sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
593 -- we're only interested in the last instruction of
594 -- the block, and only if it has a single destination.
597 => [instr] -> [Unique]
600 = case jumpDestsOfInstr (last instrs) of
601 [one] -> [getUnique one]
604 mkNode :: (Instruction t)
606 -> (GenBasicBlock t, Unique, [Unique])
607 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
609 seqBlocks :: (Eq t) => [(GenBasicBlock t1, t, [t])] -> [GenBasicBlock t1]
611 seqBlocks ((block,_,[]) : rest)
612 = block : seqBlocks rest
613 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
614 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
615 | otherwise = block : seqBlocks rest'
617 (can_fallthrough, rest') = reorder next [] rest
618 -- TODO: we should do a better job for cycles; try to maximise the
619 -- fallthroughs within a loop.
620 seqBlocks _ = panic "AsmCodegen:seqBlocks"
622 reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
623 reorder _ accum [] = (False, reverse accum)
624 reorder id accum (b@(block,id',out) : rest)
625 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
626 | otherwise = reorder id (b:accum) rest
629 -- -----------------------------------------------------------------------------
630 -- Making far branches
632 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
633 -- big, we have to work around this limitation.
636 :: [NatBasicBlock PPC.Instr.Instr]
637 -> [NatBasicBlock PPC.Instr.Instr]
638 makeFarBranches blocks
639 | last blockAddresses < nearLimit = blocks
640 | otherwise = zipWith handleBlock blockAddresses blocks
642 blockAddresses = scanl (+) 0 $ map blockLen blocks
643 blockLen (BasicBlock _ instrs) = length instrs
645 handleBlock addr (BasicBlock id instrs)
646 = BasicBlock id (zipWith makeFar [addr..] instrs)
648 makeFar _ (PPC.Instr.BCC PPC.Cond.ALWAYS tgt) = PPC.Instr.BCC PPC.Cond.ALWAYS tgt
649 makeFar addr (PPC.Instr.BCC cond tgt)
650 | abs (addr - targetAddr) >= nearLimit
651 = PPC.Instr.BCCFAR cond tgt
653 = PPC.Instr.BCC cond tgt
654 where Just targetAddr = lookupUFM blockAddressMap tgt
655 makeFar _ other = other
657 nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
658 -- distance, as we have a few pseudo-insns that are
659 -- pretty-printed as multiple instructions,
660 -- and it's just not worth the effort to calculate
663 blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
665 -- -----------------------------------------------------------------------------
666 -- Generate jump tables
668 -- Analyzes all native code and generates data sections for all jump
669 -- table instructions.
671 :: NcgImpl instr jumpDest
672 -> [NatCmmTop instr] -> [NatCmmTop instr]
673 generateJumpTables ncgImpl xs = concatMap f xs
674 where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
676 g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
678 -- -----------------------------------------------------------------------------
683 -> NcgImpl instr jumpDest
687 shortcutBranches dflags ncgImpl tops
688 | optLevel dflags < 1 = tops -- only with -O or higher
689 | otherwise = map (apply_mapping ncgImpl mapping) tops'
691 (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
692 mapping = foldr plusUFM emptyUFM mappings
694 build_mapping :: NcgImpl instr jumpDest
695 -> GenCmmTop d t (ListGraph instr)
696 -> (GenCmmTop d t (ListGraph instr), UniqFM jumpDest)
697 build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
698 build_mapping _ (CmmProc info lbl (ListGraph []))
699 = (CmmProc info lbl (ListGraph []), emptyUFM)
700 build_mapping ncgImpl (CmmProc info lbl (ListGraph (head:blocks)))
701 = (CmmProc info lbl (ListGraph (head:others)), mapping)
702 -- drop the shorted blocks, but don't ever drop the first one,
703 -- because it is pointed to by a global label.
705 -- find all the blocks that just consist of a jump that can be
707 -- Don't completely eliminate loops here -- that can leave a dangling jump!
708 (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
709 split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
710 | Just jd <- canShortcut ncgImpl insn,
711 Just dest <- getJumpDestBlockId ncgImpl jd,
712 (setMember dest s) || dest == id -- loop checks
713 = (s, shortcut_blocks, b : others)
714 split (s, shortcut_blocks, others) (BasicBlock id [insn])
715 | Just dest <- canShortcut ncgImpl insn
716 = (setInsert id s, (id,dest) : shortcut_blocks, others)
717 split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
720 -- build a mapping from BlockId to JumpDest for shorting branches
721 mapping = foldl add emptyUFM shortcut_blocks
722 add ufm (id,dest) = addToUFM ufm id dest
724 apply_mapping :: NcgImpl instr jumpDest
726 -> GenCmmTop CmmStatic h (ListGraph instr)
727 -> GenCmmTop CmmStatic h (ListGraph instr)
728 apply_mapping ncgImpl ufm (CmmData sec statics)
729 = CmmData sec (map (shortcutStatic ncgImpl (lookupUFM ufm)) statics)
730 -- we need to get the jump tables, so apply the mapping to the entries
732 apply_mapping ncgImpl ufm (CmmProc info lbl (ListGraph blocks))
733 = CmmProc info lbl (ListGraph $ map short_bb blocks)
735 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
736 short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
737 -- shortcutJump should apply the mapping repeatedly,
738 -- just in case we can short multiple branches.
740 -- -----------------------------------------------------------------------------
741 -- Instruction selection
743 -- Native code instruction selection for a chunk of stix code. For
744 -- this part of the computation, we switch from the UniqSM monad to
745 -- the NatM monad. The latter carries not only a Unique, but also an
746 -- Int denoting the current C stack pointer offset in the generated
747 -- code; this is needed for creating correct spill offsets on
748 -- architectures which don't offer, or for which it would be
749 -- prohibitively expensive to employ, a frame pointer register. Viz,
752 -- The offset is measured in bytes, and indicates the difference
753 -- between the current (simulated) C stack-ptr and the value it was at
754 -- the beginning of the block. For stacks which grow down, this value
755 -- should be either zero or negative.
757 -- Switching between the two monads whilst carrying along the same
758 -- Unique supply breaks abstraction. Is that bad?
762 -> (DynFlags -> RawCmmTop -> NatM [NatCmmTop instr])
768 genMachCode dflags cmmTopCodeGen cmm_top
769 = do { initial_us <- getUs
770 ; let initial_st = mkNatM_State initial_us 0 dflags
771 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
772 final_delta = natm_delta final_st
773 final_imports = natm_imports final_st
774 ; if final_delta == 0
775 then return (new_tops, final_imports)
776 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
779 -- -----------------------------------------------------------------------------
780 -- Generic Cmm optimiser
786 (b) Simple inlining: a temporary which is assigned to and then
787 used, once, can be shorted.
788 (c) Position independent code and dynamic linking
789 (i) introduce the appropriate indirections
790 and position independent refs
791 (ii) compile a list of imported symbols
793 Ideas for other things we could do:
795 - shortcut jumps-to-jumps
796 - simple CSE: if an expr is assigned to a temp, then replace later occs of
797 that expr with the temp, until the expr is no longer valid (can push through
798 temp assignments, and certain assigns to mem...)
801 cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel])
802 cmmToCmm _ top@(CmmData _ _) = (top, [])
803 cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do
804 blocks' <- mapM cmmBlockConFold (cmmMiniInline (cmmEliminateDeadBlocks blocks))
805 return $ CmmProc info lbl (ListGraph blocks')
807 newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
809 instance Monad CmmOptM where
810 return x = CmmOptM $ \(imports, _) -> (# x,imports #)
812 CmmOptM $ \(imports, dflags) ->
813 case f (imports, dflags) of
816 CmmOptM g' -> g' (imports', dflags)
818 addImportCmmOpt :: CLabel -> CmmOptM ()
819 addImportCmmOpt lbl = CmmOptM $ \(imports, _dflags) -> (# (), lbl:imports #)
821 getDynFlagsCmmOpt :: CmmOptM DynFlags
822 getDynFlagsCmmOpt = CmmOptM $ \(imports, dflags) -> (# dflags, imports #)
824 runCmmOpt :: DynFlags -> CmmOptM a -> (a, [CLabel])
825 runCmmOpt dflags (CmmOptM f) = case f ([], dflags) of
826 (# result, imports #) -> (result, imports)
828 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
829 cmmBlockConFold (BasicBlock id stmts) = do
830 stmts' <- mapM cmmStmtConFold stmts
831 return $ BasicBlock id stmts'
833 cmmStmtConFold :: CmmStmt -> CmmOptM CmmStmt
837 -> do src' <- cmmExprConFold DataReference src
838 return $ case src' of
839 CmmReg reg' | reg == reg' -> CmmNop
840 new_src -> CmmAssign reg new_src
843 -> do addr' <- cmmExprConFold DataReference addr
844 src' <- cmmExprConFold DataReference src
845 return $ CmmStore addr' src'
848 -> do addr' <- cmmExprConFold JumpReference addr
849 return $ CmmJump addr' regs
851 CmmCall target regs args srt returns
852 -> do target' <- case target of
853 CmmCallee e conv -> do
854 e' <- cmmExprConFold CallReference e
855 return $ CmmCallee e' conv
856 other -> return other
857 args' <- mapM (\(CmmHinted arg hint) -> do
858 arg' <- cmmExprConFold DataReference arg
859 return (CmmHinted arg' hint)) args
860 return $ CmmCall target' regs args' srt returns
862 CmmCondBranch test dest
863 -> do test' <- cmmExprConFold DataReference test
864 return $ case test' of
865 CmmLit (CmmInt 0 _) ->
866 CmmComment (mkFastString ("deleted: " ++
867 showSDoc (pprStmt stmt)))
869 CmmLit (CmmInt _ _) -> CmmBranch dest
870 _other -> CmmCondBranch test' dest
873 -> do expr' <- cmmExprConFold DataReference expr
874 return $ CmmSwitch expr' ids
880 cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
881 cmmExprConFold referenceKind expr = do
882 dflags <- getDynFlagsCmmOpt
883 let arch = platformArch (targetPlatform dflags)
886 -> do addr' <- cmmExprConFold DataReference addr
887 return $ CmmLoad addr' rep
890 -- For MachOps, we first optimize the children, and then we try
891 -- our hand at some constant-folding.
892 -> do args' <- mapM (cmmExprConFold DataReference) args
893 return $ cmmMachOpFold mop args'
895 CmmLit (CmmLabel lbl)
897 cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
898 CmmLit (CmmLabelOff lbl off)
900 dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
901 return $ cmmMachOpFold (MO_Add wordWidth) [
903 (CmmLit $ CmmInt (fromIntegral off) wordWidth)
906 -- On powerpc (non-PIC), it's easier to jump directly to a label than
907 -- to use the register table, so we replace these registers
908 -- with the corresponding labels:
909 CmmReg (CmmGlobal EagerBlackholeInfo)
910 | arch == ArchPPC && not opt_PIC
911 -> cmmExprConFold referenceKind $
912 CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info")))
913 CmmReg (CmmGlobal GCEnter1)
914 | arch == ArchPPC && not opt_PIC
915 -> cmmExprConFold referenceKind $
916 CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1")))
917 CmmReg (CmmGlobal GCFun)
918 | arch == ArchPPC && not opt_PIC
919 -> cmmExprConFold referenceKind $
920 CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun")))