1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
11 -- The above warning supression flag is a temporary kludge.
12 -- While working on this module you are encouraged to remove it and fix
13 -- any warnings in the module. See
14 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
17 module AsmCodeGen ( nativeCodeGen ) where
19 #include "HsVersions.h"
20 #include "nativeGen/NCG.h"
28 import PositionIndependentCode
29 import RegAlloc.Liveness
31 import qualified RegAlloc.Linear.Main as Linear
33 import qualified GraphColor as Color
34 import qualified RegAlloc.Graph.Main as Color
35 import qualified RegAlloc.Graph.Stats as Color
36 import qualified RegAlloc.Graph.Coalesce as Color
39 import CmmOpt ( cmmMiniInline, cmmMachOpFold )
45 import Unique ( Unique, getUnique )
47 import List ( groupBy, sortBy )
49 #if powerpc_TARGET_ARCH
50 import StaticFlags ( opt_Static, opt_PIC )
53 import Config ( cProjectVersion )
57 import qualified Pretty
77 The native-code generator has machine-independent and
78 machine-dependent modules.
80 This module ("AsmCodeGen") is the top-level machine-independent
81 module. Before entering machine-dependent land, we do some
82 machine-independent optimisations (defined below) on the
85 We convert to the machine-specific 'Instr' datatype with
86 'cmmCodeGen', assuming an infinite supply of registers. We then use
87 a machine-independent register allocator ('regAlloc') to rejoin
88 reality. Obviously, 'regAlloc' has machine-specific helper
89 functions (see about "RegAllocInfo" below).
91 Finally, we order the basic blocks of the function so as to minimise
92 the number of jumps between blocks, by utilising fallthrough wherever
95 The machine-dependent bits break down as follows:
97 * ["MachRegs"] Everything about the target platform's machine
98 registers (and immediate operands, and addresses, which tend to
99 intermingle/interact with registers).
101 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
102 have a module of its own), plus a miscellany of other things
103 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
105 * ["MachCodeGen"] is where 'Cmm' stuff turns into
106 machine instructions.
108 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
111 * ["RegAllocInfo"] In the register allocator, we manipulate
112 'MRegsState's, which are 'BitSet's, one bit per machine register.
113 When we want to say something about a specific machine register
114 (e.g., ``it gets clobbered by this instruction''), we set/unset
115 its bit. Obviously, we do this 'BitSet' thing for efficiency
118 The 'RegAllocInfo' module collects together the machine-specific
119 info needed to do register allocation.
121 * ["RegisterAlloc"] The (machine-independent) register allocator.
124 -- -----------------------------------------------------------------------------
125 -- Top-level of the native codegen
128 nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO ()
129 nativeCodeGen dflags h us cmms
131 let split_cmms = concat $ map add_split cmms
133 -- BufHandle is a performance hack. We could hide it inside
134 -- Pretty if it weren't for the fact that we do lots of little
135 -- printDocs here (in order to do codegen in constant space).
136 bufh <- newBufHandle h
137 (imports, prof) <- cmmNativeGens dflags bufh us split_cmms [] [] 0
140 let (native, colorStats, linearStats)
145 Opt_D_dump_asm "Asm code"
146 (vcat $ map (docToSDoc . pprNatCmmTop) $ concat native)
148 -- dump global NCG stats for graph coloring allocator
149 (case concat $ catMaybes colorStats of
152 -- build the global register conflict graph
154 = foldl Color.union Color.initGraph
155 $ [ Color.raGraph stat
156 | stat@Color.RegAllocStatsStart{} <- stats]
158 dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
159 $ Color.pprStats stats graphGlobal
162 Opt_D_dump_asm_conflicts "Register conflict graph"
163 $ Color.dotGraph Color.regDotColor trivColorable
167 -- dump global NCG stats for linear allocator
168 (case concat $ catMaybes linearStats of
170 stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
171 $ Linear.pprStats (concat native) stats)
173 -- write out the imports
174 Pretty.printDoc Pretty.LeftMode h
175 $ makeImportsDoc (concat imports)
179 where add_split (Cmm tops)
180 | dopt Opt_SplitObjs dflags = split_marker : tops
183 split_marker = CmmProc [] mkSplitMarkerLabel [] (ListGraph [])
186 -- | Do native code generation on all these cmms.
188 cmmNativeGens dflags h us [] impAcc profAcc count
189 = return (reverse impAcc, reverse profAcc)
191 cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
193 (us', native, imports, colorStats, linearStats)
194 <- cmmNativeGen dflags us cmm count
196 Pretty.bufLeftRender h
197 $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map pprNatCmmTop native
200 if dopt Opt_D_dump_asm dflags
201 || dopt Opt_D_dump_asm_stats dflags
205 let count' = count + 1;
208 -- force evaulation all this stuff to avoid space leaks
209 seqString (showSDoc $ vcat $ map ppr imports) `seq` return ()
210 lsPprNative `seq` return ()
211 count' `seq` return ()
213 cmmNativeGens dflags h us' cmms
215 ((lsPprNative, colorStats, linearStats) : profAcc)
218 where seqString [] = ()
219 seqString (x:xs) = x `seq` seqString xs `seq` ()
222 -- | Complete native code generation phase for a single top-level chunk of Cmm.
223 -- Dumping the output of each stage along the way.
224 -- Global conflict graph and NGC stats
228 -> RawCmmTop -- ^ the cmm to generate code for
229 -> Int -- ^ sequence number of this top thing
231 , [NatCmmTop] -- native code
232 , [CLabel] -- things imported by this cmm
233 , Maybe [Color.RegAllocStats] -- stats for the coloring register allocator
234 , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
236 cmmNativeGen dflags us cmm count
239 -- rewrite assignments to global regs
240 let (fixed_cmm, usFix) =
241 {-# SCC "fixAssignsTop" #-}
242 initUs us $ fixAssignsTop cmm
244 -- cmm to cmm optimisations
245 let (opt_cmm, imports) =
246 {-# SCC "cmmToCmm" #-}
247 cmmToCmm dflags fixed_cmm
250 Opt_D_dump_opt_cmm "Optimised Cmm"
251 (pprCmm $ Cmm [opt_cmm])
253 -- generate native code from cmm
254 let ((native, lastMinuteImports), usGen) =
255 {-# SCC "genMachCode" #-}
256 initUs usFix $ genMachCode dflags opt_cmm
259 Opt_D_dump_asm_native "Native code"
260 (vcat $ map (docToSDoc . pprNatCmmTop) native)
263 -- tag instructions with register liveness information
264 let (withLiveness, usLive) =
265 {-# SCC "regLiveness" #-}
266 initUs usGen $ mapUs regLiveness native
269 Opt_D_dump_asm_liveness "Liveness annotations added"
270 (vcat $ map ppr withLiveness)
273 -- allocate registers
274 (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
275 if ( dopt Opt_RegsGraph dflags
276 || dopt Opt_RegsIterative dflags)
278 -- the regs usable for allocation
280 = foldr (\r -> plusUFM_C unionUniqSets
281 $ unitUFM (regClass r) (unitUniqSet r))
283 $ map RealReg allocatableRegs
285 -- graph coloring register allocation
286 let ((alloced, regAllocStats), usAlloc)
287 = {-# SCC "RegAlloc" #-}
292 (mkUniqSet [0..maxSpillSlots])
295 -- dump out what happened during register allocation
297 Opt_D_dump_asm_regalloc "Registers allocated"
298 (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
301 Opt_D_dump_asm_regalloc_stages "Build/spill stages"
302 (vcat $ map (\(stage, stats)
303 -> text "# --------------------------"
304 $$ text "# cmm " <> int count <> text " Stage " <> int stage
306 $ zip [0..] regAllocStats)
309 if dopt Opt_D_dump_asm_stats dflags
310 then Just regAllocStats else Nothing
312 -- force evaluation of the Maybe to avoid space leak
313 mPprStats `seq` return ()
315 return ( alloced, usAlloc
320 -- do linear register allocation
321 let ((alloced, regAllocStats), usAlloc)
322 = {-# SCC "RegAlloc" #-}
325 $ mapUs Linear.regAlloc withLiveness
328 Opt_D_dump_asm_regalloc "Registers allocated"
329 (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
332 if dopt Opt_D_dump_asm_stats dflags
333 then Just (catMaybes regAllocStats) else Nothing
335 -- force evaluation of the Maybe to avoid space leak
336 mPprStats `seq` return ()
338 return ( alloced, usAlloc
342 ---- shortcut branches
344 {-# SCC "shortcutBranches" #-}
345 shortcutBranches dflags alloced
349 {-# SCC "sequenceBlocks" #-}
350 map sequenceTop shorted
353 let final_mach_code =
355 {-# SCC "x86fp_kludge" #-}
356 map x86fp_kludge sequenced
363 , lastMinuteImports ++ imports
369 x86fp_kludge :: NatCmmTop -> NatCmmTop
370 x86fp_kludge top@(CmmData _ _) = top
371 x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
372 CmmProc info lbl params (ListGraph $ i386_insert_ffrees code)
376 -- | Build a doc for all the imports.
378 makeImportsDoc :: [CLabel] -> Pretty.Doc
379 makeImportsDoc imports
382 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
383 -- On recent versions of Darwin, the linker supports
384 -- dead-stripping of code and data on a per-symbol basis.
385 -- There's a hack to make this work in PprMach.pprNatCmmTop.
386 Pretty.$$ Pretty.text ".subsections_via_symbols"
388 #if HAVE_GNU_NONEXEC_STACK
389 -- On recent GNU ELF systems one can mark an object file
390 -- as not requiring an executable stack. If all objects
391 -- linked into a program have this note then the program
392 -- will not use an executable stack, which is good for
393 -- security. GHC generated code does not need an executable
394 -- stack so add the note in:
395 Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
397 #if !defined(darwin_TARGET_OS)
398 -- And just because every other compiler does, lets stick in
399 -- an identifier directive: .ident "GHC x.y.z"
400 Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
401 Pretty.text cProjectVersion
402 in Pretty.text ".ident" Pretty.<+>
403 Pretty.doubleQuotes compilerIdent
407 -- Generate "symbol stubs" for all external symbols that might
408 -- come from a dynamic library.
409 dyld_stubs :: [CLabel] -> Pretty.Doc
410 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
411 map head $ group $ sort imps-}
413 -- (Hack) sometimes two Labels pretty-print the same, but have
414 -- different uniques; so we compare their text versions...
416 | needImportedSymbols
418 (pprGotDeclaration :) $
419 map (pprImportedSymbol . fst . head) $
420 groupBy (\(_,a) (_,b) -> a == b) $
421 sortBy (\(_,a) (_,b) -> compare a b) $
427 doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
428 astyle = mkCodeStyle AsmStyle
431 -- -----------------------------------------------------------------------------
432 -- Sequencing the basic blocks
434 -- Cmm BasicBlocks are self-contained entities: they always end in a
435 -- jump, either non-local or to another basic block in the same proc.
436 -- In this phase, we attempt to place the basic blocks in a sequence
437 -- such that as many of the local jumps as possible turn into
440 sequenceTop :: NatCmmTop -> NatCmmTop
441 sequenceTop top@(CmmData _ _) = top
442 sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
443 CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
445 -- The algorithm is very simple (and stupid): we make a graph out of
446 -- the blocks where there is an edge from one block to another iff the
447 -- first block ends by jumping to the second. Then we topologically
448 -- sort this graph. Then traverse the list: for each block, we first
449 -- output the block, then if it has an out edge, we move the
450 -- destination of the out edge to the front of the list, and continue.
452 -- FYI, the classic layout for basic blocks uses postorder DFS; this
453 -- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
455 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
456 sequenceBlocks [] = []
457 sequenceBlocks (entry:blocks) =
458 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
459 -- the first block is the entry point ==> it must remain at the start.
461 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
462 sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
464 getOutEdges :: [Instr] -> [Unique]
465 getOutEdges instrs = case jumpDests (last instrs) [] of
466 [one] -> [getUnique one]
468 -- we're only interested in the last instruction of
469 -- the block, and only if it has a single destination.
471 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
474 seqBlocks ((block,_,[]) : rest)
475 = block : seqBlocks rest
476 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
477 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
478 | otherwise = block : seqBlocks rest'
480 (can_fallthrough, rest') = reorder next [] rest
481 -- TODO: we should do a better job for cycles; try to maximise the
482 -- fallthroughs within a loop.
483 seqBlocks _ = panic "AsmCodegen:seqBlocks"
485 reorder id accum [] = (False, reverse accum)
486 reorder id accum (b@(block,id',out) : rest)
487 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
488 | otherwise = reorder id (b:accum) rest
491 -- -----------------------------------------------------------------------------
492 -- Making far branches
494 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
495 -- big, we have to work around this limitation.
497 makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
499 #if powerpc_TARGET_ARCH
500 makeFarBranches blocks
501 | last blockAddresses < nearLimit = blocks
502 | otherwise = zipWith handleBlock blockAddresses blocks
504 blockAddresses = scanl (+) 0 $ map blockLen blocks
505 blockLen (BasicBlock _ instrs) = length instrs
507 handleBlock addr (BasicBlock id instrs)
508 = BasicBlock id (zipWith makeFar [addr..] instrs)
510 makeFar addr (BCC ALWAYS tgt) = BCC ALWAYS tgt
511 makeFar addr (BCC cond tgt)
512 | abs (addr - targetAddr) >= nearLimit
516 where Just targetAddr = lookupUFM blockAddressMap tgt
517 makeFar addr other = other
519 nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
520 -- distance, as we have a few pseudo-insns that are
521 -- pretty-printed as multiple instructions,
522 -- and it's just not worth the effort to calculate
525 blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
530 -- -----------------------------------------------------------------------------
533 shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
534 shortcutBranches dflags tops
535 | optLevel dflags < 1 = tops -- only with -O or higher
536 | otherwise = map (apply_mapping mapping) tops'
538 (tops', mappings) = mapAndUnzip build_mapping tops
539 mapping = foldr plusUFM emptyUFM mappings
541 build_mapping top@(CmmData _ _) = (top, emptyUFM)
542 build_mapping (CmmProc info lbl params (ListGraph []))
543 = (CmmProc info lbl params (ListGraph []), emptyUFM)
544 build_mapping (CmmProc info lbl params (ListGraph (head:blocks)))
545 = (CmmProc info lbl params (ListGraph (head:others)), mapping)
546 -- drop the shorted blocks, but don't ever drop the first one,
547 -- because it is pointed to by a global label.
549 -- find all the blocks that just consist of a jump that can be
551 (shortcut_blocks, others) = partitionWith split blocks
552 split (BasicBlock id [insn]) | Just dest <- canShortcut insn
554 split other = Right other
556 -- build a mapping from BlockId to JumpDest for shorting branches
557 mapping = foldl add emptyUFM shortcut_blocks
558 add ufm (id,dest) = addToUFM ufm id dest
560 apply_mapping ufm (CmmData sec statics)
561 = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics)
562 -- we need to get the jump tables, so apply the mapping to the entries
564 apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
565 = CmmProc info lbl params (ListGraph $ map short_bb blocks)
567 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
568 short_insn i = shortcutJump (lookupUFM ufm) i
569 -- shortcutJump should apply the mapping repeatedly,
570 -- just in case we can short multiple branches.
572 -- -----------------------------------------------------------------------------
573 -- Instruction selection
575 -- Native code instruction selection for a chunk of stix code. For
576 -- this part of the computation, we switch from the UniqSM monad to
577 -- the NatM monad. The latter carries not only a Unique, but also an
578 -- Int denoting the current C stack pointer offset in the generated
579 -- code; this is needed for creating correct spill offsets on
580 -- architectures which don't offer, or for which it would be
581 -- prohibitively expensive to employ, a frame pointer register. Viz,
584 -- The offset is measured in bytes, and indicates the difference
585 -- between the current (simulated) C stack-ptr and the value it was at
586 -- the beginning of the block. For stacks which grow down, this value
587 -- should be either zero or negative.
589 -- Switching between the two monads whilst carrying along the same
590 -- Unique supply breaks abstraction. Is that bad?
592 genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
594 genMachCode dflags cmm_top
595 = do { initial_us <- getUs
596 ; let initial_st = mkNatM_State initial_us 0 dflags
597 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
598 final_delta = natm_delta final_st
599 final_imports = natm_imports final_st
600 ; if final_delta == 0
601 then return (new_tops, final_imports)
602 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
605 -- -----------------------------------------------------------------------------
606 -- Fixup assignments to global registers so that they assign to
607 -- locations within the RegTable, if appropriate.
609 -- Note that we currently don't fixup reads here: they're done by
610 -- the generic optimiser below, to avoid having two separate passes
613 fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop
614 fixAssignsTop top@(CmmData _ _) = returnUs top
615 fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) =
616 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
617 returnUs (CmmProc info lbl params (ListGraph blocks'))
619 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
620 fixAssignsBlock (BasicBlock id stmts) =
621 fixAssigns stmts `thenUs` \ stmts' ->
622 returnUs (BasicBlock id stmts')
624 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
626 mapUs fixAssign stmts `thenUs` \ stmtss ->
627 returnUs (concat stmtss)
629 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
630 fixAssign (CmmAssign (CmmGlobal reg) src)
631 | Left realreg <- reg_or_addr
632 = returnUs [CmmAssign (CmmGlobal reg) src]
633 | Right baseRegAddr <- reg_or_addr
634 = returnUs [CmmStore baseRegAddr src]
635 -- Replace register leaves with appropriate StixTrees for
636 -- the given target. GlobalRegs which map to a reg on this
637 -- arch are left unchanged. Assigning to BaseReg is always
638 -- illegal, so we check for that.
640 reg_or_addr = get_GlobalReg_reg_or_addr reg
642 fixAssign other_stmt = returnUs [other_stmt]
644 -- -----------------------------------------------------------------------------
645 -- Generic Cmm optimiser
651 (b) Simple inlining: a temporary which is assigned to and then
652 used, once, can be shorted.
653 (c) Replacement of references to GlobalRegs which do not have
654 machine registers by the appropriate memory load (eg.
655 Hp ==> *(BaseReg + 34) ).
656 (d) Position independent code and dynamic linking
657 (i) introduce the appropriate indirections
658 and position independent refs
659 (ii) compile a list of imported symbols
661 Ideas for other things we could do (ToDo):
663 - shortcut jumps-to-jumps
664 - eliminate dead code blocks
665 - simple CSE: if an expr is assigned to a temp, then replace later occs of
666 that expr with the temp, until the expr is no longer valid (can push through
667 temp assignments, and certain assigns to mem...)
670 cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel])
671 cmmToCmm _ top@(CmmData _ _) = (top, [])
672 cmmToCmm dflags (CmmProc info lbl params (ListGraph blocks)) = runCmmOpt dflags $ do
673 blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
674 return $ CmmProc info lbl params (ListGraph blocks')
676 newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
678 instance Monad CmmOptM where
679 return x = CmmOptM $ \(imports, _) -> (# x,imports #)
681 CmmOptM $ \(imports, dflags) ->
682 case f (imports, dflags) of
685 CmmOptM g' -> g' (imports', dflags)
687 addImportCmmOpt :: CLabel -> CmmOptM ()
688 addImportCmmOpt lbl = CmmOptM $ \(imports, dflags) -> (# (), lbl:imports #)
690 getDynFlagsCmmOpt :: CmmOptM DynFlags
691 getDynFlagsCmmOpt = CmmOptM $ \(imports, dflags) -> (# dflags, imports #)
693 runCmmOpt :: DynFlags -> CmmOptM a -> (a, [CLabel])
694 runCmmOpt dflags (CmmOptM f) = case f ([], dflags) of
695 (# result, imports #) -> (result, imports)
697 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
698 cmmBlockConFold (BasicBlock id stmts) = do
699 stmts' <- mapM cmmStmtConFold stmts
700 return $ BasicBlock id stmts'
705 -> do src' <- cmmExprConFold DataReference src
706 return $ case src' of
707 CmmReg reg' | reg == reg' -> CmmNop
708 new_src -> CmmAssign reg new_src
711 -> do addr' <- cmmExprConFold DataReference addr
712 src' <- cmmExprConFold DataReference src
713 return $ CmmStore addr' src'
716 -> do addr' <- cmmExprConFold JumpReference addr
717 return $ CmmJump addr' regs
719 CmmCall target regs args srt returns
720 -> do target' <- case target of
721 CmmCallee e conv -> do
722 e' <- cmmExprConFold CallReference e
723 return $ CmmCallee e' conv
724 other -> return other
725 args' <- mapM (\(CmmHinted arg hint) -> do
726 arg' <- cmmExprConFold DataReference arg
727 return (CmmHinted arg' hint)) args
728 return $ CmmCall target' regs args' srt returns
730 CmmCondBranch test dest
731 -> do test' <- cmmExprConFold DataReference test
732 return $ case test' of
733 CmmLit (CmmInt 0 _) ->
734 CmmComment (mkFastString ("deleted: " ++
735 showSDoc (pprStmt stmt)))
737 CmmLit (CmmInt n _) -> CmmBranch dest
738 other -> CmmCondBranch test' dest
741 -> do expr' <- cmmExprConFold DataReference expr
742 return $ CmmSwitch expr' ids
748 cmmExprConFold referenceKind expr
751 -> do addr' <- cmmExprConFold DataReference addr
752 return $ CmmLoad addr' rep
755 -- For MachOps, we first optimize the children, and then we try
756 -- our hand at some constant-folding.
757 -> do args' <- mapM (cmmExprConFold DataReference) args
758 return $ cmmMachOpFold mop args'
760 CmmLit (CmmLabel lbl)
762 dflags <- getDynFlagsCmmOpt
763 cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
764 CmmLit (CmmLabelOff lbl off)
766 dflags <- getDynFlagsCmmOpt
767 dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
768 return $ cmmMachOpFold (MO_Add wordWidth) [
770 (CmmLit $ CmmInt (fromIntegral off) wordWidth)
773 #if powerpc_TARGET_ARCH
774 -- On powerpc (non-PIC), it's easier to jump directly to a label than
775 -- to use the register table, so we replace these registers
776 -- with the corresponding labels:
777 CmmReg (CmmGlobal EagerBlackholeInfo)
779 -> cmmExprConFold referenceKind $
780 CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_EAGER_BLACKHOLE_INFO")))
781 CmmReg (CmmGlobal GCEnter1)
783 -> cmmExprConFold referenceKind $
784 CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_enter_1")))
785 CmmReg (CmmGlobal GCFun)
787 -> cmmExprConFold referenceKind $
788 CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_fun")))
791 CmmReg (CmmGlobal mid)
792 -- Replace register leaves with appropriate StixTrees for
793 -- the given target. MagicIds which map to a reg on this
794 -- arch are left unchanged. For the rest, BaseReg is taken
795 -- to mean the address of the reg table in MainCapability,
796 -- and for all others we generate an indirection to its
797 -- location in the register table.
798 -> case get_GlobalReg_reg_or_addr mid of
799 Left realreg -> return expr
802 BaseReg -> cmmExprConFold DataReference baseRegAddr
803 other -> cmmExprConFold DataReference
804 (CmmLoad baseRegAddr (globalRegType mid))
805 -- eliminate zero offsets
807 -> cmmExprConFold referenceKind (CmmReg reg)
809 CmmRegOff (CmmGlobal mid) offset
810 -- RegOf leaves are just a shorthand form. If the reg maps
811 -- to a real reg, we keep the shorthand, otherwise, we just
812 -- expand it and defer to the above code.
813 -> case get_GlobalReg_reg_or_addr mid of
814 Left realreg -> return expr
816 -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [
817 CmmReg (CmmGlobal mid),
818 CmmLit (CmmInt (fromIntegral offset)
823 -- -----------------------------------------------------------------------------