1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
11 -- The above warning supression flag is a temporary kludge.
12 -- While working on this module you are encouraged to remove it and fix
13 -- any warnings in the module. See
14 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
17 module AsmCodeGen ( nativeCodeGen ) where
19 #include "HsVersions.h"
20 #include "nativeGen/NCG.h"
29 #elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
36 #elif sparc_TARGET_ARCH
41 import SPARC.ShortcutJump
43 #elif powerpc_TARGET_ARCH
52 #error "AsmCodeGen: unknown architecture"
56 import RegAlloc.Liveness
57 import qualified RegAlloc.Linear.Main as Linear
59 import qualified GraphColor as Color
60 import qualified RegAlloc.Graph.Main as Color
61 import qualified RegAlloc.Graph.Stats as Color
62 import qualified RegAlloc.Graph.Coalesce as Color
63 import qualified RegAlloc.Graph.TrivColorable as Color
65 import qualified SPARC.CodeGen.Expand as SPARC
76 import CmmOpt ( cmmMiniInline, cmmMachOpFold )
82 import Unique ( Unique, getUnique )
85 #if powerpc_TARGET_ARCH
86 import StaticFlags ( opt_Static, opt_PIC )
89 import Config ( cProjectVersion )
93 import qualified Pretty
113 The native-code generator has machine-independent and
114 machine-dependent modules.
116 This module ("AsmCodeGen") is the top-level machine-independent
117 module. Before entering machine-dependent land, we do some
118 machine-independent optimisations (defined below) on the
121 We convert to the machine-specific 'Instr' datatype with
122 'cmmCodeGen', assuming an infinite supply of registers. We then use
123 a machine-independent register allocator ('regAlloc') to rejoin
124 reality. Obviously, 'regAlloc' has machine-specific helper
125 functions (see about "RegAllocInfo" below).
127 Finally, we order the basic blocks of the function so as to minimise
128 the number of jumps between blocks, by utilising fallthrough wherever
131 The machine-dependent bits break down as follows:
133 * ["MachRegs"] Everything about the target platform's machine
134 registers (and immediate operands, and addresses, which tend to
135 intermingle/interact with registers).
137 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
138 have a module of its own), plus a miscellany of other things
139 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
141 * ["MachCodeGen"] is where 'Cmm' stuff turns into
142 machine instructions.
144 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
147 * ["RegAllocInfo"] In the register allocator, we manipulate
148 'MRegsState's, which are 'BitSet's, one bit per machine register.
149 When we want to say something about a specific machine register
150 (e.g., ``it gets clobbered by this instruction''), we set/unset
151 its bit. Obviously, we do this 'BitSet' thing for efficiency
154 The 'RegAllocInfo' module collects together the machine-specific
155 info needed to do register allocation.
157 * ["RegisterAlloc"] The (machine-independent) register allocator.
160 -- -----------------------------------------------------------------------------
161 -- Top-level of the native codegen
164 nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO ()
165 nativeCodeGen dflags h us cmms
167 let split_cmms = concat $ map add_split cmms
169 -- BufHandle is a performance hack. We could hide it inside
170 -- Pretty if it weren't for the fact that we do lots of little
171 -- printDocs here (in order to do codegen in constant space).
172 bufh <- newBufHandle h
173 (imports, prof) <- cmmNativeGens dflags bufh us split_cmms [] [] 0
176 let (native, colorStats, linearStats)
181 Opt_D_dump_asm "Asm code"
182 (vcat $ map (docToSDoc . pprNatCmmTop) $ concat native)
184 -- dump global NCG stats for graph coloring allocator
185 (case concat $ catMaybes colorStats of
188 -- build the global register conflict graph
190 = foldl Color.union Color.initGraph
191 $ [ Color.raGraph stat
192 | stat@Color.RegAllocStatsStart{} <- stats]
194 dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
195 $ Color.pprStats stats graphGlobal
198 Opt_D_dump_asm_conflicts "Register conflict graph"
202 targetVirtualRegSqueeze
203 targetRealRegSqueeze)
207 -- dump global NCG stats for linear allocator
208 (case concat $ catMaybes linearStats of
210 stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
211 $ Linear.pprStats (concat native) stats)
213 -- write out the imports
214 Pretty.printDoc Pretty.LeftMode h
215 $ makeImportsDoc dflags (concat imports)
219 where add_split (Cmm tops)
220 | dopt Opt_SplitObjs dflags = split_marker : tops
223 split_marker = CmmProc [] mkSplitMarkerLabel [] (ListGraph [])
226 -- | Do native code generation on all these cmms.
228 cmmNativeGens dflags h us [] impAcc profAcc count
229 = return (reverse impAcc, reverse profAcc)
231 cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
233 (us', native, imports, colorStats, linearStats)
234 <- cmmNativeGen dflags us cmm count
236 Pretty.bufLeftRender h
237 $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map pprNatCmmTop native
240 if dopt Opt_D_dump_asm dflags
241 || dopt Opt_D_dump_asm_stats dflags
245 let count' = count + 1;
248 -- force evaulation all this stuff to avoid space leaks
249 seqString (showSDoc $ vcat $ map ppr imports) `seq` return ()
250 lsPprNative `seq` return ()
251 count' `seq` return ()
253 cmmNativeGens dflags h us' cmms
255 ((lsPprNative, colorStats, linearStats) : profAcc)
258 where seqString [] = ()
259 seqString (x:xs) = x `seq` seqString xs `seq` ()
262 -- | Complete native code generation phase for a single top-level chunk of Cmm.
263 -- Dumping the output of each stage along the way.
264 -- Global conflict graph and NGC stats
268 -> RawCmmTop -- ^ the cmm to generate code for
269 -> Int -- ^ sequence number of this top thing
271 , [NatCmmTop Instr] -- native code
272 , [CLabel] -- things imported by this cmm
273 , Maybe [Color.RegAllocStats Instr] -- stats for the coloring register allocator
274 , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
276 cmmNativeGen dflags us cmm count
279 -- rewrite assignments to global regs
280 let (fixed_cmm, usFix) =
281 {-# SCC "fixAssignsTop" #-}
282 initUs us $ fixAssignsTop cmm
284 -- cmm to cmm optimisations
285 let (opt_cmm, imports) =
286 {-# SCC "cmmToCmm" #-}
287 cmmToCmm dflags fixed_cmm
290 Opt_D_dump_opt_cmm "Optimised Cmm"
291 (pprCmm $ Cmm [opt_cmm])
293 -- generate native code from cmm
294 let ((native, lastMinuteImports), usGen) =
295 {-# SCC "genMachCode" #-}
296 initUs usFix $ genMachCode dflags opt_cmm
299 Opt_D_dump_asm_native "Native code"
300 (vcat $ map (docToSDoc . pprNatCmmTop) native)
303 -- tag instructions with register liveness information
304 let (withLiveness, usLive) =
305 {-# SCC "regLiveness" #-}
306 initUs usGen $ mapUs regLiveness native
309 Opt_D_dump_asm_liveness "Liveness annotations added"
310 (vcat $ map ppr withLiveness)
313 -- allocate registers
314 (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
315 if ( dopt Opt_RegsGraph dflags
316 || dopt Opt_RegsIterative dflags)
318 -- the regs usable for allocation
319 let (alloc_regs :: UniqFM (UniqSet RealReg))
320 = foldr (\r -> plusUFM_C unionUniqSets
321 $ unitUFM (targetClassOfRealReg r) (unitUniqSet r))
326 -- do the graph coloring register allocation
327 let ((alloced, regAllocStats), usAlloc)
328 = {-# SCC "RegAlloc" #-}
333 (mkUniqSet [0..maxSpillSlots])
336 -- dump out what happened during register allocation
338 Opt_D_dump_asm_regalloc "Registers allocated"
339 (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
342 Opt_D_dump_asm_regalloc_stages "Build/spill stages"
343 (vcat $ map (\(stage, stats)
344 -> text "# --------------------------"
345 $$ text "# cmm " <> int count <> text " Stage " <> int stage
347 $ zip [0..] regAllocStats)
350 if dopt Opt_D_dump_asm_stats dflags
351 then Just regAllocStats else Nothing
353 -- force evaluation of the Maybe to avoid space leak
354 mPprStats `seq` return ()
356 return ( alloced, usAlloc
361 -- do linear register allocation
362 let ((alloced, regAllocStats), usAlloc)
363 = {-# SCC "RegAlloc" #-}
366 $ mapUs Linear.regAlloc withLiveness
369 Opt_D_dump_asm_regalloc "Registers allocated"
370 (vcat $ map (docToSDoc . pprNatCmmTop) alloced)
373 if dopt Opt_D_dump_asm_stats dflags
374 then Just (catMaybes regAllocStats) else Nothing
376 -- force evaluation of the Maybe to avoid space leak
377 mPprStats `seq` return ()
379 return ( alloced, usAlloc
383 ---- shortcut branches
385 {-# SCC "shortcutBranches" #-}
386 shortcutBranches dflags alloced
390 {-# SCC "sequenceBlocks" #-}
391 map sequenceTop shorted
396 {-# SCC "x86fp_kludge" #-}
397 map x86fp_kludge sequenced
402 ---- expansion of SPARC synthetic instrs
403 #if sparc_TARGET_ARCH
405 {-# SCC "sparc_expand" #-}
406 map SPARC.expandTop kludged
409 Opt_D_dump_asm_expanded "Synthetic instructions expanded"
410 (vcat $ map (docToSDoc . pprNatCmmTop) expanded)
418 , lastMinuteImports ++ imports
424 x86fp_kludge :: NatCmmTop Instr -> NatCmmTop Instr
425 x86fp_kludge top@(CmmData _ _) = top
426 x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
427 CmmProc info lbl params (ListGraph $ i386_insert_ffrees code)
431 -- | Build a doc for all the imports.
433 makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
434 makeImportsDoc dflags imports
437 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
438 -- On recent versions of Darwin, the linker supports
439 -- dead-stripping of code and data on a per-symbol basis.
440 -- There's a hack to make this work in PprMach.pprNatCmmTop.
441 Pretty.$$ Pretty.text ".subsections_via_symbols"
443 #if HAVE_GNU_NONEXEC_STACK
444 -- On recent GNU ELF systems one can mark an object file
445 -- as not requiring an executable stack. If all objects
446 -- linked into a program have this note then the program
447 -- will not use an executable stack, which is good for
448 -- security. GHC generated code does not need an executable
449 -- stack so add the note in:
450 Pretty.$$ Pretty.text ".section .note.GNU-stack,\"\",@progbits"
452 #if !defined(darwin_TARGET_OS)
453 -- And just because every other compiler does, lets stick in
454 -- an identifier directive: .ident "GHC x.y.z"
455 Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
456 Pretty.text cProjectVersion
457 in Pretty.text ".ident" Pretty.<+>
458 Pretty.doubleQuotes compilerIdent
462 -- Generate "symbol stubs" for all external symbols that might
463 -- come from a dynamic library.
464 dyld_stubs :: [CLabel] -> Pretty.Doc
465 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
466 map head $ group $ sort imps-}
468 arch = platformArch $ targetPlatform dflags
469 os = platformOS $ targetPlatform dflags
471 -- (Hack) sometimes two Labels pretty-print the same, but have
472 -- different uniques; so we compare their text versions...
474 | needImportedSymbols arch os
476 (pprGotDeclaration arch os :) $
477 map ( pprImportedSymbol arch os . fst . head) $
478 groupBy (\(_,a) (_,b) -> a == b) $
479 sortBy (\(_,a) (_,b) -> compare a b) $
485 doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
486 astyle = mkCodeStyle AsmStyle
489 -- -----------------------------------------------------------------------------
490 -- Sequencing the basic blocks
492 -- Cmm BasicBlocks are self-contained entities: they always end in a
493 -- jump, either non-local or to another basic block in the same proc.
494 -- In this phase, we attempt to place the basic blocks in a sequence
495 -- such that as many of the local jumps as possible turn into
502 sequenceTop top@(CmmData _ _) = top
503 sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
504 CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
506 -- The algorithm is very simple (and stupid): we make a graph out of
507 -- the blocks where there is an edge from one block to another iff the
508 -- first block ends by jumping to the second. Then we topologically
509 -- sort this graph. Then traverse the list: for each block, we first
510 -- output the block, then if it has an out edge, we move the
511 -- destination of the out edge to the front of the list, and continue.
513 -- FYI, the classic layout for basic blocks uses postorder DFS; this
514 -- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
518 => [NatBasicBlock instr]
519 -> [NatBasicBlock instr]
521 sequenceBlocks [] = []
522 sequenceBlocks (entry:blocks) =
523 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
524 -- the first block is the entry point ==> it must remain at the start.
529 => [NatBasicBlock instr]
530 -> [SCC ( NatBasicBlock instr
534 sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
536 -- we're only interested in the last instruction of
537 -- the block, and only if it has a single destination.
540 => [instr] -> [Unique]
543 = case jumpDestsOfInstr (last instrs) of
544 [one] -> [getUnique one]
547 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
550 seqBlocks ((block,_,[]) : rest)
551 = block : seqBlocks rest
552 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
553 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
554 | otherwise = block : seqBlocks rest'
556 (can_fallthrough, rest') = reorder next [] rest
557 -- TODO: we should do a better job for cycles; try to maximise the
558 -- fallthroughs within a loop.
559 seqBlocks _ = panic "AsmCodegen:seqBlocks"
561 reorder id accum [] = (False, reverse accum)
562 reorder id accum (b@(block,id',out) : rest)
563 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
564 | otherwise = reorder id (b:accum) rest
567 -- -----------------------------------------------------------------------------
568 -- Making far branches
570 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
571 -- big, we have to work around this limitation.
574 :: [NatBasicBlock Instr]
575 -> [NatBasicBlock Instr]
577 #if powerpc_TARGET_ARCH
578 makeFarBranches blocks
579 | last blockAddresses < nearLimit = blocks
580 | otherwise = zipWith handleBlock blockAddresses blocks
582 blockAddresses = scanl (+) 0 $ map blockLen blocks
583 blockLen (BasicBlock _ instrs) = length instrs
585 handleBlock addr (BasicBlock id instrs)
586 = BasicBlock id (zipWith makeFar [addr..] instrs)
588 makeFar addr (BCC ALWAYS tgt) = BCC ALWAYS tgt
589 makeFar addr (BCC cond tgt)
590 | abs (addr - targetAddr) >= nearLimit
594 where Just targetAddr = lookupUFM blockAddressMap tgt
595 makeFar addr other = other
597 nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
598 -- distance, as we have a few pseudo-insns that are
599 -- pretty-printed as multiple instructions,
600 -- and it's just not worth the effort to calculate
603 blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
608 -- -----------------------------------------------------------------------------
616 shortcutBranches dflags tops
617 | optLevel dflags < 1 = tops -- only with -O or higher
618 | otherwise = map (apply_mapping mapping) tops'
620 (tops', mappings) = mapAndUnzip build_mapping tops
621 mapping = foldr plusUFM emptyUFM mappings
623 build_mapping top@(CmmData _ _) = (top, emptyUFM)
624 build_mapping (CmmProc info lbl params (ListGraph []))
625 = (CmmProc info lbl params (ListGraph []), emptyUFM)
626 build_mapping (CmmProc info lbl params (ListGraph (head:blocks)))
627 = (CmmProc info lbl params (ListGraph (head:others)), mapping)
628 -- drop the shorted blocks, but don't ever drop the first one,
629 -- because it is pointed to by a global label.
631 -- find all the blocks that just consist of a jump that can be
633 (shortcut_blocks, others) = partitionWith split blocks
634 split (BasicBlock id [insn]) | Just dest <- canShortcut insn
636 split other = Right other
638 -- build a mapping from BlockId to JumpDest for shorting branches
639 mapping = foldl add emptyUFM shortcut_blocks
640 add ufm (id,dest) = addToUFM ufm id dest
642 apply_mapping ufm (CmmData sec statics)
643 = CmmData sec (map (shortcutStatic (lookupUFM ufm)) statics)
644 -- we need to get the jump tables, so apply the mapping to the entries
646 apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
647 = CmmProc info lbl params (ListGraph $ map short_bb blocks)
649 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
650 short_insn i = shortcutJump (lookupUFM ufm) i
651 -- shortcutJump should apply the mapping repeatedly,
652 -- just in case we can short multiple branches.
654 -- -----------------------------------------------------------------------------
655 -- Instruction selection
657 -- Native code instruction selection for a chunk of stix code. For
658 -- this part of the computation, we switch from the UniqSM monad to
659 -- the NatM monad. The latter carries not only a Unique, but also an
660 -- Int denoting the current C stack pointer offset in the generated
661 -- code; this is needed for creating correct spill offsets on
662 -- architectures which don't offer, or for which it would be
663 -- prohibitively expensive to employ, a frame pointer register. Viz,
666 -- The offset is measured in bytes, and indicates the difference
667 -- between the current (simulated) C stack-ptr and the value it was at
668 -- the beginning of the block. For stacks which grow down, this value
669 -- should be either zero or negative.
671 -- Switching between the two monads whilst carrying along the same
672 -- Unique supply breaks abstraction. Is that bad?
681 genMachCode dflags cmm_top
682 = do { initial_us <- getUs
683 ; let initial_st = mkNatM_State initial_us 0 dflags
684 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
685 final_delta = natm_delta final_st
686 final_imports = natm_imports final_st
687 ; if final_delta == 0
688 then return (new_tops, final_imports)
689 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
692 -- -----------------------------------------------------------------------------
693 -- Fixup assignments to global registers so that they assign to
694 -- locations within the RegTable, if appropriate.
696 -- Note that we currently don't fixup reads here: they're done by
697 -- the generic optimiser below, to avoid having two separate passes
700 fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop
701 fixAssignsTop top@(CmmData _ _) = returnUs top
702 fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) =
703 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
704 returnUs (CmmProc info lbl params (ListGraph blocks'))
706 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
707 fixAssignsBlock (BasicBlock id stmts) =
708 fixAssigns stmts `thenUs` \ stmts' ->
709 returnUs (BasicBlock id stmts')
711 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
713 mapUs fixAssign stmts `thenUs` \ stmtss ->
714 returnUs (concat stmtss)
716 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
717 fixAssign (CmmAssign (CmmGlobal reg) src)
718 | Left realreg <- reg_or_addr
719 = returnUs [CmmAssign (CmmGlobal reg) src]
720 | Right baseRegAddr <- reg_or_addr
721 = returnUs [CmmStore baseRegAddr src]
722 -- Replace register leaves with appropriate StixTrees for
723 -- the given target. GlobalRegs which map to a reg on this
724 -- arch are left unchanged. Assigning to BaseReg is always
725 -- illegal, so we check for that.
727 reg_or_addr = get_GlobalReg_reg_or_addr reg
729 fixAssign other_stmt = returnUs [other_stmt]
731 -- -----------------------------------------------------------------------------
732 -- Generic Cmm optimiser
738 (b) Simple inlining: a temporary which is assigned to and then
739 used, once, can be shorted.
740 (c) Replacement of references to GlobalRegs which do not have
741 machine registers by the appropriate memory load (eg.
742 Hp ==> *(BaseReg + 34) ).
743 (d) Position independent code and dynamic linking
744 (i) introduce the appropriate indirections
745 and position independent refs
746 (ii) compile a list of imported symbols
748 Ideas for other things we could do (ToDo):
750 - shortcut jumps-to-jumps
751 - eliminate dead code blocks
752 - simple CSE: if an expr is assigned to a temp, then replace later occs of
753 that expr with the temp, until the expr is no longer valid (can push through
754 temp assignments, and certain assigns to mem...)
757 cmmToCmm :: DynFlags -> RawCmmTop -> (RawCmmTop, [CLabel])
758 cmmToCmm _ top@(CmmData _ _) = (top, [])
759 cmmToCmm dflags (CmmProc info lbl params (ListGraph blocks)) = runCmmOpt dflags $ do
760 blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks)
761 return $ CmmProc info lbl params (ListGraph blocks')
763 newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
765 instance Monad CmmOptM where
766 return x = CmmOptM $ \(imports, _) -> (# x,imports #)
768 CmmOptM $ \(imports, dflags) ->
769 case f (imports, dflags) of
772 CmmOptM g' -> g' (imports', dflags)
774 addImportCmmOpt :: CLabel -> CmmOptM ()
775 addImportCmmOpt lbl = CmmOptM $ \(imports, dflags) -> (# (), lbl:imports #)
777 getDynFlagsCmmOpt :: CmmOptM DynFlags
778 getDynFlagsCmmOpt = CmmOptM $ \(imports, dflags) -> (# dflags, imports #)
780 runCmmOpt :: DynFlags -> CmmOptM a -> (a, [CLabel])
781 runCmmOpt dflags (CmmOptM f) = case f ([], dflags) of
782 (# result, imports #) -> (result, imports)
784 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
785 cmmBlockConFold (BasicBlock id stmts) = do
786 stmts' <- mapM cmmStmtConFold stmts
787 return $ BasicBlock id stmts'
792 -> do src' <- cmmExprConFold DataReference src
793 return $ case src' of
794 CmmReg reg' | reg == reg' -> CmmNop
795 new_src -> CmmAssign reg new_src
798 -> do addr' <- cmmExprConFold DataReference addr
799 src' <- cmmExprConFold DataReference src
800 return $ CmmStore addr' src'
803 -> do addr' <- cmmExprConFold JumpReference addr
804 return $ CmmJump addr' regs
806 CmmCall target regs args srt returns
807 -> do target' <- case target of
808 CmmCallee e conv -> do
809 e' <- cmmExprConFold CallReference e
810 return $ CmmCallee e' conv
811 other -> return other
812 args' <- mapM (\(CmmHinted arg hint) -> do
813 arg' <- cmmExprConFold DataReference arg
814 return (CmmHinted arg' hint)) args
815 return $ CmmCall target' regs args' srt returns
817 CmmCondBranch test dest
818 -> do test' <- cmmExprConFold DataReference test
819 return $ case test' of
820 CmmLit (CmmInt 0 _) ->
821 CmmComment (mkFastString ("deleted: " ++
822 showSDoc (pprStmt stmt)))
824 CmmLit (CmmInt n _) -> CmmBranch dest
825 other -> CmmCondBranch test' dest
828 -> do expr' <- cmmExprConFold DataReference expr
829 return $ CmmSwitch expr' ids
835 cmmExprConFold referenceKind expr
838 -> do addr' <- cmmExprConFold DataReference addr
839 return $ CmmLoad addr' rep
842 -- For MachOps, we first optimize the children, and then we try
843 -- our hand at some constant-folding.
844 -> do args' <- mapM (cmmExprConFold DataReference) args
845 return $ cmmMachOpFold mop args'
847 CmmLit (CmmLabel lbl)
849 dflags <- getDynFlagsCmmOpt
850 cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
851 CmmLit (CmmLabelOff lbl off)
853 dflags <- getDynFlagsCmmOpt
854 dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
855 return $ cmmMachOpFold (MO_Add wordWidth) [
857 (CmmLit $ CmmInt (fromIntegral off) wordWidth)
860 #if powerpc_TARGET_ARCH
861 -- On powerpc (non-PIC), it's easier to jump directly to a label than
862 -- to use the register table, so we replace these registers
863 -- with the corresponding labels:
864 CmmReg (CmmGlobal EagerBlackholeInfo)
866 -> cmmExprConFold referenceKind $
867 CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_EAGER_BLACKHOLE_INFO")))
868 CmmReg (CmmGlobal GCEnter1)
870 -> cmmExprConFold referenceKind $
871 CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_enter_1")))
872 CmmReg (CmmGlobal GCFun)
874 -> cmmExprConFold referenceKind $
875 CmmLit (CmmLabel (mkRtsCodeLabel (sLit "__stg_gc_fun")))
878 CmmReg (CmmGlobal mid)
879 -- Replace register leaves with appropriate StixTrees for
880 -- the given target. MagicIds which map to a reg on this
881 -- arch are left unchanged. For the rest, BaseReg is taken
882 -- to mean the address of the reg table in MainCapability,
883 -- and for all others we generate an indirection to its
884 -- location in the register table.
885 -> case get_GlobalReg_reg_or_addr mid of
886 Left realreg -> return expr
889 BaseReg -> cmmExprConFold DataReference baseRegAddr
890 other -> cmmExprConFold DataReference
891 (CmmLoad baseRegAddr (globalRegType mid))
892 -- eliminate zero offsets
894 -> cmmExprConFold referenceKind (CmmReg reg)
896 CmmRegOff (CmmGlobal mid) offset
897 -- RegOf leaves are just a shorthand form. If the reg maps
898 -- to a real reg, we keep the shorthand, otherwise, we just
899 -- expand it and defer to the above code.
900 -> case get_GlobalReg_reg_or_addr mid of
901 Left realreg -> return expr
903 -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [
904 CmmReg (CmmGlobal mid),
905 CmmLit (CmmInt (fromIntegral offset)
910 -- -----------------------------------------------------------------------------