X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FAsmCodeGen.lhs;h=dcd785efcc40cde363f9c4af88f0e7360c96eb0f;hb=8f8d1ebf5f37c1b51f8d48cd1343a226d9769912;hp=31c3825b8177cd773fbb25e7ff03bca1586037f3;hpb=c39373f1371fd1e46ea91be262f00c277b31f8e5;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs index 31c3825..dcd785e 100644 --- a/ghc/compiler/nativeGen/AsmCodeGen.lhs +++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs @@ -1,295 +1,545 @@ -% -% (c) The AQUA Project, Glasgow University, 1993-1998 -% +-- ----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 1993-2004 +-- +-- This is the top-level module in the native code generator. +-- +-- ----------------------------------------------------------------------------- \begin{code} module AsmCodeGen ( nativeCodeGen ) where #include "HsVersions.h" -#include "nativeGen/NCG.h" +#include "NCG.h" -import IO ( Handle ) -import List ( intersperse ) - -import MachMisc +import MachInstrs import MachRegs -import MachCode +import MachCodeGen import PprMach - -import AbsCStixGen ( genCodeAbstractC ) -import AbsCSyn ( AbstractC, MagicId ) -import AsmRegAlloc ( runRegAllocate ) -import OrdList ( OrdList ) -import PrimOp ( commutableOp, PrimOp(..) ) -import RegAllocInfo ( mkMRegsState, MRegsState, findReservedRegs ) -import Stix ( StixTree(..), StixReg(..), - pprStixTrees, CodeSegment(..) ) -import PrimRep ( isFloatingRep, PrimRep(..) ) -import UniqSupply ( returnUs, thenUs, mapUs, initUs, - initUs_, UniqSM, UniqSupply ) -import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM ) -import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) ) - -import Outputable - -\end{code} - -The 96/03 native-code generator has machine-independent and -machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}). - -This module (@AsmCodeGen@) is the top-level machine-independent -module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s -(defined in module @Stix@), using support code from @StixInfo@ (info -tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C -macros), and @StixInteger@ (GMP arbitrary-precision operations). - -Before entering machine-dependent land, we do some machine-independent -@genericOpt@imisations (defined below) on the @StixTree@s. - -We convert to the machine-specific @Instr@ datatype with -@stmt2Instrs@, assuming an ``infinite'' supply of registers. We then -use a machine-independent register allocator (@runRegAllocate@) to -rejoin reality. Obviously, @runRegAllocate@ has machine-specific -helper functions (see about @RegAllocInfo@ below). +import RegisterAlloc +import RegAllocInfo ( jumpDests ) +import NCGMonad +import PositionIndependentCode + +import Cmm +import CmmOpt ( cmmMiniInline, cmmMachOpFold ) +import PprCmm ( pprStmt, pprCmms ) +import MachOp +import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel ) +#if powerpc_TARGET_ARCH +import CLabel ( mkRtsCodeLabel ) +#endif + +import UniqFM +import Unique ( Unique, getUnique ) +import UniqSupply +import FastTypes +import List ( groupBy, sortBy ) +import CLabel ( pprCLabel ) +import ErrUtils ( dumpIfSet_dyn ) +import DynFlags ( DynFlags, DynFlag(..), dopt ) +import StaticFlags ( opt_Static, opt_PIC ) + +import Digraph +import qualified Pretty +import Outputable +import FastString + +-- DEBUGGING ONLY +--import OrdList + +#ifdef NCG_DEBUG +import List ( intersperse ) +#endif + +import DATA_INT +import DATA_WORD +import DATA_BITS +import GLAEXTS + +{- +The native-code generator has machine-independent and +machine-dependent modules. + +This module ("AsmCodeGen") is the top-level machine-independent +module. Before entering machine-dependent land, we do some +machine-independent optimisations (defined below) on the +'CmmStmts's. + +We convert to the machine-specific 'Instr' datatype with +'cmmCodeGen', assuming an infinite supply of registers. We then use +a machine-independent register allocator ('regAlloc') to rejoin +reality. Obviously, 'regAlloc' has machine-specific helper +functions (see about "RegAllocInfo" below). + +Finally, we order the basic blocks of the function so as to minimise +the number of jumps between blocks, by utilising fallthrough wherever +possible. The machine-dependent bits break down as follows: -\begin{description} -\item[@MachRegs@:] Everything about the target platform's machine + + * ["MachRegs"] Everything about the target platform's machine registers (and immediate operands, and addresses, which tend to intermingle/interact with registers). -\item[@MachMisc@:] Includes the @Instr@ datatype (possibly should + * ["MachInstrs"] Includes the 'Instr' datatype (possibly should have a module of its own), plus a miscellany of other things - (e.g., @targetDoubleSize@, @smStablePtrTable@, ...) + (e.g., 'targetDoubleSize', 'smStablePtrTable', ...) -\item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into + * ["MachCodeGen"] is where 'Cmm' stuff turns into machine instructions. -\item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really - an @Doc@). + * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really + a 'Doc'). -\item[@RegAllocInfo@:] In the register allocator, we manipulate - @MRegsState@s, which are @BitSet@s, one bit per machine register. + * ["RegAllocInfo"] In the register allocator, we manipulate + 'MRegsState's, which are 'BitSet's, one bit per machine register. When we want to say something about a specific machine register (e.g., ``it gets clobbered by this instruction''), we set/unset - its bit. Obviously, we do this @BitSet@ thing for efficiency + its bit. Obviously, we do this 'BitSet' thing for efficiency reasons. - The @RegAllocInfo@ module collects together the machine-specific + The 'RegAllocInfo' module collects together the machine-specific info needed to do register allocation. -\end{description} - -So, here we go: -\begin{code} -nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc) -nativeCodeGen absC us - = let (stixRaw, us1) = initUs us (genCodeAbstractC absC) - stixOpt = map (map genericOpt) stixRaw - insns = initUs_ us1 (codeGen stixOpt) - debug_stix = vcat (map pprStixTrees stixOpt) - in - (debug_stix, insns) -\end{code} + * ["RegisterAlloc"] The (machine-independent) register allocator. +-} + +-- ----------------------------------------------------------------------------- +-- Top-level of the native codegen + +-- NB. We *lazilly* compile each block of code for space reasons. + +nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc +nativeCodeGen dflags cmms us + = let (res, _) = initUs us $ + cgCmm (concat (map add_split cmms)) + + cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel]) + cgCmm tops = + lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results -> + case unzip3 results of { (cmms,docs,imps) -> + returnUs (Cmm cmms, my_vcat docs, concat imps) + } + in + case res of { (ppr_cmms, insn_sdoc, imports) -> do + dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms]) + return (insn_sdoc Pretty.$$ dyld_stubs imports +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- On recent versions of Darwin, the linker supports + -- dead-stripping of code and data on a per-symbol basis. + -- There's a hack to make this work in PprMach.pprNatCmmTop. + Pretty.$$ Pretty.text ".subsections_via_symbols" +#endif + ) + } -@codeGen@ is the top-level code-generation function: -\begin{code} -codeGen :: [[StixTree]] -> UniqSM SDoc - -codeGen stixFinal - = mapUs genMachCode stixFinal `thenUs` \ dynamic_codes -> - let - fp_kludge :: [Instr] -> [Instr] - fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id) + where - static_instrss :: [[Instr]] - static_instrss = map fp_kludge (scheduleMachCode dynamic_codes) - docs = map (vcat . map pprInstr) static_instrss + add_split (Cmm tops) + | dopt Opt_SplitObjs dflags = split_marker : tops + | otherwise = tops + + split_marker = CmmProc [] mkSplitMarkerLabel [] [] + + -- Generate "symbol stubs" for all external symbols that might + -- come from a dynamic library. +{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ + map head $ group $ sort imps-} + + -- (Hack) sometimes two Labels pretty-print the same, but have + -- different uniques; so we compare their text versions... + dyld_stubs imps + | needImportedSymbols + = Pretty.vcat $ + (pprGotDeclaration :) $ + map (pprImportedSymbol . fst . head) $ + groupBy (\(_,a) (_,b) -> a == b) $ + sortBy (\(_,a) (_,b) -> compare a b) $ + map doPpr $ + imps + | otherwise + = Pretty.empty + + where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle) + astyle = mkCodeStyle AsmStyle + +#ifndef NCG_DEBUG + my_vcat sds = Pretty.vcat sds +#else + my_vcat sds = Pretty.vcat ( + intersperse ( + Pretty.char ' ' + Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker") + Pretty.$$ Pretty.char ' ' + ) + sds + ) +#endif + + +-- Complete native code generation phase for a single top-level chunk +-- of Cmm. + +cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel]) +cmmNativeGen dflags cmm + = {-# SCC "fixAssigns" #-} + fixAssignsTop cmm `thenUs` \ fixed_cmm -> + {-# SCC "genericOpt" #-} + cmmToCmm fixed_cmm `bind` \ (cmm, imports) -> + (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance + then cmm + else CmmData Text []) `bind` \ ppr_cmm -> + {-# SCC "genMachCode" #-} + genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) -> + {-# SCC "regAlloc" #-} + map regAlloc pre_regalloc `bind` \ with_regs -> + {-# SCC "sequenceBlocks" #-} + map sequenceTop with_regs `bind` \ sequenced -> + {-# SCC "x86fp_kludge" #-} + map x86fp_kludge sequenced `bind` \ final_mach_code -> + {-# SCC "vcat" #-} + Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc -> + + returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports) + where + x86fp_kludge :: NatCmmTop -> NatCmmTop + x86fp_kludge top@(CmmData _ _) = top +#if i386_TARGET_ARCH + x86fp_kludge top@(CmmProc info lbl params code) = + CmmProc info lbl params (map bb_i386_insert_ffrees code) + where + bb_i386_insert_ffrees (BasicBlock id instrs) = + BasicBlock id (i386_insert_ffrees instrs) +#else + x86fp_kludge top = top +#endif + +-- ----------------------------------------------------------------------------- +-- Sequencing the basic blocks + +-- Cmm BasicBlocks are self-contained entities: they always end in a +-- jump, either non-local or to another basic block in the same proc. +-- In this phase, we attempt to place the basic blocks in a sequence +-- such that as many of the local jumps as possible turn into +-- fallthroughs. + +sequenceTop :: NatCmmTop -> NatCmmTop +sequenceTop top@(CmmData _ _) = top +sequenceTop (CmmProc info lbl params blocks) = + CmmProc info lbl params (sequenceBlocks blocks) + +-- The algorithm is very simple (and stupid): we make a graph out of +-- the blocks where there is an edge from one block to another iff the +-- first block ends by jumping to the second. Then we topologically +-- sort this graph. Then traverse the list: for each block, we first +-- output the block, then if it has an out edge, we move the +-- destination of the out edge to the front of the list, and continue. + +sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock] +sequenceBlocks [] = [] +sequenceBlocks (entry:blocks) = + seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks))) + -- the first block is the entry point ==> it must remain at the start. + +sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])] +sccBlocks blocks = stronglyConnCompR (map mkNode blocks) + +getOutEdges :: [Instr] -> [Unique] +getOutEdges instrs = case jumpDests (last instrs) [] of + [one] -> [getUnique one] + _many -> [] + -- we're only interested in the last instruction of + -- the block, and only if it has a single destination. + +mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs) + +seqBlocks [] = [] +seqBlocks ((block,_,[]) : rest) + = block : seqBlocks rest +seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest) + | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest' + | otherwise = block : seqBlocks rest' + where + (can_fallthrough, rest') = reorder next [] rest + -- TODO: we should do a better job for cycles; try to maximise the + -- fallthroughs within a loop. +seqBlocks _ = panic "AsmCodegen:seqBlocks" + +reorder id accum [] = (False, reverse accum) +reorder id accum (b@(block,id',out) : rest) + | id == id' = (True, (block,id,out) : reverse accum ++ rest) + | otherwise = reorder id (b:accum) rest + +-- ----------------------------------------------------------------------------- +-- Instruction selection + +-- Native code instruction selection for a chunk of stix code. For +-- this part of the computation, we switch from the UniqSM monad to +-- the NatM monad. The latter carries not only a Unique, but also an +-- Int denoting the current C stack pointer offset in the generated +-- code; this is needed for creating correct spill offsets on +-- architectures which don't offer, or for which it would be +-- prohibitively expensive to employ, a frame pointer register. Viz, +-- x86. + +-- The offset is measured in bytes, and indicates the difference +-- between the current (simulated) C stack-ptr and the value it was at +-- the beginning of the block. For stacks which grow down, this value +-- should be either zero or negative. + +-- Switching between the two monads whilst carrying along the same +-- Unique supply breaks abstraction. Is that bad? + +genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel]) + +genMachCode cmm_top initial_us + = let initial_st = mkNatM_State initial_us 0 + (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top) + final_us = natm_us final_st + final_delta = natm_delta final_st + final_imports = natm_imports final_st in - returnUs (vcat (intersperse (char ' ' - $$ text "# ___stg_split_marker" - $$ char ' ') - docs)) -\end{code} - -Top level code generator for a chunk of stix code: -\begin{code} -genMachCode :: [StixTree] -> UniqSM InstrList - -genMachCode stmts - = mapUs stmt2Instrs stmts `thenUs` \ blocks -> - returnUs (foldr (.) id blocks asmVoid) -\end{code} - -The next bit does the code scheduling. The scheduler must also deal -with register allocation of temporaries. Much parallelism can be -exposed via the OrdList, but more might occur, so further analysis -might be needed. - -\begin{code} -scheduleMachCode :: [InstrList] -> [[Instr]] - -scheduleMachCode - = map (runRegAllocate freeRegsState findReservedRegs) + if final_delta == 0 + then ((new_tops, final_imports), final_us) + else pprPanic "genMachCode: nonzero final delta" + (int final_delta) + +-- ----------------------------------------------------------------------------- +-- Fixup assignments to global registers so that they assign to +-- locations within the RegTable, if appropriate. + +-- Note that we currently don't fixup reads here: they're done by +-- the generic optimiser below, to avoid having two separate passes +-- over the Cmm. + +fixAssignsTop :: CmmTop -> UniqSM CmmTop +fixAssignsTop top@(CmmData _ _) = returnUs top +fixAssignsTop (CmmProc info lbl params blocks) = + mapUs fixAssignsBlock blocks `thenUs` \ blocks' -> + returnUs (CmmProc info lbl params blocks') + +fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock +fixAssignsBlock (BasicBlock id stmts) = + fixAssigns stmts `thenUs` \ stmts' -> + returnUs (BasicBlock id stmts') + +fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt] +fixAssigns stmts = + mapUs fixAssign stmts `thenUs` \ stmtss -> + returnUs (concat stmtss) + +fixAssign :: CmmStmt -> UniqSM [CmmStmt] +fixAssign (CmmAssign (CmmGlobal BaseReg) src) + = panic "cmmStmtConFold: assignment to BaseReg"; + +fixAssign (CmmAssign (CmmGlobal reg) src) + | Left realreg <- reg_or_addr + = returnUs [CmmAssign (CmmGlobal reg) src] + | Right baseRegAddr <- reg_or_addr + = returnUs [CmmStore baseRegAddr src] + -- Replace register leaves with appropriate StixTrees for + -- the given target. GlobalRegs which map to a reg on this + -- arch are left unchanged. Assigning to BaseReg is always + -- illegal, so we check for that. where - freeRegsState = mkMRegsState (extractMappedRegNos freeRegs) -\end{code} - -%************************************************************************ -%* * -\subsection[NCOpt]{The Generic Optimiser} -%* * -%************************************************************************ - -This is called between translating Abstract C to its Tree and actually -using the Native Code Generator to generate the annotations. It's a -chance to do some strength reductions. - -** Remember these all have to be machine independent *** - -Note that constant-folding should have already happened, but we might -have introduced some new opportunities for constant-folding wrt -address manipulations. - -\begin{code} -genericOpt :: StixTree -> StixTree -\end{code} - -For most nodes, just optimize the children. - -\begin{code} -genericOpt (StInd pk addr) = StInd pk (genericOpt addr) - -genericOpt (StAssign pk dst src) - = StAssign pk (genericOpt dst) (genericOpt src) - -genericOpt (StJump addr) = StJump (genericOpt addr) - -genericOpt (StCondJump addr test) - = StCondJump addr (genericOpt test) - -genericOpt (StCall fn cconv pk args) - = StCall fn cconv pk (map genericOpt args) -\end{code} - -Fold indices together when the types match: -\begin{code} -genericOpt (StIndex pk (StIndex pk' base off) off') - | pk == pk' - = StIndex pk (genericOpt base) - (genericOpt (StPrim IntAddOp [off, off'])) - -genericOpt (StIndex pk base off) - = StIndex pk (genericOpt base) (genericOpt off) -\end{code} - -For PrimOps, we first optimize the children, and then we try our hand -at some constant-folding. - -\begin{code} -genericOpt (StPrim op args) = primOpt op (map genericOpt args) -\end{code} - -Replace register leaves with appropriate StixTrees for the given -target. - -\begin{code} -genericOpt leaf@(StReg (StixMagicId id)) - = case (stgReg id) of - Always tree -> genericOpt tree - Save _ -> leaf - -genericOpt other = other -\end{code} - -Now, try to constant-fold the PrimOps. The arguments have already -been optimized and folded. - -\begin{code} -primOpt - :: PrimOp -- The operation from an StPrim - -> [StixTree] -- The optimized arguments - -> StixTree - -primOpt op arg@[StInt x] - = case op of - IntNegOp -> StInt (-x) - _ -> StPrim op arg - -primOpt op args@[StInt x, StInt y] - = case op of - CharGtOp -> StInt (if x > y then 1 else 0) - CharGeOp -> StInt (if x >= y then 1 else 0) - CharEqOp -> StInt (if x == y then 1 else 0) - CharNeOp -> StInt (if x /= y then 1 else 0) - CharLtOp -> StInt (if x < y then 1 else 0) - CharLeOp -> StInt (if x <= y then 1 else 0) - IntAddOp -> StInt (x + y) - IntSubOp -> StInt (x - y) - IntMulOp -> StInt (x * y) - IntQuotOp -> StInt (x `quot` y) - IntRemOp -> StInt (x `rem` y) - IntGtOp -> StInt (if x > y then 1 else 0) - IntGeOp -> StInt (if x >= y then 1 else 0) - IntEqOp -> StInt (if x == y then 1 else 0) - IntNeOp -> StInt (if x /= y then 1 else 0) - IntLtOp -> StInt (if x < y then 1 else 0) - IntLeOp -> StInt (if x <= y then 1 else 0) - -- ToDo: WordQuotOp, WordRemOp. - _ -> StPrim op args -\end{code} - -When possible, shift the constants to the right-hand side, so that we -can match for strength reductions. Note that the code generator will -also assume that constants have been shifted to the right when -possible. - -\begin{code} -primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x] -\end{code} - -We can often do something with constants of 0 and 1 ... + reg_or_addr = get_GlobalReg_reg_or_addr reg + +fixAssign (CmmCall target results args vols) + = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) -> + returnUs (caller_save ++ + CmmCall target results' args vols : + caller_restore ++ + concat stores) + where + -- we also save/restore any caller-saves STG registers here + (caller_save, caller_restore) = callerSaveVolatileRegs vols + + fixResult g@(CmmGlobal reg,hint) = + case get_GlobalReg_reg_or_addr reg of + Left realreg -> returnUs (g, []) + Right baseRegAddr -> + getUniqueUs `thenUs` \ uq -> + let local = CmmLocal (LocalReg uq (globalRegRep reg)) in + returnUs ((local,hint), + [CmmStore baseRegAddr (CmmReg local)]) + fixResult other = + returnUs (other,[]) + +fixAssign other_stmt = returnUs [other_stmt] + +-- ----------------------------------------------------------------------------- +-- Generic Cmm optimiser + +{- +Here we do: + + (a) Constant folding + (b) Simple inlining: a temporary which is assigned to and then + used, once, can be shorted. + (c) Replacement of references to GlobalRegs which do not have + machine registers by the appropriate memory load (eg. + Hp ==> *(BaseReg + 34) ). + (d) Position independent code and dynamic linking + (i) introduce the appropriate indirections + and position independent refs + (ii) compile a list of imported symbols + +Ideas for other things we could do (ToDo): + + - shortcut jumps-to-jumps + - eliminate dead code blocks + - simple CSE: if an expr is assigned to a temp, then replace later occs of + that expr with the temp, until the expr is no longer valid (can push through + temp assignments, and certain assigns to mem...) +-} + +cmmToCmm :: CmmTop -> (CmmTop, [CLabel]) +cmmToCmm top@(CmmData _ _) = (top, []) +cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do + blocks' <- mapM cmmBlockConFold (cmmMiniInline blocks) + return $ CmmProc info lbl params blocks' + +newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #)) + +instance Monad CmmOptM where + return x = CmmOptM $ \imports -> (# x,imports #) + (CmmOptM f) >>= g = + CmmOptM $ \imports -> + case f imports of + (# x, imports' #) -> + case g x of + CmmOptM g' -> g' imports' + +addImportCmmOpt :: CLabel -> CmmOptM () +addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #) + +runCmmOpt :: CmmOptM a -> (a, [CLabel]) +runCmmOpt (CmmOptM f) = case f [] of + (# result, imports #) -> (result, imports) + +cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock +cmmBlockConFold (BasicBlock id stmts) = do + stmts' <- mapM cmmStmtConFold stmts + return $ BasicBlock id stmts' + +cmmStmtConFold stmt + = case stmt of + CmmAssign reg src + -> do src' <- cmmExprConFold False src + return $ case src' of + CmmReg reg' | reg == reg' -> CmmNop + new_src -> CmmAssign reg new_src + + CmmStore addr src + -> do addr' <- cmmExprConFold False addr + src' <- cmmExprConFold False src + return $ CmmStore addr' src' + + CmmJump addr regs + -> do addr' <- cmmExprConFold True addr + return $ CmmJump addr' regs + + CmmCall target regs args vols + -> do target' <- case target of + CmmForeignCall e conv -> do + e' <- cmmExprConFold True e + return $ CmmForeignCall e' conv + other -> return other + args' <- mapM (\(arg, hint) -> do + arg' <- cmmExprConFold False arg + return (arg', hint)) args + return $ CmmCall target' regs args' vols + + CmmCondBranch test dest + -> do test' <- cmmExprConFold False test + return $ case test' of + CmmLit (CmmInt 0 _) -> + CmmComment (mkFastString ("deleted: " ++ + showSDoc (pprStmt stmt))) + + CmmLit (CmmInt n _) -> CmmBranch dest + other -> CmmCondBranch test' dest + + CmmSwitch expr ids + -> do expr' <- cmmExprConFold False expr + return $ CmmSwitch expr' ids + + other + -> return other + + +cmmExprConFold isJumpTarget expr + = case expr of + CmmLoad addr rep + -> do addr' <- cmmExprConFold False addr + return $ CmmLoad addr' rep + + CmmMachOp mop args + -- For MachOps, we first optimize the children, and then we try + -- our hand at some constant-folding. + -> do args' <- mapM (cmmExprConFold False) args + return $ cmmMachOpFold mop args' + + CmmLit (CmmLabel lbl) + -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl + CmmLit (CmmLabelOff lbl off) + -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl + return $ cmmMachOpFold (MO_Add wordRep) [ + dynRef, + (CmmLit $ CmmInt (fromIntegral off) wordRep) + ] + +#if powerpc_TARGET_ARCH + -- On powerpc (non-PIC), it's easier to jump directly to a label than + -- to use the register table, so we replace these registers + -- with the corresponding labels: + CmmReg (CmmGlobal GCEnter1) + | not opt_PIC + -> cmmExprConFold isJumpTarget $ + CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) + CmmReg (CmmGlobal GCFun) + | not opt_PIC + -> cmmExprConFold isJumpTarget $ + CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun"))) +#endif + + CmmReg (CmmGlobal mid) + -- Replace register leaves with appropriate StixTrees for + -- the given target. MagicIds which map to a reg on this + -- arch are left unchanged. For the rest, BaseReg is taken + -- to mean the address of the reg table in MainCapability, + -- and for all others we generate an indirection to its + -- location in the register table. + -> case get_GlobalReg_reg_or_addr mid of + Left realreg -> return expr + Right baseRegAddr + -> case mid of + BaseReg -> cmmExprConFold False baseRegAddr + other -> cmmExprConFold False (CmmLoad baseRegAddr + (globalRegRep mid)) + -- eliminate zero offsets + CmmRegOff reg 0 + -> cmmExprConFold False (CmmReg reg) + + CmmRegOff (CmmGlobal mid) offset + -- RegOf leaves are just a shorthand form. If the reg maps + -- to a real reg, we keep the shorthand, otherwise, we just + -- expand it and defer to the above code. + -> case get_GlobalReg_reg_or_addr mid of + Left realreg -> return expr + Right baseRegAddr + -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [ + CmmReg (CmmGlobal mid), + CmmLit (CmmInt (fromIntegral offset) + wordRep)]) + other + -> return other + +-- ----------------------------------------------------------------------------- +-- Utils + +bind f x = x $! f -\begin{code} -primOpt op args@[x, y@(StInt 0)] - = case op of - IntAddOp -> x - IntSubOp -> x - IntMulOp -> y - AndOp -> y - OrOp -> x - XorOp -> x - SllOp -> x - SrlOp -> x - ISllOp -> x - ISraOp -> x - ISrlOp -> x - _ -> StPrim op args - -primOpt op args@[x, y@(StInt 1)] - = case op of - IntMulOp -> x - IntQuotOp -> x - IntRemOp -> StInt 0 - _ -> StPrim op args \end{code} -Now look for multiplication/division by powers of 2 (integers). - -\begin{code} -primOpt op args@[x, y@(StInt n)] - = case op of - IntMulOp -> case exactLog2 n of - Nothing -> StPrim op args - Just p -> StPrim ISllOp [x, StInt p] - IntQuotOp -> case exactLog2 n of - Nothing -> StPrim op args - Just p -> StPrim ISrlOp [x, StInt p] - _ -> StPrim op args -\end{code} - -Anything else is just too hard. - -\begin{code} -primOpt op args = StPrim op args -\end{code}