X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FAsmCodeGen.lhs;h=93b385f63c632c54f4264c1f49c37ca4d44f5514;hb=9605d81c7c2fd3de0c07500a7f8a141eed89defc;hp=b2fcb6c65330c88db1b3daf22fd5725551106c82;hpb=423d477bfecd490de1449c59325c8776f91d7aac;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs index b2fcb6c..93b385f 100644 --- a/ghc/compiler/nativeGen/AsmCodeGen.lhs +++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs @@ -19,11 +19,12 @@ import PprMach import RegisterAlloc import RegAllocInfo ( jumpDests ) import NCGMonad +import PositionIndependentCode import Cmm import PprCmm ( pprStmt, pprCmms ) import MachOp -import CLabel ( CLabel, mkSplitMarkerLabel ) +import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel ) #if powerpc_TARGET_ARCH import CLabel ( mkRtsCodeLabel ) #endif @@ -32,13 +33,11 @@ import UniqFM import Unique ( Unique, getUnique ) import UniqSupply import FastTypes -#if darwin_TARGET_OS -import PprMach ( pprDyldSymbolStub ) -import List ( group, sort ) -#endif +import List ( groupBy, sortBy ) +import CLabel ( pprCLabel ) import ErrUtils ( dumpIfSet_dyn ) -import CmdLineOpts ( DynFlags, DynFlag(..), dopt, opt_Static, - opt_EnsureSplittableC ) +import DynFlags ( DynFlags, DynFlag(..), dopt ) +import StaticFlags ( opt_Static, opt_PIC ) import Digraph import qualified Pretty @@ -112,45 +111,57 @@ The machine-dependent bits break down as follows: nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc nativeCodeGen dflags cmms us - | not opt_Static - = panic "NCG does not handle dynamic libraries right now" - -- ToDo: MachCodeGen used to have derefDLL function which expanded - -- dynamic CLabels (labelDynamic lbl == True) into the appropriate - -- dereferences. This should be done in the pre-NCG cmmToCmm pass instead. - -- It doesn't apply to static data, of course. There are hacks so that - -- the RTS knows what to do for references to closures in a DLL in SRTs, - -- and we never generate a reference to a closure in another DLL in a - -- static constructor. - - | otherwise - = let ((ppr_cmms, insn_sdoc, imports), _) = initUs us $ + = let (res, _) = initUs us $ cgCmm (concat (map add_split cmms)) - cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [(Bool, CLabel)]) + cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel]) cgCmm tops = lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results -> - let (cmms,docs,imps) = unzip3 results in + case unzip3 results of { (cmms,docs,imps) -> returnUs (Cmm cmms, my_vcat docs, concat imps) - in do + } + in + case res of { (ppr_cmms, insn_sdoc, imports) -> do dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms]) - return (insn_sdoc Pretty.$$ dyld_stubs imports) + return (insn_sdoc Pretty.$$ dyld_stubs imports +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- On recent versions of Darwin, the linker supports + -- dead-stripping of code and data on a per-symbol basis. + -- There's a hack to make this work in PprMach.pprNatCmmTop. + Pretty.$$ Pretty.text ".subsections_via_symbols" +#endif + ) + } where add_split (Cmm tops) - | opt_EnsureSplittableC = split_marker : tops - | otherwise = tops + | dopt Opt_SplitObjs dflags = split_marker : tops + | otherwise = tops split_marker = CmmProc [] mkSplitMarkerLabel [] [] -#if darwin_TARGET_OS -- Generate "symbol stubs" for all external symbols that might -- come from a dynamic library. - dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ - map head $ group $ sort imps -#else - dyld_stubs imps = Pretty.empty -#endif +{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ + map head $ group $ sort imps-} + + -- (Hack) sometimes two Labels pretty-print the same, but have + -- different uniques; so we compare their text versions... + dyld_stubs imps + | needImportedSymbols + = Pretty.vcat $ + (pprGotDeclaration :) $ + map (pprImportedSymbol . fst . head) $ + groupBy (\(_,a) (_,b) -> a == b) $ + sortBy (\(_,a) (_,b) -> compare a b) $ + map doPpr $ + imps + | otherwise + = Pretty.empty + + where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle) + astyle = mkCodeStyle AsmStyle #ifndef NCG_DEBUG my_vcat sds = Pretty.vcat sds @@ -169,17 +180,17 @@ nativeCodeGen dflags cmms us -- Complete native code generation phase for a single top-level chunk -- of Cmm. -cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [(Bool,CLabel)]) +cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel]) cmmNativeGen dflags cmm = {-# SCC "fixAssigns" #-} fixAssignsTop cmm `thenUs` \ fixed_cmm -> {-# SCC "genericOpt" #-} - cmmToCmm fixed_cmm `bind` \ cmm -> + cmmToCmm fixed_cmm `bind` \ (cmm, imports) -> (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance then cmm else CmmData Text []) `bind` \ ppr_cmm -> {-# SCC "genMachCode" #-} - genMachCode cmm `thenUs` \ (pre_regalloc, imports) -> + genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) -> {-# SCC "regAlloc" #-} map regAlloc pre_regalloc `bind` \ with_regs -> {-# SCC "sequenceBlocks" #-} @@ -189,7 +200,7 @@ cmmNativeGen dflags cmm {-# SCC "vcat" #-} Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc -> - returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", imports) + returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports) where x86fp_kludge :: NatCmmTop -> NatCmmTop x86fp_kludge top@(CmmData _ _) = top @@ -279,7 +290,7 @@ reorder id accum (b@(block,id',out) : rest) -- Switching between the two monads whilst carrying along the same -- Unique supply breaks abstraction. Is that bad? -genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [(Bool,CLabel)]) +genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel]) genMachCode cmm_top initial_us = let initial_st = mkNatM_State initial_us 0 @@ -323,7 +334,7 @@ fixAssign (CmmAssign (CmmGlobal BaseReg) src) fixAssign (CmmAssign (CmmGlobal reg) src) | Left realreg <- reg_or_addr - = returnUs [CmmAssign (CmmGlobal reg) (cmmExprConFold src)] + = returnUs [CmmAssign (CmmGlobal reg) src] | Right baseRegAddr <- reg_or_addr = returnUs [CmmStore baseRegAddr src] -- Replace register leaves with appropriate StixTrees for @@ -335,8 +346,14 @@ fixAssign (CmmAssign (CmmGlobal reg) src) fixAssign (CmmCall target results args vols) = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) -> - returnUs (CmmCall target results' args vols : concat stores) + returnUs (caller_save ++ + CmmCall target results' args vols : + caller_restore ++ + concat stores) where + -- we also save/restore any caller-saves STG registers here + (caller_save, caller_restore) = callerSaveVolatileRegs vols + fixResult g@(CmmGlobal reg,hint) = case get_GlobalReg_reg_or_addr reg of Left realreg -> returnUs (g, []) @@ -362,79 +379,128 @@ Here we do: (c) Replacement of references to GlobalRegs which do not have machine registers by the appropriate memory load (eg. Hp ==> *(BaseReg + 34) ). + (d) Position independent code and dynamic linking + (i) introduce the appropriate indirections + and position independent refs + (ii) compile a list of imported symbols Ideas for other things we could do (ToDo): - shortcut jumps-to-jumps - eliminate dead code blocks + - simple CSE: if an expr is assigned to a temp, then replace later occs of + that expr with the temp, until the expr is no longer valid (can push through + temp assignments, and certain assigns to mem...) -} -cmmToCmm :: CmmTop -> CmmTop -cmmToCmm top@(CmmData _ _) = top -cmmToCmm (CmmProc info lbl params blocks) = - CmmProc info lbl params (map cmmBlockConFold (cmmPeep blocks)) +cmmToCmm :: CmmTop -> (CmmTop, [CLabel]) +cmmToCmm top@(CmmData _ _) = (top, []) +cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do + blocks' <- mapM cmmBlockConFold (cmmPeep blocks) + return $ CmmProc info lbl params blocks' -cmmBlockConFold :: CmmBasicBlock -> CmmBasicBlock -cmmBlockConFold (BasicBlock id stmts) = BasicBlock id (map cmmStmtConFold stmts) +newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #)) + +instance Monad CmmOptM where + return x = CmmOptM $ \imports -> (# x,imports #) + (CmmOptM f) >>= g = + CmmOptM $ \imports -> + case f imports of + (# x, imports' #) -> + case g x of + CmmOptM g' -> g' imports' + +addImportCmmOpt :: CLabel -> CmmOptM () +addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #) + +runCmmOpt :: CmmOptM a -> (a, [CLabel]) +runCmmOpt (CmmOptM f) = case f [] of + (# result, imports #) -> (result, imports) + +cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock +cmmBlockConFold (BasicBlock id stmts) = do + stmts' <- mapM cmmStmtConFold stmts + return $ BasicBlock id stmts' cmmStmtConFold stmt = case stmt of CmmAssign reg src - -> case cmmExprConFold src of - CmmReg reg' | reg == reg' -> CmmNop - new_src -> CmmAssign reg new_src + -> do src' <- cmmExprConFold False src + return $ case src' of + CmmReg reg' | reg == reg' -> CmmNop + new_src -> CmmAssign reg new_src CmmStore addr src - -> CmmStore (cmmExprConFold addr) (cmmExprConFold src) + -> do addr' <- cmmExprConFold False addr + src' <- cmmExprConFold False src + return $ CmmStore addr' src' CmmJump addr regs - -> CmmJump (cmmExprConFold addr) regs + -> do addr' <- cmmExprConFold True addr + return $ CmmJump addr' regs CmmCall target regs args vols - -> CmmCall (case target of - CmmForeignCall e conv -> - CmmForeignCall (cmmExprConFold e) conv - other -> other) - regs - [ (cmmExprConFold arg,hint) | (arg,hint) <- args ] - vols + -> do target' <- case target of + CmmForeignCall e conv -> do + e' <- cmmExprConFold True e + return $ CmmForeignCall e' conv + other -> return other + args' <- mapM (\(arg, hint) -> do + arg' <- cmmExprConFold False arg + return (arg', hint)) args + return $ CmmCall target' regs args' vols CmmCondBranch test dest - -> let test_opt = cmmExprConFold test - in - case test_opt of - CmmLit (CmmInt 0 _) -> - CmmComment (mkFastString ("deleted: " ++ + -> do test' <- cmmExprConFold False test + return $ case test' of + CmmLit (CmmInt 0 _) -> + CmmComment (mkFastString ("deleted: " ++ showSDoc (pprStmt stmt))) - CmmLit (CmmInt n _) -> CmmBranch dest - other -> CmmCondBranch (cmmExprConFold test) dest + CmmLit (CmmInt n _) -> CmmBranch dest + other -> CmmCondBranch test' dest CmmSwitch expr ids - -> CmmSwitch (cmmExprConFold expr) ids + -> do expr' <- cmmExprConFold False expr + return $ CmmSwitch expr' ids other - -> other + -> return other -cmmExprConFold expr +cmmExprConFold isJumpTarget expr = case expr of CmmLoad addr rep - -> CmmLoad (cmmExprConFold addr) rep + -> do addr' <- cmmExprConFold False addr + return $ CmmLoad addr' rep CmmMachOp mop args -- For MachOps, we first optimize the children, and then we try -- our hand at some constant-folding. - -> cmmMachOpFold mop (map cmmExprConFold args) + -> do args' <- mapM (cmmExprConFold False) args + return $ cmmMachOpFold mop args' + + CmmLit (CmmLabel lbl) + -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl + CmmLit (CmmLabelOff lbl off) + -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl + return $ cmmMachOpFold (MO_Add wordRep) [ + dynRef, + (CmmLit $ CmmInt (fromIntegral off) wordRep) + ] #if powerpc_TARGET_ARCH - -- On powerpc, it's easier to jump directly to a label than + -- On powerpc (non-PIC), it's easier to jump directly to a label than -- to use the register table, so we replace these registers -- with the corresponding labels: CmmReg (CmmGlobal GCEnter1) - -> CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) + | not opt_PIC + -> cmmExprConFold isJumpTarget $ + CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1"))) CmmReg (CmmGlobal GCFun) - -> CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun"))) + | not opt_PIC + -> cmmExprConFold isJumpTarget $ + CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun"))) #endif CmmReg (CmmGlobal mid) @@ -445,29 +511,29 @@ cmmExprConFold expr -- and for all others we generate an indirection to its -- location in the register table. -> case get_GlobalReg_reg_or_addr mid of - Left realreg -> expr + Left realreg -> return expr Right baseRegAddr -> case mid of - BaseReg -> cmmExprConFold baseRegAddr - other -> cmmExprConFold (CmmLoad baseRegAddr + BaseReg -> cmmExprConFold False baseRegAddr + other -> cmmExprConFold False (CmmLoad baseRegAddr (globalRegRep mid)) -- eliminate zero offsets CmmRegOff reg 0 - -> cmmExprConFold (CmmReg reg) + -> cmmExprConFold False (CmmReg reg) CmmRegOff (CmmGlobal mid) offset -- RegOf leaves are just a shorthand form. If the reg maps -- to a real reg, we keep the shorthand, otherwise, we just -- expand it and defer to the above code. -> case get_GlobalReg_reg_or_addr mid of - Left realreg -> expr + Left realreg -> return expr Right baseRegAddr - -> cmmExprConFold (CmmMachOp (MO_Add wordRep) [ + -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [ CmmReg (CmmGlobal mid), CmmLit (CmmInt (fromIntegral offset) wordRep)]) other - -> other + -> return other -- ----------------------------------------------------------------------------- @@ -490,20 +556,53 @@ cmmMachOpFold op arg@[CmmLit (CmmInt x rep)] -- "from" type, in order to truncate to the correct size. -- The final narrow/widen to the destination type -- is implicit in the CmmLit. - MO_S_Conv from to -> CmmLit (CmmInt (narrowS from x) to) + MO_S_Conv from to + | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to) + | otherwise -> CmmLit (CmmInt (narrowS from x) to) MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to) - _ -> panic "cmmMachOpFold: unknown unary op" + + _ -> panic "cmmMachOpFold: unknown unary op" + -- Eliminate conversion NOPs cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x --- ToDo: eliminate multiple conversions. Be careful though: can't remove --- a narrowing, and can't remove conversions to/from floating point types. +-- Eliminate nested conversions where possible +cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] + | Just (rep1,rep2,signed1) <- isIntConversion conv_inner, + Just (_, rep3,signed2) <- isIntConversion conv_outer + = case () of + -- widen then narrow to the same size is a nop + _ | rep1 < rep2 && rep1 == rep3 -> x + -- Widen then narrow to different size: collapse to single conversion + -- but remember to use the signedness from the widening, just in case + -- the final conversion is a widen. + | rep1 < rep2 && rep2 > rep3 -> + cmmMachOpFold (intconv signed1 rep1 rep3) [x] + -- Nested widenings: collapse if the signedness is the same + | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 -> + cmmMachOpFold (intconv signed1 rep1 rep3) [x] + -- Nested narrowings: collapse + | rep1 > rep2 && rep2 > rep3 -> + cmmMachOpFold (MO_U_Conv rep1 rep3) [x] + | otherwise -> + CmmMachOp conv_outer args + where + isIntConversion (MO_U_Conv rep1 rep2) + | not (isFloatingRep rep1) && not (isFloatingRep rep2) + = Just (rep1,rep2,False) + isIntConversion (MO_S_Conv rep1 rep2) + | not (isFloatingRep rep1) && not (isFloatingRep rep2) + = Just (rep1,rep2,True) + isIntConversion _ = Nothing + + intconv True = MO_S_Conv + intconv False = MO_U_Conv --- ToDo: eliminate nested comparisons: --- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)] --- turns into a simple equality test. +-- ToDo: a narrow of a load can be collapsed into a narrow load, right? +-- but what if the architecture only supports word-sized loads, should +-- we do the transformation anyway? cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] = case mop of @@ -553,9 +652,6 @@ cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] cmmMachOpFold op [x@(CmmLit _), y] | not (isLit y) && isCommutableMachOp op = cmmMachOpFold op [y, x] - where - isLit (CmmLit _) = True - isLit _ = False -- Turn (a+b)+c into a+(b+c) where possible. Because literals are -- moved to the right, it is more likely that we will find @@ -564,16 +660,15 @@ cmmMachOpFold op [x@(CmmLit _), y] -- -- ToDo: this appears to introduce a quadratic behaviour due to the -- nested cmmMachOpFold. Can we fix this? +-- +-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2 +-- is also a lit (otherwise arg1 would be on the right). If we +-- put arg1 on the left of the rearranged expression, we'll get into a +-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ... +-- cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3] - | mop1 == mop2 && isAssociative mop1 + | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1) = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]] - where - isAssociative (MO_Add _) = True - isAssociative (MO_Mul _) = True - isAssociative (MO_And _) = True - isAssociative (MO_Or _) = True - isAssociative (MO_Xor _) = True - isAssociative _ = False -- Make a RegOff if we can cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] @@ -659,7 +754,6 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] cmmMachOpFold mop args = CmmMachOp mop args - -- ----------------------------------------------------------------------------- -- exactLog2 @@ -755,6 +849,9 @@ lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest) Nothing -> Nothing Just stmts -> Just (stmt:stmts) +lookForInline u expr (CmmNop : rest) + = lookForInline u expr rest + lookForInline u expr (stmt:stmts) = case lookupUFM (getStmtUses stmt) u of Just 1 -> Just (inlineStmt u expr stmt : stmts) @@ -813,6 +910,9 @@ inlineExpr u a other_expr = other_expr bind f x = x $! f +isLit (CmmLit _) = True +isLit _ = False + isComparisonExpr :: CmmExpr -> Bool isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op isComparisonExpr _other = False