From df1fecb95e3a0cf901184605da96dc8ae092b173 Mon Sep 17 00:00:00 2001 From: David Terei Date: Tue, 13 Jul 2010 18:32:43 +0000 Subject: [PATCH] LLVM: Add in new LLVM mangler for implementing TNTC on OSX --- compiler/ghc.cabal.in | 1 + compiler/llvmGen/LlvmCodeGen.hs | 4 +- compiler/llvmGen/LlvmCodeGen/Ppr.hs | 34 ++++++--- compiler/llvmGen/LlvmMangler.hs | 129 +++++++++++++++++++++++++++++++++++ compiler/main/DriverPhases.hs | 9 +++ compiler/main/DriverPipeline.hs | 21 +++++- 6 files changed, 184 insertions(+), 14 deletions(-) create mode 100644 compiler/llvmGen/LlvmMangler.hs diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 479e56d..925014e 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -165,6 +165,7 @@ Library LlvmCodeGen.Data LlvmCodeGen.Ppr LlvmCodeGen.Regs + LlvmMangler MkId Module Name diff --git a/compiler/llvmGen/LlvmCodeGen.hs b/compiler/llvmGen/LlvmCodeGen.hs index 67c70ba..065758f 100644 --- a/compiler/llvmGen/LlvmCodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen.hs @@ -2,7 +2,7 @@ -- | This is the top-level module in the LLVM code generator. -- -module LlvmCodeGen ( llvmCodeGen ) where +module LlvmCodeGen ( llvmCodeGen, llvmFixupAsm ) where #include "HsVersions.h" @@ -13,6 +13,8 @@ import LlvmCodeGen.CodeGen import LlvmCodeGen.Data import LlvmCodeGen.Ppr +import LlvmMangler + import CLabel import Cmm import CgUtils ( fixStgRegisters ) diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs index 064aed8..daadc55 100644 --- a/compiler/llvmGen/LlvmCodeGen/Ppr.hs +++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs @@ -16,9 +16,9 @@ import CLabel import Cmm import FastString +import qualified Outputable import Pretty import Unique -import Util -- ---------------------------------------------------------------------------- -- * Top level @@ -84,7 +84,7 @@ pprLlvmCmmTop _ _ (CmmData _ lmdata) pprLlvmCmmTop env count (CmmProc info lbl _ (ListGraph blks)) = let static = CmmDataLabel lbl : info (idoc, ivar) = if not (null info) - then pprCmmStatic env count static + then pprInfoTable env count lbl static else (empty, []) in (idoc $+$ ( let sec = mkLayoutSection (count + 1) @@ -102,19 +102,24 @@ pprLlvmCmmTop env count (CmmProc info lbl _ (ListGraph blks)) -- | Pretty print CmmStatic -pprCmmStatic :: LlvmEnv -> Int -> [CmmStatic] -> (Doc, [LlvmVar]) -pprCmmStatic env count stat +pprInfoTable :: LlvmEnv -> Int -> CLabel -> [CmmStatic] -> (Doc, [LlvmVar]) +pprInfoTable env count lbl stat = let unres = genLlvmData (Text, stat) (_, (ldata, ltypes)) = resolveLlvmData env unres - setSection (gv@(LMGlobalVar s ty l _ _ c), d) - = let v = if l == Internal then [gv] else [] - sec = mkLayoutSection count - in ((LMGlobalVar s ty l sec llvmInfAlign c, d), v) + setSection ((LMGlobalVar _ ty l _ _ c), d) + = let sec = mkLayoutSection count + ilabel = strCLabel_llvm (entryLblToInfoLbl lbl) + `appendFS` (fsLit "_itable") + gv = LMGlobalVar ilabel ty l sec llvmInfAlign c + v = if l == Internal then [gv] else [] + in ((gv, d), v) setSection v = (v,[]) - (ldata', llvmUsed) = mapAndUnzip setSection ldata - in (pprLlvmData (ldata', ltypes), concat llvmUsed) + (ldata', llvmUsed) = setSection (last ldata) + in if length ldata /= 1 + then Outputable.panic "LlvmCodeGen.Ppr: invalid info table!" + else (pprLlvmData ([ldata'], ltypes), llvmUsed) -- | Create an appropriate section declaration for subsection of text @@ -124,5 +129,12 @@ pprCmmStatic env count stat -- so we are hoping it does. mkLayoutSection :: Int -> LMSection mkLayoutSection n - = Just (fsLit $ ".text;.text " ++ show n ++ " #") +#if darwin_TARGET_OS + -- On OSX we can't use the GNU Assembler, we must use the OSX assembler, which + -- doesn't support subsections. So we post process the assembly code, this + -- section specifier will be replaced with '.text' by the mangler. + = Just (fsLit $ "__STRIP,__me" ++ show n) +#else + = Just (fsLit $ ".text # .text " ++ show n ++ " #") +#endif diff --git a/compiler/llvmGen/LlvmMangler.hs b/compiler/llvmGen/LlvmMangler.hs new file mode 100644 index 0000000..54eead1 --- /dev/null +++ b/compiler/llvmGen/LlvmMangler.hs @@ -0,0 +1,129 @@ +{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} + +-- ----------------------------------------------------------------------------- +-- | GHC LLVM Mangler +-- +-- This script processes the assembly produced by LLVM, rearranging the code +-- so that an info table appears before its corresponding function. +module LlvmMangler ( llvmFixupAsm ) where + +import Data.ByteString.Char8 ( ByteString ) +import qualified Data.ByteString.Char8 as BS + +{- + Configuration. +-} +newSection, oldSection, functionSuf, tableSuf, funDivider, eol :: ByteString +newSection = BS.pack "\n.text\n" +oldSection = BS.pack "__STRIP,__me" +functionSuf = BS.pack "_info:" +tableSuf = BS.pack "_info_itable:" +funDivider = BS.pack "\n\n" +eol = BS.pack "\n" + +eolPred :: Char -> Bool +eolPred = ((==) '\n') + +-- | Read in assembly file and process +llvmFixupAsm :: FilePath -> FilePath -> IO () +llvmFixupAsm f1 f2 = do + asm <- BS.readFile f1 + BS.writeFile f2 BS.empty + allTables f2 asm + return () + +-- | Run over whole assembly file +allTables :: FilePath -> ByteString -> IO () +allTables f str = do + rem <- oneTable f str + if BS.null rem + then return () + else allTables f rem + +{- | + Look for the next function that needs to have its info table + arranged to be before it and process it. This will print out + any code before this function, then the info table, then the + function. It will return the remainder of the assembly code + to process. + + We rely here on the fact that LLVM prints all global variables + at the end of the file, so an info table will always appear + after its function. + + To try to help explain the string searches, here is some + assembly code that would be processed by this program, with + split markers placed in it like so, : + + [ ...asm code... ] + jmp *%eax + + .def Main_main_info + .section TEXT + .globl _Main_main_info + _Main_main_info: + sub $12, %esp + [ ...asm code... ] + jmp *%eax + + .def ..... + + [ ...asm code... ] + + .long 231231 + + .section TEXT + .global _Main_main_entry + .align 4 + _Main_main_entry: + .long 0 + [ ...asm code... ] + + .section TEXT +-} +oneTable :: FilePath -> ByteString -> IO ByteString +oneTable f str = + let last' xs = if (null xs) then 0 else last xs + + -- get the function + (bl, al) = BS.breakSubstring functionSuf str + start = last' $ BS.findSubstrings funDivider bl + (before, fheader) = BS.splitAt start bl + (fun, after) = BS.breakSubstring funDivider al + label = snd $ BS.breakEnd eolPred bl + + -- get the info table + ilabel = label `BS.append` tableSuf + (bit, itable) = BS.breakSubstring ilabel after + (itable', ait) = BS.breakSubstring funDivider itable + istart = last' $ BS.findSubstrings funDivider bit + (bit', iheader) = BS.splitAt istart bit + + -- fix up sections + fheader' = replaceSection fheader + iheader' = replaceSection iheader + + function = [before, eol, iheader', itable', eol, fheader', fun, eol] + remainder = bit' `BS.append` ait + in if BS.null al + then do + BS.appendFile f bl + return BS.empty + + else if BS.null itable + then error $ "Function without matching info table! (" + ++ (BS.unpack label) ++ ")" + + else do + mapM_ (BS.appendFile f) function + return remainder + +-- | Replace the current section in a function or table header with the +-- text section specifier. +replaceSection :: ByteString -> ByteString +replaceSection sec = + let (s1, s2) = BS.breakSubstring oldSection sec + s1' = fst $ BS.breakEnd eolPred s1 + s2' = snd $ BS.break eolPred s2 + in s1' `BS.append` newSection `BS.append` s2' + diff --git a/compiler/main/DriverPhases.hs b/compiler/main/DriverPhases.hs index 4e7c05e..1532c2f 100644 --- a/compiler/main/DriverPhases.hs +++ b/compiler/main/DriverPhases.hs @@ -82,6 +82,7 @@ data Phase | As | LlvmOpt -- Run LLVM opt tool over llvm assembly | LlvmLlc -- LLVM bitcode to native assembly + | LlvmMangle -- Fix up TNTC by processing assembly produced by LLVM | CmmCpp -- pre-process Cmm source | Cmm -- parse & compile Cmm code @@ -113,6 +114,7 @@ eqPhase SplitAs SplitAs = True eqPhase As As = True eqPhase LlvmOpt LlvmOpt = True eqPhase LlvmLlc LlvmLlc = True +eqPhase LlvmMangle LlvmMangle = True eqPhase CmmCpp CmmCpp = True eqPhase Cmm Cmm = True eqPhase StopLn StopLn = True @@ -138,7 +140,12 @@ nextPhase Mangle = SplitMangle nextPhase SplitMangle = As nextPhase As = SplitAs nextPhase LlvmOpt = LlvmLlc +#if darwin_TARGET_OS +nextPhase LlvmLlc = LlvmMangle +#else nextPhase LlvmLlc = As +#endif +nextPhase LlvmMangle = As nextPhase SplitAs = StopLn nextPhase Ccpp = As nextPhase Cc = As @@ -168,6 +175,7 @@ startPhase "s" = As startPhase "S" = As startPhase "ll" = LlvmOpt startPhase "bc" = LlvmLlc +startPhase "lm_s" = LlvmMangle startPhase "o" = StopLn startPhase "cmm" = CmmCpp startPhase "cmmcpp" = Cmm @@ -194,6 +202,7 @@ phaseInputExt SplitMangle = "split_s" -- not really generated phaseInputExt As = "s" phaseInputExt LlvmOpt = "ll" phaseInputExt LlvmLlc = "bc" +phaseInputExt LlvmMangle = "lm_s" phaseInputExt SplitAs = "split_s" -- not really generated phaseInputExt CmmCpp = "cmm" phaseInputExt Cmm = "cmmcpp" diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs index 9128538..046e21c 100644 --- a/compiler/main/DriverPipeline.hs +++ b/compiler/main/DriverPipeline.hs @@ -48,6 +48,7 @@ import Maybes ( expectJust ) import ParserCoreUtils ( getCoreModuleName ) import SrcLoc import FastString +import LlvmCodeGen ( llvmFixupAsm ) -- import MonadUtils -- import Data.Either @@ -1268,8 +1269,13 @@ runPhase LlvmLlc _stop hsc_env _basename _suff input_fn get_output_fn maybe_loc let dflags = hsc_dflags hsc_env let lc_opts = getOpts dflags opt_lc let opt_lvl = max 0 (min 2 $ optLevel dflags) +#if darwin_TARGET_OS + let nphase = LlvmMangle +#else + let nphase = As +#endif - output_fn <- get_output_fn dflags As maybe_loc + output_fn <- get_output_fn dflags nphase maybe_loc SysTools.runLlvmLlc dflags (map SysTools.Option lc_opts @@ -1278,11 +1284,22 @@ runPhase LlvmLlc _stop hsc_env _basename _suff input_fn get_output_fn maybe_loc SysTools.FileOption "" input_fn, SysTools.Option "-o", SysTools.FileOption "" output_fn]) - return (As, dflags, maybe_loc, output_fn) + return (nphase, dflags, maybe_loc, output_fn) where llvmOpts = ["-O1", "-O2", "-O3"] +----------------------------------------------------------------------------- +-- LlvmMangle phase + +runPhase LlvmMangle _stop hsc_env _basename _suff input_fn get_output_fn maybe_loc + = liftIO $ do + let dflags = hsc_dflags hsc_env + output_fn <- get_output_fn dflags As maybe_loc + llvmFixupAsm input_fn output_fn + return (As, dflags, maybe_loc, output_fn) + + -- warning suppression runPhase other _stop _dflags _basename _suff _input_fn _get_output_fn _maybe_loc = panic ("runPhase: don't know how to run phase " ++ show other) -- 1.7.10.4