From: David Terei Date: Sun, 18 Jul 2010 23:10:00 +0000 (+0000) Subject: LLVM: Use mangler to fix up stack alignment issues on OSX X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=efee3ecf26da95178b773ed68f33601e3fea2c23 LLVM: Use mangler to fix up stack alignment issues on OSX --- diff --git a/compiler/llvmGen/LlvmCodeGen.hs b/compiler/llvmGen/LlvmCodeGen.hs index 065758f..b4d407d 100644 --- a/compiler/llvmGen/LlvmCodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen.hs @@ -36,22 +36,8 @@ import System.IO -- llvmCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO () llvmCodeGen dflags h us cmms - = do - bufh <- newBufHandle h - - Prt.bufLeftRender bufh $ pprLlvmHeader - - env' <- cmmDataLlvmGens dflags bufh env cdata [] - cmmProcLlvmGens dflags bufh us env' cmm 1 [] - - bFlush bufh - - return () - where - cmm = concat $ map (\(Cmm top) -> top) cmms - + = let cmm = concat $ map (\(Cmm top) -> top) cmms (cdata,env) = foldr split ([],initLlvmEnv) cmm - split (CmmData s d' ) (d,e) = ((s,d'):d,e) split (CmmProc i l _ _) (d,e) = let lbl = strCLabel_llvm $ if not (null i) @@ -59,6 +45,15 @@ llvmCodeGen dflags h us cmms else l env' = funInsert lbl llvmFunTy e in (d,env') + in do + bufh <- newBufHandle h + Prt.bufLeftRender bufh $ pprLlvmHeader + + env' <- cmmDataLlvmGens dflags bufh env cdata [] + cmmProcLlvmGens dflags bufh us env' cmm 1 [] + + bFlush bufh + return () -- ----------------------------------------------------------------------------- @@ -98,8 +93,7 @@ cmmProcLlvmGens _ h _ _ [] _ ivars usedArray = LMStaticArray (map cast ivars) ty lmUsed = (LMGlobalVar (fsLit "llvm.used") ty Appending (Just $ fsLit "llvm.metadata") Nothing False, Just usedArray) - in do - Prt.bufLeftRender h $ pprLlvmData ([lmUsed], []) + in Prt.bufLeftRender h $ pprLlvmData ([lmUsed], []) cmmProcLlvmGens dflags h us env (cmm : cmms) count ivars = do diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 3eb873e..37ad119 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -30,6 +30,7 @@ import Control.Monad ( liftM ) type LlvmStatements = OrdList LlvmStatement + -- ----------------------------------------------------------------------------- -- | Top-level of the LLVM proc Code generator -- @@ -62,9 +63,9 @@ basicBlocksCodeGen :: LlvmEnv basicBlocksCodeGen env ([]) (blocks, tops) = do let (blocks', allocs) = mapAndUnzip dominateAllocs blocks let allocs' = concat allocs - let ((BasicBlock id fstmts):rblocks) = blocks' + let ((BasicBlock id fstmts):rblks) = blocks' fplog <- funPrologue - let fblocks = (BasicBlock id (fplog ++ allocs' ++ fstmts)):rblocks + let fblocks = (BasicBlock id (fplog ++ allocs' ++ fstmts)):rblks return (env, fblocks, tops) basicBlocksCodeGen env (block:blocks) (lblocks', ltops') @@ -74,15 +75,6 @@ basicBlocksCodeGen env (block:blocks) (lblocks', ltops') basicBlocksCodeGen env' blocks (lblocks, ltops) --- | Generate code for one block -basicBlockCodeGen :: LlvmEnv - -> CmmBasicBlock - -> UniqSM ( LlvmEnv, [LlvmBasicBlock], [LlvmCmmTop] ) -basicBlockCodeGen env (BasicBlock id stmts) - = do (env', instrs, top) <- stmtsToInstrs env stmts (nilOL, []) - return (env', [BasicBlock id (fromOL instrs)], top) - - -- | Allocations need to be extracted so they can be moved to the entry -- of a function to make sure they dominate all possible paths in the CFG. dominateAllocs :: LlvmBasicBlock -> (LlvmBasicBlock, [LlvmStatement]) @@ -91,9 +83,18 @@ dominateAllocs (BasicBlock id stmts) where (allstmts, allallocs) = foldl split ([],[]) stmts split (stmts', allocs) s@(Assignment _ (Alloca _ _)) - = (stmts', allocs ++ [s]) + = (stmts', allocs ++ [s]) split (stmts', allocs) other - = (stmts' ++ [other], allocs) + = (stmts' ++ [other], allocs) + + +-- | Generate code for one block +basicBlockCodeGen :: LlvmEnv + -> CmmBasicBlock + -> UniqSM ( LlvmEnv, [LlvmBasicBlock], [LlvmCmmTop] ) +basicBlockCodeGen env (BasicBlock id stmts) + = do (env', instrs, top) <- stmtsToInstrs env stmts (nilOL, []) + return (env', [BasicBlock id (fromOL instrs)], top) -- ----------------------------------------------------------------------------- diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs index 6c65f18..853f1b1 100644 --- a/compiler/llvmGen/LlvmCodeGen/Ppr.hs +++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs @@ -3,7 +3,7 @@ -- module LlvmCodeGen.Ppr ( - pprLlvmHeader, pprLlvmCmmTop, pprLlvmData + pprLlvmHeader, pprLlvmCmmTop, pprLlvmData, infoSection, iTableSuf ) where #include "HsVersions.h" @@ -20,6 +20,7 @@ import qualified Outputable import Pretty import Unique + -- ---------------------------------------------------------------------------- -- * Top level -- @@ -110,7 +111,7 @@ pprInfoTable env count lbl stat setSection ((LMGlobalVar _ ty l _ _ c), d) = let sec = mkLayoutSection count ilabel = strCLabel_llvm (entryLblToInfoLbl lbl) - `appendFS` (fsLit "_itable") + `appendFS` fsLit iTableSuf gv = LMGlobalVar ilabel ty l sec llvmInfAlign c v = if l == Internal then [gv] else [] in ((gv, d), v) @@ -121,6 +122,11 @@ pprInfoTable env count lbl stat then Outputable.panic "LlvmCodeGen.Ppr: invalid info table!" else (pprLlvmData ([ldata'], ltypes), llvmUsed) +-- | We generate labels for info tables by converting them to the same label +-- as for the entry code but adding this string as a suffix. +iTableSuf :: String +iTableSuf = "_itable" + -- | Create an appropriate section declaration for subsection of text -- WARNING: This technique could fail as gas documentation says it only @@ -129,12 +135,21 @@ pprInfoTable env count lbl stat -- so we are hoping it does. mkLayoutSection :: Int -> LMSection mkLayoutSection n -#if darwin_TARGET_OS -- On OSX we can't use the GNU Assembler, we must use the OSX assembler, which -- doesn't support subsections. So we post process the assembly code, this -- section specifier will be replaced with '.text' by the mangler. - = Just (fsLit $ "__STRIP,__me" ++ show n) + = Just (fsLit $ infoSection ++ show n +#if darwin_TARGET_OS + ) +#else + ++ "#") +#endif + +-- | The section we are putting info tables and their entry code into +infoSection :: String +#if darwin_TARGET_OS +infoSection = "__STRIP,__me" #else - = Just (fsLit $ ".text; .text " ++ show n ++ " #") +infoSection = ".text; .text " #endif diff --git a/compiler/llvmGen/LlvmMangler.hs b/compiler/llvmGen/LlvmMangler.hs index 54eead1..2fbe324 100644 --- a/compiler/llvmGen/LlvmMangler.hs +++ b/compiler/llvmGen/LlvmMangler.hs @@ -2,27 +2,38 @@ -- ----------------------------------------------------------------------------- -- | GHC LLVM Mangler --- +-- -- This script processes the assembly produced by LLVM, rearranging the code --- so that an info table appears before its corresponding function. +-- so that an info table appears before its corresponding function. We also +-- use it to fix up the stack alignment, which needs to be 16 byte aligned +-- but always ends up off by 4 bytes because GHC sets it to the wrong starting +-- value in the RTS. +-- +-- We only need this for Mac OS X, other targets don't use it. +-- + module LlvmMangler ( llvmFixupAsm ) where import Data.ByteString.Char8 ( ByteString ) import qualified Data.ByteString.Char8 as BS -{- - Configuration. --} +import LlvmCodeGen.Ppr ( infoSection, iTableSuf ) + + +{- Configuration. -} newSection, oldSection, functionSuf, tableSuf, funDivider, eol :: ByteString newSection = BS.pack "\n.text\n" -oldSection = BS.pack "__STRIP,__me" +oldSection = BS.pack infoSection functionSuf = BS.pack "_info:" -tableSuf = BS.pack "_info_itable:" +tableSuf = BS.pack $ "_info" ++ iTableSuf ++ ":" funDivider = BS.pack "\n\n" eol = BS.pack "\n" -eolPred :: Char -> Bool + +eolPred, dollarPred, commaPred :: Char -> Bool eolPred = ((==) '\n') +dollarPred = ((==) '$') +commaPred = ((==) ',') -- | Read in assembly file and process llvmFixupAsm :: FilePath -> FilePath -> IO () @@ -46,11 +57,11 @@ allTables f str = do any code before this function, then the info table, then the function. It will return the remainder of the assembly code to process. - + We rely here on the fact that LLVM prints all global variables at the end of the file, so an info table will always appear after its function. - + To try to help explain the string searches, here is some assembly code that would be processed by this program, with split markers placed in it like so, : @@ -84,7 +95,7 @@ allTables f str = do oneTable :: FilePath -> ByteString -> IO ByteString oneTable f str = let last' xs = if (null xs) then 0 else last xs - + -- get the function (bl, al) = BS.breakSubstring functionSuf str start = last' $ BS.findSubstrings funDivider bl @@ -99,14 +110,17 @@ oneTable f str = istart = last' $ BS.findSubstrings funDivider bit (bit', iheader) = BS.splitAt istart bit + -- fixup stack alignment + fun' = fixupStack fun BS.empty + -- fix up sections fheader' = replaceSection fheader iheader' = replaceSection iheader - function = [before, eol, iheader', itable', eol, fheader', fun, eol] + function = [before, eol, iheader', itable', eol, fheader', fun', eol] remainder = bit' `BS.append` ait in if BS.null al - then do + then do BS.appendFile f bl return BS.empty @@ -127,3 +141,42 @@ replaceSection sec = s2' = snd $ BS.break eolPred s2 in s1' `BS.append` newSection `BS.append` s2' + +-- | Mac OS X requires that the stack be 16 byte aligned when making a function +-- call (only really required though when making a call that will pass through +-- the dynamic linker). During code generation we marked any points where we +-- make a call that requires this alignment. The alignment isn't correctly +-- generated by LLVM as LLVM rightly assumes that the stack wil be aligned to +-- 16n + 12 on entry (since the function call was 16 byte aligned and the return +-- address should have been pushed, so sub 4). GHC though since it always uses +-- jumps keeps the stack 16 byte aligned on both function calls and function +-- entry. We correct LLVM's alignment then by putting inline assembly in that +-- subtracts and adds 4 to the sp as required. +fixupStack :: ByteString -> ByteString -> ByteString +fixupStack fun nfun | BS.null nfun = + let -- fixup sub op + (a, b) = BS.breakSubstring (BS.pack ", %esp\n") fun + (a', num) = BS.breakEnd dollarPred a + num' = BS.pack $ show (read (BS.unpack num) + 4) + fix = a' `BS.append` num' + in if BS.null b + then nfun `BS.append` a + else fixupStack b (nfun `BS.append` fix) + +fixupStack fun nfun = + let -- fixup add ops + (a, b) = BS.breakSubstring (BS.pack "jmp") fun + -- We need to avoid processing jumps to labels, they are of the form: + -- jmp\tL..., jmp\t_f..., jmpl\t_f..., jmpl\t*%eax... + labelJump = BS.index b 4 == 'L' + (jmp, b') = BS.break eolPred b + (a', numx) = BS.breakEnd dollarPred a + (num, x) = BS.break commaPred numx + num' = BS.pack $ show (read (BS.unpack num) + 4) + fix = a' `BS.append` num' `BS.append` x `BS.append` jmp + in if BS.null b + then nfun `BS.append` a + else if labelJump + then fixupStack b' (nfun `BS.append` a `BS.append` jmp) + else fixupStack b' (nfun `BS.append` fix) +