From 7cca410a40cccf0fbeda2155f307baa5619b8130 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Tue, 12 Dec 2006 10:36:23 +0000 Subject: [PATCH] MERGE: Fix Windows DEP violations (bug #885) Original patch by brianlsmith@gmail.com --- compiler/ghci/ByteCodeFFI.lhs | 21 ++++++++++++++++----- compiler/ghci/ByteCodeGen.lhs | 23 +++++++++++++++-------- compiler/ghci/ByteCodeInstr.lhs | 4 +++- compiler/ghci/ByteCodeItbls.lhs | 28 +++++++++++++--------------- compiler/ghci/ByteCodeLink.lhs | 6 +++--- rts/Linker.c | 39 ++++++++++++++++++++++----------------- rts/sm/Storage.c | 5 +++++ 7 files changed, 77 insertions(+), 49 deletions(-) diff --git a/compiler/ghci/ByteCodeFFI.lhs b/compiler/ghci/ByteCodeFFI.lhs index c5bdc2c..3e12828 100644 --- a/compiler/ghci/ByteCodeFFI.lhs +++ b/compiler/ghci/ByteCodeFFI.lhs @@ -5,7 +5,7 @@ ByteCodeGen: Generate machine-code sequences for foreign import \begin{code} -module ByteCodeFFI ( mkMarshalCode, moan64 ) where +module ByteCodeFFI ( mkMarshalCode, moan64, newExec ) where #include "HsVersions.h" @@ -18,10 +18,12 @@ import Panic -- there is ifdeffery below import Control.Exception ( throwDyn ) import Data.Bits ( Bits(..), shiftR, shiftL ) -import Foreign ( newArray, Ptr ) import Data.List ( mapAccumL ) import Data.Word ( Word8, Word32 ) +import Foreign ( Ptr, FunPtr, castPtrToFunPtr, + Storable, sizeOf, pokeArray ) +import Foreign.C ( CUInt ) import System.IO.Unsafe ( unsafePerformIO ) import System.IO ( hPutStrLn, stderr ) -- import Debug.Trace ( trace ) @@ -70,14 +72,23 @@ we don't clear our own (single) arg off the C stack. -} mkMarshalCode :: CCallConv -> (Int, CgRep) -> Int -> [(Int, CgRep)] - -> IO (Ptr Word8) + -> IO (FunPtr ()) mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps - in Foreign.newArray bytes - + in newExec bytes +newExec :: Storable a => [a] -> IO (FunPtr ()) +newExec code + = do ptr <- _allocateExec (fromIntegral $ codeSize undefined code) + pokeArray ptr code + return (castPtrToFunPtr ptr) + where + codeSize :: Storable a => a -> [a] -> Int + codeSize dummy array = sizeOf(dummy) * length array +foreign import ccall unsafe "allocateExec" + _allocateExec :: CUInt -> IO (Ptr a) mkMarshalCode_wrk :: CCallConv -> (Int, CgRep) -> Int -> [(Int, CgRep)] diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs index 576763e..72ad7df 100644 --- a/compiler/ghci/ByteCodeGen.lhs +++ b/compiler/ghci/ByteCodeGen.lhs @@ -10,6 +10,7 @@ module ByteCodeGen ( UnlinkedBCO, byteCodeGen, coreExprToBCOs ) where #include "HsVersions.h" import ByteCodeInstr +import ByteCodeItbls import ByteCodeFFI import ByteCodeAsm import ByteCodeLink @@ -48,7 +49,7 @@ import Constants import Data.List ( intersperse, sortBy, zip4, zip6, partition ) import Foreign ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8, - withForeignPtr ) + withForeignPtr, castFunPtrToPtr ) import Foreign.C ( CInt ) import Control.Exception ( throwDyn ) @@ -138,7 +139,7 @@ mkProtoBCO -> Int -> [StgWord] -> Bool -- True <=> is a return point, rather than a function - -> [Ptr ()] + -> [BcPtr] -> ProtoBCO name mkProtoBCO nm instrs_ordlist origin arity bitmap_size bitmap is_ret mallocd_blocks @@ -926,7 +927,7 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l ioToBc (mkMarshalCode cconv (r_offW, r_rep) addr_offW (zip args_offW a_reps)) `thenBc` \ addr_of_marshaller -> - recordMallocBc addr_of_marshaller `thenBc_` + recordItblMallocBc (ItblPtr (castFunPtrToPtr addr_of_marshaller)) `thenBc_` let -- Offset of the next stack frame down the stack. The CCALL -- instruction needs to describe the chunk of stack containing @@ -935,7 +936,7 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l stk_offset = d_after_r - s -- do the call - do_call = unitOL (CCALL stk_offset (castPtr addr_of_marshaller)) + do_call = unitOL (CCALL stk_offset (castFunPtrToPtr addr_of_marshaller)) -- slide and return wrapup = mkSLIDE r_sizeW (d_after_r - r_sizeW - s) `snocOL` RETURN_UBX r_rep @@ -1102,7 +1103,7 @@ pushAtom d p (AnnLit lit) -- to be on the safe side we copy the string into -- a malloc'd area of memory. ioToBc (mallocBytes (n+1)) `thenBc` \ ptr -> - recordMallocBc ptr `thenBc_` + recordMallocBc ptr `thenBc_` ioToBc ( withForeignPtr fp $ \p -> do memcpy ptr p (fromIntegral n) @@ -1314,10 +1315,12 @@ mkStackOffsets original_depth szsw -- ----------------------------------------------------------------------------- -- The bytecode generator's monad +type BcPtr = Either ItblPtr (Ptr ()) + data BcM_State = BcM_State { nextlabel :: Int, -- for generating local labels - malloced :: [Ptr ()] } -- ptrs malloced for current BCO + malloced :: [BcPtr] } -- thunks malloced for current BCO -- Should be free()d when it is GCd newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) @@ -1351,13 +1354,17 @@ instance Monad BcM where (>>) = thenBc_ return = returnBc -emitBc :: ([Ptr ()] -> ProtoBCO Name) -> BcM (ProtoBCO Name) +emitBc :: ([BcPtr] -> ProtoBCO Name) -> BcM (ProtoBCO Name) emitBc bco = BcM $ \st -> return (st{malloced=[]}, bco (malloced st)) recordMallocBc :: Ptr a -> BcM () recordMallocBc a - = BcM $ \st -> return (st{malloced = castPtr a : malloced st}, ()) + = BcM $ \st -> return (st{malloced = Right (castPtr a) : malloced st}, ()) + +recordItblMallocBc :: ItblPtr -> BcM () +recordItblMallocBc a + = BcM $ \st -> return (st{malloced = Left a : malloced st}, ()) getLabelBc :: BcM Int getLabelBc diff --git a/compiler/ghci/ByteCodeInstr.lhs b/compiler/ghci/ByteCodeInstr.lhs index c1aafc9..5239139 100644 --- a/compiler/ghci/ByteCodeInstr.lhs +++ b/compiler/ghci/ByteCodeInstr.lhs @@ -11,6 +11,8 @@ module ByteCodeInstr ( #include "HsVersions.h" #include "../includes/MachDeps.h" +import ByteCodeItbls ( ItblPtr ) + import Outputable import Name import Id @@ -38,7 +40,7 @@ data ProtoBCO a -- what the BCO came from protoBCOExpr :: Either [AnnAlt Id VarSet] (AnnExpr Id VarSet), -- malloc'd pointers - protoBCOPtrs :: [Ptr ()] + protoBCOPtrs :: [Either ItblPtr (Ptr ())] } type LocalLabel = Int diff --git a/compiler/ghci/ByteCodeItbls.lhs b/compiler/ghci/ByteCodeItbls.lhs index 863a7b7..29c54b7 100644 --- a/compiler/ghci/ByteCodeItbls.lhs +++ b/compiler/ghci/ByteCodeItbls.lhs @@ -6,10 +6,11 @@ ByteCodeItbls: Generate infotables for interpreter-made bytecodes \begin{code} {-# OPTIONS -optc-DNON_POSIX_SOURCE #-} -module ByteCodeItbls ( ItblEnv, ItblPtr, mkITbls ) where +module ByteCodeItbls ( ItblEnv, ItblPtr(..), itblCode, mkITbls ) where #include "HsVersions.h" +import ByteCodeFFI ( newExec ) import Name ( Name, getName ) import NameEnv import SMRep ( typeCgRep ) @@ -35,7 +36,15 @@ import GHC.Ptr ( Ptr(..) ) %************************************************************************ \begin{code} -type ItblPtr = Ptr StgInfoTable +newtype ItblPtr = ItblPtr (Ptr ()) deriving Show + +itblCode :: ItblPtr -> Ptr () +itblCode (ItblPtr ptr) + = (castPtr ptr) +#ifdef GHCI_TABLES_NEXT_TO_CODE + `plusPtr` (wORD_SIZE * 2) +#endif + type ItblEnv = NameEnv (Name, ItblPtr) -- We need the Name in the range so we know which -- elements to filter out when unloading a module @@ -107,16 +116,11 @@ make_constr_itbls cons -- This is the only arch-dependent bit. code = mkJumpToAddr entry_addr in - do addr <- malloc_exec (sizeOf itbl) + do addr <- newExec [itbl] --putStrLn ("SIZE of itbl is " ++ show (sizeOf itbl)) --putStrLn ("# ptrs of itbl is " ++ show ptrs) --putStrLn ("# nptrs of itbl is " ++ show nptrs_really) - poke addr itbl - return (getName dcon, addr -#ifdef GHCI_TABLES_NEXT_TO_CODE - `plusPtr` (2 * wORD_SIZE) -#endif - ) + return (getName dcon, ItblPtr (castFunPtrToPtr addr)) -- Make code which causes a jump to the given address. This is the @@ -390,10 +394,4 @@ load :: Storable a => PtrIO a load = do addr <- advance lift (peek addr) -foreign import ccall unsafe "allocateExec" - _allocateExec :: CUInt -> IO (Ptr a) - -malloc_exec :: Int -> IO (Ptr a) -malloc_exec bytes = _allocateExec (fromIntegral bytes) - \end{code} diff --git a/compiler/ghci/ByteCodeLink.lhs b/compiler/ghci/ByteCodeLink.lhs index fd66545..3305daa 100644 --- a/compiler/ghci/ByteCodeLink.lhs +++ b/compiler/ghci/ByteCodeLink.lhs @@ -46,7 +46,7 @@ import GHC.Exts ( BCO#, newBCO#, unsafeCoerce#, Int#, import GHC.Arr ( Array(..) ) import GHC.IOBase ( IO(..) ) -import GHC.Ptr ( Ptr(..) ) +import GHC.Ptr ( Ptr(..), castPtr ) import GHC.Base ( writeArray#, RealWorld, Int(..) ) \end{code} @@ -124,7 +124,7 @@ linkBCO' ie ce (UnlinkedBCO nm arity insns_barr bitmap literalsSS ptrsSS itblsSS ptrs_parr = case ptrs_arr of Array lo hi parr -> parr itbls_arr = listArray (0, n_itbls-1) linked_itbls - :: UArray Int ItblPtr + itbls_barr = case itbls_arr of UArray lo hi barr -> barr literals_arr = listArray (0, n_literals-1) linked_literals @@ -222,7 +222,7 @@ lookupName ce nm lookupIE :: ItblEnv -> Name -> IO (Ptr a) lookupIE ie con_nm = case lookupNameEnv ie con_nm of - Just (_, Ptr a) -> return (Ptr a) + Just (_, a) -> return (castPtr (itblCode a)) Nothing -> do -- try looking up in the object files. let sym_to_find1 = nameToCLabel con_nm "con_info" diff --git a/rts/Linker.c b/rts/Linker.c index 73d4034..bd0b543 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -10,7 +10,7 @@ #include "PosixSource.h" #endif -/* Linux needs _GNU_SOURCE to get RTLD_DEFAULT from and +/* Linux needs _GNU_SOURCE to get RTLD_DEFAULT from and MREMAP_MAYMOVE from . */ #ifdef __linux__ @@ -1161,13 +1161,12 @@ loadObj( char *path ) void *map_addr = NULL; #else FILE *f; - int misalignment; #endif initLinker(); /* debugBelch("loadObj %s\n", path ); */ - /* Check that we haven't already loaded this object. + /* Check that we haven't already loaded this object. Ignore requests to load multiple times */ { ObjectCode *o; @@ -1257,7 +1256,7 @@ loadObj( char *path ) #define EXTRA_MAP_FLAGS 0 #endif - oc->image = mmap(map_addr, n, PROT_EXEC|PROT_READ|PROT_WRITE, + oc->image = mmap(map_addr, n, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|EXTRA_MAP_FLAGS, fd, 0); if (oc->image == MAP_FAILED) barf("loadObj: can't map `%s'", path); @@ -1271,7 +1270,12 @@ loadObj( char *path ) if (!f) barf("loadObj: can't read `%s'", path); -#ifdef darwin_HOST_OS +# if defined(mingw32_HOST_OS) + // TODO: We would like to use allocateExec here, but allocateExec + // cannot currently allocate blocks large enough. + oc->image = VirtualAlloc(NULL, oc->fileSize, MEM_RESERVE | MEM_COMMIT, + PAGE_EXECUTE_READWRITE); +# elif defined(darwin_HOST_OS) // In a Mach-O .o file, all sections can and will be misaligned // if the total size of the headers is not a multiple of the // desired alignment. This is fine for .o files that only serve @@ -1281,15 +1285,12 @@ loadObj( char *path ) // We calculate the correct alignment from the header before // reading the file, and then we misalign oc->image on purpose so // that the actual sections end up aligned again. - misalignment = machoGetMisalignment(f); - oc->misalignment = misalignment; -#else - misalignment = 0; -#endif - + oc->misalignment = machoGetMisalignment(f); oc->image = stgMallocBytes(oc->fileSize + misalignment, "loadObj(image)"); - oc->image += misalignment; - +# else + oc->image = stgMallocBytes(oc->fileSize, "loadObj(image)"); +# endif + n = fread ( oc->image, 1, oc->fileSize, f ); if (n != oc->fileSize) barf("loadObj: error whilst reading `%s'", path); @@ -1402,9 +1403,13 @@ unloadObj( char *path ) prev->next = oc->next; } - /* We're going to leave this in place, in case there are - any pointers from the heap into it: */ - /* stgFree(oc->image); */ + // We're going to leave this in place, in case there are + // any pointers from the heap into it: + // #ifdef mingw32_HOST_OS + // VirtualFree(oc->image); + // #else + // stgFree(oc->image); + // #endif stgFree(oc->fileName); stgFree(oc->symbols); stgFree(oc->sections); @@ -1479,7 +1484,7 @@ static void addSection ( ObjectCode* oc, SectionKind kind, /* ocAllocateJumpIslands - + Allocate additional space at the end of the object file image to make room for jump islands. diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 6af2d19..ddae8c6 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -979,6 +979,11 @@ calcNeeded(void) in the page, and when the page is emptied (all objects on the page are free) we free the page again, not forgetting to make it non-executable. + + TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that + the linker cannot use allocateExec for loading object code files + on Windows. Once allocateExec can handle larger objects, the linker + should be modified to use allocateExec instead of VirtualAlloc. ------------------------------------------------------------------------- */ static bdescr *exec_block; -- 1.7.10.4