X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FAdjustor.c;h=b798d86e2dcb2a19fc33701ed9bf24ac4aa3f838;hb=0aca2f00f9bddce3624c1c99e9d2373a3a10a6c3;hp=da8951c0c23b49e6213e0a61ac9d17be6ca7e2e6;hpb=eb311572a2551ab83f3141f00a3639caa551b03a;p=ghc-hetmet.git diff --git a/ghc/rts/Adjustor.c b/ghc/rts/Adjustor.c index da8951c..b798d86 100644 --- a/ghc/rts/Adjustor.c +++ b/ghc/rts/Adjustor.c @@ -34,8 +34,8 @@ An adjustor thunk differs from a C function pointer in one respect: when the code is through with it, it has to be freed in order to release Haskell and C resources. Failure to do so result in memory leaks on both the C and Haskell side. - */ + #include "PosixSource.h" #include "Rts.h" #include "RtsExternal.h" @@ -46,46 +46,48 @@ Haskell side. #include #endif -/* Heavily arch-specific, I'm afraid.. */ +#if defined(openbsd_TARGET_OS) +#include +#include +#include -typedef enum { - pageExecuteRead, - pageExecuteReadWrite -} pageMode; +/* no C99 header stdint.h on OpenBSD? */ +typedef unsigned long my_uintptr_t; +#endif + +/* Heavily arch-specific, I'm afraid.. */ /* - * Function: execPage() + * Allocate len bytes which are readable, writable, and executable. * - * Set the executable bit on page containing addr. - * - * TODO: Can the code span more than one page? If yes, we need to make two - * pages executable! + * ToDo: If this turns out to be a performance bottleneck, one could + * e.g. cache the last VirtualProtect/mprotect-ed region and do + * nothing in case of a cache hit. */ -static rtsBool -execPage (void* addr, pageMode mode) +static void* +mallocBytesRWX(int len) { -#if defined(i386_TARGET_ARCH) && defined(_WIN32) && 0 - SYSTEM_INFO sInfo; - DWORD dwOldProtect = 0; - - /* doesn't return a result, so presumably it can't fail... */ - GetSystemInfo(&sInfo); - - if ( VirtualProtect ( (void*)((unsigned long)addr & (sInfo.dwPageSize - 1)), - sInfo.dwPageSize, - ( mode == pageExecuteReadWrite ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ), - &dwOldProtect) == 0 ) { -# if 1 - DWORD rc = GetLastError(); - prog_belch("execPage: failed to protect 0x%p; error=%lu; old protection: %lu\n", addr, rc, dwOldProtect); -# endif - return rtsFalse; - } - return rtsTrue; -#else - (void)addr; (void)mode; /* keep gcc -Wall happy */ - return rtsTrue; + void *addr = stgMallocBytes(len, "mallocBytesRWX"); +#if defined(i386_TARGET_ARCH) && defined(_WIN32) + /* This could be necessary for processors which distinguish between READ and + EXECUTE memory accesses, e.g. Itaniums. */ + DWORD dwOldProtect = 0; + if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) { + barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n", + addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect); + } +#elif defined(openbsd_TARGET_OS) + /* malloced memory isn't executable by default on OpenBSD */ + my_uintptr_t pageSize = sysconf(_SC_PAGESIZE); + my_uintptr_t mask = ~(pageSize - 1); + my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask; + my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask; + my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize; + if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) { + barf("mallocBytesRWX: failed to protect 0x%p\n", addr); + } #endif + return addr; } #if defined(i386_TARGET_ARCH) @@ -132,8 +134,52 @@ stgAllocStable(size_t size_in_bytes, StgStablePtr *stable) } #endif +#if defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH) +#if !(defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS)) + +/* !!! !!! WARNING: !!! !!! + * This structure is accessed from AdjustorAsm.s + * Any changes here have to be mirrored in the offsets there. + */ + +typedef struct AdjustorStub { +#if defined(powerpc_TARGET_ARCH) && defined(darwin_TARGET_OS) + unsigned lis; + unsigned ori; + unsigned lwz; + unsigned mtctr; + unsigned bctr; + StgFunPtr code; +#elif defined(powerpc64_TARGET_ARCH) && defined(darwin_TARGET_OS) + /* powerpc64-darwin: just guessing that it won't use fundescs. */ + unsigned lis; + unsigned ori; + unsigned rldimi; + unsigned oris; + unsigned ori2; + unsigned lwz; + unsigned mtctr; + unsigned bctr; + StgFunPtr code; +#else + /* fundesc-based ABIs */ +#define FUNDESCS + StgFunPtr code; + struct AdjustorStub + *toc; + void *env; +#endif + StgStablePtr hptr; + StgFunPtr wptr; + StgInt negative_framesize; + StgInt extrawords_plus_one; +} AdjustorStub; + +#endif +#endif + void* -createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr) +createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr, char *typeString) { void *adjustor = NULL; @@ -153,7 +199,7 @@ createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr) : ff e0 jmp %eax # and jump to it. # the callee cleans up the stack */ - adjustor = stgMallocBytes(14, "createAdjustor"); + adjustor = mallocBytesRWX(14); { unsigned char *const adj_code = (unsigned char *)adjustor; adj_code[0x00] = (unsigned char)0x58; /* popl %eax */ @@ -168,8 +214,6 @@ createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr) adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */ adj_code[0x0d] = (unsigned char)0xe0; - - execPage(adjustor, pageExecuteReadWrite); } #endif break; @@ -200,7 +244,7 @@ createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr) That's (thankfully) the case here with the restricted set of return types that we support. */ - adjustor = stgMallocBytes(17, "createAdjustor"); + adjustor = mallocBytesRWX(17); { unsigned char *const adj_code = (unsigned char *)adjustor; @@ -215,8 +259,6 @@ createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr) adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */ adj_code[0x10] = (unsigned char)0xe0; - - execPage(adjustor, pageExecuteReadWrite); } #elif defined(sparc_TARGET_ARCH) /* Magic constant computed by inspecting the code length of the following @@ -248,7 +290,7 @@ createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr) similarly, and local variables should be accessed via %fp, not %sp. In a nutshell: This should work! (Famous last words! :-) */ - adjustor = stgMallocBytes(4*(11+1), "createAdjustor"); + adjustor = mallocBytesRWX(4*(11+1)); { unsigned long *const adj_code = (unsigned long *)adjustor; @@ -325,7 +367,7 @@ TODO: Depending on how much allocation overhead stgMallocBytes uses for 4 bytes (getting rid of the nop), hence saving memory. [ccshan] */ ASSERT(((StgWord64)wptr & 3) == 0); - adjustor = stgMallocBytes(48, "createAdjustor"); + adjustor = mallocBytesRWX(48); { StgWord64 *const code = (StgWord64 *)adjustor; @@ -341,9 +383,9 @@ TODO: Depending on how much allocation overhead stgMallocBytes uses for /* Ensure that instruction cache is consistent with our new code */ __asm__ volatile("call_pal %0" : : "i" (PAL_imb)); } -#elif defined(powerpc_TARGET_ARCH) +#elif defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS) /* - For PowerPC, the following code is used: + For PowerPC Linux, the following code is used: mr r10,r8 mr r9,r7 @@ -363,7 +405,7 @@ TODO: Depending on how much allocation overhead stgMallocBytes uses for this code, it only works for up to 6 arguments (when floating point arguments are involved, this may be more or less, depending on the exact situation). */ - adjustor = stgMallocBytes(4*13, "createAdjustor"); + adjustor = mallocBytesRWX(4*13); { unsigned long *const adj_code = (unsigned long *)adjustor; @@ -406,6 +448,103 @@ TODO: Depending on how much allocation overhead stgMallocBytes uses for __asm__ volatile ("sync\n\tisync"); } } + +#elif defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH) + +#define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF)) +#define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16)) + { + AdjustorStub *adjustorStub; + int sz = 0, extra_sz, total_sz; + + // from AdjustorAsm.s + // not declared as a function so that AIX-style + // fundescs can never get in the way. + extern void *adjustorCode; + +#ifdef FUNDESCS + adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor"); +#else + adjustorStub = mallocBytesRWX(sizeof(AdjustorStub)); +#endif + adjustor = adjustorStub; + + adjustorStub->code = (void*) &adjustorCode; + +#ifdef FUNDESCS + // function descriptors are a cool idea. + // We don't need to generate any code at runtime. + adjustorStub->toc = adjustorStub; +#else + + // no function descriptors :-( + // We need to do things "by hand". +#if defined(powerpc_TARGET_ARCH) + // lis r2, hi(adjustorStub) + adjustorStub->lis = OP_HI(0x3c40, adjustorStub); + // ori r2, r2, lo(adjustorStub) + adjustorStub->ori = OP_LO(0x6042, adjustorStub); + // lwz r0, code(r2) + adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code) + - (char*)adjustorStub); + // mtctr r0 + adjustorStub->mtctr = 0x7c0903a6; + // bctr + adjustorStub->bctr = 0x4e800420; +#else + barf("adjustor creation not supported on this platform"); +#endif + + // Flush the Instruction cache: + { + int n = sizeof(AdjustorStub)/sizeof(unsigned); + unsigned *p = (unsigned*)adjustor; + while(n--) + { + __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0" + : : "r" (p)); + p++; + } + __asm__ volatile ("sync\n\tisync"); + } +#endif + + printf("createAdjustor: %s\n", typeString); + while(*typeString) + { + char t = *typeString++; + + switch(t) + { +#if defined(powerpc64_TARGET_ARCH) + case 'd': sz += 1; break; + case 'l': sz += 1; break; +#else + case 'd': sz += 2; break; + case 'l': sz += 2; break; +#endif + case 'f': sz += 1; break; + case 'i': sz += 1; break; + } + } + extra_sz = sz - 8; + if(extra_sz < 0) + extra_sz = 0; + total_sz = (6 /* linkage area */ + + 8 /* minimum parameter area */ + + 2 /* two extra arguments */ + + extra_sz)*sizeof(StgWord); + + // align to 16 bytes. + // AIX only requires 8 bytes, but who cares? + total_sz = (total_sz+15) & ~0xF; + + adjustorStub->hptr = hptr; + adjustorStub->wptr = wptr; + adjustorStub->negative_framesize = -total_sz; + adjustorStub->extrawords_plus_one = extra_sz + 1; + } + #elif defined(ia64_TARGET_ARCH) /* Up to 8 inputs are passed in registers. We flush the last two inputs to @@ -506,7 +645,7 @@ freeHaskellFunctionPtr(void* ptr) #if defined(i386_TARGET_ARCH) if ( *(unsigned char*)ptr != 0x68 && *(unsigned char*)ptr != 0x58 ) { - prog_belch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); return; } @@ -518,7 +657,7 @@ freeHaskellFunctionPtr(void* ptr) } #elif defined(sparc_TARGET_ARCH) if ( *(unsigned long*)ptr != 0x9C23A008UL ) { - prog_belch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); return; } @@ -526,24 +665,31 @@ freeHaskellFunctionPtr(void* ptr) freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11))); #elif defined(alpha_TARGET_ARCH) if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) { - prog_belch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); return; } /* Free the stable pointer first..*/ freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10))); -#elif defined(powerpc_TARGET_ARCH) +#elif defined(powerpc_TARGET_ARCH) && defined(linux_TARGET_OS) if ( *(StgWord*)ptr != 0x7d0a4378 ) { - prog_belch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); return; } freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 4*12))); +#elif defined(powerpc_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH) + extern void* adjustorCode; + if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) { + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + return; + } + freeStablePtr(((AdjustorStub*)ptr)->hptr); #elif defined(ia64_TARGET_ARCH) IA64FunDesc *fdesc = (IA64FunDesc *)ptr; StgWord64 *code = (StgWord64 *)(fdesc+1); if (fdesc->ip != (StgWord64)code) { - prog_belch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); return; } freeStablePtr((StgStablePtr)code[16]); @@ -563,7 +709,7 @@ freeHaskellFunctionPtr(void* ptr) * * Perform initialisation of adjustor thunk layer (if needed.) */ -rtsBool +void initAdjustor(void) { #if defined(i386_TARGET_ARCH) @@ -584,15 +730,12 @@ initAdjustor(void) to return to it before tail jumping from the adjustor thunk. */ - obscure_ccall_ret_code = stgMallocBytes(4, "initAdjustor"); + obscure_ccall_ret_code = mallocBytesRWX(4); obscure_ccall_ret_code[0x00] = (unsigned char)0x83; /* addl $0x4, %esp */ obscure_ccall_ret_code[0x01] = (unsigned char)0xc4; obscure_ccall_ret_code[0x02] = (unsigned char)0x04; obscure_ccall_ret_code[0x03] = (unsigned char)0xc3; /* ret */ - - execPage(obscure_ccall_ret_code, pageExecuteRead); #endif - return rtsTrue; }