pass arguments to unknown function calls in registers
[ghc-hetmet.git] / ghc / rts / Linker.c
index d49c814..a9faab5 100644 (file)
@@ -26,6 +26,7 @@
 #include "RtsUtils.h"
 #include "Schedule.h"
 #include "Storage.h"
+#include "Sparks.h"
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #  include <windows.h>
 #  include <math.h>
 #elif defined(darwin_HOST_OS)
-#  include <mach-o/ppc/reloc.h>
 #  define OBJFORMAT_MACHO
 #  include <mach-o/loader.h>
 #  include <mach-o/nlist.h>
 #  include <mach-o/reloc.h>
 #  include <mach-o/dyld.h>
+#if defined(powerpc_HOST_ARCH)
+#  include <mach-o/ppc/reloc.h>
+#endif
 #endif
 
 /* Hash table mapping symbol names to Symbol */
@@ -104,13 +107,20 @@ static int ocVerifyImage_PEi386 ( ObjectCode* oc );
 static int ocGetNames_PEi386    ( ObjectCode* oc );
 static int ocResolve_PEi386     ( ObjectCode* oc );
 #elif defined(OBJFORMAT_MACHO)
-static int ocAllocateJumpIslands_MachO ( ObjectCode* oc );
 static int ocVerifyImage_MachO    ( ObjectCode* oc );
 static int ocGetNames_MachO       ( ObjectCode* oc );
 static int ocResolve_MachO        ( ObjectCode* oc );
 
+static int machoGetMisalignment( FILE * );
+#ifdef powerpc_HOST_ARCH
+static int ocAllocateJumpIslands_MachO ( ObjectCode* oc );
 static void machoInitSymbolsWithoutUnderscore( void );
 #endif
+#endif
+
+#if defined(x86_64_HOST_ARCH)
+static void*x86_64_high_symbol( char *lbl, void *addr );
+#endif
 
 /* -----------------------------------------------------------------------------
  * Built-in symbols from the RTS
@@ -123,19 +133,17 @@ typedef struct _RtsSymbolVal {
 
 
 #if !defined(PAR)
-#define Maybe_ForeignObj        SymX(mkForeignObjzh_fast)
-
 #define Maybe_Stable_Names      SymX(mkWeakzh_fast)                    \
                                SymX(makeStableNamezh_fast)             \
                                SymX(finalizzeWeakzh_fast)
 #else
 /* These are not available in GUM!!! -- HWL */
-#define Maybe_ForeignObj
 #define Maybe_Stable_Names
 #endif
 
 #if !defined (mingw32_HOST_OS)
 #define RTS_POSIX_ONLY_SYMBOLS                  \
+      SymX(signal_handlers)                    \
       SymX(stg_sig_install)                    \
       Sym(nocldstop)
 #endif
@@ -295,8 +303,22 @@ typedef struct _RtsSymbolVal {
       SymX(exp)                                 \
       SymX(log)                                 \
       SymX(sqrt)                                \
+      SymX(powf)                                 \
+      SymX(tanhf)                                \
+      SymX(coshf)                                \
+      SymX(sinhf)                                \
+      SymX(atanf)                                \
+      SymX(acosf)                                \
+      SymX(asinf)                                \
+      SymX(tanf)                                 \
+      SymX(cosf)                                 \
+      SymX(sinf)                                 \
+      SymX(expf)                                 \
+      SymX(logf)                                 \
+      SymX(sqrtf)                                \
       SymX(memcpy)                              \
-      SymX(stg_InstallConsoleEvent)             \
+      SymX(rts_InstallConsoleEvent)             \
+      SymX(rts_ConsoleHandlerDone)              \
       Sym(mktime)                               \
       Sym(_imp___timezone)                      \
       Sym(_imp___tzname)                        \
@@ -311,19 +333,75 @@ typedef struct _RtsSymbolVal {
       Sym(closedir)
 #endif
 
+#if defined(darwin_TARGET_OS) && HAVE_PRINTF_LDBLSTUB
+#define RTS_DARWIN_ONLY_SYMBOLS                        \
+     Sym(asprintf$LDBLStub)                     \
+     Sym(err$LDBLStub)                          \
+     Sym(errc$LDBLStub)                         \
+     Sym(errx$LDBLStub)                         \
+     Sym(fprintf$LDBLStub)                      \
+     Sym(fscanf$LDBLStub)                       \
+     Sym(fwprintf$LDBLStub)                     \
+     Sym(fwscanf$LDBLStub)                      \
+     Sym(printf$LDBLStub)                       \
+     Sym(scanf$LDBLStub)                        \
+     Sym(snprintf$LDBLStub)                     \
+     Sym(sprintf$LDBLStub)                      \
+     Sym(sscanf$LDBLStub)                       \
+     Sym(strtold$LDBLStub)                      \
+     Sym(swprintf$LDBLStub)                     \
+     Sym(swscanf$LDBLStub)                      \
+     Sym(syslog$LDBLStub)                       \
+     Sym(vasprintf$LDBLStub)                    \
+     Sym(verr$LDBLStub)                         \
+     Sym(verrc$LDBLStub)                        \
+     Sym(verrx$LDBLStub)                        \
+     Sym(vfprintf$LDBLStub)                     \
+     Sym(vfscanf$LDBLStub)                      \
+     Sym(vfwprintf$LDBLStub)                    \
+     Sym(vfwscanf$LDBLStub)                     \
+     Sym(vprintf$LDBLStub)                      \
+     Sym(vscanf$LDBLStub)                       \
+     Sym(vsnprintf$LDBLStub)                    \
+     Sym(vsprintf$LDBLStub)                     \
+     Sym(vsscanf$LDBLStub)                      \
+     Sym(vswprintf$LDBLStub)                    \
+     Sym(vswscanf$LDBLStub)                     \
+     Sym(vsyslog$LDBLStub)                      \
+     Sym(vwarn$LDBLStub)                        \
+     Sym(vwarnc$LDBLStub)                       \
+     Sym(vwarnx$LDBLStub)                       \
+     Sym(vwprintf$LDBLStub)                     \
+     Sym(vwscanf$LDBLStub)                      \
+     Sym(warn$LDBLStub)                         \
+     Sym(warnc$LDBLStub)                        \
+     Sym(warnx$LDBLStub)                        \
+     Sym(wcstold$LDBLStub)                      \
+     Sym(wprintf$LDBLStub)                      \
+     Sym(wscanf$LDBLStub)
+#else
+#define RTS_DARWIN_ONLY_SYMBOLS
+#endif
+
 #ifndef SMP
 # define MAIN_CAP_SYM SymX(MainCapability)
 #else
 # define MAIN_CAP_SYM
 #endif
 
+#if !defined(mingw32_HOST_OS)
+#define RTS_USER_SIGNALS_SYMBOLS \
+   SymX(setIOManagerPipe)
+#else
+#define RTS_USER_SIGNALS_SYMBOLS /* nothing */
+#endif
+
 #ifdef TABLES_NEXT_TO_CODE
 #define RTS_RET_SYMBOLS /* nothing */
 #else
 #define RTS_RET_SYMBOLS                        \
       SymX(stg_enter_ret)                      \
       SymX(stg_gc_fun_ret)                     \
-      SymX(stg_ap_0_ret)                       \
       SymX(stg_ap_v_ret)                       \
       SymX(stg_ap_f_ret)                       \
       SymX(stg_ap_d_ret)                       \
@@ -341,7 +419,6 @@ typedef struct _RtsSymbolVal {
 #endif
 
 #define RTS_SYMBOLS                            \
-      Maybe_ForeignObj                         \
       Maybe_Stable_Names                       \
       Sym(StgReturn)                           \
       SymX(stg_enter_info)                     \
@@ -410,6 +487,7 @@ typedef struct _RtsSymbolVal {
       SymX(delayzh_fast)                       \
       SymX(deRefWeakzh_fast)                   \
       SymX(deRefStablePtrzh_fast)              \
+      SymX(dirty_MUT_VAR)                      \
       SymX(divExactIntegerzh_fast)             \
       SymX(divModIntegerzh_fast)               \
       SymX(forkzh_fast)                                \
@@ -462,6 +540,7 @@ typedef struct _RtsSymbolVal {
       SymX(newTVarzh_fast)                     \
       SymX(atomicModifyMutVarzh_fast)          \
       SymX(newPinnedByteArrayzh_fast)          \
+      SymX(newSpark)                           \
       SymX(orIntegerzh_fast)                   \
       SymX(performGC)                          \
       SymX(performMajorGC)                     \
@@ -519,12 +598,9 @@ typedef struct _RtsSymbolVal {
       SymX(rts_mkWord8)                                \
       SymX(rts_unlock)                         \
       SymX(rtsSupportsBoundThreads)            \
-      SymX(run_queue_hd)                       \
       SymX(__hscore_get_saved_termios)         \
       SymX(__hscore_set_saved_termios)         \
-      SymX(setIOManagerPipe)                   \
       SymX(setProgArgv)                                \
-      SymX(startSignalHandler)                 \
       SymX(startupHaskell)                     \
       SymX(shutdownHaskell)                    \
       SymX(shutdownHaskellAndExit)             \
@@ -536,10 +612,10 @@ typedef struct _RtsSymbolVal {
       SymX(stg_EMPTY_MVAR_info)                        \
       SymX(stg_IND_STATIC_info)                        \
       SymX(stg_INTLIKE_closure)                        \
+      SymX(stg_MUT_ARR_PTRS_DIRTY_info)                \
       SymX(stg_MUT_ARR_PTRS_FROZEN_info)       \
       SymX(stg_MUT_ARR_PTRS_FROZEN0_info)      \
       SymX(stg_WEAK_info)                       \
-      SymX(stg_ap_0_info)                      \
       SymX(stg_ap_v_info)                      \
       SymX(stg_ap_f_info)                      \
       SymX(stg_ap_d_info)                      \
@@ -554,6 +630,21 @@ typedef struct _RtsSymbolVal {
       SymX(stg_ap_pppp_info)                   \
       SymX(stg_ap_ppppp_info)                  \
       SymX(stg_ap_pppppp_info)                 \
+      SymX(stg_ap_0_fast)                      \
+      SymX(stg_ap_v_fast)                      \
+      SymX(stg_ap_f_fast)                      \
+      SymX(stg_ap_d_fast)                      \
+      SymX(stg_ap_l_fast)                      \
+      SymX(stg_ap_n_fast)                      \
+      SymX(stg_ap_p_fast)                      \
+      SymX(stg_ap_pv_fast)                     \
+      SymX(stg_ap_pp_fast)                     \
+      SymX(stg_ap_ppv_fast)                    \
+      SymX(stg_ap_ppp_fast)                    \
+      SymX(stg_ap_pppv_fast)                   \
+      SymX(stg_ap_pppp_fast)                   \
+      SymX(stg_ap_ppppp_fast)                  \
+      SymX(stg_ap_pppppp_fast)                 \
       SymX(stg_ap_1_upd_info)                  \
       SymX(stg_ap_2_upd_info)                  \
       SymX(stg_ap_3_upd_info)                  \
@@ -592,7 +683,21 @@ typedef struct _RtsSymbolVal {
       SymX(word2Integerzh_fast)                        \
       SymX(writeTVarzh_fast)                   \
       SymX(xorIntegerzh_fast)                  \
-      SymX(yieldzh_fast)
+      SymX(yieldzh_fast)                        \
+      SymX(stg_interp_constr_entry)             \
+      SymX(stg_interp_constr1_entry)            \
+      SymX(stg_interp_constr2_entry)            \
+      SymX(stg_interp_constr3_entry)            \
+      SymX(stg_interp_constr4_entry)            \
+      SymX(stg_interp_constr5_entry)            \
+      SymX(stg_interp_constr6_entry)            \
+      SymX(stg_interp_constr7_entry)            \
+      SymX(stg_interp_constr8_entry)            \
+      SymX(stgMallocBytesRWX)                   \
+      SymX(getAllocations)                      \
+      SymX(revertCAFs)                          \
+      SymX(RtsFlags)                            \
+      RTS_USER_SIGNALS_SYMBOLS
 
 #ifdef SUPPORT_LONG_LONGS
 #define RTS_LONG_LONG_SYMS                     \
@@ -626,7 +731,7 @@ typedef struct _RtsSymbolVal {
 #define RTS_LIBGCC_SYMBOLS
 #endif
 
-#ifdef darwin_HOST_OS
+#if defined(darwin_HOST_OS) && defined(powerpc_HOST_ARCH)
       // Symbols that don't have a leading underscore
       // on Mac OS X. They have to receive special treatment,
       // see machoInitSymbolsWithoutUnderscore()
@@ -645,6 +750,7 @@ RTS_LONG_LONG_SYMS
 RTS_POSIX_ONLY_SYMBOLS
 RTS_MINGW_ONLY_SYMBOLS
 RTS_CYGWIN_ONLY_SYMBOLS
+RTS_DARWIN_ONLY_SYMBOLS
 RTS_LIBGCC_SYMBOLS
 #undef Sym
 #undef SymX
@@ -674,6 +780,12 @@ static RtsSymbolVal rtsSyms[] = {
       RTS_MINGW_ONLY_SYMBOLS
       RTS_CYGWIN_ONLY_SYMBOLS
       RTS_LIBGCC_SYMBOLS
+#if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
+      // dyld stub code contains references to this,
+      // but it should never be called because we treat
+      // lazy pointers as nonlazy.
+      { "dyld_stub_binding_helper", (void*)0xDEADBEEF },
+#endif
       { 0, 0 } /* sentinel */
 };
 
@@ -746,7 +858,7 @@ initLinker( void )
        ghciInsertStrHashTable("(GHCi built-in symbols)",
                                symhash, sym->lbl, sym->addr);
     }
-#   if defined(OBJFORMAT_MACHO)
+#   if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
     machoInitSymbolsWithoutUnderscore();
 #   endif
 
@@ -891,6 +1003,16 @@ lookupSymbol( char *lbl )
 #      if defined(openbsd_HOST_OS)
        val = dlsym(dl_prog_handle, lbl);
        return (val != NULL) ? val : dlsym(dl_libc_handle,lbl);
+#      elif defined(x86_64_HOST_ARCH)
+       val = dlsym(dl_prog_handle, lbl);
+       if (val >= (void *)0x80000000) {
+           void *new_val;
+           new_val = x86_64_high_symbol(lbl, val);
+           IF_DEBUG(linker,debugBelch("lookupSymbol: relocating out of range symbol: %s = %p, now %p\n", lbl, val, new_val));
+           return new_val;
+       } else {
+           return val;
+       }
 #      else /* not openbsd */
        return dlsym(dl_prog_handle, lbl);
 #      endif
@@ -984,7 +1106,7 @@ void ghci_enquire ( char* addr )
             // debugBelch("ghci_enquire: can't find %s\n", sym);
          }
          else if (addr-DELTA <= a && a <= addr+DELTA) {
-            debugBelch("%p + %3d  ==  `%s'\n", addr, a - addr, sym);
+            debugBelch("%p + %3d  ==  `%s'\n", addr, (int)(a - addr), sym);
          }
       }
    }
@@ -1011,8 +1133,8 @@ loadObj( char *path )
    void *map_addr = NULL;
 #else
    FILE *f;
+   int misalignment;
 #endif
-
    initLinker();
 
    /* debugBelch("loadObj %s\n", path ); */
@@ -1116,13 +1238,30 @@ loadObj( char *path )
 
 #else /* !USE_MMAP */
 
-   oc->image = stgMallocBytes(oc->fileSize, "loadObj(image)");
-
    /* load the image into memory */
    f = fopen(path, "rb");
    if (!f)
        barf("loadObj: can't read `%s'", path);
 
+#ifdef darwin_HOST_OS
+    // In a Mach-O .o file, all sections can and will be misaligned
+    // if the total size of the headers is not a multiple of the
+    // desired alignment. This is fine for .o files that only serve
+    // as input for the static linker, but it's not fine for us,
+    // as SSE (used by gcc for floating point) and Altivec require
+    // 16-byte alignment.
+    // We calculate the correct alignment from the header before
+    // reading the file, and then we misalign oc->image on purpose so
+    // that the actual sections end up aligned again.
+   misalignment = machoGetMisalignment(f);
+   oc->misalignment = misalignment;
+#else
+   misalignment = 0;
+#endif
+
+   oc->image = stgMallocBytes(oc->fileSize + misalignment, "loadObj(image)");
+   oc->image += misalignment;
+   
    n = fread ( oc->image, 1, oc->fileSize, f );
    if (n != oc->fileSize)
       barf("loadObj: error whilst reading `%s'", path);
@@ -1131,7 +1270,7 @@ loadObj( char *path )
 
 #endif /* USE_MMAP */
 
-#  if defined(OBJFORMAT_MACHO)
+#  if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
    r = ocAllocateJumpIslands_MachO ( oc );
    if (!r) { return r; }
 #  elif defined(OBJFORMAT_ELF) && defined(powerpc_HOST_ARCH)
@@ -1332,6 +1471,10 @@ static int ocAllocateJumpIslands( ObjectCode* oc, int count, int first )
   int pagesize, n, m;
 #endif
   int aligned;
+  int misalignment = 0;
+#if darwin_HOST_OS
+  misalignment = oc->misalignment;
+#endif
 
   if( count > 0 )
   {
@@ -1347,28 +1490,46 @@ static int ocAllocateJumpIslands( ObjectCode* oc, int count, int first )
     n = ROUND_UP( oc->fileSize, pagesize );
     m = ROUND_UP( aligned + sizeof (ppcJumpIsland) * count, pagesize );
 
-    /* The effect of this mremap() call is only the ensure that we have
-     * a sufficient number of virtually contiguous pages.  As returned from
-     * mremap, the pages past the end of the file are not backed.  We give
-     * them a backing by using MAP_FIXED to map in anonymous pages.
+    /* If we have a half-page-size file and map one page of it then
+     * the part of the page after the size of the file remains accessible.
+     * If, however, we map in 2 pages, the 2nd page is not accessible
+     * and will give a "Bus Error" on access.  To get around this, we check
+     * if we need any extra pages for the jump islands and map them in
+     * anonymously.  We must check that we actually require extra pages
+     * otherwise the attempt to mmap 0 pages of anonymous memory will
+     * fail -EINVAL.
      */
-    if( (oc->image = mremap( oc->image, n, m, MREMAP_MAYMOVE )) == MAP_FAILED )
-    {
-      errorBelch( "Unable to mremap for Jump Islands\n" );
-      return 0;
-    }
 
-    if( mmap( oc->image + n, m - n, PROT_READ | PROT_WRITE | PROT_EXEC,
-              MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0 ) == MAP_FAILED )
+    if( m > n )
     {
-      errorBelch( "Unable to mmap( MAP_FIXED ) for Jump Islands\n" );
-      return 0;
+      /* The effect of this mremap() call is only the ensure that we have
+       * a sufficient number of virtually contiguous pages.  As returned from
+       * mremap, the pages past the end of the file are not backed.  We give
+       * them a backing by using MAP_FIXED to map in anonymous pages.
+       */
+      oc->image = mremap( oc->image, n, m, MREMAP_MAYMOVE );
+
+      if( oc->image == MAP_FAILED )
+      {
+        errorBelch( "Unable to mremap for Jump Islands\n" );
+        return 0;
+      }
+
+      if( mmap( oc->image + n, m - n, PROT_READ | PROT_WRITE | PROT_EXEC,
+                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0 ) == MAP_FAILED )
+      {
+        errorBelch( "Unable to mmap( MAP_FIXED ) for Jump Islands\n" );
+        return 0;
+      }
     }
 
 #else
+    oc->image -= misalignment;
     oc->image = stgReallocBytes( oc->image,
+                                 misalignment + 
                                  aligned + sizeof (ppcJumpIsland) * count,
                                  "ocAllocateJumpIslands" );
+    oc->image += misalignment;
 #endif /* USE_MMAP */
 
     oc->jump_islands = (ppcJumpIsland *) (oc->image + aligned);
@@ -1722,24 +1883,25 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
             + hdr->NumberOfSymbols * sizeof_COFF_symbol;
 
    if (hdr->Machine != 0x14c) {
-      errorBelch("Not x86 PEi386");
+      errorBelch("%s: Not x86 PEi386", oc->fileName);
       return 0;
    }
    if (hdr->SizeOfOptionalHeader != 0) {
-      errorBelch("PEi386 with nonempty optional header");
+      errorBelch("%s: PEi386 with nonempty optional header", oc->fileName);
       return 0;
    }
    if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
         (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
         (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
         (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
-      errorBelch("Not a PEi386 object file");
+      errorBelch("%s: Not a PEi386 object file", oc->fileName);
       return 0;
    }
    if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
         /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
-      errorBelch("Invalid PEi386 word size or endiannness: %d",
-            (int)(hdr->Characteristics));
+      errorBelch("%s: Invalid PEi386 word size or endiannness: %d",
+                oc->fileName,
+                (int)(hdr->Characteristics));
       return 0;
    }
    /* If the string table size is way crazy, this might indicate that
@@ -1914,19 +2076,37 @@ ocGetNames_PEi386 ( ObjectCode* oc )
    /* Allocate space for any (local, anonymous) .bss sections. */
 
    for (i = 0; i < hdr->NumberOfSections; i++) {
+      UInt32 bss_sz;
       UChar* zspace;
       COFF_section* sectab_i
          = (COFF_section*)
            myindex ( sizeof_COFF_section, sectab, i );
       if (0 != strcmp(sectab_i->Name, ".bss")) continue;
-      if (sectab_i->VirtualSize == 0) continue;
+      /* sof 10/05: the PE spec text isn't too clear regarding what
+       * the SizeOfRawData field is supposed to hold for object
+       * file sections containing just uninitialized data -- for executables,
+       * it is supposed to be zero; unclear what it's supposed to be
+       * for object files. However, VirtualSize is guaranteed to be
+       * zero for object files, which definitely suggests that SizeOfRawData
+       * will be non-zero (where else would the size of this .bss section be
+       * stored?) Looking at the COFF_section info for incoming object files,
+       * this certainly appears to be the case.
+       *
+       * => I suspect we've been incorrectly handling .bss sections in (relocatable)
+       * object files up until now. This turned out to bite us with ghc-6.4.1's use
+       * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
+       * variable decls into to the .bss section. (The specific function in Q which
+       * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
+       */
+      if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
       /* This is a non-empty .bss section.  Allocate zeroed space for
          it, and set its PointerToRawData field such that oc->image +
          PointerToRawData == addr_of_zeroed_space.  */
-      zspace = stgCallocBytes(1, sectab_i->VirtualSize,
-                              "ocGetNames_PEi386(anonymous bss)");
+      bss_sz = sectab_i->VirtualSize;
+      if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
+      zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
       sectab_i->PointerToRawData = ((UChar*)zspace) - ((UChar*)(oc->image));
-      addProddableBlock(oc, zspace, sectab_i->VirtualSize);
+      addProddableBlock(oc, zspace, bss_sz);
       /* debugBelch("BSS anon section at 0x%x\n", zspace); */
    }
 
@@ -1955,6 +2135,7 @@ ocGetNames_PEi386 ( ObjectCode* oc )
 #     endif
 
       if (0==strcmp(".text",sectab_i->Name) ||
+          0==strcmp(".rdata",sectab_i->Name)||
           0==strcmp(".rodata",sectab_i->Name))
          kind = SECTIONKIND_CODE_OR_RODATA;
       if (0==strcmp(".data",sectab_i->Name) ||
@@ -1973,8 +2154,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
              information. */
           && 0 != strcmp(".stab", sectab_i->Name)
           && 0 != strcmp(".stabstr", sectab_i->Name)
+          /* ignore constructor section for now */
+          && 0 != strcmp(".ctors", sectab_i->Name)
          ) {
-         errorBelch("Unknown PEi386 section name `%s'", sectab_i->Name);
+         errorBelch("Unknown PEi386 section name `%s' (while processing: %s)", sectab_i->Name, oc->fileName);
          return 0;
       }
 
@@ -2116,7 +2299,8 @@ ocResolve_PEi386 ( ObjectCode* oc )
       /* Ignore sections called which contain stabs debugging
          information. */
       if (0 == strcmp(".stab", sectab_i->Name)
-          || 0 == strcmp(".stabstr", sectab_i->Name))
+          || 0 == strcmp(".stabstr", sectab_i->Name)
+          || 0 == strcmp(".ctors", sectab_i->Name))
          continue;
 
       if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
@@ -2133,8 +2317,14 @@ ocResolve_PEi386 ( ObjectCode* oc )
         COFF_reloc* rel = (COFF_reloc*)
                            myindex ( sizeof_COFF_reloc, reltab, 0 );
        noRelocs = rel->VirtualAddress;
+
+       /* 10/05: we now assume (and check for) a GNU ld that is capable
+        * of handling object files with (>2^16) of relocs.
+        */
+#if 0
        debugBelch("WARNING: Overflown relocation field (# relocs found: %u)\n",
                   noRelocs);
+#endif
        j = 1;
       } else {
        noRelocs = sectab_i->NumberOfRelocations;
@@ -2213,9 +2403,22 @@ ocResolve_PEi386 ( ObjectCode* oc )
                   -- hence the constant 4.
                   Also I don't know if A should be added, but so
                   far it has always been zero.
+
+                 SOF 05/2005: 'A' (old contents of *pP) have been observed
+                 to contain values other than zero (the 'wx' object file
+                 that came with wxhaskell-0.9.4; dunno how it was compiled..).
+                 So, add displacement to old value instead of asserting
+                 A to be zero. Fixes wxhaskell-related crashes, and no other
+                 ill effects have been observed.
+                 
+                 Update: the reason why we're seeing these more elaborate
+                 relocations is due to a switch in how the NCG compiles SRTs 
+                 and offsets to them from info tables. SRTs live in .(ro)data, 
+                 while info tables live in .text, causing GAS to emit REL32/DISP32 
+                 relocations with non-zero values. Adding the displacement is
+                 the right thing to do.
               */
-               ASSERT(A==0);
-               *pP = S - ((UInt32)pP) - 4;
+               *pP = S - ((UInt32)pP) - 4 + A;
                break;
             default:
                debugBelch("%s: unhandled PEi386 relocation type %d",
@@ -2431,6 +2634,64 @@ PLTSize(void)
 #endif
 
 
+#if x86_64_HOST_ARCH
+// On x86_64, 32-bit relocations are often used, which requires that
+// we can resolve a symbol to a 32-bit offset.  However, shared
+// libraries are placed outside the 2Gb area, which leaves us with a
+// problem when we need to give a 32-bit offset to a symbol in a
+// shared library.
+// 
+// For a function symbol, we can allocate a bounce sequence inside the
+// 2Gb area and resolve the symbol to this.  The bounce sequence is
+// simply a long jump instruction to the real location of the symbol.
+//
+// For data references, we're screwed.
+//
+typedef struct {
+    unsigned char jmp[8];  /* 6 byte instruction: jmpq *0x00000002(%rip) */
+    void *addr;
+} x86_64_bounce;
+
+#define X86_64_BB_SIZE 1024
+
+static x86_64_bounce *x86_64_bounce_buffer = NULL;
+static nat x86_64_bb_next_off;
+
+static void*
+x86_64_high_symbol( char *lbl, void *addr )
+{
+    x86_64_bounce *bounce;
+
+    if ( x86_64_bounce_buffer == NULL || 
+        x86_64_bb_next_off >= X86_64_BB_SIZE ) {
+       x86_64_bounce_buffer = 
+           mmap(NULL, X86_64_BB_SIZE * sizeof(x86_64_bounce), 
+                PROT_EXEC|PROT_READ|PROT_WRITE, 
+                MAP_PRIVATE|MAP_32BIT|MAP_ANONYMOUS, -1, 0);
+       if (x86_64_bounce_buffer == MAP_FAILED) {
+           barf("x86_64_high_symbol: mmap failed");
+       }
+       x86_64_bb_next_off = 0;
+    }
+    bounce = &x86_64_bounce_buffer[x86_64_bb_next_off];
+    bounce->jmp[0] = 0xff;
+    bounce->jmp[1] = 0x25;
+    bounce->jmp[2] = 0x02;
+    bounce->jmp[3] = 0x00;
+    bounce->jmp[4] = 0x00;
+    bounce->jmp[5] = 0x00;
+    bounce->addr = addr;
+    x86_64_bb_next_off++;
+
+    IF_DEBUG(linker, debugBelch("x86_64: allocated bounce entry for %s->%p at %p\n",
+                               lbl, addr, bounce));
+
+    insertStrHashTable(symhash, lbl, bounce);
+    return bounce;
+}
+#endif
+
+
 /*
  * Generic ELF functions
  */
@@ -2537,8 +2798,8 @@ ocVerifyImage_ELF ( ObjectCode* oc )
    }
 
    IF_DEBUG(linker,debugBelch(
-             "\nSection header table: start %d, n_entries %d, ent_size %d\n",
-             ehdr->e_shoff, ehdr->e_shnum, ehdr->e_shentsize  ));
+             "\nSection header table: start %ld, n_entries %d, ent_size %d\n",
+             (long)ehdr->e_shoff, ehdr->e_shnum, ehdr->e_shentsize  ));
 
    ASSERT (ehdr->e_shentsize == sizeof(Elf_Shdr));
 
@@ -2603,9 +2864,9 @@ ocVerifyImage_ELF ( ObjectCode* oc )
       nsymtabs++;
       stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset);
       nent = shdr[i].sh_size / sizeof(Elf_Sym);
-      IF_DEBUG(linker,debugBelch( "   number of entries is apparently %d (%d rem)\n",
+      IF_DEBUG(linker,debugBelch( "   number of entries is apparently %d (%ld rem)\n",
                nent,
-               shdr[i].sh_size % sizeof(Elf_Sym)
+               (long)shdr[i].sh_size % sizeof(Elf_Sym)
              ));
       if (0 != shdr[i].sh_size % sizeof(Elf_Sym)) {
          errorBelch("%s: non-integral number of symbol table entries", oc->fileName);
@@ -2914,8 +3175,8 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
          case R_386_PC32: *pP = value - P; break;
 #        endif
          default:
-            errorBelch("%s: unhandled ELF relocation(Rel) type %d\n",
-                 oc->fileName, ELF_R_TYPE(info));
+            errorBelch("%s: unhandled ELF relocation(Rel) type %lu\n",
+                 oc->fileName, (lnat)ELF_R_TYPE(info));
             return 0;
       }
 
@@ -2931,7 +3192,7 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
                           Elf_Sym*  stab, char* strtab )
 {
    int j;
-   char *symbol;
+   char *symbol = NULL;
    Elf_Addr targ;
    Elf_Rela* rtab = (Elf_Rela*) (ehdrC + shdr[shnum].sh_offset);
    int         nent = shdr[shnum].sh_size / sizeof(Elf_Rela);
@@ -3121,27 +3382,42 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
             break;
 #        endif
 
-#if x86_64_HOST_OS
+#if x86_64_HOST_ARCH
       case R_X86_64_64:
          *(Elf64_Xword *)P = value;
          break;
 
       case R_X86_64_PC32:
-         *(Elf64_Word *)P = (Elf64_Word) (value - P);
+      {
+         StgInt64 off = value - P;
+         if (off >= 0x7fffffffL || off < -0x80000000L) {
+             barf("R_X86_64_PC32 relocation out of range: %s = %p",
+                  symbol, off);
+         }
+         *(Elf64_Word *)P = (Elf64_Word)off;
          break;
+      }
 
       case R_X86_64_32:
+         if (value >= 0x7fffffffL) {
+             barf("R_X86_64_32 relocation out of range: %s = %p\n",
+                  symbol, value);
+         }
          *(Elf64_Word *)P = (Elf64_Word)value;
          break;
 
       case R_X86_64_32S:
+         if ((StgInt64)value > 0x7fffffffL || (StgInt64)value < -0x80000000L) {
+             barf("R_X86_64_32S relocation out of range: %s = %p\n",
+                  symbol, value);
+         }
          *(Elf64_Sword *)P = (Elf64_Sword)value;
          break;
 #endif
 
          default:
-            errorBelch("%s: unhandled ELF relocation(RelA) type %d\n",
-                 oc->fileName, ELF_R_TYPE(info));
+            errorBelch("%s: unhandled ELF relocation(RelA) type %lu\n",
+                 oc->fileName, (lnat)ELF_R_TYPE(info));
             return 0;
       }
 
@@ -3210,7 +3486,7 @@ ia64_extract_instruction(Elf64_Xword *target)
 {
    Elf64_Xword w1, w2;
    int slot = (Elf_Addr)target & 3;
-   (Elf_Addr)target &= ~3;
+   target = (Elf_Addr)target & ~3;
 
    w1 = *target;
    w2 = *(target+1);
@@ -3232,7 +3508,7 @@ static void
 ia64_deposit_instruction(Elf64_Xword *target, Elf64_Xword value)
 {
    int slot = (Elf_Addr)target & 3;
-   (Elf_Addr)target &= ~3;
+   target = (Elf_Addr)target & ~3;
 
    switch (slot)
    {
@@ -3335,7 +3611,7 @@ static int ocAllocateJumpIslands_ELF( ObjectCode *oc )
 #if defined(OBJFORMAT_MACHO)
 
 /*
-  Support for MachO linking on Darwin/MacOS X on PowerPC chips
+  Support for MachO linking on Darwin/MacOS X
   by Wolfgang Thaller (wolfgang.thaller@gmx.net)
 
   I hereby formally apologize for the hackish nature of this code.
@@ -3344,6 +3620,7 @@ static int ocAllocateJumpIslands_ELF( ObjectCode *oc )
   *) add still more sanity checks.
 */
 
+#ifdef powerpc_HOST_ARCH
 static int ocAllocateJumpIslands_MachO(ObjectCode* oc)
 {
     struct mach_header *header = (struct mach_header *) oc->image;
@@ -3388,6 +3665,7 @@ static int ocAllocateJumpIslands_MachO(ObjectCode* oc)
     }
     return ocAllocateJumpIslands(oc,0,0);
 }
+#endif
 
 static int ocVerifyImage_MachO(ObjectCode* oc STG_UNUSED)
 {
@@ -3466,6 +3744,10 @@ static int relocateSection(
        return 1;
     else if(!strcmp(sect->sectname,"__nl_symbol_ptr"))
        return 1;
+    else if(!strcmp(sect->sectname,"__la_sym_ptr2"))
+       return 1;
+    else if(!strcmp(sect->sectname,"__la_sym_ptr3"))
+       return 1;
 
     n = sect->nreloc;
     relocs = (struct relocation_info*) (image + sect->reloff);
@@ -3485,6 +3767,15 @@ static int relocateSection(
                    unsigned long* wordPtr = (unsigned long*) (image + sect->offset + scat->r_address);
                    checkProddableBlock(oc,wordPtr);
 
+                    // Note on relocation types:
+                    // i386 uses the GENERIC_RELOC_* types,
+                    // while ppc uses special PPC_RELOC_* types.
+                    // *_RELOC_VANILLA and *_RELOC_PAIR have the same value
+                    // in both cases, all others are different.
+                    // Therefore, we use GENERIC_RELOC_VANILLA
+                    // and GENERIC_RELOC_PAIR instead of the PPC variants,
+                    // and use #ifdefs for the other types.
+                    
                    // Step 1: Figure out what the relocated value should be
                    if(scat->r_type == GENERIC_RELOC_VANILLA)
                    {
@@ -3495,23 +3786,28 @@ static int relocateSection(
                                                                 scat->r_value)
                                         - scat->r_value;
                    }
+#ifdef powerpc_HOST_ARCH
                    else if(scat->r_type == PPC_RELOC_SECTDIFF
                        || scat->r_type == PPC_RELOC_LO16_SECTDIFF
                        || scat->r_type == PPC_RELOC_HI16_SECTDIFF
                        || scat->r_type == PPC_RELOC_HA16_SECTDIFF)
+#else
+                    else if(scat->r_type == GENERIC_RELOC_SECTDIFF)
+#endif
                    {
                        struct scattered_relocation_info *pair =
                                (struct scattered_relocation_info*) &relocs[i+1];
 
-                       if(!pair->r_scattered || pair->r_type != PPC_RELOC_PAIR)
+                       if(!pair->r_scattered || pair->r_type != GENERIC_RELOC_PAIR)
                            barf("Invalid Mach-O file: "
-                                "PPC_RELOC_*_SECTDIFF not followed by PPC_RELOC_PAIR");
+                                "RELOC_*_SECTDIFF not followed by RELOC_PAIR");
 
                        word = (unsigned long)
                               (relocateAddress(oc, nSections, sections, scat->r_value)
                              - relocateAddress(oc, nSections, sections, pair->r_value));
                        i++;
                    }
+#ifdef powerpc_HOST_ARCH
                    else if(scat->r_type == PPC_RELOC_HI16
                          || scat->r_type == PPC_RELOC_LO16
                          || scat->r_type == PPC_RELOC_HA16
@@ -3550,14 +3846,21 @@ static int relocateSection(
                         
                         i++;
                     }
+ #endif
                     else
                        continue;  // ignore the others
 
+#ifdef powerpc_HOST_ARCH
                     if(scat->r_type == GENERIC_RELOC_VANILLA
                         || scat->r_type == PPC_RELOC_SECTDIFF)
+#else
+                    if(scat->r_type == GENERIC_RELOC_VANILLA
+                        || scat->r_type == GENERIC_RELOC_SECTDIFF)
+#endif
                     {
                         *wordPtr = word;
                     }
+#ifdef powerpc_HOST_ARCH
                     else if(scat->r_type == PPC_RELOC_LO16_SECTDIFF || scat->r_type == PPC_RELOC_LO16)
                     {
                         ((unsigned short*) wordPtr)[1] = word & 0xFFFF;
@@ -3571,6 +3874,7 @@ static int relocateSection(
                         ((unsigned short*) wordPtr)[1] = ((word >> 16) & 0xFFFF)
                             + ((word & (1<<15)) ? 1 : 0);
                     }
+#endif
                }
            }
 
@@ -3585,10 +3889,12 @@ static int relocateSection(
            if(reloc->r_length == 2)
            {
                unsigned long word = 0;
+#ifdef powerpc_HOST_ARCH
                 unsigned long jumpIsland = 0;
                 long offsetToJumpIsland = 0xBADBAD42; // initialise to bad value
                                                       // to avoid warning and to catch
                                                       // bugs.
+#endif
 
                unsigned long* wordPtr = (unsigned long*) (image + sect->offset + reloc->r_address);
                checkProddableBlock(oc,wordPtr);
@@ -3597,6 +3903,7 @@ static int relocateSection(
                {
                    word = *wordPtr;
                }
+#ifdef powerpc_HOST_ARCH
                else if(reloc->r_type == PPC_RELOC_LO16)
                {
                    word = ((unsigned short*) wordPtr)[1];
@@ -3617,7 +3924,7 @@ static int relocateSection(
                    word = *wordPtr;
                    word = (word & 0x03FFFFFC) | ((word & 0x02000000) ? 0xFC000000 : 0);
                }
-
+#endif
 
                if(!reloc->r_extern)
                {
@@ -3641,17 +3948,19 @@ static int relocateSection(
 
                    if(reloc->r_pcrel)
                     {  
+#ifdef powerpc_HOST_ARCH
                             // In the .o file, this should be a relative jump to NULL
-                            // and we'll change it to a jump to a relative jump to the symbol
+                            // and we'll change it to a relative jump to the symbol
                         ASSERT(-word == reloc->r_address);
-                        word = (unsigned long) symbolAddress;
-                        jumpIsland = makeJumpIsland(oc,reloc->r_symbolnum,word);
-                       word -= ((long)image) + sect->offset + reloc->r_address;
+                        jumpIsland = makeJumpIsland(oc,reloc->r_symbolnum,(unsigned long) symbolAddress);
                         if(jumpIsland != 0)
                         {
-                            offsetToJumpIsland = jumpIsland
-                                - (((long)image) + sect->offset + reloc->r_address);
+                            offsetToJumpIsland = word + jumpIsland
+                                - (((long)image) + sect->offset - sect->addr);
                         }
+#endif
+                       word += (unsigned long) symbolAddress
+                                - (((long)image) + sect->offset - sect->addr);
                     }
                     else
                     {
@@ -3664,6 +3973,7 @@ static int relocateSection(
                    *wordPtr = word;
                    continue;
                }
+#ifdef powerpc_HOST_ARCH
                else if(reloc->r_type == PPC_RELOC_LO16)
                {
                    ((unsigned short*) wordPtr)[1] = word & 0xFFFF;
@@ -3700,7 +4010,8 @@ static int relocateSection(
                    *wordPtr = (*wordPtr & 0xFC000003) | (word & 0x03FFFFFC);
                    continue;
                }
-           }
+#endif
+            }
            barf("\nunknown relocation %d",reloc->r_type);
            return 0;
        }
@@ -3882,6 +4193,10 @@ static int ocResolve_MachO(ObjectCode* oc)
            la_ptrs = &sections[i];
        else if(!strcmp(sections[i].sectname,"__nl_symbol_ptr"))
            nl_ptrs = &sections[i];
+        else if(!strcmp(sections[i].sectname,"__la_sym_ptr2"))
+           la_ptrs = &sections[i];
+        else if(!strcmp(sections[i].sectname,"__la_sym_ptr3"))
+           la_ptrs = &sections[i];
     }
 
     if(dsymLC)
@@ -3914,6 +4229,7 @@ static int ocResolve_MachO(ObjectCode* oc)
     return 1;
 }
 
+#ifdef powerpc_HOST_ARCH
 /*
  * The Mach-O object format uses leading underscores. But not everywhere.
  * There is a small number of runtime support functions defined in
@@ -3946,3 +4262,26 @@ static void machoInitSymbolsWithoutUnderscore()
 #undef Sym
 }
 #endif
+
+/*
+ * Figure out by how much to shift the entire Mach-O file in memory
+ * when loading so that its single segment ends up 16-byte-aligned
+ */
+static int machoGetMisalignment( FILE * f )
+{
+    struct mach_header header;
+    int misalignment;
+    
+    fread(&header, sizeof(header), 1, f);
+    rewind(f);
+
+    if(header.magic != MH_MAGIC)
+        return 0;
+    
+    misalignment = (header.sizeofcmds + sizeof(header))
+                    & 0xF;
+
+    return misalignment ? (16 - misalignment) : 0;
+}
+
+#endif