fix build on Windows
[ghc-hetmet.git] / rts / Linker.c
index 5b692a0..d84e4f7 100644 (file)
@@ -21,6 +21,7 @@
 #include "HsFFI.h"
 
 #include "sm/Storage.h"
+#include "Stats.h"
 #include "Hash.h"
 #include "LinkerInternals.h"
 #include "RtsUtils.h"
@@ -38,6 +39,8 @@
 
 #include <stdlib.h>
 #include <string.h>
+#include <stdio.h>
+#include <assert.h>
 
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #include <sys/wait.h>
 #endif
 
-#if defined(ia64_HOST_ARCH) || defined(linux_HOST_OS) || defined(freebsd_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
+#if defined(ia64_HOST_ARCH) || defined(linux_HOST_OS) || defined(freebsd_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
 #define USE_MMAP
 #include <fcntl.h>
 #include <sys/mman.h>
 
-#if defined(linux_HOST_OS) || defined(freebsd_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
+#if defined(linux_HOST_OS) || defined(freebsd_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 
 #endif
 
-#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
+#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
 #  define OBJFORMAT_ELF
+#  include <regex.h>   // regex is already used by dlopen() so this is OK
+                       // to use here without requiring an additional lib
 #elif defined(cygwin32_HOST_OS) || defined (mingw32_HOST_OS)
 #  define OBJFORMAT_PEi386
 #  include <windows.h>
 #  include <math.h>
 #elif defined(darwin_HOST_OS)
 #  define OBJFORMAT_MACHO
+#  include <regex.h>
 #  include <mach-o/loader.h>
 #  include <mach-o/nlist.h>
 #  include <mach-o/reloc.h>
@@ -505,10 +511,13 @@ typedef struct _RtsSymbolVal {
 #if !defined(mingw32_HOST_OS)
 #define RTS_USER_SIGNALS_SYMBOLS \
    SymI_HasProto(setIOManagerPipe) \
+   SymI_HasProto(ioManagerWakeup) \
+   SymI_HasProto(ioManagerSync) \
    SymI_HasProto(blockUserSignals) \
    SymI_HasProto(unblockUserSignals)
 #else
 #define RTS_USER_SIGNALS_SYMBOLS     \
+   SymI_HasProto(ioManagerWakeup) \
    SymI_HasProto(sendIOManagerEvent) \
    SymI_HasProto(readIOManagerEvent) \
    SymI_HasProto(getIOManagerEvent)  \
@@ -666,6 +675,21 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(RET_SEMI_loads_avoided)
 
 
+// On most platforms, the garbage collector rewrites references
+//     to small integer and char objects to a set of common, shared ones.
+//
+// We don't do this when compiling to Windows DLLs at the moment because
+//     it doesn't support cross package data references well.
+//
+#if defined(__PIC__) && defined(mingw32_HOST_OS)
+#define RTS_INTCHAR_SYMBOLS
+#else
+#define RTS_INTCHAR_SYMBOLS                            \
+      SymI_HasProto(stg_CHARLIKE_closure)              \
+      SymI_HasProto(stg_INTLIKE_closure)               
+#endif
+
+
 #define RTS_SYMBOLS                                    \
       Maybe_Stable_Names                               \
       RTS_TICKY_SYMBOLS                                 \
@@ -736,7 +760,11 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(forkOS_createThread)               \
       SymI_HasProto(freeHaskellFunctionPtr)            \
       SymI_HasProto(getOrSetTypeableStore)             \
-      SymI_HasProto(getOrSetSignalHandlerStore)                \
+      SymI_HasProto(getOrSetGHCConcSignalHandlerStore)         \
+      SymI_HasProto(getOrSetGHCConcPendingEventsStore)         \
+      SymI_HasProto(getOrSetGHCConcPendingDelaysStore)         \
+      SymI_HasProto(getOrSetGHCConcIOManagerThreadStore)       \
+      SymI_HasProto(getOrSetGHCConcProddingStore)              \
       SymI_HasProto(genSymZh)                          \
       SymI_HasProto(genericRaise)                      \
       SymI_HasProto(getProgArgv)                       \
@@ -846,11 +874,9 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(stg_CAF_BLACKHOLE_info)            \
       SymI_HasProto(__stg_EAGER_BLACKHOLE_info)                \
       SymI_HasProto(startTimer)                         \
-      SymI_HasProto(stg_CHARLIKE_closure)              \
       SymI_HasProto(stg_MVAR_CLEAN_info)               \
       SymI_HasProto(stg_MVAR_DIRTY_info)               \
       SymI_HasProto(stg_IND_STATIC_info)               \
-      SymI_HasProto(stg_INTLIKE_closure)               \
       SymI_HasProto(stg_ARR_WORDS_info)                 \
       SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info)       \
       SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_info)      \
@@ -923,9 +949,9 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(stg_writeTVarzh)                   \
       SymI_HasProto(stg_yieldzh)                        \
       SymI_NeedsProto(stg_interp_constr_entry)          \
-      SymI_HasProto(alloc_blocks)                       \
       SymI_HasProto(alloc_blocks_lim)                   \
-      SymI_HasProto(allocateLocal)                      \
+      SymI_HasProto(g0)                                 \
+      SymI_HasProto(allocate)                           \
       SymI_HasProto(allocateExec)                      \
       SymI_HasProto(freeExec)                          \
       SymI_HasProto(getAllocations)                     \
@@ -938,7 +964,8 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(n_capabilities)                    \
       SymI_HasProto(stg_traceCcszh)                     \
       SymI_HasProto(stg_traceEventzh)                   \
-      RTS_USER_SIGNALS_SYMBOLS
+      RTS_USER_SIGNALS_SYMBOLS                         \
+      RTS_INTCHAR_SYMBOLS
 
 
 // 64-bit support functions in libgcc.a
@@ -951,43 +978,11 @@ typedef struct _RtsSymbolVal {
       SymI_NeedsProto(__muldi3)                               \
       SymI_NeedsProto(__ashldi3)                      \
       SymI_NeedsProto(__ashrdi3)                      \
-      SymI_NeedsProto(__lshrdi3)                      \
-      SymI_NeedsProto(__eprintf)
+      SymI_NeedsProto(__lshrdi3)
 #else
 #define RTS_LIBGCC_SYMBOLS
 #endif
 
-/* NOTE [io-manager-ghci]
-
-   When GHCi loads the base package, it gets another copy of the CAFs
-   in GHC.Conc that record the IO manager's ThreadId, and the blocking
-   queues, so we get another IO manager.  This is bad enough, but what
-   is worse is that GHCi by default reverts all CAFs on every :load,
-   so we'll get *another* IO manager thread (and an associated pipe)
-   every time the user does :load.  Miraculously, this actually
-   manages to just about work in GHC 6.10 and earlier, but broke when
-   I tried to fix #1185 (restarting the IO manager after a fork()).
-
-   To work around it and ensure that we only have a single IO manager,
-   we map the CAFs in the dynamically-loaded GHC.Conc to the
-   statically-linked GHC.Conc.  This is an ugly hack, but it's the
-   least ugly hack that I could think of (SDM 3/11/2009)
-*/
-
-#define RTS_GHC_CONC_SYMBOLS \
-      SymI_NeedsProto(base_GHCziConc_pendingDelays_closure) \
-      SymI_NeedsProto(base_GHCziConc_pendingEvents_closure) \
-      SymI_NeedsProto(base_GHCziConc_ioManagerThread_closure)
-
-#ifdef mingw32_HOST_OS
-#define RTS_GHC_CONC_OS_SYMBOLS /* empty */
-#else
-#define RTS_GHC_CONC_OS_SYMBOLS \
-      SymI_NeedsProto(base_GHCziConc_prodding_closure) \
-      SymI_NeedsProto(base_GHCziConc_sync_closure) \
-      SymI_NeedsProto(base_GHCziConc_stick_closure)
-#endif
-
 #if defined(darwin_HOST_OS) && defined(powerpc_HOST_ARCH)
       // Symbols that don't have a leading underscore
       // on Mac OS X. They have to receive special treatment,
@@ -1016,8 +1011,6 @@ RTS_CYGWIN_ONLY_SYMBOLS
 RTS_DARWIN_ONLY_SYMBOLS
 RTS_LIBGCC_SYMBOLS
 RTS_LIBFFI_SYMBOLS
-RTS_GHC_CONC_SYMBOLS
-RTS_GHC_CONC_OS_SYMBOLS
 #undef SymI_NeedsProto
 #undef SymI_HasProto
 #undef SymI_HasProto_redirect
@@ -1053,8 +1046,6 @@ static RtsSymbolVal rtsSyms[] = {
       RTS_DARWIN_ONLY_SYMBOLS
       RTS_LIBGCC_SYMBOLS
       RTS_LIBFFI_SYMBOLS
-      RTS_GHC_CONC_SYMBOLS
-      RTS_GHC_CONC_OS_SYMBOLS
 #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
       // dyld stub code contains references to this,
       // but it should never be called because we treat
@@ -1076,18 +1067,12 @@ static void ghciInsertStrHashTable ( char* obj_name,
                                      void *data
                                   )
 {
-#define GHC_CONC MAYBE_LEADING_UNDERSCORE_STR("base_GHCziConc")
-
-    if (lookupHashTable(table, (StgWord)key) == NULL)
-    {
+   if (lookupHashTable(table, (StgWord)key) == NULL)
+   {
       insertStrHashTable(table, (StgWord)key, data);
       return;
-    }
-    if (strncmp(key, GHC_CONC, strlen(GHC_CONC)) == 0) {
-        /* see NOTE [io-manager-ghci] */
-        return;
-    }
-    debugBelch(
+   }
+   debugBelch(
       "\n\n"
       "GHCi runtime linker: fatal error: I found a duplicate definition for symbol\n"
       "   %s\n"
@@ -1102,8 +1087,8 @@ static void ghciInsertStrHashTable ( char* obj_name,
       "\n",
       (char*)key,
       obj_name
-    );
-    exit(1);
+   );
+   exit(1);
 }
 /* -----------------------------------------------------------------------------
  * initialize the object linker
@@ -1114,12 +1099,20 @@ static int linker_init_done = 0 ;
 
 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
 static void *dl_prog_handle;
+static regex_t re_invalid;
+static regex_t re_realso;
+#ifdef THREADED_RTS
+static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
+#endif
 #endif
 
 void
 initLinker( void )
 {
     RtsSymbolVal *sym;
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+    int compileResult;
+#endif
 
     /* Make initLinker idempotent, so we can call it
        before evey relevant operation; that means we
@@ -1128,6 +1121,9 @@ initLinker( void )
       linker_init_done = 1;
     }
 
+#if defined(THREADED_RTS) && (defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO))
+    initMutex(&dl_mutex);
+#endif
     stablehash = allocStrHashTable();
     symhash = allocStrHashTable();
 
@@ -1146,6 +1142,15 @@ initLinker( void )
 #   else
     dl_prog_handle = dlopen(NULL, RTLD_LAZY);
 #   endif /* RTLD_DEFAULT */
+
+    compileResult = regcomp(&re_invalid,
+           "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*invalid ELF header",
+           REG_EXTENDED);
+    ASSERT( compileResult == 0 );
+    compileResult = regcomp(&re_realso,
+           "GROUP *\\( *(([^ )])+)",
+           REG_EXTENDED);
+    ASSERT( compileResult == 0 );
 #   endif
 
 #if defined(x86_64_HOST_ARCH)
@@ -1166,6 +1171,19 @@ initLinker( void )
 #endif
 }
 
+void
+exitLinker( void ) {
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+   if (linker_init_done == 1) {
+      regfree(&re_invalid);
+      regfree(&re_realso);
+#ifdef THREADED_RTS
+      closeMutex(&dl_mutex);
+#endif
+   }
+#endif
+}
+
 /* -----------------------------------------------------------------------------
  *                  Loading DLL or .so dynamic libraries
  * -----------------------------------------------------------------------------
@@ -1201,29 +1219,112 @@ typedef
 static OpenedDLL* opened_dlls = NULL;
 #endif
 
-const char *
-addDLL( char *dll_name )
-{
 #  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
-   /* ------------------- ELF DLL loader ------------------- */
-   void *hdl;
-   const char *errmsg;
 
-   initLinker();
+static char *
+internal_dlopen(const char *dll_name)
+{
+   void *hdl;
+   char *errmsg, *errmsg_copy;
 
    // omitted: RTLD_NOW
    // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
-   hdl= dlopen(dll_name, RTLD_LAZY | RTLD_GLOBAL);
+   IF_DEBUG(linker,
+      debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
+
+   //-------------- Begin critical section ------------------
+   // This critical section is necessary because dlerror() is not
+   // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
+   // Also, the error message returned must be copied to preserve it
+   // (see POSIX also)
 
+   ACQUIRE_LOCK(&dl_mutex);
+   hdl = dlopen(dll_name, RTLD_LAZY | RTLD_GLOBAL);
+
+   errmsg = NULL;
    if (hdl == NULL) {
       /* dlopen failed; return a ptr to the error msg. */
       errmsg = dlerror();
       if (errmsg == NULL) errmsg = "addDLL: unknown error";
-      return errmsg;
-   } else {
+      errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
+      strcpy(errmsg_copy, errmsg);
+      errmsg = errmsg_copy;
+   }
+   RELEASE_LOCK(&dl_mutex);
+   //--------------- End critical section -------------------
+
+   return errmsg;
+}
+#  endif
+
+const char *
+addDLL( char *dll_name )
+{
+#  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+   /* ------------------- ELF DLL loader ------------------- */
+
+#define NMATCH 5
+   regmatch_t match[NMATCH];
+   char *errmsg;
+   FILE* fp;
+   size_t match_length;
+#define MAXLINE 1000
+   char line[MAXLINE];
+   int result;
+
+   initLinker();
+
+   IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
+   errmsg = internal_dlopen(dll_name);
+
+   if (errmsg == NULL) {
       return NULL;
    }
-   /*NOTREACHED*/
+
+   // GHC Trac ticket #2615
+   // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
+   // contain linker scripts rather than ELF-format object code. This
+   // code handles the situation by recognizing the real object code
+   // file name given in the linker script.
+   //
+   // If an "invalid ELF header" error occurs, it is assumed that the
+   // .so file contains a linker script instead of ELF object code.
+   // In this case, the code looks for the GROUP ( ... ) linker
+   // directive. If one is found, the first file name inside the
+   // parentheses is treated as the name of a dynamic library and the
+   // code attempts to dlopen that file. If this is also unsuccessful,
+   // an error message is returned.
+
+   // see if the error message is due to an invalid ELF header
+   IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
+   result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
+   IF_DEBUG(linker, debugBelch("result = %i\n", result));
+   if (result == 0) {
+      // success -- try to read the named file as a linker script
+      match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
+                                MAXLINE-1);
+      strncpy(line, (errmsg+(match[1].rm_so)),match_length);
+      line[match_length] = '\0'; // make sure string is null-terminated
+      IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
+      if ((fp = fopen(line, "r")) == NULL) {
+        return errmsg; // return original error if open fails
+      }
+      // try to find a GROUP ( ... ) command
+      while (fgets(line, MAXLINE, fp) != NULL) {
+        IF_DEBUG(linker, debugBelch("input line = %s", line));
+        if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
+            // success -- try to dlopen the first named file
+            IF_DEBUG(linker, debugBelch("match%s\n",""));
+           line[match[1].rm_eo] = '\0';
+           errmsg = internal_dlopen(line+match[1].rm_so);
+           break;
+        }
+        // if control reaches here, no GROUP ( ... ) directive was found
+        // and the original error message is returned to the caller
+      }
+      fclose(fp);
+   }
+   return errmsg;
 
 #  elif defined(OBJFORMAT_PEi386)
    /* ------------------- Win32 DLL loader ------------------- */
@@ -1444,7 +1545,7 @@ mmap_again:
        } else {
            if ((W_)result > 0x80000000) {
                // oops, we were given memory over 2Gb
-#if defined(freebsd_HOST_OS)
+#if defined(freebsd_HOST_OS) || defined(dragonfly_HOST_OS)
                // Some platforms require MAP_FIXED.  This is normally
                // a bad idea, because MAP_FIXED will overwrite
                // existing mappings.
@@ -4215,7 +4316,14 @@ static int relocateSection(
                     }
  #endif
                     else
-                       continue;  // ignore the others
+                    {
+                       barf ("Don't know how to handle this Mach-O "
+                             "scattered relocation entry: "
+                              "object file %s; entry type %ld; "
+                              "address %#lx\n", 
+                              oc->fileName, scat->r_type, scat->r_address);
+                        return 0;
+                     }
 
 #ifdef powerpc_HOST_ARCH
                     if(scat->r_type == GENERIC_RELOC_VANILLA
@@ -4244,11 +4352,28 @@ static int relocateSection(
                     }
 #endif
                }
+               else
+               {
+                   barf("Can't handle Mach-O scattered relocation entry "
+                        "with this r_length tag: "
+                         "object file %s; entry type %ld; "
+                         "r_length tag %ld; address %#lx\n", 
+                         oc->fileName, scat->r_type, scat->r_length,
+                         scat->r_address);
+                    return 0;
+               }
+           }
+           else /* scat->r_pcrel */
+           {
+               barf("Don't know how to handle *PC-relative* Mach-O "
+                    "scattered relocation entry: "
+                     "object file %s; entry type %ld; address %#lx\n", 
+                     oc->fileName, scat->r_type, scat->r_address);
+               return 0;
            }
 
-           continue; // FIXME: I hope it's OK to ignore all the others.
        }
-       else
+       else /* !(relocs[i].r_address & R_SCATTERED) */
        {
            struct relocation_info *reloc = &relocs[i];
            if(reloc->r_pcrel && !reloc->r_extern)
@@ -4293,6 +4418,14 @@ static int relocateSection(
                    word = (word & 0x03FFFFFC) | ((word & 0x02000000) ? 0xFC000000 : 0);
                }
 #endif
+                else
+                {
+                    barf("Can't handle this Mach-O relocation entry "
+                        "(not scattered): "
+                         "object file %s; entry type %ld; address %#lx\n", 
+                         oc->fileName, reloc->r_type, reloc->r_address);
+                    return 0;
+                }
 
                if(!reloc->r_extern)
                {
@@ -4384,8 +4517,16 @@ static int relocateSection(
                }
 #endif
             }
-           barf("\nunknown relocation %d",reloc->r_type);
-           return 0;
+            else
+            {
+                barf("Can't handle Mach-O relocation entry (not scattered) "
+                      "with this r_length tag: "
+                      "object file %s; entry type %ld; "
+                      "r_length tag %ld; address %#lx\n", 
+                      oc->fileName, reloc->r_type, reloc->r_length,
+                      reloc->r_address);
+                return 0;
+           }
        }
 #endif
     }