From 54afa8cb01aa038f64fb9ab943d92a9638394e34 Mon Sep 17 00:00:00 2001
From: sewardj <unknown>
Date: Thu, 2 Aug 2001 17:15:17 +0000
Subject: [PATCH] [project @ 2001-08-02 17:15:16 by sewardj] Haskell-side
 support for FFI (foreign import only).

Since doing the FFI necessarily involves gruesome
architecture-specific knowledge about calling conventions, I have
chosen to put this knowledge in Haskell-land, in ByteCodeFFI.

The general idea is: to do a ccall, the interpreter accumulates the
args R to L on the stack, as is the normal case for tail-calls.
However, it then calls a piece of machine code created by ByteCodeFFI
and which is specific to this call site.  This glue code copies args
off the Haskell stack, calls the target function, and places the
result back into a dummy placeholder created on the Haskell stack
prior to the call.  The interpreter then SLIDEs and RETURNs in the
normal way.

The magic glue code copies args off the Haskell stack and pushes them
directly on the C stack (x86) and/or into regs (sparc et al).  Because
the code is made up specifically for this call site, it can do all
that non-interpretively.  The address (of the C fn to call) is
presented as just another tagged Addr# on the Haskell stack.  This
makes f-i-dynamic trivial since the first arg is the said Addr#.

Presently ByteCodeFFI only knows how to generate x86 code sequences.
---
 ghc/compiler/ghci/ByteCodeFFI.lhs    |  242 ++++++++++++++++++++++++++++++++++
 ghc/compiler/ghci/ByteCodeGen.lhs    |  221 +++++++++++++++++++++++++------
 ghc/compiler/ghci/ByteCodeInstr.lhs  |   31 ++++-
 ghc/compiler/ghci/ByteCodeLink.lhs   |   25 ++--
 ghc/compiler/typecheck/TcForeign.lhs |   11 +-
 5 files changed, 472 insertions(+), 58 deletions(-)
 create mode 100644 ghc/compiler/ghci/ByteCodeFFI.lhs
diff --git a/ghc/compiler/ghci/ByteCodeFFI.lhs b/ghc/compiler/ghci/ByteCodeFFI.lhs
new file mode 100644
index 0000000..8e65548
--- /dev/null
+++ b/ghc/compiler/ghci/ByteCodeFFI.lhs
@@ -0,0 +1,242 @@
+%
+% (c) The University of Glasgow 2000
+%
+\section[ByteCodeGen]{Generate bytecode from Core}
+
+\begin{code}
+module ByteCodeFFI ( taggedSizeW, untaggedSizeW, mkMarshalCode ) where
+
+#include "HsVersions.h"
+
+import PrimRep		( PrimRep(..), getPrimRepSize, isFollowableRep )
+import Bits		( Bits(..), shiftR )
+import Word		( Word8, Word32 )
+import Addr		( Addr(..), writeWord8OffAddr )
+import Foreign		( Ptr(..), mallocBytes )
+import IOExts		( unsafePerformIO, trace )
+
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection{The sizes of things.  These are platform-independent.}
+%*									*
+%************************************************************************
+
+\begin{code}
+
+-- When I push one of these on the H stack, how much does Sp move by?
+taggedSizeW :: PrimRep -> Int
+taggedSizeW pr
+   | isFollowableRep pr = 1 {-it's a pointer, Jim-}
+   | otherwise          = 1 {-the tag-} + getPrimRepSize pr
+
+-- The plain size of something, without tag.
+untaggedSizeW :: PrimRep -> Int
+untaggedSizeW pr
+   | isFollowableRep pr = 1
+   | otherwise          = getPrimRepSize pr
+
+-- How big is this thing's tag?
+sizeOfTagW :: PrimRep -> Int
+sizeOfTagW pr
+   | isFollowableRep pr = 0
+   | otherwise          = 1
+
+-- Blast a bunch of bytes into malloc'd memory and return the addr.
+sendBytesToMallocville :: [Word8] -> IO Addr
+sendBytesToMallocville bytes
+   = do let n = length bytes
+        (Ptr a#) <- mallocBytes n
+        mapM ( \(off,byte) -> writeWord8OffAddr (A# a#) off byte )
+             (zip [0 ..] bytes)
+        return (A# a#)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection{The platform-dependent marshall-code-generator.}
+%*									*
+%************************************************************************
+
+\begin{code}
+
+{-
+Make a piece of code which expects to see the Haskell stack
+looking like this.  It is given a pointer to the lowest word in
+the stack -- presumably the tag of the placeholder.
+                 
+                  <arg_n>
+                  ...
+                  <arg_1>
+                  Addr# address_of_C_fn
+                  <placeholder-for-result#> (must be an unboxed type)
+-}
+mkMarshalCode :: (Int, PrimRep) -> Int -> [(Int, PrimRep)] 
+              -> Addr
+mkMarshalCode (r_offW, r_rep) addr_offW arg_offs_n_reps
+   = let bytes = mkMarshalCode_wrk (r_offW, r_rep) 
+                                   addr_offW arg_offs_n_reps
+     in  unsafePerformIO (sendBytesToMallocville bytes)
+
+
+mkMarshalCode_wrk :: (Int, PrimRep) -> Int -> [(Int, PrimRep)] 
+                  -> [Word8]
+mkMarshalCode_wrk (r_offW, r_rep) addr_offW arg_offs_n_reps
+
+   = let -- Don't change this without first consulting Intel Corp :-)
+         bytes_per_word = 4
+
+         -- addr and result bits offsetsW
+         offset_of_addr_bitsW = addr_offW + sizeOfTagW AddrRep
+         offset_of_res_bitsW  = r_offW + sizeOfTagW r_rep
+
+         offsets_to_pushW
+            = concat
+              [ let -- where this arg's bits start
+                    a_bits_offW = a_offW + sizeOfTagW a_rep
+                in 
+                    [a_bits_offW .. a_bits_offW + untaggedSizeW a_rep - 1]
+
+                | (a_offW, a_rep) <- reverse arg_offs_n_reps
+              ]
+
+         -- some helpers to assemble x86 insns.
+         movl_offespmem_esi offB	-- movl   offB(%esp), %esi
+            = [0x8B, 0xB4, 0x24] ++ lit32 offB
+         movl_offesimem_ecx offB	-- movl   offB(%esi), %ecx
+            = [0x8B, 0x8E] ++ lit32 offB
+         save_regs			-- pushl  all intregs except %esp
+            = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
+         restore_regs			-- popl   ditto
+            = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
+         pushl_ecx			-- pushl  %ecx
+            = [0x51]
+         call_star_ecx			-- call   * %ecx
+            = [0xFF, 0xD1]
+         add_lit_esp lit		-- addl   $lit, %esp
+            = [0x81, 0xC4] ++ lit32 lit
+         movl_eax_offesimem offB	-- movl   %eax, offB(%esi)
+            = [0x89, 0x86] ++ lit32 offB
+         ret				-- ret
+            = [0xC3]
+
+         lit32 :: Int -> [Word8]
+         lit32 i = let w32 = (fromIntegral i) :: Word32
+                   in  map (fromIntegral . ( .&. 0xFF))
+                           [w32, w32 `shiftR` 8, 
+                            w32 `shiftR` 16,  w32 `shiftR` 24]
+         {-
+             2 0000 8BB42478    movl    0x12345678(%esp), %esi
+             2      563412
+             3 0007 8B8E7856    movl    0x12345678(%esi), %ecx
+             3      3412
+             4              
+             5 000d 50535152    pushl %eax ; pushl %ebx ; pushl %ecx ; pushl  %edx
+             6 0011 565755      pushl %esi ; pushl %edi ; pushl %ebp
+             7              
+             8 0014 5D5F5E      popl %ebp ; popl %edi ; popl %esi 
+             9 0017 5A595B58    popl %edx ; popl %ecx ; popl %ebx ; popl %eax
+            10              
+            11 001b 51          pushl %ecx
+            12 001c FFD1        call * %ecx
+            13              
+            14 001e 81C47856    addl $0x12345678, %esp
+            14      3412
+            15 0024 89867856    movl %eax, 0x12345678(%esi)
+            15      3412
+            16 002a 89967856    movl %edx, 0x12345678(%esi)
+            16      3412
+            18              
+            19 0030 C3          ret
+            20              
+
+         -}
+
+     in
+     trace (show (map fst arg_offs_n_reps))
+     (
+     {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is 
+        arg passed from the interpreter.
+
+        Push all callee saved regs.  Push all of them anyway ...
+           pushl       %eax
+           pushl       %ebx
+           pushl       %ecx
+           pushl       %edx
+           pushl       %esi
+           pushl       %edi
+           pushl       %ebp
+     -}
+     save_regs
+
+     {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
+        We'll use %esi as a temporary to point at the H stack, and
+        %ecx as a temporary to copy via.
+
+           movl        28+4(%esp), %esi
+     -}
+     ++ movl_offespmem_esi 32
+
+     {- For each arg in args_offs_n_reps, examine the associated PrimRep 
+        to determine how many payload (non-tag) words there are, and 
+        whether or not there is a tag.  This gives a bunch of offsets on 
+        the H stack to copy to the C stack:
+
+           movl        off1(%esi), %ecx
+           pushl       %ecx
+     -}
+     ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW) 
+                            ++ pushl_ecx) 
+                  offsets_to_pushW
+
+     {- Get the addr to call into %ecx, bearing in mind that there's 
+        an Addr# tag at the indicated location, and do the call:
+
+           movl        4*(1 /*tag*/ +addr_offW)(%esi), %ecx
+           call        * %ecx
+     -}
+     ++ movl_offesimem_ecx (bytes_per_word * offset_of_addr_bitsW)
+     ++ call_star_ecx
+
+     {- Nuke the args just pushed and re-establish %esi at the 
+        H-stack ptr:
+
+           addl        $4*number_of_args_pushed, %esp (ccall only)
+           movl        28+4(%esp), %esi
+     -}
+     ++ add_lit_esp (bytes_per_word * length offsets_to_pushW)
+     ++ movl_offespmem_esi 32
+
+     {- Depending on what the return type is, get the result 
+        from %eax or %edx:%eax or %st(0).
+
+           movl        %eax, 4(%esi)        -- assuming tagged result
+        or
+           movl        %edx, 4(%esi)
+           movl        %eax, 8(%esi)
+        or
+           fstpl       4(%esi)
+        or
+           fstps       4(%esi)
+     -}
+     ++ case r_rep of
+           IntRep -> movl_eax_offesimem 4
+
+     {- Restore all the pushed regs and go home.
+
+           pushl        %ebp
+           pushl        %edi
+           pushl        %esi
+           pushl        %edx
+           pushl        %ecx
+           pushl        %ebx
+           pushl        %eax
+
+           ret
+     -}
+     ++ restore_regs
+     ++ ret
+     )
+\end{code}
+
diff --git a/ghc/compiler/ghci/ByteCodeGen.lhs b/ghc/compiler/ghci/ByteCodeGen.lhs
index eb5613c..41021a4 100644
--- a/ghc/compiler/ghci/ByteCodeGen.lhs
+++ b/ghc/compiler/ghci/ByteCodeGen.lhs
@@ -14,7 +14,8 @@ module ByteCodeGen ( UnlinkedBCO, UnlinkedBCOExpr, ItblEnv, ClosureEnv, HValue,
 import Outputable
 import Name		( Name, getName )
 import Id		( Id, idType, isDataConId_maybe, isPrimOpId_maybe, isFCallId,
-			  idPrimRep, mkSysLocal, idName )
+			  idPrimRep, mkSysLocal, idName, isFCallId_maybe )
+import ForeignCall	( ForeignCall(..), CCallTarget(..), CCallSpec(..) )
 import OrdList		( OrdList, consOL, snocOL, appOL, unitOL, 
 			  nilOL, toOL, concatOL, fromOL )
 import FiniteMap	( FiniteMap, addListToFM, listToFM,
@@ -29,8 +30,9 @@ import Type		( typePrimRep, splitTyConApp_maybe, isTyVarTy, splitForAllTys )
 import DataCon		( dataConTag, fIRST_TAG, dataConTyCon, 
                           dataConWrapId, isUnboxedTupleCon )
 import TyCon		( TyCon(..), tyConFamilySize, isDataTyCon, tyConDataCons,
-			  isFunTyCon )
+			  isFunTyCon, isUnboxedTupleTyCon )
 import Class		( Class, classTyCon )
+import Type		( Type, repType, splitRepFunTys )
 import Util		( zipEqual, zipWith4Equal, naturalMergeSortLe, nOfThem )
 import Var		( isTyVar )
 import VarSet		( VarSet, varSetElems )
@@ -46,10 +48,12 @@ import ByteCodeItbls	( ItblEnv, mkITbls )
 import ByteCodeLink	( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
 			  ClosureEnv, HValue, filterNameMap,
 			  iNTERP_STACK_CHECK_THRESH )
+import ByteCodeFFI	( taggedSizeW, untaggedSizeW, mkMarshalCode )
+import Linker		( lookupSymbol )
 
 import List		( intersperse, sortBy, zip4 )
 import Foreign		( Ptr(..), mallocBytes )
-import Addr		( Addr(..), addrToInt, writeCharOffAddr )
+import Addr		( Addr(..), nullAddr, addrToInt, writeCharOffAddr )
 import CTypes		( CInt )
 import Exception	( throwDyn )
 
@@ -263,9 +267,11 @@ schemeE :: Int -> Sequel -> BCEnv -> AnnExpr Id VarSet -> BcM BCInstrList
 -- Delegate tail-calls to schemeT.
 schemeE d s p e@(fvs, AnnApp f a) 
    = schemeT d s p (fvs, AnnApp f a)
+
 schemeE d s p e@(fvs, AnnVar v)
    | isFollowableRep v_rep
-   = schemeT d s p (fvs, AnnVar v)
+   =  -- Ptr-ish thing; push it in the normal way
+     schemeT d s p (fvs, AnnVar v)
 
    | otherwise
    = -- returning an unboxed value.  Heave it on the stack, SLIDE, and RETURN.
@@ -328,11 +334,10 @@ schemeE d s p (fvs_case, AnnCase (fvs_scrut, scrut) bndr
                                  [(DEFAULT, [], (fvs_rhs, rhs))])
 
    | let isFunType var_type 
-            = case splitForAllTys var_type of
-                 (_, ty) -> case splitTyConApp_maybe ty of
-                               Just (tycon,_) | isFunTyCon tycon -> True
-                               _ -> False
-         ty_bndr = idType bndr
+            = case splitTyConApp_maybe var_type of
+                 Just (tycon,_) | isFunTyCon tycon -> True
+                 _ -> False
+         ty_bndr = repType (idType bndr)
      in isFunType ty_bndr || isTyVarTy ty_bndr
 
    -- Nasty hack; treat
@@ -355,8 +360,16 @@ schemeE d s p (fvs_case, AnnCase (fvs_scrut, scrut) bndr
          (schemeE d s p new_expr)
 
 
-schemeE d s p (fvs, AnnCase scrut bndr alts)
+schemeE d s p (fvs, AnnCase scrut bndr alts0)
    = let
+        alts = case alts0 of
+                  [(DataAlt dc, [bind1, bind2], rhs)] 
+                     | isUnboxedTupleCon dc
+                       && VoidRep == typePrimRep (idType bind1)
+                     ->  [(DEFAULT, [bind2], rhs)]
+                  other
+                     -> alts0
+
         -- Top of stack is the return itbl, as usual.
         -- underneath it is the pointer to the alt_code BCO.
         -- When an alt is entered, it assumes the returned value is
@@ -445,11 +458,15 @@ schemeE d s p other
 -- 1.  A nullary constructor.  Push its closure on the stack 
 --     and SLIDE and RETURN.
 --
--- 2.  Application of a non-nullary constructor, by defn saturated.
+-- 2.  (Another nasty hack).  Spot (# a::VoidRep, b #) and treat
+--     it simply as  b  -- since the representations are identical
+--     (the VoidRep takes up zero stack space).
+--
+-- 3.  Application of a non-nullary constructor, by defn saturated.
 --     Split the args into ptrs and non-ptrs, and push the nonptrs, 
 --     then the ptrs, and then do PACK and RETURN.
 --
--- 3.  Otherwise, it must be a function call.  Push the args
+-- 4.  Otherwise, it must be a function call.  Push the args
 --     right to left, SLIDE and ENTER.
 
 schemeT :: Int 		-- Stack depth
@@ -462,6 +479,9 @@ schemeT d s p app
 --   | trace ("schemeT: env in = \n" ++ showSDocDebug (ppBCEnv p)) False
 --   = panic "schemeT ?!?!"
 
+--   | trace ("\nschemeT\n" ++ showSDoc (pprCoreExpr (deAnnotate app)) ++ "\n") False
+--   = error "?!?!" 
+
    -- Handle case 0
    | Just (arg, constr_names) <- maybe_is_tagToEnum_call
    = pushAtom True d p arg 		`bind` \ (push, arg_words) ->
@@ -477,17 +497,27 @@ schemeT d s p app
         `snocOL` ENTER
      )
 
-   -- Cases 2 and 3
+   -- Handle case 2
+   | let isVoidRepAtom (_, AnnVar v)    = VoidRep == typePrimRep (idType v)
+         isVoidRepAtom (_, AnnNote n e) = isVoidRepAtom e
+     in  is_con_call && isUnboxedTupleCon con 
+         && length args_r_to_l == 2 
+         && isVoidRepAtom (last (args_r_to_l))
+   = trace ("schemeT: unboxed pair with Void first component") (
+     schemeT d s p (head args_r_to_l)
+     )
+
+   -- Cases 3 and 4
    | otherwise
    = if   is_con_call && isUnboxedTupleCon con
      then returnBc unboxedTupleException
-     else returnBc code
+     else code `seq` returnBc code
 
    where
       -- Detect and extract relevant info for the tagToEnum kludge.
       maybe_is_tagToEnum_call
          = let extract_constr_Names ty
-                  = case splitTyConApp_maybe ty of
+                  = case splitTyConApp_maybe (repType ty) of
                        (Just (tyc, [])) |  isDataTyCon tyc
                                         -> map getName (tyConDataCons tyc)
                        other            -> panic "maybe_is_tagToEnum_call.extract_constr_Ids"
@@ -504,12 +534,12 @@ schemeT d s p app
       chomp expr
          = case snd expr of
               AnnVar v    -> ([], v)
-              AnnApp f a  -> case chomp f of (az, f) -> (snd a:az, f)
+              AnnApp f a  -> case chomp f of (az, f) -> (a:az, f)
               AnnNote n e -> chomp e
               other       -> pprPanic "schemeT" 
                                 (ppr (deAnnotate (panic "schemeT.chomp", other)))
          
-      args_r_to_l = filter (not.isTypeAtom) args_r_to_l_raw
+      args_r_to_l = filter (not.isTypeAtom.snd) args_r_to_l_raw
       isTypeAtom (AnnType _) = True
       isTypeAtom _           = False
 
@@ -523,20 +553,108 @@ schemeT d s p app
          | not is_con_call
          = args_r_to_l
          | otherwise
-         = filter (not.isPtr) args_r_to_l ++ filter isPtr args_r_to_l
+         = filter (not.isPtr.snd) args_r_to_l ++ filter (isPtr.snd) args_r_to_l
            where isPtr = isFollowableRep . atomRep
 
       -- make code to push the args and then do the SLIDE-ENTER thing
-      code = do_pushery d args_final_r_to_l
-
+      code          = do_pushery d (map snd args_final_r_to_l)
       tag_when_push = not is_con_call
-      narg_words    = sum (map (get_arg_szw . atomRep) args_r_to_l)
+      narg_words    = sum (map (get_arg_szw . atomRep . snd) args_r_to_l)
       get_arg_szw   = if tag_when_push then taggedSizeW else untaggedSizeW
 
       do_pushery d (arg:args)
          = let (push, arg_words) = pushAtom tag_when_push d p arg
            in  push `appOL` do_pushery (d+arg_words) args
       do_pushery d []
+
+         -- CCALL !
+         | Just (CCall (CCallSpec (StaticTarget target) 
+                                  cconv safety)) <- isFCallId_maybe fn
+         = let -- Get the arg and result reps.
+               (a_reps, r_rep) = getCCallPrimReps (idType fn)               
+               tys_str = showSDoc (ppr (a_reps, r_rep))
+               {-
+               Because the Haskell stack grows down, the a_reps refer to 
+               lowest to highest addresses in that order.  The args for the call
+               are on the stack.  Now push an unboxed, tagged Addr# indicating
+               the C function to call.  Then push a dummy placeholder for the 
+               result.  Finally, emit a CCALL insn with an offset pointing to the 
+               Addr# just pushed, and a literal field holding the mallocville
+               address of the piece of marshalling code we generate.
+               So, just prior to the CCALL insn, the stack looks like this 
+               (growing down, as usual):
+                 
+                  <arg_n>
+                  ...
+                  <arg_1>
+                  Addr# address_of_C_fn
+                  <placeholder-for-result#> (must be an unboxed type)
+
+               The interpreter then calls the marshall code mentioned
+               in the CCALL insn, passing it (& <placeholder-for-result#>), 
+               that is, the addr of the topmost word in the stack.
+               When this returns, the placeholder will have been
+               filled in.  The placeholder is slid down to the sequel
+               depth, and we RETURN.
+
+               This arrangement makes it simple to do f-i-dynamic since the Addr#
+               value is the first arg anyway.  It also has the virtue that the
+               stack is GC-understandable at all times.
+
+               The marshalling code is generated specifically for this
+               call site, and so knows exactly the (Haskell) stack
+               offsets of the args, fn address and placeholder.  It
+               copies the args to the C stack, calls the stacked addr,
+               and parks the result back in the placeholder.  The interpreter
+               calls it as a normal C call, assuming it has a signature
+                  void marshall_code ( StgWord* ptr_to_top_of_stack )
+               -}
+
+               -- resolve static address
+               target_addr 
+                  = let unpacked = _UNPK_ target
+                    in  case unsafePerformIO (lookupSymbol unpacked) of
+                           Just aa -> case aa of Ptr a# -> A# a#
+                           Nothing -> panic ("interpreted ccall: can't resolve: " 
+                                             ++ unpacked)
+
+               -- push the Addr#
+               addr_usizeW  = untaggedSizeW AddrRep
+               addr_tsizeW  = taggedSizeW AddrRep
+               push_Addr    = toOL [PUSH_UBX (Right target_addr) addr_usizeW,
+                                    PUSH_TAG addr_usizeW]
+               d_after_Addr = d + addr_tsizeW
+               -- push the return placeholder
+               r_lit        = mkDummyLiteral r_rep
+               r_usizeW     = untaggedSizeW r_rep
+               r_tsizeW     = 1{-tag-} + r_usizeW
+               push_r       = toOL [PUSH_UBX (Left r_lit) r_usizeW,
+                                    PUSH_TAG r_usizeW]
+               d_after_r    = d_after_Addr + r_tsizeW
+               -- do the call
+               do_call      = unitOL (CCALL addr_of_marshaller)
+               -- slide and return
+               wrapup       = mkSLIDE r_tsizeW
+                                      (d_after_r - r_tsizeW - s)
+                              `snocOL` RETURN r_rep
+
+               -- generate the marshalling code we're going to call
+               r_offW       = 0 
+               addr_offW    = r_tsizeW
+               arg1_offW    = r_tsizeW + addr_tsizeW
+               args_offW    = map (arg1_offW +) 
+                                  (init (scanl (+) 0 (map taggedSizeW a_reps)))
+               addr_of_marshaller
+                            = mkMarshalCode (r_offW, r_rep) addr_offW
+                                            (zip args_offW a_reps)               
+           in
+               trace (show (arg1_offW, args_offW  ,  (map taggedSizeW a_reps) )) (
+               target_addr 
+               `seq`
+               (push_Addr `appOL` push_r `appOL` do_call `appOL` wrapup)
+               )
+
+         | otherwise
          = case maybe_dcon of
               Just con -> PACK con narg_words `consOL` (
                           mkSLIDE 1 (d - narg_words - s) `snocOL` ENTER)
@@ -553,6 +671,44 @@ bind x f
    = f x
 
 
+mkDummyLiteral :: PrimRep -> Literal
+mkDummyLiteral pr
+   = case pr of
+        IntRep -> MachInt 0
+        _      -> pprPanic "mkDummyLiteral" (ppr pr)
+
+
+-- Convert (eg) 
+--       PrelGHC.Int# -> PrelGHC.State# PrelGHC.RealWorld
+--                    -> (# PrelGHC.State# PrelGHC.RealWorld, PrelGHC.Int# #)
+--
+-- to [IntRep] -> IntRep
+-- and check that the last arg is VoidRep'd and that an unboxed pair is
+-- returned wherein the first arg is VoidRep'd.
+
+getCCallPrimReps :: Type -> ([PrimRep], PrimRep)
+getCCallPrimReps fn_ty
+   = let (a_tys, r_ty) = splitRepFunTys fn_ty
+         a_reps        = map typePrimRep a_tys
+         (r_tycon, r_reps) 
+            = case splitTyConApp_maybe (repType r_ty) of
+                      (Just (tyc, tys)) -> (tyc, map typePrimRep tys)
+                      Nothing -> blargh
+         ok = length a_reps >= 1 && VoidRep == last a_reps
+               && length r_reps == 2 && VoidRep == head r_reps
+               && isUnboxedTupleTyCon r_tycon
+               && PtrRep /= r_rep_to_go	-- if it was, it would be impossible 
+                                        -- to create a valid return value 
+                                        -- placeholder on the stack
+         a_reps_to_go = init a_reps
+         r_rep_to_go  = r_reps !! 1
+         blargh       = pprPanic "getCCallPrimReps: can't handle:" 
+                                 (pprType fn_ty)
+     in 
+     --trace (showSDoc (ppr (a_reps, r_reps))) (
+     if ok then (a_reps_to_go, r_rep_to_go) else blargh
+     --)
+
 atomRep (AnnVar v)    = typePrimRep (idType v)
 atomRep (AnnLit l)    = literalPrimRep l
 atomRep (AnnNote n b) = atomRep (snd b)
@@ -689,7 +845,7 @@ pushAtom tagged d p (AnnVar v)
      (unitOL (PUSH_TAG 0), 1)
 
    | isFCallId v
-   = pprPanic "pushAtom: byte code generator can't handle CCalls" (ppr v)
+   = pprPanic "pushAtom: shouldn't get an FCallId here" (ppr v)
 
    | Just primop <- isPrimOpId_maybe v
    = (unitOL (PUSH_G (Right primop)), 1)
@@ -736,7 +892,7 @@ pushAtom False d p (AnnLit lit)
      where
         code rep
            = let size_host_words = untaggedSizeW rep
-             in (unitOL (PUSH_UBX lit size_host_words), size_host_words)
+             in (unitOL (PUSH_UBX (Left lit) size_host_words), size_host_words)
 
         pushStr s 
            = let mallocvilleAddr
@@ -758,12 +914,9 @@ pushAtom False d p (AnnLit lit)
                                       return (A# a#)
                                    )
                          _ -> panic "StgInterp.lit2expr: unhandled string constant type"
-
-                 addrLit 
-                    = MachInt (toInteger (addrToInt mallocvilleAddr))
              in
                 -- Get the addr on the stack, untaggedly
-                (unitOL (PUSH_UBX addrLit 1), 1)
+                (unitOL (PUSH_UBX (Right mallocvilleAddr) 1), 1)
 
 
 
@@ -931,20 +1084,6 @@ lookupBCEnv_maybe :: BCEnv -> Id -> Maybe Int
 lookupBCEnv_maybe = lookupFM
 
 
--- When I push one of these on the stack, how much does Sp move by?
-taggedSizeW :: PrimRep -> Int
-taggedSizeW pr
-   | isFollowableRep pr = 1
-   | otherwise          = 1{-the tag-} + getPrimRepSize pr
-
-
--- The plain size of something, without tag.
-untaggedSizeW :: PrimRep -> Int
-untaggedSizeW pr
-   | isFollowableRep pr = 1
-   | otherwise          = getPrimRepSize pr
-
-
 taggedIdSizeW, untaggedIdSizeW :: Id -> Int
 taggedIdSizeW   = taggedSizeW   . typePrimRep . idType
 untaggedIdSizeW = untaggedSizeW . typePrimRep . idType
diff --git a/ghc/compiler/ghci/ByteCodeInstr.lhs b/ghc/compiler/ghci/ByteCodeInstr.lhs
index c654b20..64e27fd 100644
--- a/ghc/compiler/ghci/ByteCodeInstr.lhs
+++ b/ghc/compiler/ghci/ByteCodeInstr.lhs
@@ -19,6 +19,7 @@ import PrimRep		( PrimRep )
 import DataCon		( DataCon )
 import VarSet		( VarSet )
 import PrimOp		( PrimOp )
+import Foreign		( Addr )
 
 \end{code}
 
@@ -55,9 +56,17 @@ data BCInstr
    | PUSH_AS   Name PrimRep	-- push alts and BCO_ptr_ret_info
 				-- PrimRep so we know which itbl
    -- Pushing literals
-   | PUSH_UBX  Literal	Int 
-                        -- push this int/float/double, NO TAG, on the stack
+   | PUSH_UBX  (Either Literal Addr)
+               Int      -- push this int/float/double/addr, NO TAG, on the stack
 			-- Int is # of words to copy from literal pool
+                        -- Eitherness reflects the difficulty of dealing with 
+                        -- MachAddr here, mostly due to the excessive 
+                        -- (and unnecessary) restrictions imposed by the designers
+                        -- of the new Foreign library.  In particular it is quite 
+                        -- impossible to convert an Addr to any other integral type,
+                        -- and it appears impossible to get hold of the bits of an 
+                        -- addr, even though we need to to assemble BCOs.
+
    | PUSH_TAG  Int      -- push this tag on the stack
 
    | SLIDE     Int{-this many-} Int{-down by this much-}
@@ -89,11 +98,14 @@ data BCInstr
    | CASEFAIL
    | JMP              LocalLabel
 
+   -- For doing calls to C (via glue code generated by ByteCodeFFI)
+   | CCALL            Addr	-- of the glue code
+
    -- To Infinity And Beyond
    | ENTER
-   | RETURN	PrimRep
-		-- unboxed value on TOS.  Use tag to find underlying ret itbl
-		-- and return as per that.
+   | RETURN    PrimRep
+               -- unboxed value on TOS.  Use tag to find underlying ret itbl
+               -- and return as per that.
 
 
 instance Outputable a => Outputable (ProtoBCO a) where
@@ -114,7 +126,10 @@ instance Outputable BCInstr where
    ppr (PUSH_G (Right op))   = text "PUSH_G  " <+> text "PrelPrimopWrappers." 
                                                <> ppr op
    ppr (PUSH_AS nm pk)       = text "PUSH_AS " <+> ppr nm <+> ppr pk
-   ppr (PUSH_UBX lit nw)     = text "PUSH_UBX" <+> parens (int nw) <+> ppr lit
+
+   ppr (PUSH_UBX (Left lit) nw) = text "PUSH_UBX" <+> parens (int nw) <+> ppr lit
+   ppr (PUSH_UBX (Right aa) nw) = text "PUSH_UBX" <+> parens (int nw) <+> text (show aa)
+
    ppr (PUSH_TAG n)          = text "PUSH_TAG" <+> int n
    ppr (SLIDE n d)           = text "SLIDE   " <+> int n <+> int d
    ppr (ALLOC sz)            = text "ALLOC   " <+> int sz
@@ -138,7 +153,8 @@ instance Outputable BCInstr where
    ppr CASEFAIL              = text "CASEFAIL"
    ppr ENTER                 = text "ENTER"
    ppr (RETURN pk)           = text "RETURN  " <+> ppr pk
-
+   ppr (CCALL marshall_addr) = text "CCALL   " <+> text "marshall code at" 
+                                               <+> text (show marshall_addr)
 
 -- The stack use, in words, of each bytecode insn.  These _must_ be
 -- correct, or overestimates of reality, to be safe.
@@ -168,6 +184,7 @@ bciStackUse CASEFAIL              = 0
 bciStackUse (JMP lab)             = 0
 bciStackUse ENTER                 = 0
 bciStackUse (RETURN pk)           = 0
+bciStackUse (CCALL marshall_addr) = 0
 
 -- These insns actually reduce stack use, but we need the high-tide level,
 -- so can't use this info.  Not that it matters much.
diff --git a/ghc/compiler/ghci/ByteCodeLink.lhs b/ghc/compiler/ghci/ByteCodeLink.lhs
index 320138d..31c912a 100644
--- a/ghc/compiler/ghci/ByteCodeLink.lhs
+++ b/ghc/compiler/ghci/ByteCodeLink.lhs
@@ -67,8 +67,8 @@ linkIModules :: ItblEnv    -- incoming global itbl env; returned updated
 	     -> IO ([HValue], ItblEnv, ClosureEnv)
 linkIModules gie gce mods 
    = do let (bcoss, ies) = unzip mods
-            bcos = concat bcoss
-            final_gie = foldr plusFM gie ies
+            bcos         = concat bcoss
+            final_gie    = foldr plusFM gie ies
         (final_gce, linked_bcos) <- linkSomeBCOs True final_gie gce bcos
         return (linked_bcos, final_gie, final_gce)
 
@@ -226,8 +226,13 @@ mkBits findLabel st proto_insns
                PUSH_AS   nm pk    -> do (p, st2)  <- ptr st (Left nm)
                                         (np, st3) <- ctoi_itbl st2 pk
                                         instr3 st3 i_PUSH_AS p np
-               PUSH_UBX  lit nws  -> do (np, st2) <- literal st lit
+               PUSH_UBX  (Left lit) nws  
+                                  -> do (np, st2) <- literal st lit
                                         instr3 st2 i_PUSH_UBX np nws
+               PUSH_UBX  (Right aa) nws  
+                                  -> do (np, st2) <- addr st aa
+                                        instr3 st2 i_PUSH_UBX np nws
+
                PUSH_TAG  tag      -> instr2 st i_PUSH_TAG tag
                SLIDE     n by     -> instr3 st i_SLIDE n by
                ALLOC     n        -> instr2 st i_ALLOC n
@@ -252,10 +257,12 @@ mkBits findLabel st proto_insns
                TESTLT_P  i l      -> instr3 st i_TESTLT_P i (findLabel l)
                TESTEQ_P  i l      -> instr3 st i_TESTEQ_P i (findLabel l)
                CASEFAIL           -> instr1 st i_CASEFAIL
-               JMP l              -> instr2 st i_JMP (findLabel l)
+               JMP       l        -> instr2 st i_JMP (findLabel l)
                ENTER              -> instr1 st i_ENTER
-               RETURN rep         -> do (itbl_no,st2) <- itoc_itbl st rep
+               RETURN    rep      -> do (itbl_no,st2) <- itoc_itbl st rep
                                         instr2 st2 i_RETURN itbl_no
+               CCALL     m_addr   -> do (np, st2) <- addr st m_addr
+                                        instr2 st2 i_CCALL np
 
        i2s :: Int -> Word16
        i2s = fromIntegral
@@ -315,6 +322,7 @@ mkBits findLabel st proto_insns
        literal st (MachFloat r)  = float st (fromRational r)
        literal st (MachDouble r) = double st (fromRational r)
        literal st (MachChar c)   = int st c
+       literal st other          = pprPanic "ByteCodeLink.mkBits" (ppr other)
 
        ctoi_itbl st pk
           = addr st ret_itbl_addr
@@ -513,7 +521,7 @@ lookupCE ce (Right primop)
         case m of
            Just (Ptr addr) -> case addrToHValue# addr of
                                  (# hval #) -> return hval
-           Nothing -> pprPanic "ByteCodeGen.lookupCE(primop)" (ppr primop)
+           Nothing -> pprPanic "ByteCodeLink.lookupCE(primop)" (ppr primop)
 lookupCE ce (Left nm)
    = case lookupFM ce nm of
         Just aa -> return aa
@@ -522,7 +530,7 @@ lookupCE ce (Left nm)
                  case m of
                     Just (Ptr addr) -> case addrToHValue# addr of
                                           (# hval #) -> return hval
-                    Nothing        -> pprPanic "ByteCodeGen.lookupCE" (ppr nm)
+                    Nothing        -> pprPanic "ByteCodeLink.lookupCE" (ppr nm)
 
 lookupIE :: ItblEnv -> Name -> IO (Ptr a)
 lookupIE ie con_nm 
@@ -538,7 +546,7 @@ lookupIE ie con_nm
                              n <- lookupSymbol (nameToCLabel con_nm "static_info")
                              case n of
                                 Just addr -> return addr
-                                Nothing -> pprPanic "ByteCodeGen.lookupIE" (ppr con_nm)
+                                Nothing -> pprPanic "ByteCodeLink.lookupIE" (ppr con_nm)
 
 -- HACKS!!!  ToDo: cleaner
 nameToCLabel :: Name -> String{-suffix-} -> String
@@ -592,6 +600,7 @@ i_ENTER    = (bci_ENTER :: Int)
 i_RETURN   = (bci_RETURN :: Int)
 i_STKCHECK = (bci_STKCHECK :: Int)
 i_JMP      = (bci_JMP :: Int)
+i_CCALL    = (bci_CCALL :: Int)
 
 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)
 
diff --git a/ghc/compiler/typecheck/TcForeign.lhs b/ghc/compiler/typecheck/TcForeign.lhs
index 5812a76..c46db8b 100644
--- a/ghc/compiler/typecheck/TcForeign.lhs
+++ b/ghc/compiler/typecheck/TcForeign.lhs
@@ -124,7 +124,7 @@ tcCheckFIType sig_ty arg_tys res_ty (CImport (CCallSpec target _ safety))
 
   | otherwise 		-- Normal foreign import
   = checkCg (if isCasmTarget target
-	     then checkC else checkCOrAsmOrDotNet)		`thenNF_Tc_`
+	     then checkC else checkCOrAsmOrDotNetOrInterp)	`thenNF_Tc_`
     checkCTarget target						`thenNF_Tc_`
     getDOptsTc							`thenNF_Tc` \ dflags ->
     checkForeignArgs (isFFIArgumentTy dflags safety) arg_tys	`thenNF_Tc_`
@@ -133,7 +133,7 @@ tcCheckFIType sig_ty arg_tys res_ty (CImport (CCallSpec target _ safety))
 -- This makes a convenient place to check
 -- that the C identifier is valid for C
 checkCTarget (StaticTarget str) 
-  = checkCg checkCOrAsmOrDotNet 	`thenNF_Tc_`
+  = checkCg checkCOrAsmOrDotNetOrInterp	 	`thenNF_Tc_`
     check (isCLabelString str) (badCName str)
 
 checkCTarget (CasmTarget _)
@@ -244,6 +244,13 @@ checkCOrAsmOrDotNet HscAsm = Nothing
 checkCOrAsmOrDotNet HscILX = Nothing
 checkCOrAsmOrDotNet other  = Just (text "requires C, native or .NET ILX code generation")
 
+checkCOrAsmOrDotNetOrInterp HscC           = Nothing
+checkCOrAsmOrDotNetOrInterp HscAsm         = Nothing
+checkCOrAsmOrDotNetOrInterp HscILX         = Nothing
+checkCOrAsmOrDotNetOrInterp HscInterpreted = Nothing
+checkCOrAsmOrDotNetOrInterp other  
+   = Just (text "requires interpreted, C, native or .NET ILX code generation")
+
 checkCg check
  = getDOptsTc		`thenNF_Tc` \ dflags ->
    case check (dopt_HscLang dflags) of
-- 
1.7.10.4