[project @ 2003-02-12 17:57:34 by simonpj]

[ghc-hetmet.git] / ghc / compiler / ghci / ByteCodeGen.lhs
diff --git a/ghc/compiler/ghci/ByteCodeGen.lhs b/ghc/compiler/ghci/ByteCodeGen.lhs

index 345a81b..b8cbb37 100644 (file)
--- a/ghc/compiler/ghci/ByteCodeGen.lhs
+++ b/ghc/compiler/ghci/ByteCodeGen.lhs
@@ -1,43 +1,49 @@
  %
-% (c) The University of Glasgow 2000
+% (c) The University of Glasgow 2002
  %
  \section[ByteCodeGen]{Generate bytecode from Core}
  
  \begin{code}
-module ByteCodeGen ( UnlinkedBCO, UnlinkedBCOExpr, ItblEnv, ClosureEnv, HValue,
-                    filterNameMap,
-                     byteCodeGen, coreExprToBCOs
-                  ) where
+module ByteCodeGen ( UnlinkedBCO, byteCodeGen, coreExprToBCOs ) where
  
  #include "HsVersions.h"
  
+import ByteCodeInstr
+import ByteCodeFFI     ( mkMarshalCode, moan64 )
+import ByteCodeAsm     ( CompiledByteCode(..), UnlinkedBCO, 
+                         assembleBCO, assembleBCOs, iNTERP_STACK_CHECK_THRESH )
+import ByteCodeLink    ( lookupStaticPtr )
+
  import Outputable
-import Name            ( Name, getName )
-import Id              ( Id, idType, isDataConId_maybe, isPrimOpId_maybe, isFCallId,
-                         idPrimRep, mkSysLocal, idName, isFCallId_maybe, isPrimOpId )
+import Name            ( Name, getName, mkSystemName )
+import Id
+import FiniteMap
  import ForeignCall     ( ForeignCall(..), CCallTarget(..), CCallSpec(..) )
-import OrdList         ( OrdList, consOL, snocOL, appOL, unitOL, 
-                         nilOL, toOL, concatOL, fromOL )
-import FiniteMap       ( FiniteMap, addListToFM, listToFM, elemFM,
-                         addToFM, lookupFM, fmToList )
+import HscTypes                ( ModGuts(..), ModGuts, typeEnvTyCons, typeEnvClasses )
+import CoreUtils       ( exprType )
  import CoreSyn
  import PprCore         ( pprCoreExpr )
  import Literal         ( Literal(..), literalPrimRep )
-import PrimRep         ( PrimRep(..) )
+import PrimRep
  import PrimOp          ( PrimOp(..) )
  import CoreFVs         ( freeVars )
-import Type            ( typePrimRep, isUnLiftedType, splitTyConApp_maybe, isTyVarTy )
-import DataCon         ( dataConTag, fIRST_TAG, dataConTyCon, 
-                          dataConWrapId, isUnboxedTupleCon )
-import TyCon           ( TyCon(..), tyConFamilySize, isDataTyCon, tyConDataCons,
+import Type            ( typePrimRep, isUnLiftedType, splitTyConApp_maybe, 
+                         isTyVarTy )
+import DataCon         ( DataCon, dataConTag, fIRST_TAG, dataConTyCon, 
+                          isUnboxedTupleCon, isNullaryDataCon, dataConWorkId,
+                         dataConRepArity )
+import TyCon           ( tyConFamilySize, isDataTyCon, tyConDataCons,
                           isFunTyCon, isUnboxedTupleTyCon )
  import Class           ( Class, classTyCon )
  import Type            ( Type, repType, splitFunTys, dropForAlls )
-import Util            ( zipEqual, zipWith4Equal, naturalMergeSortLe, nOfThem,
-                         isSingleton, lengthIs, notNull )
+import Util
  import DataCon         ( dataConRepArity )
  import Var             ( isTyVar )
  import VarSet          ( VarSet, varSetElems )
+import TysPrim         ( foreignObjPrimTyCon, 
+                         arrayPrimTyCon, mutableArrayPrimTyCon,
+                         byteArrayPrimTyCon, mutableByteArrayPrimTyCon
+                       )
  import PrimRep         ( isFollowableRep )
  import CmdLineOpts     ( DynFlags, DynFlag(..) )
  import ErrUtils                ( showPass, dumpIfSet_dyn )
@@ -46,112 +52,76 @@ import FastString  ( FastString(..), unpackFS )
  import Panic           ( GhcException(..) )
  import PprType         ( pprType )
  import SMRep           ( arrWordsHdrSize, arrPtrsHdrSize )
+import OrdList
  import Constants       ( wORD_SIZE )
-import ByteCodeInstr   ( BCInstr(..), ProtoBCO(..), nameOfProtoBCO, bciStackUse )
-import ByteCodeItbls   ( ItblEnv, mkITbls )
-import ByteCodeLink    ( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
-                         ClosureEnv, HValue, filterNameMap, linkFail,
-                         iNTERP_STACK_CHECK_THRESH )
-import ByteCodeFFI     ( taggedSizeW, untaggedSizeW, mkMarshalCode, moan64 )
-import Linker          ( lookupSymbol )
-
-import List            ( intersperse, sortBy, zip4 )
+import BasicTypes      ( TopLevelFlag(..), isTopLevel, isNotTopLevel )
+
+import Data.List       ( intersperse, sortBy, zip4, zip5, partition )
  import Foreign         ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8 )
-import CTypes          ( CInt )
-import Exception       ( throwDyn )
+import Foreign.C       ( CInt )
+import Control.Exception       ( throwDyn )
  
-import GlaExts         ( Int(..), ByteArray# )
+import GHC.Exts                ( Int(..), ByteArray# )
  
-import Monad           ( when )
-import Maybe           ( isJust )
-import Char            ( ord )
-\end{code}
+import Control.Monad   ( when, mapAndUnzipM )
+import Data.Char       ( ord )
+import Data.Bits
  
-%************************************************************************
-%*                                                                     *
-\subsection{Functions visible from outside this module.}
-%*                                                                     *
-%************************************************************************
-
-\begin{code}
+-- -----------------------------------------------------------------------------
+-- Generating byte code for a complete module 
  
  byteCodeGen :: DynFlags
-            -> [CoreBind] 
-            -> [TyCon] -> [Class]
-            -> IO ([UnlinkedBCO], ItblEnv)
-byteCodeGen dflags binds local_tycons local_classes
+            -> ModGuts
+            -> IO CompiledByteCode
+byteCodeGen dflags (ModGuts { mg_binds = binds, mg_types = type_env })
     = do showPass dflags "ByteCodeGen"
-        let tycs = local_tycons ++ map classTyCon local_classes
-        itblenv <- mkITbls tycs
+        let  local_tycons  = typeEnvTyCons  type_env
+            local_classes = typeEnvClasses type_env
+            tycs = local_tycons ++ map classTyCon local_classes
  
-        let flatBinds = concatMap getBind binds
-            getBind (NonRec bndr rhs) = [(bndr, freeVars rhs)]
-            getBind (Rec binds)       = [(bndr, freeVars rhs) | (bndr,rhs) <- binds]
+        let flatBinds = [ (bndr, freeVars rhs) 
+                       | (bndr, rhs) <- flattenBinds binds]
  
-        (BcM_State proto_bcos final_ctr mallocd, ())
-           <- runBc (BcM_State [] 0 []) 
-                    (mapBc (schemeR True []) flatBinds `thenBc_` returnBc ())
-                       --               ^^
-                       -- better be no free vars in these top-level bindings
+        (BcM_State final_ctr mallocd, proto_bcos)
+           <- runBc (mapM schemeTopBind flatBinds)
  
          when (notNull mallocd)
               (panic "ByteCodeGen.byteCodeGen: missing final emitBc?")
  
          dumpIfSet_dyn dflags Opt_D_dump_BCOs
-           "Proto-bcos" (vcat (intersperse (char ' ') (map ppr proto_bcos)))
-
-        bcos <- mapM assembleBCO proto_bcos
+           "Proto-BCOs" (vcat (intersperse (char ' ') (map ppr proto_bcos)))
  
-        return (bcos, itblenv)
+        assembleBCOs proto_bcos tycs
          
+-- -----------------------------------------------------------------------------
+-- Generating byte code for an expression
  
  -- Returns: (the root BCO for this expression, 
  --           a list of auxilary BCOs resulting from compiling closures)
  coreExprToBCOs :: DynFlags
                -> CoreExpr
-               -> IO UnlinkedBCOExpr
+               -> IO UnlinkedBCO
  coreExprToBCOs dflags expr
   = do showPass dflags "ByteCodeGen"
  
        -- create a totally bogus name for the top-level BCO; this
        -- should be harmless, since it's never used for anything
-      let invented_id   = mkSysLocal FSLIT("ExprTopLevel") 
-                               (mkPseudoUnique3 0) 
-                               (panic "invented_id's type")
-      let invented_name = idName invented_id
-
-         annexpr = freeVars expr
-         fvs = filter (not.isTyVar) (varSetElems (fst annexpr))
-
-      (BcM_State all_proto_bcos final_ctr mallocd, ()) 
-         <- runBc (BcM_State [] 0 []) 
-                  (schemeR True fvs (invented_id, annexpr))
+      let invented_name  = mkSystemName (mkPseudoUnique3 0) FSLIT("ExprTopLevel")
+          invented_id    = mkLocalId invented_name (panic "invented_id's type")
+         
+      (BcM_State final_ctr mallocd, proto_bco) 
+         <- runBc (schemeTopBind (invented_id, freeVars expr))
  
        when (notNull mallocd)
             (panic "ByteCodeGen.coreExprToBCOs: missing final emitBc?")
  
-      dumpIfSet_dyn dflags Opt_D_dump_BCOs
-         "Proto-bcos" (vcat (intersperse (char ' ') (map ppr all_proto_bcos)))
-
-      let root_proto_bco 
-             = case filter ((== invented_name).nameOfProtoBCO) all_proto_bcos of
-                  [root_bco] -> root_bco
-          auxiliary_proto_bcos
-             = filter ((/= invented_name).nameOfProtoBCO) all_proto_bcos
-
-      auxiliary_bcos <- mapM assembleBCO auxiliary_proto_bcos
-      root_bco <- assembleBCO root_proto_bco
+      dumpIfSet_dyn dflags Opt_D_dump_BCOs "Proto-BCOs" (ppr proto_bco)
  
-      return (root_bco, auxiliary_bcos)
-\end{code}
+      assembleBCO proto_bco
  
-%************************************************************************
-%*                                                                     *
-\subsection{Compilation schema for the bytecode generator.}
-%*                                                                     *
-%************************************************************************
  
-\begin{code}
+-- -----------------------------------------------------------------------------
+-- Compilation schema for the bytecode generator
  
  type BCInstrList = OrdList BCInstr
  
@@ -167,13 +137,32 @@ ppBCEnv p
       $$ nest 4 (vcat (map pp_one (sortBy cmp_snd (fmToList p))))
       $$ text "end-env"
       where
-        pp_one (var, offset) = int offset <> colon <+> ppr var
+        pp_one (var, offset) = int offset <> colon <+> ppr var <+> ppr (idPrimRep var)
          cmp_snd x y = compare (snd x) (snd y)
  
  -- Create a BCO and do a spot of peephole optimisation on the insns
  -- at the same time.
-mkProtoBCO nm instrs_ordlist origin mallocd_blocks
-   = ProtoBCO nm maybe_with_stack_check origin mallocd_blocks
+mkProtoBCO
+   :: name
+   -> BCInstrList
+   -> Either  [AnnAlt Id VarSet] (AnnExpr Id VarSet)
+   -> Int
+   -> Int
+   -> [StgWord]
+   -> Bool     -- True <=> is a return point, rather than a function
+   -> [Ptr ()]
+   -> ProtoBCO name
+mkProtoBCO nm instrs_ordlist origin arity bitmap_size bitmap
+  is_ret mallocd_blocks
+   = ProtoBCO {
+       protoBCOName = nm,
+       protoBCOInstrs = maybe_with_stack_check,
+       protoBCOBitmap = bitmap,
+       protoBCOBitmapSize = bitmap_size,
+       protoBCOArity = arity,
+       protoBCOExpr = origin,
+       protoBCOPtrs = mallocd_blocks
+      }
       where
          -- Overestimate the stack usage (in words) of this BCO,
          -- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit
@@ -183,17 +172,19 @@ mkProtoBCO nm instrs_ordlist origin mallocd_blocks
          -- (hopefully rare) cases when the (overestimated) stack use
          -- exceeds iNTERP_STACK_CHECK_THRESH.
          maybe_with_stack_check
+          | is_ret = peep_d
+               -- don't do stack checks at return points;
+               -- everything is aggregated up to the top BCO
+               -- (which must be a function)
             | stack_overest >= 65535
             = pprPanic "mkProtoBCO: stack use won't fit in 16 bits" 
                        (int stack_overest)
             | stack_overest >= iNTERP_STACK_CHECK_THRESH
-           = (STKCHECK stack_overest) : peep_d
+           = STKCHECK stack_overest : peep_d
             | otherwise
             = peep_d    -- the supposedly common case
               
          stack_overest = sum (map bciStackUse peep_d)
-                        + 10 {- just to be really really sure -}
-
  
          -- Merge local pushes
          peep_d = peep (fromOL instrs_ordlist)
@@ -207,14 +198,80 @@ mkProtoBCO nm instrs_ordlist origin mallocd_blocks
          peep []
             = []
  
-
--- Compile code for the right hand side of a let binding.
+argBits :: [PrimRep] -> [Bool]
+argBits [] = []
+argBits (rep : args)
+  | isFollowableRep rep = False : argBits args
+  | otherwise = take (getPrimRepSize rep) (repeat True) ++ argBits args
+
+mkBitmap :: [Bool] -> [StgWord]
+mkBitmap [] = []
+mkBitmap stuff = chunkToLiveness chunk : mkBitmap rest
+  where (chunk, rest) = splitAt wORD_SIZE_IN_BITS stuff
+
+chunkToLiveness :: [Bool] -> StgWord
+chunkToLiveness chunk = 
+  foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ]
+
+-- make a bitmap where the slots specified are the *zeros* in the bitmap.
+-- eg. [1,2,4], size 4 ==> 0x8  (we leave any bits outside the size as zero,
+-- just to make the bitmap easier to read).
+intsToBitmap :: Int -> [Int] -> [StgWord]
+intsToBitmap size slots{- must be sorted -}
+  | size <= 0 = []
+  | otherwise = 
+    (foldr xor init (map (1 `shiftL`) these)) : 
+       intsToBitmap (size - wORD_SIZE_IN_BITS) 
+            (map (\x -> x - wORD_SIZE_IN_BITS) rest)
+   where (these,rest) = span (<wORD_SIZE_IN_BITS) slots
+        init
+          | size >= wORD_SIZE_IN_BITS = complement 0
+          | otherwise                 = (1 `shiftL` size) - 1
+
+wORD_SIZE_IN_BITS = wORD_SIZE * 8 :: Int
+
+-- -----------------------------------------------------------------------------
+-- schemeTopBind
+
+-- Compile code for the right-hand side of a top-level binding
+
+schemeTopBind :: (Id, AnnExpr Id VarSet) -> BcM (ProtoBCO Name)
+
+
+schemeTopBind (id, rhs)
+  | Just data_con <- isDataConWorkId_maybe id,
+    isNullaryDataCon data_con
+  =    -- Special case for the worker of a nullary data con.
+       -- It'll look like this:        Nil = /\a -> Nil a
+       -- If we feed it into schemeR, we'll get 
+       --      Nil = Nil
+       -- because mkConAppCode treats nullary constructor applications
+       -- by just re-using the single top-level definition.  So
+       -- for the worker itself, we must allocate it directly.
+    emitBc (mkProtoBCO (getName id) (toOL [PACK data_con 0, ENTER])
+                       (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-})
+
+  | otherwise
+  = schemeR [{- No free variables -}] (id, rhs)
+
+-- -----------------------------------------------------------------------------
+-- schemeR
+
+-- Compile code for a right-hand side, to give a BCO that,
+-- when executed with the free variables and arguments on top of the stack,
+-- will return with a pointer to the result on top of the stack, after
+-- removing the free variables and arguments.
+--
  -- Park the resulting BCO in the monad.  Also requires the
  -- variable to which this value was bound, so as to give the
--- resulting BCO a name.  Bool indicates top-levelness.
-
-schemeR :: Bool -> [Id] -> (Id, AnnExpr Id VarSet) -> BcM ()
-schemeR is_top fvs (nm, rhs) 
+-- resulting BCO a name. 
+
+schemeR :: [Id]                -- Free vars of the RHS, ordered as they
+                               -- will appear in the thunk.  Empty for
+                               -- top-level things, which have no free vars.
+       -> (Id, AnnExpr Id VarSet)
+       -> BcM (ProtoBCO Name)
+schemeR fvs (nm, rhs) 
  {-
     | trace (showSDoc (
                (char ' '
@@ -223,270 +280,148 @@ schemeR is_top fvs (nm, rhs)
                 $$ char ' '
                ))) False
     = undefined
--}
     | otherwise
-   = schemeR_wrk is_top fvs rhs nm (collect [] rhs)
-
+-}
+   = schemeR_wrk fvs nm rhs (collect [] rhs)
  
-collect xs (_, AnnNote note e)
-   = collect xs e
-collect xs (_, AnnLam x e) 
-   = collect (if isTyVar x then xs else (x:xs)) e
-collect xs not_lambda
-   = (reverse xs, not_lambda)
+collect xs (_, AnnNote note e) = collect xs e
+collect xs (_, AnnLam x e)     = collect (if isTyVar x then xs else (x:xs)) e
+collect xs (_, not_lambda)     = (reverse xs, not_lambda)
  
-schemeR_wrk is_top fvs original_body nm (args, body)
-   | Just dcon <- maybe_toplevel_null_con_rhs
-   = --trace ("nullary constructor! " ++ showSDocDebug (ppr nm)) (
-     emitBc (mkProtoBCO (getName nm) (toOL [PACK dcon 0, ENTER])
-                                     (Right original_body))
-     --)
+schemeR_wrk fvs nm original_body (args, body)
+   = let 
+        all_args  = reverse args ++ fvs
+        arity     = length all_args
+        -- all_args are the args in reverse order.  We're compiling a function
+        -- \fv1..fvn x1..xn -> e 
+        -- i.e. the fvs come first
  
-   | otherwise
-   = let all_args  = reverse args ++ fvs
-         szsw_args = map taggedIdSizeW all_args
+         szsw_args = map idSizeW all_args
           szw_args  = sum szsw_args
           p_init    = listToFM (zip all_args (mkStackOffsets 0 szsw_args))
-         argcheck  = unitOL (ARGCHECK szw_args)
+
+        -- make the arg bitmap
+        bits = argBits (reverse (map idPrimRep all_args))
+        bitmap_size = length bits
+        bitmap = mkBitmap bits
       in
       schemeE szw_args 0 p_init body            `thenBc` \ body_code ->
-     emitBc (mkProtoBCO (getName nm) (appOL argcheck body_code) 
-                                     (Right original_body))
+     emitBc (mkProtoBCO (getName nm) body_code (Right original_body)
+               arity bitmap_size bitmap False{-not alts-})
  
-     where
-        maybe_toplevel_null_con_rhs
-           | is_top && null args
-           = case nukeTyArgs (snd body) of
-                AnnVar v_wrk 
-                   -> case isDataConId_maybe v_wrk of
-                         Nothing -> Nothing
-                         Just dc_wrk |  nm == dataConWrapId dc_wrk
-                                     -> Just dc_wrk
-                                     |  otherwise 
-                                     -> Nothing
-                other -> Nothing
-           | otherwise
-           = Nothing
  
-        nukeTyArgs (AnnApp f (_, AnnType _)) = nukeTyArgs (snd f)
-        nukeTyArgs other                     = other
+fvsToEnv :: BCEnv -> VarSet -> [Id]
+-- Takes the free variables of a right-hand side, and
+-- delivers an ordered list of the local variables that will
+-- be captured in the thunk for the RHS
+-- The BCEnv argument tells which variables are in the local
+-- environment: these are the ones that should be captured
+--
+-- The code that constructs the thunk, and the code that executes
+-- it, have to agree about this layout
+fvsToEnv p fvs = [v | v <- varSetElems fvs, 
+                     isId v,           -- Could be a type variable
+                     v `elemFM` p]
  
-
--- Let szsw be the sizes in words of some items pushed onto the stack,
--- which has initial depth d'.  Return the values which the stack environment
--- should map these items to.
-mkStackOffsets :: Int -> [Int] -> [Int]
-mkStackOffsets original_depth szsw
-   = map (subtract 1) (tail (scanl (+) original_depth szsw))
+-- -----------------------------------------------------------------------------
+-- schemeE
  
  -- Compile code to apply the given expression to the remaining args
  -- on the stack, returning a HNF.
-schemeE :: Int -> Sequel -> BCEnv -> AnnExpr Id VarSet -> BcM BCInstrList
+schemeE :: Int -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
  
  -- Delegate tail-calls to schemeT.
-schemeE d s p e@(fvs, AnnApp f a) 
-   = schemeT d s p (fvs, AnnApp f a)
+schemeE d s p e@(AnnApp f a) 
+   = schemeT d s p e
  
-schemeE d s p e@(fvs, AnnVar v)
+schemeE d s p e@(AnnVar v)
     | not (isUnLiftedType v_type)
     =  -- Lifted-type thing; push it in the normal way
-     schemeT d s p (fvs, AnnVar v)
+     schemeT d s p e
  
     | otherwise
     = -- Returning an unlifted value.  
       -- Heave it on the stack, SLIDE, and RETURN.
-     pushAtom True d p (AnnVar v)      `thenBc` \ (push, szw) ->
+     pushAtom d p (AnnVar v)   `thenBc` \ (push, szw) ->
       returnBc (push                    -- value onto stack
                 `appOL`  mkSLIDE szw (d-s)      -- clear to sequel
-               `snocOL` RETURN v_rep)  -- go
+               `snocOL` RETURN_UBX v_rep)      -- go
     where
        v_type = idType v
        v_rep = typePrimRep v_type
  
-schemeE d s p (fvs, AnnLit literal)
-   = pushAtom True d p (AnnLit literal)        `thenBc` \ (push, szw) ->
+schemeE d s p (AnnLit literal)
+   = pushAtom d p (AnnLit literal)     `thenBc` \ (push, szw) ->
       let l_rep = literalPrimRep literal
       in  returnBc (push                        -- value onto stack
                     `appOL`  mkSLIDE szw (d-s)  -- clear to sequel
-                   `snocOL` RETURN l_rep)      -- go
+                   `snocOL` RETURN_UBX l_rep)  -- go
  
  
-{-
-   Deal specially with the cases
-      let x = fn atom1 .. atomn  in B
-   and
-      let x = Con atom1 .. atomn  in B
-              (Con must be saturated)
-
-   In these cases, generate code to allocate in-line.
-
-   This is optimisation of the general case for let, which follows
-   this one; this case can safely be omitted.  The reduction in
-   interpreter execution time seems to be around 5% for some programs,
-   with a similar drop in allocations.
-
-   This optimisation should be done more cleanly.  As-is, it is
-   inapplicable to RHSs in letrecs, and needlessly duplicates code in
-   schemeR and schemeT.  Some refactoring of the machinery would cure
-   both ills.  
--}
-schemeE d s p ee@(fvs, AnnLet (AnnNonRec x rhs) b)
-   | ok_to_go
-   = let d_init = if is_con then d else d'
-     in
-     mkPushes d_init args_r_to_l_reordered     `thenBc` \ (d_final, push_code) ->
-     schemeE d' s p' b                         `thenBc` \ body_code ->
-     let size  = d_final - d_init
-         alloc = if is_con then nilOL else unitOL (ALLOC size)
-         pack  = unitOL (if is_con then PACK the_dcon size else MKAP size size)
-     in
-         returnBc (alloc `appOL` push_code `appOL` pack
-                   `appOL` body_code)
-     where
-        -- Decide whether we can do this or not
-        (ok_to_go, is_con, the_dcon, the_fn)
-            = case maybe_fn of
-                 Nothing        -> (False, bomb 1, bomb 2, bomb 3)
-                 Just (Left fn) -> (True,  False,  bomb 5, fn)
-                 Just (Right dcon)
-                    |  dataConRepArity dcon <= length args_r_to_l
-                    -> (True, True, dcon, bomb 6)
-                    |  otherwise
-                    -> (False, bomb 7, bomb 8, bomb 9)
-        bomb n = panic ("schemeE.is_con(hacky hack hack) " ++ show n)
-
-        -- Extract the args (R -> L) and fn
-        args_r_to_l_reordered
-           | not is_con
-           = args_r_to_l
-           | otherwise
-           = filter (not.isPtr.snd) args_r_to_l ++ filter (isPtr.snd) args_r_to_l
-             where isPtr = isFollowableRep . atomRep
-
-        args_r_to_l = filter (not.isTypeAtom.snd) args_r_to_l_raw
-        isTypeAtom (AnnType _) = True
-        isTypeAtom _           = False
-
-        (args_r_to_l_raw, maybe_fn) = chomp rhs
-        chomp expr
-           = case snd expr of
-                AnnVar v 
-                   |  isFCallId v || isPrimOpId v  
-                   -> ([], Nothing)
-                   |  otherwise
-                   -> case isDataConId_maybe v of
-                         Just dcon -> ([], Just (Right dcon))
-                         Nothing   -> ([], Just (Left v))
-                AnnApp f a  -> case chomp f of (az, f) -> (a:az, f)
-                AnnNote n e -> chomp e
-                other       -> ([], Nothing)
-
-        -- This is the env in which to translate the body
-        p' = addToFM p x d
-        d' = d + 1
-
-        -- Shove the args on the stack, including the fn in the non-dcon case
-        tag_when_push = not is_con
-
-        mkPushes :: Int{-curr depth-} -> [AnnExpr Id VarSet] 
-                 -> BcM (Int{-final depth-}, BCInstrList)
-        mkPushes dd []
-           | is_con
-           = returnBc (dd, nilOL)
-           | otherwise
-           = pushAtom False dd p' (AnnVar the_fn) `thenBc` \ (fn_push_code, fn_szw) ->
-             returnBc (dd+fn_szw, fn_push_code)
-        mkPushes dd (atom:atoms) 
-           = pushAtom tag_when_push dd p' (snd atom)   
-                                               `thenBc` \ (push1_code, push1_szw) ->
-             mkPushes (dd+push1_szw) atoms     `thenBc` \ (dd_final, push_rest) ->
-             returnBc (dd_final, push1_code `appOL` push_rest)
-
+schemeE d s p (AnnLet (AnnNonRec x (_,rhs)) (_,body))
+   | (AnnVar v, args_r_to_l) <- splitApp rhs,
+     Just data_con <- isDataConWorkId_maybe v,
+     dataConRepArity data_con == length args_r_to_l
+   =   -- Special case for a non-recursive let whose RHS is a 
+       -- saturatred constructor application.
+       -- Just allocate the constructor and carry on
+     mkConAppCode d s p data_con args_r_to_l   `thenBc` \ alloc_code ->
+     schemeE (d+1) s (addToFM p x d) body      `thenBc` \ body_code ->
+     returnBc (alloc_code `appOL` body_code)
  
  -- General case for let.  Generates correct, if inefficient, code in
  -- all situations.
-schemeE d s p (fvs, AnnLet binds b)
+schemeE d s p (AnnLet binds (_,body))
     = let (xs,rhss) = case binds of AnnNonRec x rhs  -> ([x],[rhs])
                                     AnnRec xs_n_rhss -> unzip xs_n_rhss
-         n     = length xs
+         n_binds = length xs
+
+         fvss  = map (fvsToEnv p' . fst) rhss
  
-        is_local id = not (isTyVar id) && elemFM id p'
-         fvss  = map (filter is_local . varSetElems . fst) rhss
+         -- Sizes of free vars, + 1 for the fn
+         sizes = map (\rhs_fvs -> 1 + sum (map idSizeW rhs_fvs)) fvss
  
-         -- Sizes of tagged free vars, + 1 for the fn
-         sizes = map (\rhs_fvs -> 1 + sum (map taggedIdSizeW rhs_fvs)) fvss
+        -- the arity of each rhs
+        arities = map (length . fst . collect []) rhss
  
           -- This p', d' defn is safe because all the items being pushed
           -- are ptrs, so all have size 1.  d' and p' reflect the stack
           -- after the closures have been allocated in the heap (but not
           -- filled in), and pointers to them parked on the stack.
-         p'    = addListToFM p (zipE xs (mkStackOffsets d (nOfThem n 1)))
-         d'    = d + n
-
-         infos = zipE4 fvss sizes xs [n, n-1 .. 1]
+         p'    = addListToFM p (zipE xs (mkStackOffsets d (nOfThem n_binds 1)))
+         d'    = d + n_binds
           zipE  = zipEqual "schemeE"
-         zipE4 = zipWith4Equal "schemeE" (\a b c d -> (a,b,c,d))
  
           -- ToDo: don't build thunks for things with no free variables
-         buildThunk dd ([], size, id, off)
-            = returnBc (PUSH_G (Left (getName id))
+         build_thunk dd [] size bco off
+            = returnBc (PUSH_BCO bco
                          `consOL` unitOL (MKAP (off+size-1) size))
-         buildThunk dd ((fv:fvs), size, id, off)
-            = pushAtom True dd p' (AnnVar fv) 
-                                       `thenBc` \ (push_code, pushed_szw) ->
-              buildThunk (dd+pushed_szw) (fvs, size, id, off)
-                                       `thenBc` \ more_push_code ->
+         build_thunk dd (fv:fvs) size bco off = do
+              (push_code, pushed_szw) <- pushAtom dd p' (AnnVar fv) 
+              more_push_code <- build_thunk (dd+pushed_szw) fvs size bco off
                returnBc (push_code `appOL` more_push_code)
  
-         genThunkCode = mapBc (buildThunk d') infos    `thenBc` \ tcodes ->
-                        returnBc (concatOL tcodes)
-
-         allocCode = toOL (map ALLOC sizes)
-
-        schemeRs [] _ _ = returnBc ()
-        schemeRs (fvs:fvss) (x:xs) (rhs:rhss) = 
-               schemeR False fvs (x,rhs) `thenBc_` schemeRs fvss xs rhss
-     in
-     schemeE d' s p' b                                 `thenBc`  \ bodyCode ->
-     schemeRs fvss xs rhss                             `thenBc_`
-     genThunkCode                                      `thenBc` \ thunkCode ->
-     returnBc (allocCode `appOL` thunkCode `appOL` bodyCode)
-
-
-
-
-
-schemeE d s p (fvs_case, AnnCase (fvs_scrut, scrut) bndr 
-                                 [(DEFAULT, [], (fvs_rhs, rhs))])
-
-   | let isFunType var_type 
-            = case splitTyConApp_maybe var_type of
-                 Just (tycon,_) | isFunTyCon tycon -> True
-                 _ -> False
-         ty_bndr = repType (idType bndr)
-     in isFunType ty_bndr || isTyVarTy ty_bndr
+         alloc_code = toOL (zipWith mkAlloc sizes arities)
+          where mkAlloc sz 0     = ALLOC_AP sz
+                mkAlloc sz arity = ALLOC_PAP arity sz
  
-   -- Nasty hack; treat
-   --     case scrut::suspect of bndr { DEFAULT -> rhs }
-   --     as 
-   --     let bndr = scrut in rhs
-   --     when suspect is polymorphic or arrowtyped
-   -- So the required strictness properties are not observed.
-   -- At some point, must fix this properly.
-   = let new_expr
-            = (fvs_case, 
-               AnnLet 
-                  (AnnNonRec bndr (fvs_scrut, scrut)) (fvs_rhs, rhs)
-              )
+        compile_bind d' fvs x rhs size off = do
+               bco <- schemeR fvs (x,rhs)
+               build_thunk d' fvs size bco off
  
-     in  trace ("WARNING: ignoring polymorphic case in interpreted mode.\n" ++
-                "   Possibly due to strict polymorphic/functional constructor args.\n" ++
-                "   Your program may leak space unexpectedly.\n")
-         (schemeE d s p new_expr)
+        compile_binds = 
+           [ compile_bind d' fvs x rhs size n
+           | (fvs, x, rhs, size, n) <- 
+               zip5 fvss xs rhss sizes [n_binds, n_binds-1 .. 1]
+           ]
+     in do
+     body_code <- schemeE d' s p' body
+     thunk_codes <- sequence compile_binds
+     returnBc (alloc_code `appOL` concatOL thunk_codes `appOL` body_code)
  
  
  
-schemeE d s p (fvs, AnnCase scrut bndr [(DataAlt dc, [bind1, bind2], rhs)])
+schemeE d s p (AnnCase scrut bndr [(DataAlt dc, [bind1, bind2], rhs)])
     | isUnboxedTupleCon dc && VoidRep == typePrimRep (idType bind1)
         -- Convert 
         --      case .... of x { (# VoidRep'd-thing, a #) -> ... }
@@ -497,100 +432,31 @@ schemeE d s p (fvs, AnnCase scrut bndr [(DataAlt dc, [bind1, bind2], rhs)])
         -- Note that it does not matter losing the void-rep thing from the
         -- envt (it won't be bound now) because we never look such things up.
  
-   = --trace "automagic mashing of case alts (# VoidRep, a #)" (
-     schemeE d s p (fvs, AnnCase scrut bind2 [(DEFAULT, [], rhs)])
-       -- Note: 
-     --)
+   = --trace "automagic mashing of case alts (# VoidRep, a #)" $
+     doCase d s p scrut bind2 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
  
-schemeE d s p (fvs, AnnCase scrut bndr [(DataAlt dc, [bind1], rhs)])
+   | isUnboxedTupleCon dc && VoidRep == typePrimRep (idType bind2)
+   = --trace "automagic mashing of case alts (# a, VoidRep #)" $
+     doCase d s p scrut bind1 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
+
+schemeE d s p (AnnCase scrut bndr [(DataAlt dc, [bind1], rhs)])
     | isUnboxedTupleCon dc
         -- Similarly, convert
         --      case .... of x { (# a #) -> ... }
         -- to
         --      case .... of a { DEFAULT -> ... }
-   = --trace "automagic mashing of case alts (# a #)" (
-     schemeE d s p (fvs, AnnCase scrut bind1 [(DEFAULT, [], rhs)])
-     --)
-
-schemeE d s p (fvs, AnnCase scrut bndr alts)
-   = let
-        -- Top of stack is the return itbl, as usual.
-        -- underneath it is the pointer to the alt_code BCO.
-        -- When an alt is entered, it assumes the returned value is
-        -- on top of the itbl.
-        ret_frame_sizeW = 2
-
-        -- Env and depth in which to compile the alts, not including
-        -- any vars bound by the alts themselves
-        d' = d + ret_frame_sizeW + taggedIdSizeW bndr
-        p' = addToFM p bndr (d' - 1)
-
-        scrut_primrep = typePrimRep (idType bndr)
-        isAlgCase
-           | scrut_primrep == PtrRep
-           = True
-          | otherwise
-           = WARN( scrut_primrep `elem` bad_reps,
-                  text "Dire warning: strange rep in primitive case:" <+> ppr bndr )
-                       -- We don't expect to see any of these
-            False
-          where
-            bad_reps = [CodePtrRep, DataPtrRep, RetRep, CostCentreRep]
-
-        -- given an alt, return a discr and code for it.
-        codeAlt alt@(discr, binds_f, rhs)
-           | isAlgCase 
-           = let (unpack_code, d_after_unpack, p_after_unpack)
-                    = mkUnpackCode (filter (not.isTyVar) binds_f) d' p'
-             in  schemeE d_after_unpack s p_after_unpack rhs
-                                       `thenBc` \ rhs_code -> 
-                 returnBc (my_discr alt, unpack_code `appOL` rhs_code)
-           | otherwise 
-           = ASSERT(null binds_f) 
-             schemeE d' s p' rhs       `thenBc` \ rhs_code ->
-             returnBc (my_discr alt, rhs_code)
+   = --trace "automagic mashing of case alts (# a #)"  $
+     doCase d s p scrut bind1 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
  
-        my_discr (DEFAULT, binds, rhs) = NoDiscr
-        my_discr (DataAlt dc, binds, rhs) 
-           | isUnboxedTupleCon dc
-           = unboxedTupleException
-           | otherwise
-           = DiscrP (dataConTag dc - fIRST_TAG)
-        my_discr (LitAlt l, binds, rhs)
-           = case l of MachInt i     -> DiscrI (fromInteger i)
-                       MachFloat r   -> DiscrF (fromRational r)
-                       MachDouble r  -> DiscrD (fromRational r)
-                       MachChar i    -> DiscrI i
-                       _ -> pprPanic "schemeE(AnnCase).my_discr" (ppr l)
+schemeE d s p (AnnCase scrut bndr alts)
+   = doCase d s p scrut bndr alts False{-not an unboxed tuple-}
  
-        maybe_ncons 
-           | not isAlgCase = Nothing
-           | otherwise 
-           = case [dc | (DataAlt dc, _, _) <- alts] of
-                []     -> Nothing
-                (dc:_) -> Just (tyConFamilySize (dataConTyCon dc))
-
-     in 
-     mapBc codeAlt alts                                `thenBc` \ alt_stuff ->
-     mkMultiBranch maybe_ncons alt_stuff               `thenBc` \ alt_final ->
-     let 
-         alt_final_ac = ARGCHECK (taggedIdSizeW bndr) `consOL` alt_final
-         alt_bco_name = getName bndr
-         alt_bco      = mkProtoBCO alt_bco_name alt_final_ac (Left alts)
-     in
-     schemeE (d + ret_frame_sizeW) 
-             (d + ret_frame_sizeW) p scrut             `thenBc` \ scrut_code ->
-
-     emitBc alt_bco                                    `thenBc_`
-     returnBc (PUSH_AS alt_bco_name scrut_primrep `consOL` scrut_code)
-
-
-schemeE d s p (fvs, AnnNote note body)
+schemeE d s p (AnnNote note (_, body))
     = schemeE d s p body
  
  schemeE d s p other
     = pprPanic "ByteCodeGen.schemeE: unhandled case" 
-               (pprCoreExpr (deAnnotate other))
+               (pprCoreExpr (deAnnotate' other))
  
  
  -- Compile code to do a tail call.  Specifically, push the fn,
@@ -602,27 +468,24 @@ schemeE d s p other
  --     The int will be on the stack.  Generate a code sequence
  --     to convert it to the relevant constructor, SLIDE and ENTER.
  --
--- 1.  A nullary constructor.  Push its closure on the stack 
---     and SLIDE and RETURN.
+-- 1.  The fn denotes a ccall.  Defer to generateCCall.
  --
  -- 2.  (Another nasty hack).  Spot (# a::VoidRep, b #) and treat
  --     it simply as  b  -- since the representations are identical
  --     (the VoidRep takes up zero stack space).  Also, spot
  --     (# b #) and treat it as  b.
  --
--- 3.  The fn denotes a ccall.  Defer to generateCCall.
---
--- 4.  Application of a non-nullary constructor, by defn saturated.
+-- 3.  Application of a constructor, by defn saturated.
  --     Split the args into ptrs and non-ptrs, and push the nonptrs, 
  --     then the ptrs, and then do PACK and RETURN.
  --
--- 5.  Otherwise, it must be a function call.  Push the args
+-- 4.  Otherwise, it must be a function call.  Push the args
  --     right to left, SLIDE and ENTER.
  
  schemeT :: Int                 -- Stack depth
          -> Sequel      -- Sequel depth
          -> BCEnv       -- stack env
-        -> AnnExpr Id VarSet 
+        -> AnnExpr' Id VarSet 
          -> BcM BCInstrList
  
  schemeT d s p app
@@ -630,47 +493,41 @@ schemeT d s p app
  --   | trace ("schemeT: env in = \n" ++ showSDocDebug (ppBCEnv p)) False
  --   = panic "schemeT ?!?!"
  
---   | trace ("\nschemeT\n" ++ showSDoc (pprCoreExpr (deAnnotate app)) ++ "\n") False
+--   | trace ("\nschemeT\n" ++ showSDoc (pprCoreExpr (deAnnotate' app)) ++ "\n") False
  --   = error "?!?!" 
  
     -- Case 0
     | Just (arg, constr_names) <- maybe_is_tagToEnum_call
-   = pushAtom True d p arg             `thenBc` \ (push, arg_words) ->
+   = pushAtom d p arg                  `thenBc` \ (push, arg_words) ->
       implement_tagToId constr_names    `thenBc` \ tagToId_sequence ->
       returnBc (push `appOL`  tagToId_sequence            
                      `appOL`  mkSLIDE 1 (d+arg_words-s)
                      `snocOL` ENTER)
  
     -- Case 1
-   | is_con_call && null args_r_to_l
-   = returnBc (
-        (PUSH_G (Left (getName con)) `consOL` mkSLIDE 1 (d-s))
-        `snocOL` ENTER
-     )
-
-   -- Case 2
-   | [arg1,arg2] <- args_r_to_l,
-     let 
-        isVoidRepAtom (_, AnnVar v)    = typePrimRep (idType v) == VoidRep
-         isVoidRepAtom (_, AnnNote n e) = isVoidRepAtom e
-        isVoidRepAtom _ = False
-     in  
-        isVoidRepAtom arg2
-   = --trace (if isSingleton args_r_to_l
-     --       then "schemeT: unboxed singleton"
-     --       else "schemeT: unboxed pair with Void first component") (
-     schemeT d s p arg1
-     --)
-
-   -- Case 3
     | Just (CCall ccall_spec) <- isFCallId_maybe fn
     = generateCCall d s p ccall_spec fn args_r_to_l
  
-   -- Cases 4 and 5
+   -- Case 2: Constructor application
+   | Just con <- maybe_saturated_dcon,
+     isUnboxedTupleCon con
+   = case args_r_to_l of
+       [arg1,arg2] | isVoidRepAtom arg1 -> 
+                 unboxedTupleReturn d s p arg2
+       [arg1,arg2] | isVoidRepAtom arg2 -> 
+                 unboxedTupleReturn d s p arg1
+       _other -> unboxedTupleException
+
+   -- Case 3: Ordinary data constructor
+   | Just con <- maybe_saturated_dcon
+   = mkConAppCode d s p con args_r_to_l        `thenBc` \ alloc_con ->
+     returnBc (alloc_con        `appOL` 
+               mkSLIDE 1 (d - s) `snocOL`
+               ENTER)
+
+   -- Case 4: Tail call of function 
     | otherwise
-   = if   is_con_call && isUnboxedTupleCon con
-     then unboxedTupleException
-     else do_pushery d (map snd args_final_r_to_l)
+   = doTailCall d s p fn args_r_to_l
  
     where
        -- Detect and extract relevant info for the tagToEnum kludge.
@@ -682,95 +539,271 @@ schemeT d s p app
                         other -> panic "maybe_is_tagToEnum_call.extract_constr_Ids"
             in 
             case app of
-              (_, AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType t)) arg)
+              (AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType t)) arg)
                   -> case isPrimOpId_maybe v of
                         Just TagToEnumOp -> Just (snd arg, extract_constr_Names t)
                        other            -> Nothing
                other -> Nothing
  
-      -- Extract the args (R->L) and fn
-      (args_r_to_l, fn) = chomp app
-      chomp expr
-         = case snd expr of
-              AnnVar v    -> ([], v)
-              AnnApp f a
-                | isTypeAtom (snd a) -> chomp f
-                | otherwise          -> case chomp f of (az, f) -> (a:az, f)
-              AnnNote n e -> chomp e
-              other       -> pprPanic "schemeT" 
-                               (ppr (deAnnotate (panic "schemeT.chomp", other)))
+       -- Extract the args (R->L) and fn
+       -- The function will necessarily be a variable, 
+       -- because we are compiling a tail call
+      (AnnVar fn, args_r_to_l) = splitApp app
  
+      -- Only consider this to be a constructor application iff it is
+      -- saturated.  Otherwise, we'll call the constructor wrapper.
        n_args = length args_r_to_l
-
-      isTypeAtom (AnnType _) = True
-      isTypeAtom _           = False
-
-      -- decide if this is a constructor application, because we need
-      -- to rearrange the arguments on the stack if so.  For building
-      -- a constructor, we put pointers before non-pointers and omit
-      -- the tags.
-      --
-      -- Also if the constructor is not saturated, we just arrange to
-      -- call the curried worker instead.
-
-      maybe_dcon  = case isDataConId_maybe fn of
-                       Just con | dataConRepArity con == n_args -> Just con
-                       _ -> Nothing
-      is_con_call = isJust maybe_dcon
-      (Just con)  = maybe_dcon
-
-      args_final_r_to_l
-         | not is_con_call
-         = args_r_to_l
-         | otherwise
-         = filter (not.isPtr.snd) args_r_to_l ++ filter (isPtr.snd) args_r_to_l
-           where isPtr = isFollowableRep . atomRep
-
-      -- make code to push the args and then do the SLIDE-ENTER thing
-      tag_when_push = not is_con_call
-      narg_words    = sum (map (get_arg_szw . atomRep . snd) args_r_to_l)
-      get_arg_szw   = if tag_when_push then taggedSizeW else untaggedSizeW
+      maybe_saturated_dcon  
+       = case isDataConWorkId_maybe fn of
+               Just con | dataConRepArity con == n_args -> Just con
+               _ -> Nothing
+
+-- -----------------------------------------------------------------------------
+-- Generate code to build a constructor application, 
+-- leaving it on top of the stack
+
+mkConAppCode :: Int -> Sequel -> BCEnv
+            -> DataCon                 -- The data constructor
+            -> [AnnExpr' Id VarSet]    -- Args, in *reverse* order
+            -> BcM BCInstrList
+
+mkConAppCode orig_d s p con [] -- Nullary constructor
+  = ASSERT( isNullaryDataCon con )
+    returnBc (unitOL (PUSH_G (getName (dataConWorkId con))))
+       -- Instead of doing a PACK, which would allocate a fresh
+       -- copy of this constructor, use the single shared version.
+
+mkConAppCode orig_d s p con args_r_to_l 
+  = ASSERT( dataConRepArity con == length args_r_to_l )
+    do_pushery orig_d (non_ptr_args ++ ptr_args)
+ where
+       -- The args are already in reverse order, which is the way PACK
+       -- expects them to be.  We must push the non-ptrs after the ptrs.
+      (ptr_args, non_ptr_args) = partition isPtrAtom args_r_to_l
  
        do_pushery d (arg:args)
-         = pushAtom tag_when_push d p arg      `thenBc` \ (push, arg_words) ->
+         = pushAtom d p arg                    `thenBc` \ (push, arg_words) ->
             do_pushery (d+arg_words) args       `thenBc` \ more_push_code ->
             returnBc (push `appOL` more_push_code)
        do_pushery d []
-         | Just (CCall ccall_spec) <- isFCallId_maybe fn
-         = panic "schemeT.do_pushery: unexpected ccall"
-         | otherwise
-         = case maybe_dcon of
-              Just con -> returnBc (
-                             (PACK con narg_words `consOL`
-                              mkSLIDE 1 (d - narg_words - s)) `snocOL`
-                              ENTER
-                          )
-              Nothing
-                 -> pushAtom True d p (AnnVar fn)      
-                                               `thenBc` \ (push, arg_words) ->
-                    returnBc (push `appOL` mkSLIDE (narg_words+arg_words) 
-                                                   (d - s - narg_words)
-                              `snocOL` ENTER)
-
-
-{- Deal with a CCall.  Taggedly push the args onto the stack R->L,
-   deferencing ForeignObj#s and (ToDo: adjusting addrs to point to
-   payloads in Ptr/Byte arrays).  Then, generate the marshalling
-   (machine) code for the ccall, and create bytecodes to call that and
-   then return in the right way.  
--}
+         = returnBc (unitOL (PACK con n_arg_words))
+        where
+          n_arg_words = d - orig_d
+
+
+-- -----------------------------------------------------------------------------
+-- Returning an unboxed tuple with one non-void component (the only
+-- case we can handle).
+--
+-- Remember, we don't want to *evaluate* the component that is being
+-- returned, even if it is a pointed type.  We always just return.
+
+unboxedTupleReturn
+       :: Int -> Sequel -> BCEnv
+       -> AnnExpr' Id VarSet -> BcM BCInstrList
+unboxedTupleReturn d s p arg = do
+  (push, sz) <- pushAtom d p arg
+  returnBc (push `appOL`
+           mkSLIDE sz (d-s) `snocOL`
+           RETURN_UBX (atomRep arg))
+
+-- -----------------------------------------------------------------------------
+-- Generate code for a tail-call
+
+doTailCall
+       :: Int -> Sequel -> BCEnv
+       -> Id -> [AnnExpr' Id VarSet]
+       -> BcM BCInstrList
+doTailCall init_d s p fn args
+  = do_pushes init_d args (map (primRepToArgRep.atomRep) args)
+  where
+  do_pushes d [] reps = do
+       ASSERTM( null reps )
+        (push_fn, sz) <- pushAtom d p (AnnVar fn)
+       ASSERTM( sz == 1 )
+       returnBc (push_fn `appOL` (
+                 mkSLIDE ((d-init_d) + 1) (init_d - s) `appOL`
+                 unitOL ENTER))
+  do_pushes d args reps = do
+      let (push_apply, n, rest_of_reps) = findPushSeq reps
+         (these_args, rest_of_args) = splitAt n args
+      (next_d, push_code) <- push_seq d these_args
+      instrs <- do_pushes (next_d + 1) rest_of_args rest_of_reps 
+               --                ^^^ for the PUSH_APPLY_ instruction
+      returnBc (push_code `appOL` (push_apply `consOL` instrs))
+
+  push_seq d [] = return (d, nilOL)
+  push_seq d (arg:args) = do
+    (push_code, sz) <- pushAtom d p arg 
+    (final_d, more_push_code) <- push_seq (d+sz) args
+    return (final_d, push_code `appOL` more_push_code)
+
+-- v. similar to CgStackery.findMatch, ToDo: merge
+findPushSeq (RepP: RepP: RepP: RepP: RepP: RepP: RepP: rest)
+  = (PUSH_APPLY_PPPPPPP, 7, rest)
+findPushSeq (RepP: RepP: RepP: RepP: RepP: RepP: rest)
+  = (PUSH_APPLY_PPPPPP, 6, rest)
+findPushSeq (RepP: RepP: RepP: RepP: RepP: rest)
+  = (PUSH_APPLY_PPPPP, 5, rest)
+findPushSeq (RepP: RepP: RepP: RepP: rest)
+  = (PUSH_APPLY_PPPP, 4, rest)
+findPushSeq (RepP: RepP: RepP: rest)
+  = (PUSH_APPLY_PPP, 3, rest)
+findPushSeq (RepP: RepP: rest)
+  = (PUSH_APPLY_PP, 2, rest)
+findPushSeq (RepP: rest)
+  = (PUSH_APPLY_P, 1, rest)
+findPushSeq (RepV: rest)
+  = (PUSH_APPLY_V, 1, rest)
+findPushSeq (RepN: rest)
+  = (PUSH_APPLY_N, 1, rest)
+findPushSeq (RepF: rest)
+  = (PUSH_APPLY_F, 1, rest)
+findPushSeq (RepD: rest)
+  = (PUSH_APPLY_D, 1, rest)
+findPushSeq (RepL: rest)
+  = (PUSH_APPLY_L, 1, rest)
+findPushSeq _
+  = panic "ByteCodeGen.findPushSeq"
+
+-- -----------------------------------------------------------------------------
+-- Case expressions
+
+doCase  :: Int -> Sequel -> BCEnv
+       -> AnnExpr Id VarSet -> Id -> [AnnAlt Id VarSet]
+       -> Bool  -- True <=> is an unboxed tuple case, don't enter the result
+       -> BcM BCInstrList
+doCase d s p (_,scrut)
+ bndr alts is_unboxed_tuple
+  = let
+        -- Top of stack is the return itbl, as usual.
+        -- underneath it is the pointer to the alt_code BCO.
+        -- When an alt is entered, it assumes the returned value is
+        -- on top of the itbl.
+        ret_frame_sizeW = 2
+
+       -- An unlifted value gets an extra info table pushed on top
+       -- when it is returned.
+       unlifted_itbl_sizeW | isAlgCase = 0
+                           | otherwise = 1
+
+       -- depth of stack after the return value has been pushed
+       d_bndr = d + ret_frame_sizeW + idSizeW bndr
+
+       -- depth of stack after the extra info table for an unboxed return
+       -- has been pushed, if any.  This is the stack depth at the
+       -- continuation.
+        d_alts = d_bndr + unlifted_itbl_sizeW
+
+        -- Env in which to compile the alts, not including
+        -- any vars bound by the alts themselves
+        p_alts = addToFM p bndr (d_bndr - 1)
+
+       bndr_ty = idType bndr
+        isAlgCase = not (isUnLiftedType bndr_ty) && not is_unboxed_tuple
+
+        -- given an alt, return a discr and code for it.
+       codeALt alt@(DEFAULT, _, (_,rhs))
+          = schemeE d_alts s p_alts rhs        `thenBc` \ rhs_code ->
+            returnBc (NoDiscr, rhs_code)
+        codeAlt alt@(discr, bndrs, (_,rhs))
+          -- primitive or nullary constructor alt: no need to UNPACK
+          | null real_bndrs = do
+               rhs_code <- schemeE d_alts s p_alts rhs
+                returnBc (my_discr alt, rhs_code)
+          -- algebraic alt with some binders
+           | ASSERT(isAlgCase) otherwise =
+             let
+                (ptrs,nptrs) = partition (isFollowableRep.idPrimRep) real_bndrs
+                ptr_sizes    = map idSizeW ptrs
+                nptrs_sizes  = map idSizeW nptrs
+                bind_sizes   = ptr_sizes ++ nptrs_sizes
+                size         = sum ptr_sizes + sum nptrs_sizes
+                -- the UNPACK instruction unpacks in reverse order...
+                p' = addListToFM p_alts 
+                       (zip (reverse (ptrs ++ nptrs))
+                         (mkStackOffsets d_alts (reverse bind_sizes)))
+            in do
+            rhs_code <- schemeE (d_alts+size) s p' rhs
+             return (my_discr alt, unitOL (UNPACK size) `appOL` rhs_code)
+          where
+            real_bndrs = filter (not.isTyVar) bndrs
+
+
+        my_discr (DEFAULT, binds, rhs) = NoDiscr {-shouldn't really happen-}
+        my_discr (DataAlt dc, binds, rhs) 
+           | isUnboxedTupleCon dc
+           = unboxedTupleException
+           | otherwise
+           = DiscrP (dataConTag dc - fIRST_TAG)
+        my_discr (LitAlt l, binds, rhs)
+           = case l of MachInt i     -> DiscrI (fromInteger i)
+                       MachFloat r   -> DiscrF (fromRational r)
+                       MachDouble r  -> DiscrD (fromRational r)
+                       MachChar i    -> DiscrI i
+                       _ -> pprPanic "schemeE(AnnCase).my_discr" (ppr l)
+
+        maybe_ncons 
+           | not isAlgCase = Nothing
+           | otherwise 
+           = case [dc | (DataAlt dc, _, _) <- alts] of
+                []     -> Nothing
+                (dc:_) -> Just (tyConFamilySize (dataConTyCon dc))
+
+       -- the bitmap is relative to stack depth d, i.e. before the
+       -- BCO, info table and return value are pushed on.
+       -- This bit of code is v. similar to buildLivenessMask in CgBindery,
+       -- except that here we build the bitmap from the known bindings of
+       -- things that are pointers, whereas in CgBindery the code builds the
+       -- bitmap from the free slots and unboxed bindings.
+       -- (ToDo: merge?)
+       bitmap = intsToBitmap d{-size-} (sortLt (<) rel_slots)
+         where
+         binds = fmToList p
+         rel_slots = concat (map spread binds)
+         spread (id, offset)
+               | isFollowableRep (idPrimRep id) = [ rel_offset ]
+               | otherwise = []
+               where rel_offset = d - offset - 1
+
+     in do
+     alt_stuff <- mapM codeAlt alts
+     alt_final <- mkMultiBranch maybe_ncons alt_stuff
+     let 
+         alt_bco_name = getName bndr
+         alt_bco = mkProtoBCO alt_bco_name alt_final (Left alts)
+                       0{-no arity-} d{-bitmap size-} bitmap True{-is alts-}
+     -- in
+--     trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
+--          "\n      bitmap = " ++ show bitmap) $ do
+     scrut_code <- schemeE (d + ret_frame_sizeW) (d + ret_frame_sizeW) p scrut
+     alt_bco' <- emitBc alt_bco
+     let push_alts
+           | isAlgCase = PUSH_ALTS alt_bco'
+           | otherwise = PUSH_ALTS_UNLIFTED alt_bco' (typePrimRep bndr_ty)
+     returnBc (push_alts `consOL` scrut_code)
+
+
+-- -----------------------------------------------------------------------------
+-- Deal with a CCall.
+
+-- Taggedly push the args onto the stack R->L,
+-- deferencing ForeignObj#s and (ToDo: adjusting addrs to point to
+-- payloads in Ptr/Byte arrays).  Then, generate the marshalling
+-- (machine) code for the ccall, and create bytecodes to call that and
+-- then return in the right way.  
+
  generateCCall :: Int -> Sequel                 -- stack and sequel depths
                -> BCEnv
                -> CCallSpec             -- where to call
                -> Id                    -- of target, for type info
-              -> [AnnExpr Id VarSet]   -- args (atoms)
+              -> [AnnExpr' Id VarSet]  -- args (atoms)
                -> BcM BCInstrList
  
  generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
     = let 
           -- useful constants
-         addr_usizeW = untaggedSizeW AddrRep
-         addr_tsizeW = taggedSizeW AddrRep
+         addr_sizeW = getPrimRepSize AddrRep
  
           -- Get the args on the stack, with tags and suitably
           -- dereferenced for the CCall.  For each arg, return the
@@ -778,61 +811,49 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
           -- PrimRep of what was actually pushed.
  
           pargs d [] = returnBc []
-         pargs d ((_,a):az) 
-            = let rep_arg = atomRep a
-              in case rep_arg of
+         pargs d (a:az) 
+            = let arg_ty = repType (exprType (deAnnotate' a))
+
+              in case splitTyConApp_maybe arg_ty of
                      -- Don't push the FO; instead push the Addr# it
                      -- contains.
-                    ForeignObjRep
-                       -> pushAtom False{-irrelevant-} d p a
-                                                       `thenBc` \ (push_fo, _) ->
-                          let foro_szW = taggedSizeW ForeignObjRep
-                              d_now    = d + addr_tsizeW
-                              code     = push_fo `appOL` toOL [
-                                            UPK_TAG addr_usizeW 0 0,
-                                            SLIDE addr_tsizeW foro_szW
-                                         ]
-                          in  pargs d_now az           `thenBc` \ rest ->
-                              returnBc ((code, AddrRep) : rest)
-
-                    ArrayRep
-                       -> pargs (d + addr_tsizeW) az   `thenBc` \ rest ->
+                   Just (t, _)
+                    | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
+                       -> pargs (d + addr_sizeW) az    `thenBc` \ rest ->
                            parg_ArrayishRep arrPtrsHdrSize d p a
                                                         `thenBc` \ code ->
                            returnBc ((code,AddrRep):rest)
  
-                    ByteArrayRep
-                       -> pargs (d + addr_tsizeW) az   `thenBc` \ rest ->
+                    | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
+                       -> pargs (d + addr_sizeW) az    `thenBc` \ rest ->
                            parg_ArrayishRep arrWordsHdrSize d p a
                                                         `thenBc` \ code ->
                            returnBc ((code,AddrRep):rest)
  
                      -- Default case: push taggedly, but otherwise intact.
                      other
-                       -> pushAtom True d p a          `thenBc` \ (code_a, sz_a) ->
+                       -> pushAtom d p a               `thenBc` \ (code_a, sz_a) ->
                            pargs (d+sz_a) az            `thenBc` \ rest ->
-                          returnBc ((code_a, rep_arg) : rest)
+                          returnBc ((code_a, atomRep a) : rest)
  
           -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
           -- the stack but then advance it over the headers, so as to
           -- point to the payload.
           parg_ArrayishRep hdrSizeW d p a
-            = pushAtom False{-irrel-} d p a `thenBc` \ (push_fo, _) ->
+            = pushAtom d p a `thenBc` \ (push_fo, _) ->
                -- The ptr points at the header.  Advance it over the
-              -- header and then pretend this is an Addr# (push a tag).
+              -- header and then pretend this is an Addr#.
                returnBc (push_fo `snocOL` 
-                        SWIZZLE 0 (hdrSizeW * untaggedSizeW PtrRep
-                                            * wORD_SIZE) 
-                        `snocOL`
-                        PUSH_TAG addr_usizeW)
+                        SWIZZLE 0 (hdrSizeW * getPrimRepSize WordRep
+                                            * wORD_SIZE))
  
       in
-         pargs d0 args_r_to_l                          `thenBc` \ code_n_reps ->
+         pargs d0 args_r_to_l                  `thenBc` \ code_n_reps ->
       let
           (pushs_arg, a_reps_pushed_r_to_l) = unzip code_n_reps
  
           push_args    = concatOL pushs_arg
-         d_after_args = d0 + sum (map taggedSizeW a_reps_pushed_r_to_l)
+         d_after_args = d0 + sum (map getPrimRepSize a_reps_pushed_r_to_l)
           a_reps_pushed_RAW
              | null a_reps_pushed_r_to_l || head a_reps_pushed_r_to_l /= VoidRep
              = panic "ByteCodeGen.generateCCall: missing or invalid World token?"
@@ -851,7 +872,7 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
           {-
           Because the Haskell stack grows down, the a_reps refer to 
           lowest to highest addresses in that order.  The args for the call
-         are on the stack.  Now push an unboxed, tagged Addr# indicating
+         are on the stack.  Now push an unboxed Addr# indicating
           the C function to call.  Then push a dummy placeholder for the 
           result.  Finally, emit a CCALL insn with an offset pointing to the 
           Addr# just pushed, and a literal field holding the mallocville
@@ -873,8 +894,7 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
           depth, and we RETURN.
  
           This arrangement makes it simple to do f-i-dynamic since the Addr#
-         value is the first arg anyway.  It also has the virtue that the
-         stack is GC-understandable at all times.
+         value is the first arg anyway.
  
           The marshalling code is generated specifically for this
           call site, and so knows exactly the (Haskell) stack
@@ -890,12 +910,8 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
                   DynamicTarget
                      -> returnBc (False, panic "ByteCodeGen.generateCCall(dyn)")
                   StaticTarget target
-                    -> let sym_to_find = unpackFS target in
-                       ioToBc (lookupSymbol sym_to_find) `thenBc` \res ->
-                       case res of
-                           Just aa -> returnBc (True, aa)
-                           Nothing -> ioToBc (linkFail "ByteCodeGen.generateCCall" 
-                                                       sym_to_find)
+                    -> ioToBc (lookupStaticPtr target) `thenBc` \res ->
+                       returnBc (True, res)
                   CasmTarget _
                      -> pprPanic "ByteCodeGen.generateCCall: casm" (ppr ccall_spec)
       in
@@ -912,48 +928,49 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
           -- push the Addr#
           (push_Addr, d_after_Addr)
              | is_static
-            = (toOL [PUSH_UBX (Right static_target_addr) addr_usizeW,
-                     PUSH_TAG addr_usizeW],
-               d_after_args + addr_tsizeW)
+            = (toOL [PUSH_UBX (Right static_target_addr) addr_sizeW],
+               d_after_args + addr_sizeW)
              | otherwise        -- is already on the stack
              = (nilOL, d_after_args)
  
           -- Push the return placeholder.  For a call returning nothing,
           -- this is a VoidRep (tag).
-         r_usizeW  = untaggedSizeW r_rep
-         r_tsizeW  = taggedSizeW r_rep
-         d_after_r = d_after_Addr + r_tsizeW
+         r_sizeW   = getPrimRepSize r_rep
+         d_after_r = d_after_Addr + r_sizeW
           r_lit     = mkDummyLiteral r_rep
           push_r    = (if   returns_void 
                        then nilOL 
-                      else unitOL (PUSH_UBX (Left r_lit) r_usizeW))
-                      `appOL` 
-                      unitOL (PUSH_TAG r_usizeW)
+                      else unitOL (PUSH_UBX (Left r_lit) r_sizeW))
  
           -- generate the marshalling code we're going to call
           r_offW       = 0 
-         addr_offW    = r_tsizeW
-         arg1_offW    = r_tsizeW + addr_tsizeW
+         addr_offW    = r_sizeW
+         arg1_offW    = r_sizeW + addr_sizeW
           args_offW    = map (arg1_offW +) 
-                            (init (scanl (+) 0 (map taggedSizeW a_reps)))
+                            (init (scanl (+) 0 (map getPrimRepSize a_reps)))
       in
           ioToBc (mkMarshalCode cconv
                      (r_offW, r_rep) addr_offW
                      (zip args_offW a_reps))    `thenBc` \ addr_of_marshaller ->
           recordMallocBc addr_of_marshaller     `thenBc_`
       let
+        -- Offset of the next stack frame down the stack.  The CCALL
+        -- instruction needs to describe the chunk of stack containing
+        -- the ccall args to the GC, so it needs to know how large it
+        -- is.  See comment in Interpreter.c with the CCALL instruction.
+        stk_offset   = d_after_r - s
+
           -- do the call
-         do_call      = unitOL (CCALL (castPtr addr_of_marshaller))
+         do_call      = unitOL (CCALL stk_offset (castPtr addr_of_marshaller))
           -- slide and return
-         wrapup       = mkSLIDE r_tsizeW (d_after_r - r_tsizeW - s)
-                        `snocOL` RETURN r_rep
+         wrapup       = mkSLIDE r_sizeW (d_after_r - r_sizeW - s)
+                        `snocOL` RETURN_UBX r_rep
       in
-         --trace (show (arg1_offW, args_offW  ,  (map taggedSizeW a_reps) )) (
+         --trace (show (arg1_offW, args_offW  ,  (map getPrimRepSize a_reps) )) $
           returnBc (
           push_args `appOL`
           push_Addr `appOL` push_r `appOL` do_call `appOL` wrapup
           )
-         --)
  
  
  -- Make a dummy literal, to be used as a placeholder for FFI return
@@ -966,7 +983,7 @@ mkDummyLiteral pr
          WordRep   -> MachWord 0
          DoubleRep -> MachDouble 0
          FloatRep  -> MachFloat 0
-        AddrRep   | taggedSizeW AddrRep == taggedSizeW WordRep -> MachWord 0
+        AddrRep   | getPrimRepSize AddrRep == getPrimRepSize WordRep -> MachWord 0
          _         -> moan64 "mkDummyLiteral" (ppr pr)
  
  
@@ -1005,17 +1022,8 @@ maybe_getCCallReturnRep fn_ty
           blargh = pprPanic "maybe_getCCallReturn: can't handle:" 
                             (pprType fn_ty)
       in 
-     --trace (showSDoc (ppr (a_reps, r_reps))) (
+     --trace (showSDoc (ppr (a_reps, r_reps))) $
       if ok then maybe_r_rep_to_go else blargh
-     --)
-
-atomRep (AnnVar v)    = typePrimRep (idType v)
-atomRep (AnnLit l)    = literalPrimRep l
-atomRep (AnnNote n b) = atomRep (snd b)
-atomRep (AnnApp f (_, AnnType _)) = atomRep (snd f)
-atomRep (AnnLam x e) | isTyVar x = atomRep (snd e)
-atomRep other = pprPanic "atomRep" (ppr (deAnnotate (undefined,other)))
-
  
  -- Compile code which expects an unboxed Int on the top of stack,
  -- (call it i), and pushes the i'th closure in the supplied list 
@@ -1036,152 +1044,68 @@ implement_tagToId names
          mkStep l_exit (my_label, next_label, n, name_for_n)
             = toOL [LABEL my_label, 
                     TESTEQ_I n next_label, 
-                   PUSH_G (Left name_for_n), 
+                   PUSH_G name_for_n, 
                     JMP l_exit]
  
  
--- Make code to unpack the top-of-stack constructor onto the stack, 
--- adding tags for the unboxed bits.  Takes the PrimReps of the 
--- constructor's arguments.  off_h and off_s are travelling offsets
--- along the constructor and the stack.
---
--- Supposing a constructor in the heap has layout
---
---      Itbl p_1 ... p_i np_1 ... np_j
---
--- then we add to the stack, shown growing down, the following:
---
---    (previous stack)
---         p_i
---         ...
---         p_1
---         np_j
---         tag_for(np_j)
---         ..
---         np_1
---         tag_for(np_1)
---
--- so that in the common case (ptrs only) a single UNPACK instr can
--- copy all the payload of the constr onto the stack with no further ado.
-
-mkUnpackCode :: [Id]   -- constr args
-             -> Int    -- depth before unpack
-             -> BCEnv  -- env before unpack
-             -> (BCInstrList, Int, BCEnv)
-mkUnpackCode vars d p
-   = --trace ("mkUnpackCode: " ++ showSDocDebug (ppr vars)
-     --       ++ " --> " ++ show d' ++ "\n" ++ showSDocDebug (ppBCEnv p')
-     --       ++ "\n") (
-     (code_p `appOL` code_np, d', p')
-     --)
-     where
-        -- vars with reps
-        vreps = [(var, typePrimRep (idType var)) | var <- vars]
-
-        -- ptrs and nonptrs, forward
-        vreps_p  = filter (isFollowableRep.snd) vreps
-        vreps_np = filter (not.isFollowableRep.snd) vreps
-
-        -- the order in which we will augment the environment
-        vreps_env = reverse vreps_p ++ reverse vreps_np
-
-        -- new env and depth
-        vreps_env_tszsw = map (taggedSizeW.snd) vreps_env
-        p' = addListToFM p (zip (map fst vreps_env) 
-                                (mkStackOffsets d vreps_env_tszsw))
-        d' = d + sum vreps_env_tszsw
-
-        -- code to unpack the ptrs
-        ptrs_szw = sum (map (untaggedSizeW.snd) vreps_p)
-        code_p | null vreps_p = nilOL
-               | otherwise    = unitOL (UNPACK ptrs_szw)
-
-        -- code to unpack the nonptrs
-        vreps_env_uszw = sum (map (untaggedSizeW.snd) vreps_env)
-        code_np = do_nptrs vreps_env_uszw ptrs_szw (reverse (map snd vreps_np))
-        do_nptrs off_h off_s [] = nilOL
-        do_nptrs off_h off_s (npr:nprs)
-           | npr `elem` [IntRep, WordRep, FloatRep, DoubleRep, 
-                         CharRep, AddrRep, StablePtrRep]
-           = approved
-           | otherwise
-           = moan64 "ByteCodeGen.mkUnpackCode" (ppr npr)
-             where
-                approved = UPK_TAG usizeW (off_h-usizeW) off_s   `consOL` theRest
-                theRest  = do_nptrs (off_h-usizeW) (off_s + tsizeW) nprs
-                usizeW   = untaggedSizeW npr
-                tsizeW   = taggedSizeW npr
-
+-- -----------------------------------------------------------------------------
+-- pushAtom
  
  -- Push an atom onto the stack, returning suitable code & number of
--- stack words used.  Pushes it either tagged or untagged, since 
--- pushAtom is used to set up the stack prior to copying into the
--- heap for both APs (requiring tags) and constructors (which don't).
---
--- NB this means NO GC between pushing atoms for a constructor and
--- copying them into the heap.  It probably also means that 
--- tail calls MUST be of the form atom{atom ... atom} since if the
--- expression head was allowed to be arbitrary, there could be GC
--- in between pushing the arg atoms and completing the head.
--- (not sure; perhaps the allocate/doYouWantToGC interface means this
--- isn't a problem; but only if arbitrary graph construction for the
--- head doesn't leave this BCO, since GC might happen at the start of
--- each BCO (we consult doYouWantToGC there).
+-- stack words used.
  --
--- Blargh.  JRS 001206
---
--- NB (further) that the env p must map each variable to the highest-
--- numbered stack slot for it.  For example, if the stack has depth 4 
--- and we tagged-ly push (v :: Int#) on it, the value will be in stack[4],
--- the tag in stack[5], the stack will have depth 6, and p must map v to
--- 5 and not to 4.  Stack locations are numbered from zero, so a depth
--- 6 stack has valid words 0 .. 5.
+-- The env p must map each variable to the highest- numbered stack
+-- slot for it.  For example, if the stack has depth 4 and we
+-- tagged-ly push (v :: Int#) on it, the value will be in stack[4],
+-- the tag in stack[5], the stack will have depth 6, and p must map v
+-- to 5 and not to 4.  Stack locations are numbered from zero, so a
+-- depth 6 stack has valid words 0 .. 5.
+
+pushAtom :: Int -> BCEnv -> AnnExpr' Id VarSet -> BcM (BCInstrList, Int)
  
-pushAtom :: Bool -> Int -> BCEnv -> AnnExpr' Id VarSet -> BcM (BCInstrList, Int)
-pushAtom tagged d p (AnnVar v)
+pushAtom d p (AnnApp f (_, AnnType _))
+   = pushAtom d p (snd f)
+
+pushAtom d p (AnnNote note e)
+   = pushAtom d p (snd e)
+
+pushAtom d p (AnnLam x e) 
+   | isTyVar x 
+   = pushAtom d p (snd e)
+
+pushAtom d p (AnnVar v)
  
     | idPrimRep v == VoidRep
-   = if tagged then returnBc (unitOL (PUSH_TAG 0), 1) 
-               else panic "ByteCodeGen.pushAtom(VoidRep,untaggedly)"
+   = returnBc (nilOL, 0)
  
     | isFCallId v
     = pprPanic "pushAtom: shouldn't get an FCallId here" (ppr v)
  
     | Just primop <- isPrimOpId_maybe v
-   = returnBc (unitOL (PUSH_G (Right primop)), 1)
-
-   | otherwise
-   = let  {-
-         str = "\npushAtom " ++ showSDocDebug (ppr v) 
-               ++ " :: " ++ showSDocDebug (pprType (idType v))
-               ++ ", depth = " ++ show d
-               ++ ", tagged = " ++ show tagged ++ ", env =\n" ++ 
-               showSDocDebug (ppBCEnv p)
-               ++ " --> words: " ++ show (snd result) ++ "\n" ++
-               showSDoc (nest 4 (vcat (map ppr (fromOL (fst result)))))
-               ++ "\nendPushAtom " ++ showSDocDebug (ppr v)
-        -}
-
-         result
-            = case lookupBCEnv_maybe p v of
-                 Just d_v -> (toOL (nOfThem nwords (PUSH_L (d-d_v+sz_t-2))), nwords)
-                 Nothing  -> ASSERT(sz_t == 1) (unitOL (PUSH_G (Left nm)), nwords)
-
-         nm = case isDataConId_maybe v of
-                 Just c  -> getName c
-                 Nothing -> getName v
-
-         sz_t   = taggedIdSizeW v
-         sz_u   = untaggedIdSizeW v
-         nwords = if tagged then sz_t else sz_u
-     in
-         returnBc result
-
-pushAtom True d p (AnnLit lit)
-   = pushAtom False d p (AnnLit lit)           `thenBc` \ (ubx_code, ubx_size) ->
-     returnBc (ubx_code `snocOL` PUSH_TAG ubx_size, 1 + ubx_size)
-
-pushAtom False d p (AnnLit lit)
+   = returnBc (unitOL (PUSH_PRIMOP primop), 1)
+
+   | Just d_v <- lookupBCEnv_maybe p v  -- v is a local variable
+   = returnBc (toOL (nOfThem sz (PUSH_L (d-d_v+sz-2))), sz)
+        -- d - d_v                 the number of words between the TOS 
+        --                         and the 1st slot of the object
+        --
+        -- d - d_v - 1             the offset from the TOS of the 1st slot
+        --
+        -- d - d_v - 1 + sz - 1    the offset from the TOS of the last slot
+        --                         of the object.
+        --
+        -- Having found the last slot, we proceed to copy the right number of
+        -- slots on to the top of the stack.
+
+    | otherwise  -- v must be a global variable
+    = ASSERT(sz == 1) 
+      returnBc (unitOL (PUSH_G (getName v)), sz)
+
+    where
+         sz = idSizeW v
+
+
+pushAtom d p (AnnLit lit)
     = case lit of
          MachLabel fs -> code CodePtrRep
          MachWord w   -> code WordRep
@@ -1192,7 +1116,7 @@ pushAtom False d p (AnnLit lit)
          MachStr s    -> pushStr s
       where
          code rep
-           = let size_host_words = untaggedSizeW rep
+           = let size_host_words = getPrimRepSize rep
               in  returnBc (unitOL (PUSH_UBX (Left lit) size_host_words), 
                             size_host_words)
  
@@ -1203,8 +1127,8 @@ pushAtom False d p (AnnLit lit)
                              -- sigh, a string in the heap is no good to us.
                              -- We need a static C pointer, since the type of 
                              -- a string literal is Addr#.  So, copy the string 
-                            -- into C land and introduce a memory leak 
-                            -- at the same time.
+                            -- into C land and remember the pointer so we can
+                           -- free it later.
                              let n = I# l
                              -- CAREFUL!  Chars are 32 bits in ghc 4.09+
                              in  ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
@@ -1220,30 +1144,19 @@ pushAtom False d p (AnnLit lit)
                  -- Get the addr on the stack, untaggedly
                     returnBc (unitOL (PUSH_UBX (Right addr) 1), 1)
  
-
-
-
-
-pushAtom tagged d p (AnnApp f (_, AnnType _))
-   = pushAtom tagged d p (snd f)
-
-pushAtom tagged d p (AnnNote note e)
-   = pushAtom tagged d p (snd e)
-
-pushAtom tagged d p (AnnLam x e) 
-   | isTyVar x 
-   = pushAtom tagged d p (snd e)
-
-pushAtom tagged d p other
+pushAtom d p other
     = pprPanic "ByteCodeGen.pushAtom" 
                (pprCoreExpr (deAnnotate (undefined, other)))
  
-foreign import "memcpy" memcpy :: Ptr a -> ByteArray# -> CInt -> IO ()
+foreign import ccall unsafe "memcpy"
+ memcpy :: Ptr a -> ByteArray# -> CInt -> IO ()
  
  
+-- -----------------------------------------------------------------------------
  -- Given a bunch of alts code and their discrs, do the donkey work
  -- of making a multiway branch using a switch tree.
  -- What a load of hassle!
+
  mkMultiBranch :: Maybe Int     -- # datacons in tycon, if alg alt
                                 -- a hint; generates better code
                                 -- Nothing is always safe
@@ -1347,15 +1260,9 @@ mkMultiBranch maybe_ncons raw_ways
       in
           mkTree notd_ways init_lo init_hi
  
-\end{code}
-
-%************************************************************************
-%*                                                                     *
-\subsection{Supporting junk for the compilation schemes}
-%*                                                                     *
-%************************************************************************
  
-\begin{code}
+-- -----------------------------------------------------------------------------
+-- Supporting junk for the compilation schemes
  
  -- Describes case alts
  data Discr 
@@ -1373,22 +1280,11 @@ instance Outputable Discr where
     ppr NoDiscr    = text "DEF"
  
  
--- Find things in the BCEnv (the what's-on-the-stack-env)
--- See comment preceding pushAtom for precise meaning of env contents
---lookupBCEnv :: BCEnv -> Id -> Int
---lookupBCEnv env nm
---   = case lookupFM env nm of
---        Nothing -> pprPanic "lookupBCEnv" 
---                            (ppr nm $$ char ' ' $$ vcat (map ppr (fmToList env)))
---        Just xx -> xx
-
  lookupBCEnv_maybe :: BCEnv -> Id -> Maybe Int
  lookupBCEnv_maybe = lookupFM
  
-
-taggedIdSizeW, untaggedIdSizeW :: Id -> Int
-taggedIdSizeW   = taggedSizeW   . typePrimRep . idType
-untaggedIdSizeW = untaggedSizeW . typePrimRep . idType
+idSizeW :: Id -> Int
+idSizeW id = getPrimRepSize (typePrimRep (idType id))
  
  unboxedTupleException :: a
  unboxedTupleException 
@@ -1402,75 +1298,97 @@ unboxedTupleException
  mkSLIDE n d = if d == 0 then nilOL else unitOL (SLIDE n d)
  bind x f    = f x
  
-\end{code}
+splitApp :: AnnExpr' id ann -> (AnnExpr' id ann, [AnnExpr' id ann])
+       -- The arguments are returned in *right-to-left* order
+splitApp (AnnApp (_,f) (_,a))
+              | isTypeAtom a = splitApp f
+              | otherwise    = case splitApp f of 
+                                    (f', as) -> (f', a:as)
+splitApp (AnnNote n (_,e))    = splitApp e
+splitApp e                   = (e, [])
  
-%************************************************************************
-%*                                                                     *
-\subsection{The bytecode generator's monad}
-%*                                                                     *
-%************************************************************************
  
-\begin{code}
+isTypeAtom :: AnnExpr' id ann -> Bool
+isTypeAtom (AnnType _) = True
+isTypeAtom _           = False
+
+isVoidRepAtom :: AnnExpr' id ann -> Bool
+isVoidRepAtom (AnnVar v)        = typePrimRep (idType v) == VoidRep
+isVoidRepAtom (AnnNote n (_,e)) = isVoidRepAtom e
+isVoidRepAtom _                = False
+
+atomRep :: AnnExpr' Id ann -> PrimRep
+atomRep (AnnVar v)    = typePrimRep (idType v)
+atomRep (AnnLit l)    = literalPrimRep l
+atomRep (AnnNote n b) = atomRep (snd b)
+atomRep (AnnApp f (_, AnnType _)) = atomRep (snd f)
+atomRep (AnnLam x e) | isTyVar x = atomRep (snd e)
+atomRep other = pprPanic "atomRep" (ppr (deAnnotate (undefined,other)))
+
+isPtrAtom :: AnnExpr' Id ann -> Bool
+isPtrAtom e = isFollowableRep (atomRep e)
+
+-- Let szsw be the sizes in words of some items pushed onto the stack,
+-- which has initial depth d'.  Return the values which the stack environment
+-- should map these items to.
+mkStackOffsets :: Int -> [Int] -> [Int]
+mkStackOffsets original_depth szsw
+   = map (subtract 1) (tail (scanl (+) original_depth szsw))
+
+-- -----------------------------------------------------------------------------
+-- The bytecode generator's monad
+
  data BcM_State 
-   = BcM_State { bcos      :: [ProtoBCO Name], -- accumulates completed BCOs
-                 nextlabel :: Int,             -- for generating local labels
-                 malloced  :: [Ptr ()] }       -- ptrs malloced for current BCO
-                                                -- Should be free()d when it is GCd
-type BcM r = BcM_State -> IO (BcM_State, r)
+   = BcM_State { 
+       nextlabel :: Int,               -- for generating local labels
+       malloced  :: [Ptr ()] }         -- ptrs malloced for current BCO
+                                       -- Should be free()d when it is GCd
+
+newtype BcM r = BcM (BcM_State -> IO (BcM_State, r))
  
  ioToBc :: IO a -> BcM a
-ioToBc io st = do x <- io 
-                  return (st, x)
+ioToBc io = BcM $ \st -> do 
+  x <- io 
+  return (st, x)
  
-runBc :: BcM_State -> BcM r -> IO (BcM_State, r)
-runBc st0 m = do (st1, res) <- m st0
-                 return (st1, res)
+runBc :: BcM r -> IO (BcM_State, r)
+runBc (BcM m) = m (BcM_State 0 []) 
  
  thenBc :: BcM a -> (a -> BcM b) -> BcM b
-thenBc expr cont st0
-   = do (st1, q) <- expr st0
-        (st2, r) <- cont q st1
-        return (st2, r)
+thenBc (BcM expr) cont = BcM $ \st0 -> do
+  (st1, q) <- expr st0
+  let BcM k = cont q 
+  (st2, r) <- k st1
+  return (st2, r)
  
  thenBc_ :: BcM a -> BcM b -> BcM b
-thenBc_ expr cont st0
-   = do (st1, q) <- expr st0
-        (st2, r) <- cont st1
-        return (st2, r)
+thenBc_ (BcM expr) (BcM cont) = BcM $ \st0 -> do
+  (st1, q) <- expr st0
+  (st2, r) <- cont st1
+  return (st2, r)
  
  returnBc :: a -> BcM a
-returnBc result st = return (st, result)
-
+returnBc result = BcM $ \st -> (return (st, result))
  
-mapBc :: (a -> BcM b) -> [a] -> BcM [b]
-mapBc f []     = returnBc []
-mapBc f (x:xs)
-  = f x          `thenBc` \ r  ->
-    mapBc f xs   `thenBc` \ rs ->
-    returnBc (r:rs)
+instance Monad BcM where
+  (>>=) = thenBc
+  (>>)  = thenBc_
+  return = returnBc
  
-emitBc :: ([Ptr ()] -> ProtoBCO Name) -> BcM ()
-emitBc bco st
-   = return (st{bcos = bco (malloced st) : bcos st, malloced=[]}, ())
-
-newbcoBc :: BcM ()
-newbcoBc st
-   | notNull (malloced st)
-   = panic "ByteCodeGen.newbcoBc: missed prior emitBc?"
-   | otherwise
-   = return (st, ())
+emitBc :: ([Ptr ()] -> ProtoBCO Name) -> BcM (ProtoBCO Name)
+emitBc bco
+  = BcM $ \st -> return (st{malloced=[]}, bco (malloced st))
  
  recordMallocBc :: Ptr a -> BcM ()
-recordMallocBc a st
-   = return (st{malloced = castPtr a : malloced st}, ())
+recordMallocBc a
+  = BcM $ \st -> return (st{malloced = castPtr a : malloced st}, ())
  
  getLabelBc :: BcM Int
-getLabelBc st
-   = return (st{nextlabel = 1 + nextlabel st}, nextlabel st)
+getLabelBc
+  = BcM $ \st -> return (st{nextlabel = 1 + nextlabel st}, nextlabel st)
  
  getLabelsBc :: Int -> BcM [Int]
-getLabelsBc n st
-   = let ctr = nextlabel st 
-     in return (st{nextlabel = ctr+n}, [ctr .. ctr+n-1])
-
+getLabelsBc n
+  = BcM $ \st -> let ctr = nextlabel st 
+                in return (st{nextlabel = ctr+n}, [ctr .. ctr+n-1])
  \end{code}