2 % (c) The University of Glasgow 2000
4 \section[ByteCodeLink]{Bytecode assembler and linker}
8 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
10 module ByteCodeLink ( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
11 ClosureEnv, HValue, filterNameMap,
12 linkIModules, linkIExpr,
13 iNTERP_STACK_CHECK_THRESH
16 #include "HsVersions.h"
19 import Name ( Name, getName, nameModule, toRdrName, isGlobalName )
20 import RdrName ( rdrNameOcc, rdrNameModule )
21 import OccName ( occNameString )
22 import FiniteMap ( FiniteMap, addListToFM, filterFM,
23 addToFM, lookupFM, emptyFM )
25 import Literal ( Literal(..) )
26 import PrimOp ( PrimOp, primOpOcc )
27 import PrimRep ( PrimRep(..) )
28 import Constants ( wORD_SIZE )
29 import Module ( ModuleName, moduleName, moduleNameFS )
30 import Linker ( lookupSymbol )
31 import FastString ( FastString(..) )
32 import ByteCodeInstr ( BCInstr(..), ProtoBCO(..) )
33 import ByteCodeItbls ( ItblEnv, ItblPtr )
36 import Monad ( when, foldM )
38 import IArray ( array )
39 import MArray ( castSTUArray,
40 newFloatArray, writeFloatArray,
41 newDoubleArray, writeDoubleArray,
42 newIntArray, writeIntArray,
43 newAddrArray, writeAddrArray,
45 import Foreign ( Word16, Ptr(..), free )
46 import Addr ( Word, Addr(..), nullAddr )
47 import Weak ( addFinalizer )
50 import PrelBase ( Int(..) )
51 import PrelGHC ( BCO#, newBCO#, unsafeCoerce#,
52 ByteArray#, Array#, addrToHValue#, mkApUpd0# )
53 import IOExts ( fixIO )
54 import PrelArr ( Array(..) )
55 import ArrayBase ( UArray(..) )
56 import PrelIOBase ( IO(..) )
60 %************************************************************************
62 \subsection{Top-level stuff}
64 %************************************************************************
68 linkIModules :: ItblEnv -- incoming global itbl env; returned updated
69 -> ClosureEnv -- incoming global closure env; returned updated
70 -> [([UnlinkedBCO], ItblEnv)]
71 -> IO ([HValue], ItblEnv, ClosureEnv)
72 linkIModules gie gce mods
73 = do let (bcoss, ies) = unzip mods
75 final_gie = foldr plusFM gie ies
76 (final_gce, linked_bcos) <- linkSomeBCOs True final_gie gce bcos
77 return (linked_bcos, final_gie, final_gce)
80 linkIExpr :: ItblEnv -> ClosureEnv -> UnlinkedBCOExpr
81 -> IO HValue -- IO BCO# really
82 linkIExpr ie ce (root_ul_bco, aux_ul_bcos)
83 = do (aux_ce, _) <- linkSomeBCOs False ie ce aux_ul_bcos
84 (_, [root_bco]) <- linkSomeBCOs False ie aux_ce [root_ul_bco]
87 -- Link a bunch of BCOs and return them + updated closure env.
88 linkSomeBCOs :: Bool -- False <=> add _all_ BCOs to returned closure env
89 -- True <=> add only toplevel BCOs to closure env
93 -> IO (ClosureEnv, [HValue])
94 linkSomeBCOs toplevs_only ie ce_in ul_bcos
95 = do let nms = map nameOfUnlinkedBCO ul_bcos
97 ( \ hvs -> let ce_out = addListToFM ce_in (zipLazily nms hvs)
98 in mapM (linkBCO ie ce_out) ul_bcos )
100 let ce_all_additions = zip nms hvals
101 ce_top_additions = filter (isGlobalName.fst) ce_all_additions
102 ce_additions = if toplevs_only then ce_top_additions
103 else ce_all_additions
104 ce_out = -- make sure we're not inserting duplicate names into the
105 -- closure environment, which leads to trouble.
106 ASSERT (all (not . (`elemFM` ce_in)) (map fst ce_additions))
107 addListToFM ce_in ce_additions
108 return (ce_out, hvals)
110 -- A lazier zip, in which no demand is propagated to the second
111 -- list unless some demand is propagated to the snd of one of the
112 -- result list elems.
114 zipLazily (x:xs) ys = (x, head ys) : zipLazily xs (tail ys)
119 (SizedSeq Word16) -- insns
120 (SizedSeq Word) -- literals
121 (SizedSeq (Either Name PrimOp)) -- ptrs
122 (SizedSeq Name) -- itbl refs
124 nameOfUnlinkedBCO (UnlinkedBCO nm _ _ _ _) = nm
126 -- When translating expressions, we need to distinguish the root
127 -- BCO for the expression
128 type UnlinkedBCOExpr = (UnlinkedBCO, [UnlinkedBCO])
130 instance Outputable UnlinkedBCO where
131 ppr (UnlinkedBCO nm insns lits ptrs itbls)
132 = sep [text "BCO", ppr nm, text "with",
133 int (sizeSS insns), text "insns",
134 int (sizeSS lits), text "lits",
135 int (sizeSS ptrs), text "ptrs",
136 int (sizeSS itbls), text "itbls"]
139 -- these need a proper home
140 type ClosureEnv = FiniteMap Name HValue
141 data HValue = HValue -- dummy type, actually a pointer to some Real Code.
143 -- remove all entries for a given set of modules from the environment;
144 -- note that this removes all local names too (ie. temporary bindings from
145 -- the command line).
146 filterNameMap :: [ModuleName] -> FiniteMap Name a -> FiniteMap Name a
147 filterNameMap mods env
148 = filterFM (\n _ -> isGlobalName n &&
149 moduleName (nameModule n) `elem` mods) env
152 %************************************************************************
154 \subsection{The bytecode assembler}
156 %************************************************************************
158 The object format for bytecodes is: 16 bits for the opcode, and 16 for
159 each field -- so the code can be considered a sequence of 16-bit ints.
160 Each field denotes either a stack offset or number of items on the
161 stack (eg SLIDE), and index into the pointer table (eg PUSH_G), an
162 index into the literal table (eg PUSH_I/D/L), or a bytecode address in
166 -- Top level assembler fn.
167 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
169 assembleBCO (ProtoBCO nm instrs origin malloced)
171 -- pass 1: collect up the offsets of the local labels.
172 -- Remember that the first insn starts at offset 1 since offset 0
173 -- (eventually) will hold the total # of insns.
174 label_env = mkLabelEnv emptyFM 1 instrs
176 mkLabelEnv env i_offset [] = env
177 mkLabelEnv env i_offset (i:is)
179 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
180 in mkLabelEnv new_env (i_offset + instrSize16s i) is
183 = case lookupFM label_env lab of
184 Just bco_offset -> bco_offset
185 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
187 do -- pass 2: generate the instruction, ptr and nonptr bits
188 insns <- return emptySS :: IO (SizedSeq Word16)
189 lits <- return emptySS :: IO (SizedSeq Word)
190 ptrs <- return emptySS :: IO (SizedSeq (Either Name PrimOp))
191 itbls <- return emptySS :: IO (SizedSeq Name)
192 let init_asm_state = (insns,lits,ptrs,itbls)
193 (final_insns, final_lits, final_ptrs, final_itbls)
194 <- mkBits findLabel init_asm_state instrs
196 let ul_bco = UnlinkedBCO nm final_insns final_lits final_ptrs final_itbls
198 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
199 -- objects, since they might get run too early. Disable this until
200 -- we figure out what to do.
201 -- when (not (null malloced)) (addFinalizer ul_bco (mapM_ zonk malloced))
205 zonk (A# a#) = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
208 -- instrs nonptrs ptrs itbls
209 type AsmState = (SizedSeq Word16, SizedSeq Word,
210 SizedSeq (Either Name PrimOp), SizedSeq Name)
212 data SizedSeq a = SizedSeq !Int [a]
213 emptySS = SizedSeq 0 []
214 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
215 addListToSS (SizedSeq n r_xs) xs
216 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
217 sizeSS (SizedSeq n r_xs) = n
218 listFromSS (SizedSeq n r_xs) = return (reverse r_xs)
221 -- This is where all the action is (pass 2 of the assembler)
222 mkBits :: (Int -> Int) -- label finder
224 -> [BCInstr] -- instructions (in)
227 mkBits findLabel st proto_insns
228 = foldM doInstr st proto_insns
230 doInstr :: AsmState -> BCInstr -> IO AsmState
233 SWIZZLE stkoff n -> instr3 st i_SWIZZLE stkoff n
234 ARGCHECK n -> instr2 st i_ARGCHECK n
235 STKCHECK n -> instr2 st i_STKCHECK n
236 PUSH_L o1 -> instr2 st i_PUSH_L o1
237 PUSH_LL o1 o2 -> instr3 st i_PUSH_LL o1 o2
238 PUSH_LLL o1 o2 o3 -> instr4 st i_PUSH_LLL o1 o2 o3
239 PUSH_G nm -> do (p, st2) <- ptr st nm
240 instr2 st2 i_PUSH_G p
241 PUSH_AS nm pk -> do (p, st2) <- ptr st (Left nm)
242 (np, st3) <- ctoi_itbl st2 pk
243 instr3 st3 i_PUSH_AS p np
244 PUSH_UBX (Left lit) nws
245 -> do (np, st2) <- literal st lit
246 instr3 st2 i_PUSH_UBX np nws
247 PUSH_UBX (Right aa) nws
248 -> do (np, st2) <- addr st aa
249 instr3 st2 i_PUSH_UBX np nws
251 PUSH_TAG tag -> instr2 st i_PUSH_TAG tag
252 SLIDE n by -> instr3 st i_SLIDE n by
253 ALLOC n -> instr2 st i_ALLOC n
254 MKAP off sz -> instr3 st i_MKAP off sz
255 UNPACK n -> instr2 st i_UNPACK n
256 UPK_TAG n m k -> instr4 st i_UPK_TAG n m k
257 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
258 instr3 st2 i_PACK itbl_no sz
259 LABEL lab -> return st
260 TESTLT_I i l -> do (np, st2) <- int st i
261 instr3 st2 i_TESTLT_I np (findLabel l)
262 TESTEQ_I i l -> do (np, st2) <- int st i
263 instr3 st2 i_TESTEQ_I np (findLabel l)
264 TESTLT_F f l -> do (np, st2) <- float st f
265 instr3 st2 i_TESTLT_F np (findLabel l)
266 TESTEQ_F f l -> do (np, st2) <- float st f
267 instr3 st2 i_TESTEQ_F np (findLabel l)
268 TESTLT_D d l -> do (np, st2) <- double st d
269 instr3 st2 i_TESTLT_D np (findLabel l)
270 TESTEQ_D d l -> do (np, st2) <- double st d
271 instr3 st2 i_TESTEQ_D np (findLabel l)
272 TESTLT_P i l -> instr3 st i_TESTLT_P i (findLabel l)
273 TESTEQ_P i l -> instr3 st i_TESTEQ_P i (findLabel l)
274 CASEFAIL -> instr1 st i_CASEFAIL
275 JMP l -> instr2 st i_JMP (findLabel l)
276 ENTER -> instr1 st i_ENTER
277 RETURN rep -> do (itbl_no,st2) <- itoc_itbl st rep
278 instr2 st2 i_RETURN itbl_no
279 CCALL m_addr -> do (np, st2) <- addr st m_addr
280 instr2 st2 i_CCALL np
285 instr1 (st_i0,st_l0,st_p0,st_I0) i1
286 = do st_i1 <- addToSS st_i0 (i2s i1)
287 return (st_i1,st_l0,st_p0,st_I0)
289 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
290 = do st_i1 <- addToSS st_i0 (i2s i1)
291 st_i2 <- addToSS st_i1 (i2s i2)
292 return (st_i2,st_l0,st_p0,st_I0)
294 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
295 = do st_i1 <- addToSS st_i0 (i2s i1)
296 st_i2 <- addToSS st_i1 (i2s i2)
297 st_i3 <- addToSS st_i2 (i2s i3)
298 return (st_i3,st_l0,st_p0,st_I0)
300 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
301 = do st_i1 <- addToSS st_i0 (i2s i1)
302 st_i2 <- addToSS st_i1 (i2s i2)
303 st_i3 <- addToSS st_i2 (i2s i3)
304 st_i4 <- addToSS st_i3 (i2s i4)
305 return (st_i4,st_l0,st_p0,st_I0)
307 float (st_i0,st_l0,st_p0,st_I0) f
308 = do let ws = mkLitF f
309 st_l1 <- addListToSS st_l0 ws
310 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
312 double (st_i0,st_l0,st_p0,st_I0) d
313 = do let ws = mkLitD d
314 st_l1 <- addListToSS st_l0 ws
315 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
317 int (st_i0,st_l0,st_p0,st_I0) i
318 = do let ws = mkLitI i
319 st_l1 <- addListToSS st_l0 ws
320 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
322 addr (st_i0,st_l0,st_p0,st_I0) a
323 = do let ws = mkLitA a
324 st_l1 <- addListToSS st_l0 ws
325 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
327 ptr (st_i0,st_l0,st_p0,st_I0) p
328 = do st_p1 <- addToSS st_p0 p
329 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
331 itbl (st_i0,st_l0,st_p0,st_I0) dcon
332 = do st_I1 <- addToSS st_I0 (getName dcon)
333 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
335 literal st (MachWord w) = int st (fromIntegral w)
336 literal st (MachInt j) = int st (fromIntegral j)
337 literal st (MachFloat r) = float st (fromRational r)
338 literal st (MachDouble r) = double st (fromRational r)
339 literal st (MachChar c) = int st c
340 literal st other = pprPanic "ByteCodeLink.literal" (ppr other)
343 = addr st ret_itbl_addr
347 PtrRep -> stg_ctoi_ret_R1p_info
348 WordRep -> stg_ctoi_ret_R1n_info
349 IntRep -> stg_ctoi_ret_R1n_info
350 AddrRep -> stg_ctoi_ret_R1n_info
351 CharRep -> stg_ctoi_ret_R1n_info
352 FloatRep -> stg_ctoi_ret_F1_info
353 DoubleRep -> stg_ctoi_ret_D1_info
354 VoidRep -> stg_ctoi_ret_V_info
355 other -> pprPanic "ByteCodeLink.ctoi_itbl" (ppr pk)
358 = addr st ret_itbl_addr
362 CharRep -> stg_gc_unbx_r1_ret_info
363 IntRep -> stg_gc_unbx_r1_ret_info
364 AddrRep -> stg_gc_unbx_r1_ret_info
365 FloatRep -> stg_gc_f1_ret_info
366 DoubleRep -> stg_gc_d1_ret_info
368 -- Interpreter.c spots this special case
369 other -> pprPanic "ByteCodeLink.itoc_itbl" (ppr pk)
371 foreign label "stg_ctoi_ret_R1p_info" stg_ctoi_ret_R1p_info :: Addr
372 foreign label "stg_ctoi_ret_R1n_info" stg_ctoi_ret_R1n_info :: Addr
373 foreign label "stg_ctoi_ret_F1_info" stg_ctoi_ret_F1_info :: Addr
374 foreign label "stg_ctoi_ret_D1_info" stg_ctoi_ret_D1_info :: Addr
375 foreign label "stg_ctoi_ret_V_info" stg_ctoi_ret_V_info :: Addr
377 foreign label "stg_gc_unbx_r1_ret_info" stg_gc_unbx_r1_ret_info :: Addr
378 foreign label "stg_gc_f1_ret_info" stg_gc_f1_ret_info :: Addr
379 foreign label "stg_gc_d1_ret_info" stg_gc_d1_ret_info :: Addr
381 -- The size in 16-bit entities of an instruction.
382 instrSize16s :: BCInstr -> Int
415 -- Make lists of host-sized words for literals, so that when the
416 -- words are placed in memory at increasing addresses, the
417 -- bit pattern is correct for the host's word size and endianness.
418 mkLitI :: Int -> [Word]
419 mkLitF :: Float -> [Word]
420 mkLitD :: Double -> [Word]
421 mkLitA :: Addr -> [Word]
425 arr <- newFloatArray ((0::Int),0)
426 writeFloatArray arr 0 f
427 f_arr <- castSTUArray arr
428 w0 <- readWordArray f_arr 0
435 arr <- newDoubleArray ((0::Int),1)
436 writeDoubleArray arr 0 d
437 d_arr <- castSTUArray arr
438 w0 <- readWordArray d_arr 0
439 w1 <- readWordArray d_arr 1
444 arr <- newDoubleArray ((0::Int),0)
445 writeDoubleArray arr 0 d
446 d_arr <- castSTUArray arr
447 w0 <- readWordArray d_arr 0
453 arr <- newIntArray ((0::Int),0)
454 writeIntArray arr 0 i
455 i_arr <- castSTUArray arr
456 w0 <- readWordArray i_arr 0
462 arr <- newAddrArray ((0::Int),0)
463 writeAddrArray arr 0 a
464 a_arr <- castSTUArray arr
465 w0 <- readWordArray a_arr 0
471 %************************************************************************
473 \subsection{Linking interpretables into something we can run}
475 %************************************************************************
480 data BCO# = BCO# ByteArray# -- instrs :: Array Word16#
481 ByteArray# -- literals :: Array Word32#
482 PtrArray# -- ptrs :: Array HValue
483 ByteArray# -- itbls :: Array Addr#
486 linkBCO ie ce (UnlinkedBCO nm insnsSS literalsSS ptrsSS itblsSS)
487 = do insns <- listFromSS insnsSS
488 literals <- listFromSS literalsSS
489 ptrs <- listFromSS ptrsSS
490 itbls <- listFromSS itblsSS
492 linked_ptrs <- mapM (lookupCE ce) ptrs
493 linked_itbls <- mapM (lookupIE ie) itbls
495 let n_insns = sizeSS insnsSS
496 n_literals = sizeSS literalsSS
497 n_ptrs = sizeSS ptrsSS
498 n_itbls = sizeSS itblsSS
500 let ptrs_arr = array (0, n_ptrs-1) (indexify linked_ptrs)
502 ptrs_parr = case ptrs_arr of Array lo hi parr -> parr
504 itbls_arr = array (0, n_itbls-1) (indexify linked_itbls)
505 :: UArray Int ItblPtr
506 itbls_barr = case itbls_arr of UArray lo hi barr -> barr
508 insns_arr | n_insns > 65535
509 = panic "linkBCO: >= 64k insns in BCO"
512 (indexify (fromIntegral n_insns:insns))
514 insns_barr = case insns_arr of UArray lo hi barr -> barr
516 literals_arr = array (0, n_literals-1) (indexify literals)
518 literals_barr = case literals_arr of UArray lo hi barr -> barr
520 indexify :: [a] -> [(Int, a)]
521 indexify xs = zip [0..] xs
523 BCO bco# <- newBCO insns_barr literals_barr ptrs_parr itbls_barr
525 -- WAS: return (unsafeCoerce# bco#)
526 case mkApUpd0# (unsafeCoerce# bco#) of
527 (# final_bco #) -> return final_bco
532 newBCO :: ByteArray# -> ByteArray# -> Array# a -> ByteArray# -> IO BCO
534 = IO (\s -> case newBCO# a b c d s of (# s1, bco #) -> (# s1, BCO bco #))
537 lookupCE :: ClosureEnv -> Either Name PrimOp -> IO HValue
538 lookupCE ce (Right primop)
539 = do m <- lookupSymbol (primopToCLabel primop "closure")
541 Just (Ptr addr) -> case addrToHValue# addr of
542 (# hval #) -> return hval
543 Nothing -> pprPanic "ByteCodeLink.lookupCE(primop)" (ppr primop)
544 lookupCE ce (Left nm)
545 = case lookupFM ce nm of
548 -> do m <- lookupSymbol (nameToCLabel nm "closure")
550 Just (Ptr addr) -> case addrToHValue# addr of
551 (# hval #) -> return hval
552 Nothing -> pprPanic "ByteCodeLink.lookupCE" (ppr nm)
554 lookupIE :: ItblEnv -> Name -> IO (Ptr a)
556 = case lookupFM ie con_nm of
557 Just (Ptr a) -> return (Ptr a)
559 -> do -- try looking up in the object files.
560 m <- lookupSymbol (nameToCLabel con_nm "con_info")
562 Just addr -> return addr
564 -> do -- perhaps a nullary constructor?
565 n <- lookupSymbol (nameToCLabel con_nm "static_info")
567 Just addr -> return addr
568 Nothing -> pprPanic "ByteCodeLink.lookupIE" (ppr con_nm)
570 -- HACKS!!! ToDo: cleaner
571 nameToCLabel :: Name -> String{-suffix-} -> String
572 nameToCLabel n suffix
573 = _UNPK_(moduleNameFS (rdrNameModule rn))
574 ++ '_':occNameString(rdrNameOcc rn) ++ '_':suffix
575 where rn = toRdrName n
577 primopToCLabel :: PrimOp -> String{-suffix-} -> String
578 primopToCLabel primop suffix
579 = let str = "PrelPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
580 in --trace ("primopToCLabel: " ++ str)
585 %************************************************************************
587 \subsection{Connect to actual values for bytecode opcodes}
589 %************************************************************************
593 #include "Bytecodes.h"
595 i_ARGCHECK = (bci_ARGCHECK :: Int)
596 i_PUSH_L = (bci_PUSH_L :: Int)
597 i_PUSH_LL = (bci_PUSH_LL :: Int)
598 i_PUSH_LLL = (bci_PUSH_LLL :: Int)
599 i_PUSH_G = (bci_PUSH_G :: Int)
600 i_PUSH_AS = (bci_PUSH_AS :: Int)
601 i_PUSH_UBX = (bci_PUSH_UBX :: Int)
602 i_PUSH_TAG = (bci_PUSH_TAG :: Int)
603 i_SLIDE = (bci_SLIDE :: Int)
604 i_ALLOC = (bci_ALLOC :: Int)
605 i_MKAP = (bci_MKAP :: Int)
606 i_UNPACK = (bci_UNPACK :: Int)
607 i_UPK_TAG = (bci_UPK_TAG :: Int)
608 i_PACK = (bci_PACK :: Int)
609 i_TESTLT_I = (bci_TESTLT_I :: Int)
610 i_TESTEQ_I = (bci_TESTEQ_I :: Int)
611 i_TESTLT_F = (bci_TESTLT_F :: Int)
612 i_TESTEQ_F = (bci_TESTEQ_F :: Int)
613 i_TESTLT_D = (bci_TESTLT_D :: Int)
614 i_TESTEQ_D = (bci_TESTEQ_D :: Int)
615 i_TESTLT_P = (bci_TESTLT_P :: Int)
616 i_TESTEQ_P = (bci_TESTEQ_P :: Int)
617 i_CASEFAIL = (bci_CASEFAIL :: Int)
618 i_ENTER = (bci_ENTER :: Int)
619 i_RETURN = (bci_RETURN :: Int)
620 i_STKCHECK = (bci_STKCHECK :: Int)
621 i_JMP = (bci_JMP :: Int)
623 i_CCALL = (bci_CCALL :: Int)
624 i_SWIZZLE = (bci_SWIZZLE :: Int)
626 i_CCALL = error "Sorry pal, you need to bootstrap to use i_CCALL."
627 i_SWIZZLE = error "Sorry pal, you need to bootstrap to use i_SWIZZLE."
630 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)