2 % (c) The University of Glasgow 2000
4 \section[ByteCodeLink]{Bytecode assembler and linker}
7 module ByteCodeLink ( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
8 ClosureEnv, HValue, filterNameMap,
9 linkIModules, linkIExpr,
10 iNTERP_STACK_CHECK_THRESH
13 #include "HsVersions.h"
16 import Name ( Name, getName, nameModule, toRdrName, isGlobalName )
17 import RdrName ( rdrNameOcc, rdrNameModule )
18 import OccName ( occNameString )
19 import FiniteMap ( FiniteMap, addListToFM, filterFM,
20 addToFM, lookupFM, emptyFM )
22 import Literal ( Literal(..) )
23 import PrimOp ( PrimOp, primOpOcc )
24 import PrimRep ( PrimRep(..) )
25 import Constants ( wORD_SIZE )
26 import Module ( ModuleName, moduleName, moduleNameFS )
27 import Linker ( lookupSymbol )
28 import FastString ( FastString(..) )
29 import ByteCodeInstr ( BCInstr(..), ProtoBCO(..) )
30 import ByteCodeItbls ( ItblEnv, ItblPtr )
33 import Monad ( foldM )
35 import IArray ( array )
36 import MArray ( castSTUArray,
37 newFloatArray, writeFloatArray,
38 newDoubleArray, writeDoubleArray,
39 newIntArray, writeIntArray,
40 newAddrArray, writeAddrArray,
42 import Foreign ( Word16, Ptr(..), free )
43 import Addr ( Word, Addr(..), nullAddr )
44 import Weak ( addFinalizer )
47 import PrelBase ( Int(..) )
48 import PrelGHC ( BCO#, newBCO#, unsafeCoerce#,
49 ByteArray#, Array#, addrToHValue#, mkApUpd0# )
50 import IOExts ( fixIO )
51 import PrelArr ( Array(..) )
52 import ArrayBase ( UArray(..) )
53 import PrelIOBase ( IO(..) )
57 %************************************************************************
59 \subsection{Top-level stuff}
61 %************************************************************************
65 linkIModules :: ItblEnv -- incoming global itbl env; returned updated
66 -> ClosureEnv -- incoming global closure env; returned updated
67 -> [([UnlinkedBCO], ItblEnv)]
68 -> IO ([HValue], ItblEnv, ClosureEnv)
69 linkIModules gie gce mods
70 = do let (bcoss, ies) = unzip mods
72 final_gie = foldr plusFM gie ies
73 (final_gce, linked_bcos) <- linkSomeBCOs True final_gie gce bcos
74 return (linked_bcos, final_gie, final_gce)
77 linkIExpr :: ItblEnv -> ClosureEnv -> UnlinkedBCOExpr
78 -> IO HValue -- IO BCO# really
79 linkIExpr ie ce (root_ul_bco, aux_ul_bcos)
80 = do (aux_ce, _) <- linkSomeBCOs False ie ce aux_ul_bcos
81 (_, [root_bco]) <- linkSomeBCOs False ie aux_ce [root_ul_bco]
84 -- Link a bunch of BCOs and return them + updated closure env.
85 linkSomeBCOs :: Bool -- False <=> add _all_ BCOs to returned closure env
86 -- True <=> add only toplevel BCOs to closure env
90 -> IO (ClosureEnv, [HValue])
91 linkSomeBCOs toplevs_only ie ce_in ul_bcos
92 = do let nms = map nameOfUnlinkedBCO ul_bcos
94 ( \ hvs -> let ce_out = addListToFM ce_in (zipLazily nms hvs)
95 in mapM (linkBCO ie ce_out) ul_bcos )
97 let ce_all_additions = zip nms hvals
98 ce_top_additions = filter (isGlobalName.fst) ce_all_additions
99 ce_additions = if toplevs_only then ce_top_additions
100 else ce_all_additions
101 ce_out = -- make sure we're not inserting duplicate names into the
102 -- closure environment, which leads to trouble.
103 ASSERT (all (not . (`elemFM` ce_in)) (map fst ce_additions))
104 addListToFM ce_in ce_additions
105 return (ce_out, hvals)
107 -- A lazier zip, in which no demand is propagated to the second
108 -- list unless some demand is propagated to the snd of one of the
109 -- result list elems.
111 zipLazily (x:xs) ys = (x, head ys) : zipLazily xs (tail ys)
116 (SizedSeq Word16) -- insns
117 (SizedSeq Word) -- literals
118 (SizedSeq (Either Name PrimOp)) -- ptrs
119 (SizedSeq Name) -- itbl refs
120 [Addr] -- malloc'd, free when BCO GC'd
122 nameOfUnlinkedBCO (UnlinkedBCO nm _ _ _ _ _) = nm
124 -- When translating expressions, we need to distinguish the root
125 -- BCO for the expression
126 type UnlinkedBCOExpr = (UnlinkedBCO, [UnlinkedBCO])
128 instance Outputable UnlinkedBCO where
129 ppr (UnlinkedBCO nm insns lits ptrs itbls malloced)
130 = sep [text "BCO", ppr nm, text "with",
131 int (sizeSS insns), text "insns",
132 int (sizeSS lits), text "lits",
133 int (sizeSS ptrs), text "ptrs",
134 int (sizeSS itbls), text "itbls",
135 int (length malloced), text "malloced"]
138 -- these need a proper home
139 type ClosureEnv = FiniteMap Name HValue
140 data HValue = HValue -- dummy type, actually a pointer to some Real Code.
142 -- remove all entries for a given set of modules from the environment;
143 -- note that this removes all local names too (ie. temporary bindings from
144 -- the command line).
145 filterNameMap :: [ModuleName] -> FiniteMap Name a -> FiniteMap Name a
146 filterNameMap mods env
147 = filterFM (\n _ -> isGlobalName n &&
148 moduleName (nameModule n) `elem` mods) env
151 %************************************************************************
153 \subsection{The bytecode assembler}
155 %************************************************************************
157 The object format for bytecodes is: 16 bits for the opcode, and 16 for
158 each field -- so the code can be considered a sequence of 16-bit ints.
159 Each field denotes either a stack offset or number of items on the
160 stack (eg SLIDE), and index into the pointer table (eg PUSH_G), an
161 index into the literal table (eg PUSH_I/D/L), or a bytecode address in
165 -- Top level assembler fn.
166 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
168 assembleBCO (ProtoBCO nm instrs origin malloced)
170 -- pass 1: collect up the offsets of the local labels.
171 -- Remember that the first insn starts at offset 1 since offset 0
172 -- (eventually) will hold the total # of insns.
173 label_env = mkLabelEnv emptyFM 1 instrs
175 mkLabelEnv env i_offset [] = env
176 mkLabelEnv env i_offset (i:is)
178 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
179 in mkLabelEnv new_env (i_offset + instrSize16s i) is
182 = case lookupFM label_env lab of
183 Just bco_offset -> bco_offset
184 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
186 do -- pass 2: generate the instruction, ptr and nonptr bits
187 insns <- return emptySS :: IO (SizedSeq Word16)
188 lits <- return emptySS :: IO (SizedSeq Word)
189 ptrs <- return emptySS :: IO (SizedSeq (Either Name PrimOp))
190 itbls <- return emptySS :: IO (SizedSeq Name)
191 let init_asm_state = (insns,lits,ptrs,itbls)
192 (final_insns, final_lits, final_ptrs, final_itbls)
193 <- mkBits findLabel init_asm_state instrs
195 return (UnlinkedBCO nm final_insns final_lits final_ptrs final_itbls malloced)
197 -- instrs nonptrs ptrs itbls
198 type AsmState = (SizedSeq Word16, SizedSeq Word,
199 SizedSeq (Either Name PrimOp), SizedSeq Name)
201 data SizedSeq a = SizedSeq !Int [a]
202 emptySS = SizedSeq 0 []
203 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
204 addListToSS (SizedSeq n r_xs) xs
205 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
206 sizeSS (SizedSeq n r_xs) = n
207 listFromSS (SizedSeq n r_xs) = return (reverse r_xs)
210 -- This is where all the action is (pass 2 of the assembler)
211 mkBits :: (Int -> Int) -- label finder
213 -> [BCInstr] -- instructions (in)
216 mkBits findLabel st proto_insns
217 = foldM doInstr st proto_insns
219 doInstr :: AsmState -> BCInstr -> IO AsmState
222 ARGCHECK n -> instr2 st i_ARGCHECK n
223 STKCHECK n -> instr2 st i_STKCHECK n
224 PUSH_L o1 -> instr2 st i_PUSH_L o1
225 PUSH_LL o1 o2 -> instr3 st i_PUSH_LL o1 o2
226 PUSH_LLL o1 o2 o3 -> instr4 st i_PUSH_LLL o1 o2 o3
227 PUSH_G nm -> do (p, st2) <- ptr st nm
228 instr2 st2 i_PUSH_G p
229 PUSH_AS nm pk -> do (p, st2) <- ptr st (Left nm)
230 (np, st3) <- ctoi_itbl st2 pk
231 instr3 st3 i_PUSH_AS p np
232 PUSH_UBX (Left lit) nws
233 -> do (np, st2) <- literal st lit
234 instr3 st2 i_PUSH_UBX np nws
235 PUSH_UBX (Right aa) nws
236 -> do (np, st2) <- addr st aa
237 instr3 st2 i_PUSH_UBX np nws
239 PUSH_TAG tag -> instr2 st i_PUSH_TAG tag
240 SLIDE n by -> instr3 st i_SLIDE n by
241 ALLOC n -> instr2 st i_ALLOC n
242 MKAP off sz -> instr3 st i_MKAP off sz
243 UNPACK n -> instr2 st i_UNPACK n
244 UPK_TAG n m k -> instr4 st i_UPK_TAG n m k
245 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
246 instr3 st2 i_PACK itbl_no sz
247 LABEL lab -> return st
248 TESTLT_I i l -> do (np, st2) <- int st i
249 instr3 st2 i_TESTLT_I np (findLabel l)
250 TESTEQ_I i l -> do (np, st2) <- int st i
251 instr3 st2 i_TESTEQ_I np (findLabel l)
252 TESTLT_F f l -> do (np, st2) <- float st f
253 instr3 st2 i_TESTLT_F np (findLabel l)
254 TESTEQ_F f l -> do (np, st2) <- float st f
255 instr3 st2 i_TESTEQ_F np (findLabel l)
256 TESTLT_D d l -> do (np, st2) <- double st d
257 instr3 st2 i_TESTLT_D np (findLabel l)
258 TESTEQ_D d l -> do (np, st2) <- double st d
259 instr3 st2 i_TESTEQ_D np (findLabel l)
260 TESTLT_P i l -> instr3 st i_TESTLT_P i (findLabel l)
261 TESTEQ_P i l -> instr3 st i_TESTEQ_P i (findLabel l)
262 CASEFAIL -> instr1 st i_CASEFAIL
263 JMP l -> instr2 st i_JMP (findLabel l)
264 ENTER -> instr1 st i_ENTER
265 RETURN rep -> do (itbl_no,st2) <- itoc_itbl st rep
266 instr2 st2 i_RETURN itbl_no
267 CCALL m_addr -> do (np, st2) <- addr st m_addr
268 instr2 st2 i_CCALL np
273 instr1 (st_i0,st_l0,st_p0,st_I0) i1
274 = do st_i1 <- addToSS st_i0 (i2s i1)
275 return (st_i1,st_l0,st_p0,st_I0)
277 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
278 = do st_i1 <- addToSS st_i0 (i2s i1)
279 st_i2 <- addToSS st_i1 (i2s i2)
280 return (st_i2,st_l0,st_p0,st_I0)
282 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
283 = do st_i1 <- addToSS st_i0 (i2s i1)
284 st_i2 <- addToSS st_i1 (i2s i2)
285 st_i3 <- addToSS st_i2 (i2s i3)
286 return (st_i3,st_l0,st_p0,st_I0)
288 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
289 = do st_i1 <- addToSS st_i0 (i2s i1)
290 st_i2 <- addToSS st_i1 (i2s i2)
291 st_i3 <- addToSS st_i2 (i2s i3)
292 st_i4 <- addToSS st_i3 (i2s i4)
293 return (st_i4,st_l0,st_p0,st_I0)
295 float (st_i0,st_l0,st_p0,st_I0) f
296 = do let ws = mkLitF f
297 st_l1 <- addListToSS st_l0 ws
298 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
300 double (st_i0,st_l0,st_p0,st_I0) d
301 = do let ws = mkLitD d
302 st_l1 <- addListToSS st_l0 ws
303 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
305 int (st_i0,st_l0,st_p0,st_I0) i
306 = do let ws = mkLitI i
307 st_l1 <- addListToSS st_l0 ws
308 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
310 addr (st_i0,st_l0,st_p0,st_I0) a
311 = do let ws = mkLitA a
312 st_l1 <- addListToSS st_l0 ws
313 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
315 ptr (st_i0,st_l0,st_p0,st_I0) p
316 = do st_p1 <- addToSS st_p0 p
317 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
319 itbl (st_i0,st_l0,st_p0,st_I0) dcon
320 = do st_I1 <- addToSS st_I0 (getName dcon)
321 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
323 literal st (MachWord w) = int st (fromIntegral w)
324 literal st (MachInt j) = int st (fromIntegral j)
325 literal st (MachFloat r) = float st (fromRational r)
326 literal st (MachDouble r) = double st (fromRational r)
327 literal st (MachChar c) = int st c
328 literal st other = pprPanic "ByteCodeLink.literal" (ppr other)
331 = addr st ret_itbl_addr
335 PtrRep -> stg_ctoi_ret_R1p_info
336 WordRep -> stg_ctoi_ret_R1n_info
337 IntRep -> stg_ctoi_ret_R1n_info
338 AddrRep -> stg_ctoi_ret_R1n_info
339 CharRep -> stg_ctoi_ret_R1n_info
340 FloatRep -> stg_ctoi_ret_F1_info
341 DoubleRep -> stg_ctoi_ret_D1_info
342 VoidRep -> stg_ctoi_ret_V_info
343 other -> pprPanic "ByteCodeLink.ctoi_itbl" (ppr pk)
346 = addr st ret_itbl_addr
350 CharRep -> stg_gc_unbx_r1_ret_info
351 IntRep -> stg_gc_unbx_r1_ret_info
352 AddrRep -> stg_gc_unbx_r1_ret_info
353 FloatRep -> stg_gc_f1_ret_info
354 DoubleRep -> stg_gc_d1_ret_info
356 -- Interpreter.c spots this special case
357 other -> pprPanic "ByteCodeLink.itoc_itbl" (ppr pk)
359 foreign label "stg_ctoi_ret_R1p_info" stg_ctoi_ret_R1p_info :: Addr
360 foreign label "stg_ctoi_ret_R1n_info" stg_ctoi_ret_R1n_info :: Addr
361 foreign label "stg_ctoi_ret_F1_info" stg_ctoi_ret_F1_info :: Addr
362 foreign label "stg_ctoi_ret_D1_info" stg_ctoi_ret_D1_info :: Addr
363 foreign label "stg_ctoi_ret_V_info" stg_ctoi_ret_V_info :: Addr
365 foreign label "stg_gc_unbx_r1_ret_info" stg_gc_unbx_r1_ret_info :: Addr
366 foreign label "stg_gc_f1_ret_info" stg_gc_f1_ret_info :: Addr
367 foreign label "stg_gc_d1_ret_info" stg_gc_d1_ret_info :: Addr
369 -- The size in 16-bit entities of an instruction.
370 instrSize16s :: BCInstr -> Int
403 -- Make lists of host-sized words for literals, so that when the
404 -- words are placed in memory at increasing addresses, the
405 -- bit pattern is correct for the host's word size and endianness.
406 mkLitI :: Int -> [Word]
407 mkLitF :: Float -> [Word]
408 mkLitD :: Double -> [Word]
409 mkLitA :: Addr -> [Word]
413 arr <- newFloatArray ((0::Int),0)
414 writeFloatArray arr 0 f
415 f_arr <- castSTUArray arr
416 w0 <- readWordArray f_arr 0
423 arr <- newDoubleArray ((0::Int),1)
424 writeDoubleArray arr 0 d
425 d_arr <- castSTUArray arr
426 w0 <- readWordArray d_arr 0
427 w1 <- readWordArray d_arr 1
432 arr <- newDoubleArray ((0::Int),0)
433 writeDoubleArray arr 0 d
434 d_arr <- castSTUArray arr
435 w0 <- readWordArray d_arr 0
441 arr <- newIntArray ((0::Int),0)
442 writeIntArray arr 0 i
443 i_arr <- castSTUArray arr
444 w0 <- readWordArray i_arr 0
450 arr <- newAddrArray ((0::Int),0)
451 writeAddrArray arr 0 a
452 a_arr <- castSTUArray arr
453 w0 <- readWordArray a_arr 0
459 %************************************************************************
461 \subsection{Linking interpretables into something we can run}
463 %************************************************************************
468 data BCO# = BCO# ByteArray# -- instrs :: Array Word16#
469 ByteArray# -- literals :: Array Word32#
470 PtrArray# -- ptrs :: Array HValue
471 ByteArray# -- itbls :: Array Addr#
474 linkBCO ie ce (UnlinkedBCO nm insnsSS literalsSS ptrsSS itblsSS malloced)
475 = do insns <- listFromSS insnsSS
476 literals <- listFromSS literalsSS
477 ptrs <- listFromSS ptrsSS
478 itbls <- listFromSS itblsSS
480 linked_ptrs <- mapM (lookupCE ce) ptrs
481 linked_itbls <- mapM (lookupIE ie) itbls
483 let n_insns = sizeSS insnsSS
484 n_literals = sizeSS literalsSS
485 n_ptrs = sizeSS ptrsSS
486 n_itbls = sizeSS itblsSS
488 let ptrs_arr = array (0, n_ptrs-1) (indexify linked_ptrs)
490 ptrs_parr = case ptrs_arr of Array lo hi parr -> parr
492 itbls_arr = array (0, n_itbls-1) (indexify linked_itbls)
493 :: UArray Int ItblPtr
494 itbls_barr = case itbls_arr of UArray lo hi barr -> barr
496 insns_arr | n_insns > 65535
497 = panic "linkBCO: >= 64k insns in BCO"
500 (indexify (fromIntegral n_insns:insns))
502 insns_barr = case insns_arr of UArray lo hi barr -> barr
504 literals_arr = array (0, n_literals-1) (indexify literals)
506 literals_barr = case literals_arr of UArray lo hi barr -> barr
508 indexify :: [a] -> [(Int, a)]
509 indexify xs = zip [0..] xs
511 BCO bco# <- newBCO insns_barr literals_barr ptrs_parr itbls_barr
513 -- WAS: return (unsafeCoerce# bco#)
514 case mkApUpd0# (unsafeCoerce# bco#) of
516 | not (null malloced)
517 -> do addFinalizer final_bco (freeup malloced)
522 freeup :: [Addr] -> IO ()
525 = do -- putStrLn ("freeing malloced block at " ++ show a)
530 newBCO :: ByteArray# -> ByteArray# -> Array# a -> ByteArray# -> IO BCO
532 = IO (\s -> case newBCO# a b c d s of (# s1, bco #) -> (# s1, BCO bco #))
535 lookupCE :: ClosureEnv -> Either Name PrimOp -> IO HValue
536 lookupCE ce (Right primop)
537 = do m <- lookupSymbol (primopToCLabel primop "closure")
539 Just (Ptr addr) -> case addrToHValue# addr of
540 (# hval #) -> return hval
541 Nothing -> pprPanic "ByteCodeLink.lookupCE(primop)" (ppr primop)
542 lookupCE ce (Left nm)
543 = case lookupFM ce nm of
546 -> do m <- lookupSymbol (nameToCLabel nm "closure")
548 Just (Ptr addr) -> case addrToHValue# addr of
549 (# hval #) -> return hval
550 Nothing -> pprPanic "ByteCodeLink.lookupCE" (ppr nm)
552 lookupIE :: ItblEnv -> Name -> IO (Ptr a)
554 = case lookupFM ie con_nm of
555 Just (Ptr a) -> return (Ptr a)
557 -> do -- try looking up in the object files.
558 m <- lookupSymbol (nameToCLabel con_nm "con_info")
560 Just addr -> return addr
562 -> do -- perhaps a nullary constructor?
563 n <- lookupSymbol (nameToCLabel con_nm "static_info")
565 Just addr -> return addr
566 Nothing -> pprPanic "ByteCodeLink.lookupIE" (ppr con_nm)
568 -- HACKS!!! ToDo: cleaner
569 nameToCLabel :: Name -> String{-suffix-} -> String
570 nameToCLabel n suffix
571 = _UNPK_(moduleNameFS (rdrNameModule rn))
572 ++ '_':occNameString(rdrNameOcc rn) ++ '_':suffix
573 where rn = toRdrName n
575 primopToCLabel :: PrimOp -> String{-suffix-} -> String
576 primopToCLabel primop suffix
577 = let str = "PrelPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
578 in --trace ("primopToCLabel: " ++ str)
583 %************************************************************************
585 \subsection{Connect to actual values for bytecode opcodes}
587 %************************************************************************
591 #include "Bytecodes.h"
593 i_ARGCHECK = (bci_ARGCHECK :: Int)
594 i_PUSH_L = (bci_PUSH_L :: Int)
595 i_PUSH_LL = (bci_PUSH_LL :: Int)
596 i_PUSH_LLL = (bci_PUSH_LLL :: Int)
597 i_PUSH_G = (bci_PUSH_G :: Int)
598 i_PUSH_AS = (bci_PUSH_AS :: Int)
599 i_PUSH_UBX = (bci_PUSH_UBX :: Int)
600 i_PUSH_TAG = (bci_PUSH_TAG :: Int)
601 i_SLIDE = (bci_SLIDE :: Int)
602 i_ALLOC = (bci_ALLOC :: Int)
603 i_MKAP = (bci_MKAP :: Int)
604 i_UNPACK = (bci_UNPACK :: Int)
605 i_UPK_TAG = (bci_UPK_TAG :: Int)
606 i_PACK = (bci_PACK :: Int)
607 i_TESTLT_I = (bci_TESTLT_I :: Int)
608 i_TESTEQ_I = (bci_TESTEQ_I :: Int)
609 i_TESTLT_F = (bci_TESTLT_F :: Int)
610 i_TESTEQ_F = (bci_TESTEQ_F :: Int)
611 i_TESTLT_D = (bci_TESTLT_D :: Int)
612 i_TESTEQ_D = (bci_TESTEQ_D :: Int)
613 i_TESTLT_P = (bci_TESTLT_P :: Int)
614 i_TESTEQ_P = (bci_TESTEQ_P :: Int)
615 i_CASEFAIL = (bci_CASEFAIL :: Int)
616 i_ENTER = (bci_ENTER :: Int)
617 i_RETURN = (bci_RETURN :: Int)
618 i_STKCHECK = (bci_STKCHECK :: Int)
619 i_JMP = (bci_JMP :: Int)
621 i_CCALL = (bci_CCALL :: Int)
623 i_CCALL = error "Sorry pal, you need to bootstrap to use i_CCALL."
626 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)