2 % (c) The University of Glasgow 2000
4 \section[ByteCodeLink]{Bytecode assembler and linker}
7 module ByteCodeLink ( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
8 ClosureEnv, HValue, filterNameMap,
9 linkIModules, linkIExpr,
10 iNTERP_STACK_CHECK_THRESH
13 #include "HsVersions.h"
16 import Name ( Name, getName, nameModule, toRdrName, isGlobalName )
17 import RdrName ( rdrNameOcc, rdrNameModule )
18 import OccName ( occNameString )
19 import FiniteMap ( FiniteMap, addListToFM, filterFM,
20 addToFM, lookupFM, emptyFM )
22 import Literal ( Literal(..) )
23 import PrimOp ( PrimOp, primOpOcc )
24 import PrimRep ( PrimRep(..) )
25 import Constants ( wORD_SIZE )
26 import Module ( ModuleName, moduleName, moduleNameFS )
27 import Linker ( lookupSymbol )
28 import FastString ( FastString(..) )
29 import ByteCodeInstr ( BCInstr(..), ProtoBCO(..) )
30 import ByteCodeItbls ( ItblEnv, ItblPtr )
33 import Monad ( foldM )
35 import IArray ( array )
36 import MArray ( castSTUArray,
37 newFloatArray, writeFloatArray,
38 newDoubleArray, writeDoubleArray,
39 newIntArray, writeIntArray,
40 newAddrArray, writeAddrArray,
42 import Foreign ( Word16, Ptr(..) )
43 import Addr ( Word, Addr, nullAddr )
46 import PrelBase ( Int(..) )
47 import PrelGHC ( BCO#, newBCO#, unsafeCoerce#,
48 ByteArray#, Array#, addrToHValue#, mkApUpd0# )
49 import IOExts ( fixIO )
50 import PrelArr ( Array(..) )
51 import ArrayBase ( UArray(..) )
52 import PrelIOBase ( IO(..) )
56 %************************************************************************
58 \subsection{Top-level stuff}
60 %************************************************************************
64 linkIModules :: ItblEnv -- incoming global itbl env; returned updated
65 -> ClosureEnv -- incoming global closure env; returned updated
66 -> [([UnlinkedBCO], ItblEnv)]
67 -> IO ([HValue], ItblEnv, ClosureEnv)
68 linkIModules gie gce mods
69 = do let (bcoss, ies) = unzip mods
71 final_gie = foldr plusFM gie ies
72 (final_gce, linked_bcos) <- linkSomeBCOs True final_gie gce bcos
73 return (linked_bcos, final_gie, final_gce)
76 linkIExpr :: ItblEnv -> ClosureEnv -> UnlinkedBCOExpr
77 -> IO HValue -- IO BCO# really
78 linkIExpr ie ce (root_ul_bco, aux_ul_bcos)
79 = do (aux_ce, _) <- linkSomeBCOs False ie ce aux_ul_bcos
80 (_, [root_bco]) <- linkSomeBCOs False ie aux_ce [root_ul_bco]
83 -- Link a bunch of BCOs and return them + updated closure env.
84 linkSomeBCOs :: Bool -- False <=> add _all_ BCOs to returned closure env
85 -- True <=> add only toplevel BCOs to closure env
89 -> IO (ClosureEnv, [HValue])
90 linkSomeBCOs toplevs_only ie ce_in ul_bcos
91 = do let nms = map nameOfUnlinkedBCO ul_bcos
93 ( \ hvs -> let ce_out = addListToFM ce_in (zipLazily nms hvs)
94 in mapM (linkBCO ie ce_out) ul_bcos )
96 let ce_all_additions = zip nms hvals
97 ce_top_additions = filter (isGlobalName.fst) ce_all_additions
98 ce_additions = if toplevs_only then ce_top_additions
100 ce_out = -- make sure we're not inserting duplicate names into the
101 -- closure environment, which leads to trouble.
102 ASSERT (all (not . (`elemFM` ce_in)) (map fst ce_additions))
103 addListToFM ce_in ce_additions
104 return (ce_out, hvals)
106 -- A lazier zip, in which no demand is propagated to the second
107 -- list unless some demand is propagated to the snd of one of the
108 -- result list elems.
110 zipLazily (x:xs) ys = (x, head ys) : zipLazily xs (tail ys)
115 (SizedSeq Word16) -- insns
116 (SizedSeq Word) -- literals
117 (SizedSeq (Either Name PrimOp)) -- ptrs
118 (SizedSeq Name) -- itbl refs
120 nameOfUnlinkedBCO (UnlinkedBCO nm _ _ _ _) = nm
122 -- When translating expressions, we need to distinguish the root
123 -- BCO for the expression
124 type UnlinkedBCOExpr = (UnlinkedBCO, [UnlinkedBCO])
126 instance Outputable UnlinkedBCO where
127 ppr (UnlinkedBCO nm insns lits ptrs itbls)
128 = sep [text "BCO", ppr nm, text "with",
129 int (sizeSS insns), text "insns",
130 int (sizeSS lits), text "lits",
131 int (sizeSS ptrs), text "ptrs",
132 int (sizeSS itbls), text "itbls"]
135 -- these need a proper home
136 type ClosureEnv = FiniteMap Name HValue
137 data HValue = HValue -- dummy type, actually a pointer to some Real Code.
139 -- remove all entries for a given set of modules from the environment;
140 -- note that this removes all local names too (ie. temporary bindings from
141 -- the command line).
142 filterNameMap :: [ModuleName] -> FiniteMap Name a -> FiniteMap Name a
143 filterNameMap mods env
144 = filterFM (\n _ -> isGlobalName n &&
145 moduleName (nameModule n) `elem` mods) env
148 %************************************************************************
150 \subsection{The bytecode assembler}
152 %************************************************************************
154 The object format for bytecodes is: 16 bits for the opcode, and 16 for
155 each field -- so the code can be considered a sequence of 16-bit ints.
156 Each field denotes either a stack offset or number of items on the
157 stack (eg SLIDE), and index into the pointer table (eg PUSH_G), an
158 index into the literal table (eg PUSH_I/D/L), or a bytecode address in
162 -- Top level assembler fn.
163 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
165 assembleBCO (ProtoBCO nm instrs origin)
167 -- pass 1: collect up the offsets of the local labels.
168 -- Remember that the first insn starts at offset 1 since offset 0
169 -- (eventually) will hold the total # of insns.
170 label_env = mkLabelEnv emptyFM 1 instrs
172 mkLabelEnv env i_offset [] = env
173 mkLabelEnv env i_offset (i:is)
175 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
176 in mkLabelEnv new_env (i_offset + instrSize16s i) is
179 = case lookupFM label_env lab of
180 Just bco_offset -> bco_offset
181 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
183 do -- pass 2: generate the instruction, ptr and nonptr bits
184 insns <- return emptySS :: IO (SizedSeq Word16)
185 lits <- return emptySS :: IO (SizedSeq Word)
186 ptrs <- return emptySS :: IO (SizedSeq (Either Name PrimOp))
187 itbls <- return emptySS :: IO (SizedSeq Name)
188 let init_asm_state = (insns,lits,ptrs,itbls)
189 (final_insns, final_lits, final_ptrs, final_itbls)
190 <- mkBits findLabel init_asm_state instrs
192 return (UnlinkedBCO nm final_insns final_lits final_ptrs final_itbls)
194 -- instrs nonptrs ptrs itbls
195 type AsmState = (SizedSeq Word16, SizedSeq Word,
196 SizedSeq (Either Name PrimOp), SizedSeq Name)
198 data SizedSeq a = SizedSeq !Int [a]
199 emptySS = SizedSeq 0 []
200 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
201 addListToSS (SizedSeq n r_xs) xs
202 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
203 sizeSS (SizedSeq n r_xs) = n
204 listFromSS (SizedSeq n r_xs) = return (reverse r_xs)
207 -- This is where all the action is (pass 2 of the assembler)
208 mkBits :: (Int -> Int) -- label finder
210 -> [BCInstr] -- instructions (in)
213 mkBits findLabel st proto_insns
214 = foldM doInstr st proto_insns
216 doInstr :: AsmState -> BCInstr -> IO AsmState
219 ARGCHECK n -> instr2 st i_ARGCHECK n
220 STKCHECK n -> instr2 st i_STKCHECK n
221 PUSH_L o1 -> instr2 st i_PUSH_L o1
222 PUSH_LL o1 o2 -> instr3 st i_PUSH_LL o1 o2
223 PUSH_LLL o1 o2 o3 -> instr4 st i_PUSH_LLL o1 o2 o3
224 PUSH_G nm -> do (p, st2) <- ptr st nm
225 instr2 st2 i_PUSH_G p
226 PUSH_AS nm pk -> do (p, st2) <- ptr st (Left nm)
227 (np, st3) <- ctoi_itbl st2 pk
228 instr3 st3 i_PUSH_AS p np
229 PUSH_UBX lit nws -> do (np, st2) <- literal st lit
230 instr3 st2 i_PUSH_UBX np nws
231 PUSH_TAG tag -> instr2 st i_PUSH_TAG tag
232 SLIDE n by -> instr3 st i_SLIDE n by
233 ALLOC n -> instr2 st i_ALLOC n
234 MKAP off sz -> instr3 st i_MKAP off sz
235 UNPACK n -> instr2 st i_UNPACK n
236 UPK_TAG n m k -> instr4 st i_UPK_TAG n m k
237 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
238 instr3 st2 i_PACK itbl_no sz
239 LABEL lab -> return st
240 TESTLT_I i l -> do (np, st2) <- int st i
241 instr3 st2 i_TESTLT_I np (findLabel l)
242 TESTEQ_I i l -> do (np, st2) <- int st i
243 instr3 st2 i_TESTEQ_I np (findLabel l)
244 TESTLT_F f l -> do (np, st2) <- float st f
245 instr3 st2 i_TESTLT_F np (findLabel l)
246 TESTEQ_F f l -> do (np, st2) <- float st f
247 instr3 st2 i_TESTEQ_F np (findLabel l)
248 TESTLT_D d l -> do (np, st2) <- double st d
249 instr3 st2 i_TESTLT_D np (findLabel l)
250 TESTEQ_D d l -> do (np, st2) <- double st d
251 instr3 st2 i_TESTEQ_D np (findLabel l)
252 TESTLT_P i l -> instr3 st i_TESTLT_P i (findLabel l)
253 TESTEQ_P i l -> instr3 st i_TESTEQ_P i (findLabel l)
254 CASEFAIL -> instr1 st i_CASEFAIL
255 JMP l -> instr2 st i_JMP (findLabel l)
256 ENTER -> instr1 st i_ENTER
257 RETURN rep -> do (itbl_no,st2) <- itoc_itbl st rep
258 instr2 st2 i_RETURN itbl_no
263 instr1 (st_i0,st_l0,st_p0,st_I0) i1
264 = do st_i1 <- addToSS st_i0 (i2s i1)
265 return (st_i1,st_l0,st_p0,st_I0)
267 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
268 = do st_i1 <- addToSS st_i0 (i2s i1)
269 st_i2 <- addToSS st_i1 (i2s i2)
270 return (st_i2,st_l0,st_p0,st_I0)
272 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
273 = do st_i1 <- addToSS st_i0 (i2s i1)
274 st_i2 <- addToSS st_i1 (i2s i2)
275 st_i3 <- addToSS st_i2 (i2s i3)
276 return (st_i3,st_l0,st_p0,st_I0)
278 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
279 = do st_i1 <- addToSS st_i0 (i2s i1)
280 st_i2 <- addToSS st_i1 (i2s i2)
281 st_i3 <- addToSS st_i2 (i2s i3)
282 st_i4 <- addToSS st_i3 (i2s i4)
283 return (st_i4,st_l0,st_p0,st_I0)
285 float (st_i0,st_l0,st_p0,st_I0) f
286 = do let ws = mkLitF f
287 st_l1 <- addListToSS st_l0 ws
288 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
290 double (st_i0,st_l0,st_p0,st_I0) d
291 = do let ws = mkLitD d
292 st_l1 <- addListToSS st_l0 ws
293 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
295 int (st_i0,st_l0,st_p0,st_I0) i
296 = do let ws = mkLitI i
297 st_l1 <- addListToSS st_l0 ws
298 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
300 addr (st_i0,st_l0,st_p0,st_I0) a
301 = do let ws = mkLitA a
302 st_l1 <- addListToSS st_l0 ws
303 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
305 ptr (st_i0,st_l0,st_p0,st_I0) p
306 = do st_p1 <- addToSS st_p0 p
307 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
309 itbl (st_i0,st_l0,st_p0,st_I0) dcon
310 = do st_I1 <- addToSS st_I0 (getName dcon)
311 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
313 literal st (MachWord w) = int st (fromIntegral w)
314 literal st (MachInt j) = int st (fromIntegral j)
315 literal st (MachFloat r) = float st (fromRational r)
316 literal st (MachDouble r) = double st (fromRational r)
317 literal st (MachChar c) = int st c
320 = addr st ret_itbl_addr
322 ret_itbl_addr = case pk of
323 PtrRep -> stg_ctoi_ret_R1p_info
324 WordRep -> stg_ctoi_ret_R1n_info
325 IntRep -> stg_ctoi_ret_R1n_info
326 AddrRep -> stg_ctoi_ret_R1n_info
327 CharRep -> stg_ctoi_ret_R1n_info
328 FloatRep -> stg_ctoi_ret_F1_info
329 DoubleRep -> stg_ctoi_ret_D1_info
330 VoidRep -> stg_ctoi_ret_V_info
331 _ -> pprPanic "mkBits.ctoi_itbl" (ppr pk)
334 = addr st ret_itbl_addr
336 ret_itbl_addr = case pk of
337 CharRep -> stg_gc_unbx_r1_info
338 IntRep -> stg_gc_unbx_r1_info
339 FloatRep -> stg_gc_f1_info
340 DoubleRep -> stg_gc_d1_info
342 -- Interpreter.c spots this special case
344 foreign label "stg_ctoi_ret_R1p_info" stg_ctoi_ret_R1p_info :: Addr
345 foreign label "stg_ctoi_ret_R1n_info" stg_ctoi_ret_R1n_info :: Addr
346 foreign label "stg_ctoi_ret_F1_info" stg_ctoi_ret_F1_info :: Addr
347 foreign label "stg_ctoi_ret_D1_info" stg_ctoi_ret_D1_info :: Addr
348 foreign label "stg_ctoi_ret_V_info" stg_ctoi_ret_V_info :: Addr
350 foreign label "stg_gc_unbx_r1_info" stg_gc_unbx_r1_info :: Addr
351 foreign label "stg_gc_f1_info" stg_gc_f1_info :: Addr
352 foreign label "stg_gc_d1_info" stg_gc_d1_info :: Addr
354 -- The size in 16-bit entities of an instruction.
355 instrSize16s :: BCInstr -> Int
388 -- Make lists of host-sized words for literals, so that when the
389 -- words are placed in memory at increasing addresses, the
390 -- bit pattern is correct for the host's word size and endianness.
391 mkLitI :: Int -> [Word]
392 mkLitF :: Float -> [Word]
393 mkLitD :: Double -> [Word]
394 mkLitA :: Addr -> [Word]
398 arr <- newFloatArray ((0::Int),0)
399 writeFloatArray arr 0 f
400 f_arr <- castSTUArray arr
401 w0 <- readWordArray f_arr 0
408 arr <- newDoubleArray ((0::Int),1)
409 writeDoubleArray arr 0 d
410 d_arr <- castSTUArray arr
411 w0 <- readWordArray d_arr 0
412 w1 <- readWordArray d_arr 1
417 arr <- newDoubleArray ((0::Int),0)
418 writeDoubleArray arr 0 d
419 d_arr <- castSTUArray arr
420 w0 <- readWordArray d_arr 0
426 arr <- newIntArray ((0::Int),0)
427 writeIntArray arr 0 i
428 i_arr <- castSTUArray arr
429 w0 <- readWordArray i_arr 0
435 arr <- newAddrArray ((0::Int),0)
436 writeAddrArray arr 0 a
437 a_arr <- castSTUArray arr
438 w0 <- readWordArray a_arr 0
444 %************************************************************************
446 \subsection{Linking interpretables into something we can run}
448 %************************************************************************
453 data BCO# = BCO# ByteArray# -- instrs :: Array Word16#
454 ByteArray# -- literals :: Array Word32#
455 PtrArray# -- ptrs :: Array HValue
456 ByteArray# -- itbls :: Array Addr#
459 linkBCO ie ce (UnlinkedBCO nm insnsSS literalsSS ptrsSS itblsSS)
460 = do insns <- listFromSS insnsSS
461 literals <- listFromSS literalsSS
462 ptrs <- listFromSS ptrsSS
463 itbls <- listFromSS itblsSS
465 linked_ptrs <- mapM (lookupCE ce) ptrs
466 linked_itbls <- mapM (lookupIE ie) itbls
468 let n_insns = sizeSS insnsSS
469 n_literals = sizeSS literalsSS
470 n_ptrs = sizeSS ptrsSS
471 n_itbls = sizeSS itblsSS
473 let ptrs_arr = array (0, n_ptrs-1) (indexify linked_ptrs)
475 ptrs_parr = case ptrs_arr of Array lo hi parr -> parr
477 itbls_arr = array (0, n_itbls-1) (indexify linked_itbls)
478 :: UArray Int ItblPtr
479 itbls_barr = case itbls_arr of UArray lo hi barr -> barr
481 insns_arr | n_insns > 65535
482 = panic "linkBCO: >= 64k insns in BCO"
485 (indexify (fromIntegral n_insns:insns))
487 insns_barr = case insns_arr of UArray lo hi barr -> barr
489 literals_arr = array (0, n_literals-1) (indexify literals)
491 literals_barr = case literals_arr of UArray lo hi barr -> barr
493 indexify :: [a] -> [(Int, a)]
494 indexify xs = zip [0..] xs
496 BCO bco# <- newBCO insns_barr literals_barr ptrs_parr itbls_barr
498 -- WAS: return (unsafeCoerce# bco#)
499 case mkApUpd0# (unsafeCoerce# bco#) of
500 (# final_bco #) -> return final_bco
505 newBCO :: ByteArray# -> ByteArray# -> Array# a -> ByteArray# -> IO BCO
507 = IO (\s -> case newBCO# a b c d s of (# s1, bco #) -> (# s1, BCO bco #))
510 lookupCE :: ClosureEnv -> Either Name PrimOp -> IO HValue
511 lookupCE ce (Right primop)
512 = do m <- lookupSymbol (primopToCLabel primop "closure")
514 Just (Ptr addr) -> case addrToHValue# addr of
515 (# hval #) -> return hval
516 Nothing -> pprPanic "ByteCodeGen.lookupCE(primop)" (ppr primop)
517 lookupCE ce (Left nm)
518 = case lookupFM ce nm of
521 -> do m <- lookupSymbol (nameToCLabel nm "closure")
523 Just (Ptr addr) -> case addrToHValue# addr of
524 (# hval #) -> return hval
525 Nothing -> pprPanic "ByteCodeGen.lookupCE" (ppr nm)
527 lookupIE :: ItblEnv -> Name -> IO (Ptr a)
529 = case lookupFM ie con_nm of
530 Just (Ptr a) -> return (Ptr a)
532 -> do -- try looking up in the object files.
533 m <- lookupSymbol (nameToCLabel con_nm "con_info")
535 Just addr -> return addr
537 -> do -- perhaps a nullary constructor?
538 n <- lookupSymbol (nameToCLabel con_nm "static_info")
540 Just addr -> return addr
541 Nothing -> pprPanic "ByteCodeGen.lookupIE" (ppr con_nm)
543 -- HACKS!!! ToDo: cleaner
544 nameToCLabel :: Name -> String{-suffix-} -> String
545 nameToCLabel n suffix
546 = _UNPK_(moduleNameFS (rdrNameModule rn))
547 ++ '_':occNameString(rdrNameOcc rn) ++ '_':suffix
548 where rn = toRdrName n
550 primopToCLabel :: PrimOp -> String{-suffix-} -> String
551 primopToCLabel primop suffix
552 = let str = "PrelPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
553 in --trace ("primopToCLabel: " ++ str)
558 %************************************************************************
560 \subsection{Connect to actual values for bytecode opcodes}
562 %************************************************************************
566 #include "Bytecodes.h"
568 i_ARGCHECK = (bci_ARGCHECK :: Int)
569 i_PUSH_L = (bci_PUSH_L :: Int)
570 i_PUSH_LL = (bci_PUSH_LL :: Int)
571 i_PUSH_LLL = (bci_PUSH_LLL :: Int)
572 i_PUSH_G = (bci_PUSH_G :: Int)
573 i_PUSH_AS = (bci_PUSH_AS :: Int)
574 i_PUSH_UBX = (bci_PUSH_UBX :: Int)
575 i_PUSH_TAG = (bci_PUSH_TAG :: Int)
576 i_SLIDE = (bci_SLIDE :: Int)
577 i_ALLOC = (bci_ALLOC :: Int)
578 i_MKAP = (bci_MKAP :: Int)
579 i_UNPACK = (bci_UNPACK :: Int)
580 i_UPK_TAG = (bci_UPK_TAG :: Int)
581 i_PACK = (bci_PACK :: Int)
582 i_TESTLT_I = (bci_TESTLT_I :: Int)
583 i_TESTEQ_I = (bci_TESTEQ_I :: Int)
584 i_TESTLT_F = (bci_TESTLT_F :: Int)
585 i_TESTEQ_F = (bci_TESTEQ_F :: Int)
586 i_TESTLT_D = (bci_TESTLT_D :: Int)
587 i_TESTEQ_D = (bci_TESTEQ_D :: Int)
588 i_TESTLT_P = (bci_TESTLT_P :: Int)
589 i_TESTEQ_P = (bci_TESTEQ_P :: Int)
590 i_CASEFAIL = (bci_CASEFAIL :: Int)
591 i_ENTER = (bci_ENTER :: Int)
592 i_RETURN = (bci_RETURN :: Int)
593 i_STKCHECK = (bci_STKCHECK :: Int)
594 i_JMP = (bci_JMP :: Int)
596 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)