2 % (c) The University of Glasgow 2000
4 \section[ByteCodeLink]{Bytecode assembler and linker}
7 module ByteCodeLink ( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
8 ClosureEnv, HValue, linkSomeBCOs, filterNameMap,
9 iNTERP_STACK_CHECK_THRESH
12 #include "HsVersions.h"
15 import Name ( Name, getName, nameModule, toRdrName, isGlobalName )
16 import RdrName ( rdrNameOcc, rdrNameModule )
17 import OccName ( occNameString )
18 import FiniteMap ( FiniteMap, addListToFM, filterFM,
19 addToFM, lookupFM, emptyFM )
21 import Literal ( Literal(..) )
22 import PrimOp ( PrimOp, primOpOcc )
23 import PrimRep ( PrimRep(..) )
24 import Constants ( wORD_SIZE )
25 import Module ( ModuleName, moduleName, moduleNameFS )
26 import Linker ( lookupSymbol )
27 import FastString ( FastString(..) )
28 import ByteCodeInstr ( BCInstr(..), ProtoBCO(..) )
29 import ByteCodeItbls ( ItblEnv, ItblPtr )
32 import Monad ( foldM )
34 import MArray ( castSTUArray,
35 newFloatArray, writeFloatArray,
36 newDoubleArray, writeDoubleArray,
37 newIntArray, writeIntArray,
38 newAddrArray, writeAddrArray )
39 import Foreign ( Word16, Ptr(..) )
40 import Addr ( Word, Addr, nullAddr )
42 import PrelBase ( Int(..) )
43 import PrelGHC ( BCO#, newBCO#, unsafeCoerce#,
44 ByteArray#, Array#, addrToHValue#, mkApUpd0# )
45 import IOExts ( fixIO )
47 import PrelArr ( Array(..) )
48 import PrelIOBase ( IO(..) )
52 %************************************************************************
54 \subsection{Top-level stuff}
56 %************************************************************************
60 -- Link a bunch of BCOs and return them + updated closure env.
61 linkSomeBCOs :: Bool -- False <=> add _all_ BCOs to returned closure env
62 -- True <=> add only toplevel BCOs to closure env
66 -> IO (ClosureEnv, [HValue])
67 linkSomeBCOs toplevs_only ie ce_in ul_bcos
68 = do let nms = map nameOfUnlinkedBCO ul_bcos
70 ( \ hvs -> let ce_out = addListToFM ce_in (zipLazily nms hvs)
71 in mapM (linkBCO ie ce_out) ul_bcos )
73 let ce_all_additions = zip nms hvals
74 ce_top_additions = filter (isGlobalName.fst) ce_all_additions
75 ce_additions = if toplevs_only then ce_top_additions
77 ce_out = addListToFM ce_in ce_additions
78 return (ce_out, hvals)
80 -- A lazier zip, in which no demand is propagated to the second
81 -- list unless some demand is propagated to the snd of one of the
84 zipLazily (x:xs) ys = (x, head ys) : zipLazily xs (tail ys)
89 (SizedSeq Word16) -- insns
90 (SizedSeq Word) -- literals
91 (SizedSeq (Either Name PrimOp)) -- ptrs
92 (SizedSeq Name) -- itbl refs
94 nameOfUnlinkedBCO (UnlinkedBCO nm _ _ _ _) = nm
96 -- When translating expressions, we need to distinguish the root
97 -- BCO for the expression
98 type UnlinkedBCOExpr = (UnlinkedBCO, [UnlinkedBCO])
100 instance Outputable UnlinkedBCO where
101 ppr (UnlinkedBCO nm insns lits ptrs itbls)
102 = sep [text "BCO", ppr nm, text "with",
103 int (sizeSS insns), text "insns",
104 int (sizeSS lits), text "lits",
105 int (sizeSS ptrs), text "ptrs",
106 int (sizeSS itbls), text "itbls"]
109 -- these need a proper home
110 type ClosureEnv = FiniteMap Name HValue
111 data HValue = HValue -- dummy type, actually a pointer to some Real Code.
113 -- remove all entries for a given set of modules from the environment
114 filterNameMap :: [ModuleName] -> FiniteMap Name a -> FiniteMap Name a
115 filterNameMap mods env
116 = filterFM (\n _ -> moduleName (nameModule n) `elem` mods) env
119 %************************************************************************
121 \subsection{The bytecode assembler}
123 %************************************************************************
125 The object format for bytecodes is: 16 bits for the opcode, and 16 for
126 each field -- so the code can be considered a sequence of 16-bit ints.
127 Each field denotes either a stack offset or number of items on the
128 stack (eg SLIDE), and index into the pointer table (eg PUSH_G), an
129 index into the literal table (eg PUSH_I/D/L), or a bytecode address in
133 -- Top level assembler fn.
134 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
136 assembleBCO (ProtoBCO nm instrs origin)
138 -- pass 1: collect up the offsets of the local labels.
139 -- Remember that the first insn starts at offset 1 since offset 0
140 -- (eventually) will hold the total # of insns.
141 label_env = mkLabelEnv emptyFM 1 instrs
143 mkLabelEnv env i_offset [] = env
144 mkLabelEnv env i_offset (i:is)
146 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
147 in mkLabelEnv new_env (i_offset + instrSize16s i) is
150 = case lookupFM label_env lab of
151 Just bco_offset -> bco_offset
152 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
154 do -- pass 2: generate the instruction, ptr and nonptr bits
155 insns <- return emptySS :: IO (SizedSeq Word16)
156 lits <- return emptySS :: IO (SizedSeq Word)
157 ptrs <- return emptySS :: IO (SizedSeq (Either Name PrimOp))
158 itbls <- return emptySS :: IO (SizedSeq Name)
159 let init_asm_state = (insns,lits,ptrs,itbls)
160 (final_insns, final_lits, final_ptrs, final_itbls)
161 <- mkBits findLabel init_asm_state instrs
163 return (UnlinkedBCO nm final_insns final_lits final_ptrs final_itbls)
165 -- instrs nonptrs ptrs itbls
166 type AsmState = (SizedSeq Word16, SizedSeq Word,
167 SizedSeq (Either Name PrimOp), SizedSeq Name)
169 data SizedSeq a = SizedSeq !Int [a]
170 emptySS = SizedSeq 0 []
171 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
172 addListToSS (SizedSeq n r_xs) xs
173 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
174 sizeSS (SizedSeq n r_xs) = n
175 listFromSS (SizedSeq n r_xs) = return (reverse r_xs)
178 -- This is where all the action is (pass 2 of the assembler)
179 mkBits :: (Int -> Int) -- label finder
181 -> [BCInstr] -- instructions (in)
184 mkBits findLabel st proto_insns
185 = foldM doInstr st proto_insns
187 doInstr :: AsmState -> BCInstr -> IO AsmState
190 ARGCHECK n -> instr2 st i_ARGCHECK n
191 STKCHECK n -> instr2 st i_STKCHECK n
192 PUSH_L o1 -> instr2 st i_PUSH_L o1
193 PUSH_LL o1 o2 -> instr3 st i_PUSH_LL o1 o2
194 PUSH_LLL o1 o2 o3 -> instr4 st i_PUSH_LLL o1 o2 o3
195 PUSH_G nm -> do (p, st2) <- ptr st nm
196 instr2 st2 i_PUSH_G p
197 PUSH_AS nm pk -> do (p, st2) <- ptr st (Left nm)
198 (np, st3) <- ctoi_itbl st2 pk
199 instr3 st3 i_PUSH_AS p np
200 PUSH_UBX lit nws -> do (np, st2) <- literal st lit
201 instr3 st2 i_PUSH_UBX np nws
202 PUSH_TAG tag -> instr2 st i_PUSH_TAG tag
203 SLIDE n by -> instr3 st i_SLIDE n by
204 ALLOC n -> instr2 st i_ALLOC n
205 MKAP off sz -> instr3 st i_MKAP off sz
206 UNPACK n -> instr2 st i_UNPACK n
207 UPK_TAG n m k -> instr4 st i_UPK_TAG n m k
208 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
209 instr3 st2 i_PACK itbl_no sz
210 LABEL lab -> return st
211 TESTLT_I i l -> do (np, st2) <- int st i
212 instr3 st2 i_TESTLT_I np (findLabel l)
213 TESTEQ_I i l -> do (np, st2) <- int st i
214 instr3 st2 i_TESTEQ_I np (findLabel l)
215 TESTLT_F f l -> do (np, st2) <- float st f
216 instr3 st2 i_TESTLT_F np (findLabel l)
217 TESTEQ_F f l -> do (np, st2) <- float st f
218 instr3 st2 i_TESTEQ_F np (findLabel l)
219 TESTLT_D d l -> do (np, st2) <- double st d
220 instr3 st2 i_TESTLT_D np (findLabel l)
221 TESTEQ_D d l -> do (np, st2) <- double st d
222 instr3 st2 i_TESTEQ_D np (findLabel l)
223 TESTLT_P i l -> instr3 st i_TESTLT_P i (findLabel l)
224 TESTEQ_P i l -> instr3 st i_TESTEQ_P i (findLabel l)
225 CASEFAIL -> instr1 st i_CASEFAIL
226 ENTER -> instr1 st i_ENTER
227 RETURN rep -> do (itbl_no,st2) <- itoc_itbl st rep
228 instr2 st2 i_RETURN itbl_no
233 instr1 (st_i0,st_l0,st_p0,st_I0) i1
234 = do st_i1 <- addToSS st_i0 (i2s i1)
235 return (st_i1,st_l0,st_p0,st_I0)
237 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
238 = do st_i1 <- addToSS st_i0 (i2s i1)
239 st_i2 <- addToSS st_i1 (i2s i2)
240 return (st_i2,st_l0,st_p0,st_I0)
242 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
243 = do st_i1 <- addToSS st_i0 (i2s i1)
244 st_i2 <- addToSS st_i1 (i2s i2)
245 st_i3 <- addToSS st_i2 (i2s i3)
246 return (st_i3,st_l0,st_p0,st_I0)
248 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
249 = do st_i1 <- addToSS st_i0 (i2s i1)
250 st_i2 <- addToSS st_i1 (i2s i2)
251 st_i3 <- addToSS st_i2 (i2s i3)
252 st_i4 <- addToSS st_i3 (i2s i4)
253 return (st_i4,st_l0,st_p0,st_I0)
255 float (st_i0,st_l0,st_p0,st_I0) f
256 = do let ws = mkLitF f
257 st_l1 <- addListToSS st_l0 ws
258 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
260 double (st_i0,st_l0,st_p0,st_I0) d
261 = do let ws = mkLitD d
262 st_l1 <- addListToSS st_l0 ws
263 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
265 int (st_i0,st_l0,st_p0,st_I0) i
266 = do let ws = mkLitI i
267 st_l1 <- addListToSS st_l0 ws
268 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
270 addr (st_i0,st_l0,st_p0,st_I0) a
271 = do let ws = mkLitA a
272 st_l1 <- addListToSS st_l0 ws
273 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
275 ptr (st_i0,st_l0,st_p0,st_I0) p
276 = do st_p1 <- addToSS st_p0 p
277 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
279 itbl (st_i0,st_l0,st_p0,st_I0) dcon
280 = do st_I1 <- addToSS st_I0 (getName dcon)
281 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
283 literal st (MachWord w) = int st (fromIntegral w)
284 literal st (MachInt j) = int st (fromIntegral j)
285 literal st (MachFloat r) = float st (fromRational r)
286 literal st (MachDouble r) = double st (fromRational r)
287 literal st (MachChar c) = int st c
290 = addr st ret_itbl_addr
292 ret_itbl_addr = case pk of
293 PtrRep -> stg_ctoi_ret_R1p_info
294 WordRep -> stg_ctoi_ret_R1n_info
295 IntRep -> stg_ctoi_ret_R1n_info
296 AddrRep -> stg_ctoi_ret_R1n_info
297 CharRep -> stg_ctoi_ret_R1n_info
298 FloatRep -> stg_ctoi_ret_F1_info
299 DoubleRep -> stg_ctoi_ret_D1_info
300 VoidRep -> stg_ctoi_ret_V_info
301 _ -> pprPanic "mkBits.ctoi_itbl" (ppr pk)
304 = addr st ret_itbl_addr
306 ret_itbl_addr = case pk of
307 CharRep -> stg_gc_unbx_r1_info
308 IntRep -> stg_gc_unbx_r1_info
309 FloatRep -> stg_gc_f1_info
310 DoubleRep -> stg_gc_d1_info
312 -- Interpreter.c spots this special case
314 foreign label "stg_ctoi_ret_R1p_info" stg_ctoi_ret_R1p_info :: Addr
315 foreign label "stg_ctoi_ret_R1n_info" stg_ctoi_ret_R1n_info :: Addr
316 foreign label "stg_ctoi_ret_F1_info" stg_ctoi_ret_F1_info :: Addr
317 foreign label "stg_ctoi_ret_D1_info" stg_ctoi_ret_D1_info :: Addr
318 foreign label "stg_ctoi_ret_V_info" stg_ctoi_ret_V_info :: Addr
320 foreign label "stg_gc_unbx_r1_info" stg_gc_unbx_r1_info :: Addr
321 foreign label "stg_gc_f1_info" stg_gc_f1_info :: Addr
322 foreign label "stg_gc_d1_info" stg_gc_d1_info :: Addr
324 -- The size in 16-bit entities of an instruction.
325 instrSize16s :: BCInstr -> Int
357 -- Make lists of host-sized words for literals, so that when the
358 -- words are placed in memory at increasing addresses, the
359 -- bit pattern is correct for the host's word size and endianness.
360 mkLitI :: Int -> [Word]
361 mkLitF :: Float -> [Word]
362 mkLitD :: Double -> [Word]
363 mkLitA :: Addr -> [Word]
367 arr <- newFloatArray ((0::Int),0)
368 writeFloatArray arr 0 f
369 f_arr <- castSTUArray arr
370 w0 <- readWordArray f_arr 0
377 arr <- newDoubleArray ((0::Int),1)
378 writeDoubleArray arr 0 d
379 d_arr <- castSTUArray arr
380 w0 <- readWordArray d_arr 0
381 w1 <- readWordArray d_arr 1
386 arr <- newDoubleArray ((0::Int),0)
387 writeDoubleArray arr 0 d
388 d_arr <- castSTUArray arr
389 w0 <- readWordArray d_arr 0
395 arr <- newIntArray ((0::Int),0)
396 writeIntArray arr 0 i
397 i_arr <- castSTUArray arr
398 w0 <- readWordArray i_arr 0
404 arr <- newAddrArray ((0::Int),0)
405 writeAddrArray arr 0 a
406 a_arr <- castSTUArray arr
407 w0 <- readWordArray a_arr 0
413 %************************************************************************
415 \subsection{Linking interpretables into something we can run}
417 %************************************************************************
422 data BCO# = BCO# ByteArray# -- instrs :: array Word16#
423 ByteArray# -- literals :: array Word32#
424 PtrArray# -- ptrs :: Array HValue
425 ByteArray# -- itbls :: Array Addr#
428 linkBCO ie ce (UnlinkedBCO nm insnsSS literalsSS ptrsSS itblsSS)
429 = do insns <- listFromSS insnsSS
430 literals <- listFromSS literalsSS
431 ptrs <- listFromSS ptrsSS
432 itbls <- listFromSS itblsSS
434 linked_ptrs <- mapM (lookupCE ce) ptrs
435 linked_itbls <- mapM (lookupIE ie) itbls
437 let n_insns = sizeSS insnsSS
438 n_literals = sizeSS literalsSS
439 n_ptrs = sizeSS ptrsSS
440 n_itbls = sizeSS itblsSS
442 let ptrs_arr = array (0, n_ptrs-1) (indexify linked_ptrs)
444 ptrs_parr = case ptrs_arr of Array lo hi parr -> parr
446 itbls_arr = array (0, n_itbls-1) (indexify linked_itbls)
447 :: UArray Int ItblPtr
448 itbls_barr = case itbls_arr of UArray lo hi barr -> barr
450 insns_arr | n_insns > 65535
451 = panic "linkBCO: >= 64k insns in BCO"
454 (indexify (fromIntegral n_insns:insns))
456 insns_barr = case insns_arr of UArray lo hi barr -> barr
458 literals_arr = array (0, n_literals-1) (indexify literals)
460 literals_barr = case literals_arr of UArray lo hi barr -> barr
462 indexify :: [a] -> [(Int, a)]
463 indexify xs = zip [0..] xs
465 BCO bco# <- newBCO insns_barr literals_barr ptrs_parr itbls_barr
467 -- WAS: return (unsafeCoerce# bco#)
468 case mkApUpd0# (unsafeCoerce# bco#) of
469 (# final_bco #) -> return final_bco
474 newBCO :: ByteArray# -> ByteArray# -> Array# a -> ByteArray# -> IO BCO
476 = IO (\s -> case newBCO# a b c d s of (# s1, bco #) -> (# s1, BCO bco #))
479 lookupCE :: ClosureEnv -> Either Name PrimOp -> IO HValue
480 lookupCE ce (Right primop)
481 = do m <- lookupSymbol (primopToCLabel primop "closure")
483 Just (Ptr addr) -> case addrToHValue# addr of
484 (# hval #) -> return hval
485 Nothing -> pprPanic "ByteCodeGen.lookupCE(primop)" (ppr primop)
486 lookupCE ce (Left nm)
487 = case lookupFM ce nm of
490 -> do m <- lookupSymbol (nameToCLabel nm "closure")
492 Just (Ptr addr) -> case addrToHValue# addr of
493 (# hval #) -> return hval
494 Nothing -> pprPanic "ByteCodeGen.lookupCE" (ppr nm)
496 lookupIE :: ItblEnv -> Name -> IO (Ptr a)
498 = case lookupFM ie con_nm of
499 Just (Ptr a) -> return (Ptr a)
501 -> do -- try looking up in the object files.
502 m <- lookupSymbol (nameToCLabel con_nm "con_info")
504 Just addr -> return addr
506 -> do -- perhaps a nullary constructor?
507 n <- lookupSymbol (nameToCLabel con_nm "static_info")
509 Just addr -> return addr
510 Nothing -> pprPanic "ByteCodeGen.lookupIE" (ppr con_nm)
512 -- HACKS!!! ToDo: cleaner
513 nameToCLabel :: Name -> String{-suffix-} -> String
514 nameToCLabel n suffix
515 = _UNPK_(moduleNameFS (rdrNameModule rn))
516 ++ '_':occNameString(rdrNameOcc rn) ++ '_':suffix
517 where rn = toRdrName n
519 primopToCLabel :: PrimOp -> String{-suffix-} -> String
520 primopToCLabel primop suffix
521 = let str = "PrelPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
522 in --trace ("primopToCLabel: " ++ str)
527 %************************************************************************
529 \subsection{Connect to actual values for bytecode opcodes}
531 %************************************************************************
535 #include "Bytecodes.h"
537 i_ARGCHECK = (bci_ARGCHECK :: Int)
538 i_PUSH_L = (bci_PUSH_L :: Int)
539 i_PUSH_LL = (bci_PUSH_LL :: Int)
540 i_PUSH_LLL = (bci_PUSH_LLL :: Int)
541 i_PUSH_G = (bci_PUSH_G :: Int)
542 i_PUSH_AS = (bci_PUSH_AS :: Int)
543 i_PUSH_UBX = (bci_PUSH_UBX :: Int)
544 i_PUSH_TAG = (bci_PUSH_TAG :: Int)
545 i_SLIDE = (bci_SLIDE :: Int)
546 i_ALLOC = (bci_ALLOC :: Int)
547 i_MKAP = (bci_MKAP :: Int)
548 i_UNPACK = (bci_UNPACK :: Int)
549 i_UPK_TAG = (bci_UPK_TAG :: Int)
550 i_PACK = (bci_PACK :: Int)
551 i_TESTLT_I = (bci_TESTLT_I :: Int)
552 i_TESTEQ_I = (bci_TESTEQ_I :: Int)
553 i_TESTLT_F = (bci_TESTLT_F :: Int)
554 i_TESTEQ_F = (bci_TESTEQ_F :: Int)
555 i_TESTLT_D = (bci_TESTLT_D :: Int)
556 i_TESTEQ_D = (bci_TESTEQ_D :: Int)
557 i_TESTLT_P = (bci_TESTLT_P :: Int)
558 i_TESTEQ_P = (bci_TESTEQ_P :: Int)
559 i_CASEFAIL = (bci_CASEFAIL :: Int)
560 i_ENTER = (bci_ENTER :: Int)
561 i_RETURN = (bci_RETURN :: Int)
562 i_STKCHECK = (bci_STKCHECK :: Int)
564 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)