2 % (c) The University of Glasgow 2000
4 \section[ByteCodeLink]{Bytecode assembler and linker}
7 module ByteCodeLink ( UnlinkedBCO, UnlinkedBCOExpr, assembleBCO,
8 ClosureEnv, HValue, linkSomeBCOs, filterNameMap
11 #include "HsVersions.h"
14 import Name ( Name, getName, nameModule, toRdrName )
15 import RdrName ( rdrNameOcc, rdrNameModule )
16 import OccName ( occNameString )
17 import FiniteMap ( FiniteMap, addListToFM, filterFM,
18 addToFM, lookupFM, emptyFM )
20 import Literal ( Literal(..) )
21 import PrimRep ( PrimRep(..) )
22 import Util ( global )
23 import Constants ( wORD_SIZE )
24 import Module ( ModuleName, moduleName, moduleNameFS )
25 import Linker ( lookupSymbol )
26 import FastString ( FastString(..) )
27 import ByteCodeInstr ( BCInstr(..), ProtoBCO(..) )
28 import ByteCodeItbls ( ItblEnv )
31 import Monad ( foldM )
33 import MArray ( castSTUArray,
34 newFloatArray, writeFloatArray,
35 newDoubleArray, writeDoubleArray,
36 newIntArray, writeIntArray,
37 newAddrArray, writeAddrArray )
38 import Foreign ( Word16, Ptr(..) )
41 import PrelBase ( Int(..) )
42 import PrelAddr ( Addr(..) )
43 import PrelGHC ( BCO#, newBCO#, unsafeCoerce#,
44 ByteArray#, Array#, addrToHValue#, mkApUpd0# )
45 import IOExts ( IORef, fixIO, readIORef, writeIORef )
47 import PrelArr ( Array(..) )
48 import PrelIOBase ( IO(..) )
52 %************************************************************************
54 \subsection{Top-level stuff}
56 %************************************************************************
60 -- Link a bunch of BCOs and return them + updated closure env.
61 linkSomeBCOs :: ItblEnv -> ClosureEnv -> [UnlinkedBCO]
62 -> IO (ClosureEnv, [HValue])
63 linkSomeBCOs ie ce_in ul_bcos
64 = do let nms = map nameOfUnlinkedBCO ul_bcos
66 ( \ hvs -> let ce_out = addListToFM ce_in (zipLazily nms hvs)
67 in mapM (linkBCO ie ce_out) ul_bcos )
68 let ce_out = addListToFM ce_in (zip nms hvals)
69 return (ce_out, hvals)
71 -- A lazier zip, in which no demand is propagated to the second
72 -- list unless some demand is propagated to the snd of one of the
75 zipLazily (x:xs) ys = (x, head ys) : zipLazily xs (tail ys)
80 (SizedSeq Word16) -- insns
81 (SizedSeq Word) -- literals
82 (SizedSeq Name) -- ptrs
83 (SizedSeq Name) -- itbl refs
85 nameOfUnlinkedBCO (UnlinkedBCO nm _ _ _ _) = nm
87 -- When translating expressions, we need to distinguish the root
88 -- BCO for the expression
89 type UnlinkedBCOExpr = (UnlinkedBCO, [UnlinkedBCO])
91 instance Outputable UnlinkedBCO where
92 ppr (UnlinkedBCO nm insns lits ptrs itbls)
93 = sep [text "BCO", ppr nm, text "with",
94 int (sizeSS insns), text "insns",
95 int (sizeSS lits), text "lits",
96 int (sizeSS ptrs), text "ptrs",
97 int (sizeSS itbls), text "itbls"]
100 -- these need a proper home
101 type ClosureEnv = FiniteMap Name HValue
102 data HValue = HValue -- dummy type, actually a pointer to some Real Code.
104 -- remove all entries for a given set of modules from the environment
105 filterNameMap :: [ModuleName] -> FiniteMap Name a -> FiniteMap Name a
106 filterNameMap mods env
107 = filterFM (\n _ -> moduleName (nameModule n) `notElem` mods) env
111 %************************************************************************
113 \subsection{The bytecode assembler}
115 %************************************************************************
117 The object format for bytecodes is: 16 bits for the opcode, and 16 for
118 each field -- so the code can be considered a sequence of 16-bit ints.
119 Each field denotes either a stack offset or number of items on the
120 stack (eg SLIDE), and index into the pointer table (eg PUSH_G), an
121 index into the literal table (eg PUSH_I/D/L), or a bytecode address in
125 -- Top level assembler fn.
126 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
128 assembleBCO (ProtoBCO nm instrs origin)
130 -- pass 1: collect up the offsets of the local labels.
131 -- Remember that the first insn starts at offset 1 since offset 0
132 -- (eventually) will hold the total # of insns.
133 label_env = mkLabelEnv emptyFM 1 instrs
135 mkLabelEnv env i_offset [] = env
136 mkLabelEnv env i_offset (i:is)
138 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
139 in mkLabelEnv new_env (i_offset + instrSize16s i) is
142 = case lookupFM label_env lab of
143 Just bco_offset -> bco_offset
144 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
146 do -- pass 2: generate the instruction, ptr and nonptr bits
147 insns <- return emptySS :: IO (SizedSeq Word16)
148 lits <- return emptySS :: IO (SizedSeq Word)
149 ptrs <- return emptySS :: IO (SizedSeq Name)
150 itbls <- return emptySS :: IO (SizedSeq Name)
151 let init_asm_state = (insns,lits,ptrs,itbls)
152 (final_insns, final_lits, final_ptrs, final_itbls)
153 <- mkBits findLabel init_asm_state instrs
155 return (UnlinkedBCO nm final_insns final_lits final_ptrs final_itbls)
157 -- instrs nonptrs ptrs itbls
158 type AsmState = (SizedSeq Word16, SizedSeq Word, SizedSeq Name, SizedSeq Name)
160 data SizedSeq a = SizedSeq !Int [a]
161 emptySS = SizedSeq 0 []
162 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
163 addListToSS (SizedSeq n r_xs) xs
164 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
165 sizeSS (SizedSeq n r_xs) = n
166 listFromSS (SizedSeq n r_xs) = return (reverse r_xs)
169 -- This is where all the action is (pass 2 of the assembler)
170 mkBits :: (Int -> Int) -- label finder
172 -> [BCInstr] -- instructions (in)
175 mkBits findLabel st proto_insns
176 = foldM doInstr st proto_insns
178 doInstr :: AsmState -> BCInstr -> IO AsmState
181 ARGCHECK n -> instr2 st i_ARGCHECK n
182 PUSH_L o1 -> instr2 st i_PUSH_L o1
183 PUSH_LL o1 o2 -> instr3 st i_PUSH_LL o1 o2
184 PUSH_LLL o1 o2 o3 -> instr4 st i_PUSH_LLL o1 o2 o3
185 PUSH_G nm -> do (p, st2) <- ptr st nm
186 instr2 st2 i_PUSH_G p
187 PUSH_AS nm pk -> do (p, st2) <- ptr st nm
188 (np, st3) <- ctoi_itbl st2 pk
189 instr3 st3 i_PUSH_AS p np
190 PUSH_UBX lit nws -> do (np, st2) <- literal st lit
191 instr3 st2 i_PUSH_UBX np nws
192 PUSH_TAG tag -> instr2 st i_PUSH_TAG tag
193 SLIDE n by -> instr3 st i_SLIDE n by
194 ALLOC n -> instr2 st i_ALLOC n
195 MKAP off sz -> instr3 st i_MKAP off sz
196 UNPACK n -> instr2 st i_UNPACK n
197 UPK_TAG n m k -> instr4 st i_UPK_TAG n m k
198 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
199 instr3 st2 i_PACK itbl_no sz
200 LABEL lab -> return st
201 TESTLT_I i l -> do (np, st2) <- int st i
202 instr3 st2 i_TESTLT_I np (findLabel l)
203 TESTEQ_I i l -> do (np, st2) <- int st i
204 instr3 st2 i_TESTEQ_I np (findLabel l)
205 TESTLT_F f l -> do (np, st2) <- float st f
206 instr3 st2 i_TESTLT_F np (findLabel l)
207 TESTEQ_F f l -> do (np, st2) <- float st f
208 instr3 st2 i_TESTEQ_F np (findLabel l)
209 TESTLT_D d l -> do (np, st2) <- double st d
210 instr3 st2 i_TESTLT_D np (findLabel l)
211 TESTEQ_D d l -> do (np, st2) <- double st d
212 instr3 st2 i_TESTEQ_D np (findLabel l)
213 TESTLT_P i l -> instr3 st i_TESTLT_P i (findLabel l)
214 TESTEQ_P i l -> instr3 st i_TESTEQ_P i (findLabel l)
215 CASEFAIL -> instr1 st i_CASEFAIL
216 ENTER -> instr1 st i_ENTER
217 RETURN rep -> do (itbl_no,st2) <- itoc_itbl st rep
218 instr2 st2 i_RETURN itbl_no
223 instr1 (st_i0,st_l0,st_p0,st_I0) i1
224 = do st_i1 <- addToSS st_i0 (i2s i1)
225 return (st_i1,st_l0,st_p0,st_I0)
227 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
228 = do st_i1 <- addToSS st_i0 (i2s i1)
229 st_i2 <- addToSS st_i1 (i2s i2)
230 return (st_i2,st_l0,st_p0,st_I0)
232 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
233 = do st_i1 <- addToSS st_i0 (i2s i1)
234 st_i2 <- addToSS st_i1 (i2s i2)
235 st_i3 <- addToSS st_i2 (i2s i3)
236 return (st_i3,st_l0,st_p0,st_I0)
238 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
239 = do st_i1 <- addToSS st_i0 (i2s i1)
240 st_i2 <- addToSS st_i1 (i2s i2)
241 st_i3 <- addToSS st_i2 (i2s i3)
242 st_i4 <- addToSS st_i3 (i2s i4)
243 return (st_i4,st_l0,st_p0,st_I0)
245 float (st_i0,st_l0,st_p0,st_I0) f
246 = do let ws = mkLitF f
247 st_l1 <- addListToSS st_l0 ws
248 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
250 double (st_i0,st_l0,st_p0,st_I0) d
251 = do let ws = mkLitD d
252 st_l1 <- addListToSS st_l0 ws
253 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
255 int (st_i0,st_l0,st_p0,st_I0) i
256 = do let ws = mkLitI i
257 st_l1 <- addListToSS st_l0 ws
258 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
260 addr (st_i0,st_l0,st_p0,st_I0) a
261 = do let ws = mkLitA a
262 st_l1 <- addListToSS st_l0 ws
263 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
265 ptr (st_i0,st_l0,st_p0,st_I0) p
266 = do st_p1 <- addToSS st_p0 p
267 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
269 itbl (st_i0,st_l0,st_p0,st_I0) dcon
270 = do st_I1 <- addToSS st_I0 (getName dcon)
271 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
273 literal st (MachInt j) = int st (fromIntegral j)
274 literal st (MachFloat r) = float st (fromRational r)
275 literal st (MachDouble r) = double st (fromRational r)
276 literal st (MachChar c) = int st c
279 = addr st ret_itbl_addr
281 ret_itbl_addr = case pk of
282 PtrRep -> stg_ctoi_ret_R1_info
283 IntRep -> stg_ctoi_ret_R1_info
284 CharRep -> stg_ctoi_ret_R1_info
285 FloatRep -> stg_ctoi_ret_F1_info
286 DoubleRep -> stg_ctoi_ret_D1_info
287 _ -> pprPanic "mkBits.ctoi_itbl" (ppr pk)
290 = addr st ret_itbl_addr
292 ret_itbl_addr = case pk of
293 IntRep -> stg_gc_unbx_r1_info
294 FloatRep -> stg_gc_f1_info
295 DoubleRep -> stg_gc_d1_info
297 foreign label "stg_ctoi_ret_R1_info" stg_ctoi_ret_R1_info :: Addr
298 foreign label "stg_ctoi_ret_F1_info" stg_ctoi_ret_F1_info :: Addr
299 foreign label "stg_ctoi_ret_D1_info" stg_ctoi_ret_D1_info :: Addr
301 foreign label "stg_gc_unbx_r1_info" stg_gc_unbx_r1_info :: Addr
302 foreign label "stg_gc_f1_info" stg_gc_f1_info :: Addr
303 foreign label "stg_gc_d1_info" stg_gc_d1_info :: Addr
305 -- The size in 16-bit entities of an instruction.
306 instrSize16s :: BCInstr -> Int
337 -- Make lists of host-sized words for literals, so that when the
338 -- words are placed in memory at increasing addresses, the
339 -- bit pattern is correct for the host's word size and endianness.
340 mkLitI :: Int -> [Word]
341 mkLitF :: Float -> [Word]
342 mkLitD :: Double -> [Word]
343 mkLitA :: Addr -> [Word]
347 arr <- newFloatArray ((0::Int),0)
348 writeFloatArray arr 0 f
349 f_arr <- castSTUArray arr
350 w0 <- readWordArray f_arr 0
357 arr <- newDoubleArray ((0::Int),1)
358 writeDoubleArray arr 0 d
359 d_arr <- castSTUArray arr
360 w0 <- readWordArray d_arr 0
361 w1 <- readWordArray d_arr 1
366 arr <- newDoubleArray ((0::Int),0)
367 writeDoubleArray arr 0 d
368 d_arr <- castSTUArray arr
369 w0 <- readWordArray d_arr 0
375 arr <- newIntArray ((0::Int),0)
376 writeIntArray arr 0 i
377 i_arr <- castSTUArray arr
378 w0 <- readWordArray i_arr 0
384 arr <- newAddrArray ((0::Int),0)
385 writeAddrArray arr 0 a
386 a_arr <- castSTUArray arr
387 w0 <- readWordArray a_arr 0
393 %************************************************************************
395 \subsection{Linking interpretables into something we can run}
397 %************************************************************************
402 data BCO# = BCO# ByteArray# -- instrs :: array Word16#
403 ByteArray# -- literals :: array Word32#
404 PtrArray# -- ptrs :: Array HValue
405 ByteArray# -- itbls :: Array Addr#
408 GLOBAL_VAR(v_cafTable, [], [HValue])
410 addCAF :: HValue -> IO ()
411 addCAF x = do xs <- readIORef v_cafTable
412 --putStrLn ("addCAF " ++ show (1 + length xs))
413 writeIORef v_cafTable (x:xs)
416 linkBCO ie ce (UnlinkedBCO nm insnsSS literalsSS ptrsSS itblsSS)
417 = do insns <- listFromSS insnsSS
418 literals <- listFromSS literalsSS
419 ptrs <- listFromSS ptrsSS
420 itbls <- listFromSS itblsSS
422 linked_ptrs <- mapM (lookupCE ce) ptrs
423 linked_itbls <- mapM (lookupIE ie) itbls
425 let n_insns = sizeSS insnsSS
426 n_literals = sizeSS literalsSS
427 n_ptrs = sizeSS ptrsSS
428 n_itbls = sizeSS itblsSS
430 let ptrs_arr = array (0, n_ptrs-1) (indexify linked_ptrs)
432 ptrs_parr = case ptrs_arr of Array lo hi parr -> parr
434 itbls_arr = array (0, n_itbls-1) (indexify linked_itbls)
436 itbls_barr = case itbls_arr of UArray lo hi barr -> barr
438 insns_arr | n_insns > 65535
439 = panic "linkBCO: >= 64k insns in BCO"
442 (indexify (fromIntegral n_insns:insns))
444 insns_barr = case insns_arr of UArray lo hi barr -> barr
446 literals_arr = array (0, n_literals-1) (indexify literals)
448 literals_barr = case literals_arr of UArray lo hi barr -> barr
450 indexify :: [a] -> [(Int, a)]
451 indexify xs = zip [0..] xs
453 BCO bco# <- newBCO insns_barr literals_barr ptrs_parr itbls_barr
455 return (unsafeCoerce# bco#)
456 --case mkApUpd0# (unsafeCoerce# bco#) of
457 -- (# final_bco #) -> return final_bco
462 newBCO :: ByteArray# -> ByteArray# -> Array# a -> ByteArray# -> IO BCO
464 = IO (\s -> case newBCO# a b c d s of (# s1, bco #) -> (# s1, BCO bco #))
467 lookupCE :: ClosureEnv -> Name -> IO HValue
469 = case lookupFM ce nm of
472 -> do m <- lookupSymbol (nameToCLabel nm "closure")
474 Just (A# addr) -> case addrToHValue# addr of
475 (# hval #) -> do addCAF hval
477 Nothing -> pprPanic "ByteCodeGen.lookupCE" (ppr nm)
479 lookupIE :: ItblEnv -> Name -> IO Addr
481 = case lookupFM ie con_nm of
482 Just (Ptr a) -> return a
484 -> do -- try looking up in the object files.
485 m <- lookupSymbol (nameToCLabel con_nm "con_info")
487 Just addr -> return addr
489 -> do -- perhaps a nullary constructor?
490 n <- lookupSymbol (nameToCLabel con_nm "static_info")
492 Just addr -> return addr
493 Nothing -> pprPanic "ByteCodeGen.lookupIE" (ppr con_nm)
495 -- HACK!!! ToDo: cleaner
496 nameToCLabel :: Name -> String{-suffix-} -> String
497 nameToCLabel n suffix
498 = _UNPK_(moduleNameFS (rdrNameModule rn))
499 ++ '_':occNameString(rdrNameOcc rn) ++ '_':suffix
500 where rn = toRdrName n
504 %************************************************************************
506 \subsection{Connect to actual values for bytecode opcodes}
508 %************************************************************************
512 #include "Bytecodes.h"
514 i_ARGCHECK = (bci_ARGCHECK :: Int)
515 i_PUSH_L = (bci_PUSH_L :: Int)
516 i_PUSH_LL = (bci_PUSH_LL :: Int)
517 i_PUSH_LLL = (bci_PUSH_LLL :: Int)
518 i_PUSH_G = (bci_PUSH_G :: Int)
519 i_PUSH_AS = (bci_PUSH_AS :: Int)
520 i_PUSH_UBX = (bci_PUSH_UBX :: Int)
521 i_PUSH_TAG = (bci_PUSH_TAG :: Int)
522 i_SLIDE = (bci_SLIDE :: Int)
523 i_ALLOC = (bci_ALLOC :: Int)
524 i_MKAP = (bci_MKAP :: Int)
525 i_UNPACK = (bci_UNPACK :: Int)
526 i_UPK_TAG = (bci_UPK_TAG :: Int)
527 i_PACK = (bci_PACK :: Int)
528 i_TESTLT_I = (bci_TESTLT_I :: Int)
529 i_TESTEQ_I = (bci_TESTEQ_I :: Int)
530 i_TESTLT_F = (bci_TESTLT_F :: Int)
531 i_TESTEQ_F = (bci_TESTEQ_F :: Int)
532 i_TESTLT_D = (bci_TESTLT_D :: Int)
533 i_TESTEQ_D = (bci_TESTEQ_D :: Int)
534 i_TESTLT_P = (bci_TESTLT_P :: Int)
535 i_TESTEQ_P = (bci_TESTEQ_P :: Int)
536 i_CASEFAIL = (bci_CASEFAIL :: Int)
537 i_ENTER = (bci_ENTER :: Int)
538 i_RETURN = (bci_RETURN :: Int)