2 % (c) The University of Glasgow 2000
4 \section[ByteCodeLink]{Bytecode assembler and linker}
8 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
11 assembleBCOs, assembleBCO,
14 UnlinkedBCO(..), UnlinkedBCOExpr, nameOfUnlinkedBCO, bcosFreeNames,
15 SizedSeq, sizeSS, ssElts,
16 iNTERP_STACK_CHECK_THRESH
19 #include "HsVersions.h"
21 import ByteCodeInstr ( BCInstr(..), ProtoBCO(..) )
22 import ByteCodeItbls ( ItblEnv, mkITbls )
24 import Name ( Name, getName )
26 import FiniteMap ( addToFM, lookupFM, emptyFM )
28 import Literal ( Literal(..) )
29 import TyCon ( TyCon )
30 import PrimOp ( PrimOp )
31 import PrimRep ( PrimRep(..), isFollowableRep )
32 import Constants ( wORD_SIZE )
33 import FastString ( FastString(..), unpackFS )
37 import Control.Monad ( foldM )
38 import Control.Monad.ST ( runST )
40 import GHC.Word ( Word(..) )
41 import Data.Array.MArray ( MArray, newArray_, readArray, writeArray )
42 import Data.Array.ST ( castSTUArray )
43 import Foreign.Ptr ( nullPtr )
44 import Foreign ( Word16, free )
45 import Data.Int ( Int64 )
47 #if __GLASGOW_HASKELL__ >= 503
48 import GHC.IOBase ( IO(..) )
49 import GHC.Ptr ( Ptr(..) )
51 import PrelIOBase ( IO(..) )
52 import Ptr ( Ptr(..) )
58 %************************************************************************
62 %************************************************************************
65 -- CompiledByteCode represents the result of byte-code
66 -- compiling a bunch of functions and data types
69 = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
70 ItblEnv -- A mapping from DataCons to their itbls
72 instance Outputable CompiledByteCode where
73 ppr (ByteCode bcos _) = ppr bcos
78 (SizedSeq Word16) -- insns
79 (SizedSeq (Either Word FastString)) -- literals
80 -- Either literal words or a pointer to a asciiz
81 -- string, denoting a label whose *address* should
82 -- be determined at link time
83 (SizedSeq (Either Name PrimOp)) -- ptrs
84 (SizedSeq Name) -- itbl refs
86 nameOfUnlinkedBCO (UnlinkedBCO nm _ _ _ _) = nm
88 bcosFreeNames :: [UnlinkedBCO] -> NameSet
89 -- Finds external references. Remember to remove the names
90 -- defined by this group of BCOs themselves
92 = free_names `minusNameSet` defined_names
94 defined_names = mkNameSet (map nameOfUnlinkedBCO bcos)
95 free_names = foldr (unionNameSets . bco_refs) emptyNameSet bcos
97 bco_refs (UnlinkedBCO _ _ _ ptrs itbls)
98 = mkNameSet [n | Left n <- ssElts ptrs] `unionNameSets`
99 mkNameSet (ssElts itbls)
101 -- When translating expressions, we need to distinguish the root
102 -- BCO for the expression
103 type UnlinkedBCOExpr = (UnlinkedBCO, [UnlinkedBCO])
105 instance Outputable UnlinkedBCO where
106 ppr (UnlinkedBCO nm insns lits ptrs itbls)
107 = sep [text "BCO", ppr nm, text "with",
108 int (sizeSS insns), text "insns",
109 int (sizeSS lits), text "lits",
110 int (sizeSS ptrs), text "ptrs",
111 int (sizeSS itbls), text "itbls"]
115 %************************************************************************
117 \subsection{The bytecode assembler}
119 %************************************************************************
121 The object format for bytecodes is: 16 bits for the opcode, and 16 for
122 each field -- so the code can be considered a sequence of 16-bit ints.
123 Each field denotes either a stack offset or number of items on the
124 stack (eg SLIDE), and index into the pointer table (eg PUSH_G), an
125 index into the literal table (eg PUSH_I/D/L), or a bytecode address in
129 -- Top level assembler fn.
130 assembleBCOs :: [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
131 assembleBCOs proto_bcos tycons
132 = do itblenv <- mkITbls tycons
133 bcos <- mapM assembleBCO proto_bcos
134 return (ByteCode bcos itblenv)
136 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
138 assembleBCO (ProtoBCO nm instrs origin malloced)
140 -- pass 1: collect up the offsets of the local labels.
141 -- Remember that the first insn starts at offset 1 since offset 0
142 -- (eventually) will hold the total # of insns.
143 label_env = mkLabelEnv emptyFM 1 instrs
145 mkLabelEnv env i_offset [] = env
146 mkLabelEnv env i_offset (i:is)
148 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
149 in mkLabelEnv new_env (i_offset + instrSize16s i) is
152 = case lookupFM label_env lab of
153 Just bco_offset -> bco_offset
154 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
156 do -- pass 2: generate the instruction, ptr and nonptr bits
157 insns <- return emptySS :: IO (SizedSeq Word16)
158 lits <- return emptySS :: IO (SizedSeq (Either Word FastString))
159 ptrs <- return emptySS :: IO (SizedSeq (Either Name PrimOp))
160 itbls <- return emptySS :: IO (SizedSeq Name)
161 let init_asm_state = (insns,lits,ptrs,itbls)
162 (final_insns, final_lits, final_ptrs, final_itbls)
163 <- mkBits findLabel init_asm_state instrs
165 let ul_bco = UnlinkedBCO nm final_insns final_lits final_ptrs final_itbls
167 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
168 -- objects, since they might get run too early. Disable this until
169 -- we figure out what to do.
170 -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
174 zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
177 -- instrs nonptrs ptrs itbls
178 type AsmState = (SizedSeq Word16,
179 SizedSeq (Either Word FastString),
180 SizedSeq (Either Name PrimOp),
183 data SizedSeq a = SizedSeq !Int [a]
184 emptySS = SizedSeq 0 []
186 -- Why are these two monadic???
187 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
188 addListToSS (SizedSeq n r_xs) xs
189 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
191 ssElts :: SizedSeq a -> [a]
192 ssElts (SizedSeq n r_xs) = reverse r_xs
194 sizeSS :: SizedSeq a -> Int
195 sizeSS (SizedSeq n r_xs) = n
197 -- This is where all the action is (pass 2 of the assembler)
198 mkBits :: (Int -> Int) -- label finder
200 -> [BCInstr] -- instructions (in)
203 mkBits findLabel st proto_insns
204 = foldM doInstr st proto_insns
206 doInstr :: AsmState -> BCInstr -> IO AsmState
209 SWIZZLE stkoff n -> instr3 st i_SWIZZLE stkoff n
210 ARGCHECK n -> instr2 st i_ARGCHECK n
211 STKCHECK n -> instr2 st i_STKCHECK n
212 PUSH_L o1 -> instr2 st i_PUSH_L o1
213 PUSH_LL o1 o2 -> instr3 st i_PUSH_LL o1 o2
214 PUSH_LLL o1 o2 o3 -> instr4 st i_PUSH_LLL o1 o2 o3
215 PUSH_G nm -> do (p, st2) <- ptr st nm
216 instr2 st2 i_PUSH_G p
217 PUSH_AS nm pk -> do (p, st2) <- ptr st (Left nm)
218 (np, st3) <- ctoi_itbl st2 pk
219 instr3 st3 i_PUSH_AS p np
220 PUSH_UBX (Left lit) nws
221 -> do (np, st2) <- literal st lit
222 instr3 st2 i_PUSH_UBX np nws
223 PUSH_UBX (Right aa) nws
224 -> do (np, st2) <- addr st aa
225 instr3 st2 i_PUSH_UBX np nws
227 PUSH_TAG tag -> instr2 st i_PUSH_TAG tag
228 SLIDE n by -> instr3 st i_SLIDE n by
229 ALLOC n -> instr2 st i_ALLOC n
230 MKAP off sz -> instr3 st i_MKAP off sz
231 UNPACK n -> instr2 st i_UNPACK n
232 UPK_TAG n m k -> instr4 st i_UPK_TAG n m k
233 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
234 instr3 st2 i_PACK itbl_no sz
235 LABEL lab -> return st
236 TESTLT_I i l -> do (np, st2) <- int st i
237 instr3 st2 i_TESTLT_I np (findLabel l)
238 TESTEQ_I i l -> do (np, st2) <- int st i
239 instr3 st2 i_TESTEQ_I np (findLabel l)
240 TESTLT_F f l -> do (np, st2) <- float st f
241 instr3 st2 i_TESTLT_F np (findLabel l)
242 TESTEQ_F f l -> do (np, st2) <- float st f
243 instr3 st2 i_TESTEQ_F np (findLabel l)
244 TESTLT_D d l -> do (np, st2) <- double st d
245 instr3 st2 i_TESTLT_D np (findLabel l)
246 TESTEQ_D d l -> do (np, st2) <- double st d
247 instr3 st2 i_TESTEQ_D np (findLabel l)
248 TESTLT_P i l -> instr3 st i_TESTLT_P i (findLabel l)
249 TESTEQ_P i l -> instr3 st i_TESTEQ_P i (findLabel l)
250 CASEFAIL -> instr1 st i_CASEFAIL
251 JMP l -> instr2 st i_JMP (findLabel l)
252 ENTER -> instr1 st i_ENTER
253 RETURN rep -> do (itbl_no,st2) <- itoc_itbl st rep
254 instr2 st2 i_RETURN itbl_no
255 CCALL m_addr -> do (np, st2) <- addr st m_addr
256 instr2 st2 i_CCALL np
261 instr1 (st_i0,st_l0,st_p0,st_I0) i1
262 = do st_i1 <- addToSS st_i0 (i2s i1)
263 return (st_i1,st_l0,st_p0,st_I0)
265 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
266 = do st_i1 <- addToSS st_i0 (i2s i1)
267 st_i2 <- addToSS st_i1 (i2s i2)
268 return (st_i2,st_l0,st_p0,st_I0)
270 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
271 = do st_i1 <- addToSS st_i0 (i2s i1)
272 st_i2 <- addToSS st_i1 (i2s i2)
273 st_i3 <- addToSS st_i2 (i2s i3)
274 return (st_i3,st_l0,st_p0,st_I0)
276 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
277 = do st_i1 <- addToSS st_i0 (i2s i1)
278 st_i2 <- addToSS st_i1 (i2s i2)
279 st_i3 <- addToSS st_i2 (i2s i3)
280 st_i4 <- addToSS st_i3 (i2s i4)
281 return (st_i4,st_l0,st_p0,st_I0)
283 float (st_i0,st_l0,st_p0,st_I0) f
284 = do let ws = mkLitF f
285 st_l1 <- addListToSS st_l0 (map Left ws)
286 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
288 double (st_i0,st_l0,st_p0,st_I0) d
289 = do let ws = mkLitD d
290 st_l1 <- addListToSS st_l0 (map Left ws)
291 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
293 int (st_i0,st_l0,st_p0,st_I0) i
294 = do let ws = mkLitI i
295 st_l1 <- addListToSS st_l0 (map Left ws)
296 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
298 int64 (st_i0,st_l0,st_p0,st_I0) i
299 = do let ws = mkLitI64 i
300 st_l1 <- addListToSS st_l0 (map Left ws)
301 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
303 addr (st_i0,st_l0,st_p0,st_I0) a
304 = do let ws = mkLitPtr a
305 st_l1 <- addListToSS st_l0 (map Left ws)
306 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
308 litlabel (st_i0,st_l0,st_p0,st_I0) fs
309 = do st_l1 <- addListToSS st_l0 [Right fs]
310 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
312 ptr (st_i0,st_l0,st_p0,st_I0) p
313 = do st_p1 <- addToSS st_p0 p
314 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
316 itbl (st_i0,st_l0,st_p0,st_I0) dcon
317 = do st_I1 <- addToSS st_I0 (getName dcon)
318 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
320 literal st (MachLabel fs) = litlabel st fs
321 literal st (MachWord w) = int st (fromIntegral w)
322 literal st (MachInt j) = int st (fromIntegral j)
323 literal st (MachFloat r) = float st (fromRational r)
324 literal st (MachDouble r) = double st (fromRational r)
325 literal st (MachChar c) = int st c
326 literal st (MachInt64 ii) = int64 st (fromIntegral ii)
327 literal st (MachWord64 ii) = int64 st (fromIntegral ii)
328 literal st other = pprPanic "ByteCodeLink.literal" (ppr other)
331 = addr st ret_itbl_addr
335 WordRep -> stg_ctoi_ret_R1n_info
336 IntRep -> stg_ctoi_ret_R1n_info
337 AddrRep -> stg_ctoi_ret_R1n_info
338 CharRep -> stg_ctoi_ret_R1n_info
339 FloatRep -> stg_ctoi_ret_F1_info
340 DoubleRep -> stg_ctoi_ret_D1_info
341 VoidRep -> stg_ctoi_ret_V_info
342 other | isFollowableRep pk -> stg_ctoi_ret_R1p_info
343 -- Includes ArrayRep, ByteArrayRep, as well as
344 -- the obvious PtrRep
346 -> pprPanic "ByteCodeLink.ctoi_itbl" (ppr pk)
349 = addr st ret_itbl_addr
353 CharRep -> stg_gc_unbx_r1_info
354 IntRep -> stg_gc_unbx_r1_info
355 WordRep -> stg_gc_unbx_r1_info
356 AddrRep -> stg_gc_unbx_r1_info
357 FloatRep -> stg_gc_f1_info
358 DoubleRep -> stg_gc_d1_info
359 VoidRep -> nullPtr -- Interpreter.c spots this special case
360 other | isFollowableRep pk -> stg_gc_unpt_r1_info
362 -> pprPanic "ByteCodeLink.itoc_itbl" (ppr pk)
364 foreign label "stg_ctoi_ret_R1p_info" stg_ctoi_ret_R1p_info :: Ptr ()
365 foreign label "stg_ctoi_ret_R1n_info" stg_ctoi_ret_R1n_info :: Ptr ()
366 foreign label "stg_ctoi_ret_F1_info" stg_ctoi_ret_F1_info :: Ptr ()
367 foreign label "stg_ctoi_ret_D1_info" stg_ctoi_ret_D1_info :: Ptr ()
368 foreign label "stg_ctoi_ret_V_info" stg_ctoi_ret_V_info :: Ptr ()
370 foreign label "stg_gc_unbx_r1_info" stg_gc_unbx_r1_info :: Ptr ()
371 foreign label "stg_gc_unpt_r1_info" stg_gc_unpt_r1_info :: Ptr ()
372 foreign label "stg_gc_f1_info" stg_gc_f1_info :: Ptr ()
373 foreign label "stg_gc_d1_info" stg_gc_d1_info :: Ptr ()
375 -- The size in 16-bit entities of an instruction.
376 instrSize16s :: BCInstr -> Int
409 -- Make lists of host-sized words for literals, so that when the
410 -- words are placed in memory at increasing addresses, the
411 -- bit pattern is correct for the host's word size and endianness.
412 mkLitI :: Int -> [Word]
413 mkLitF :: Float -> [Word]
414 mkLitD :: Double -> [Word]
415 mkLitPtr :: Ptr () -> [Word]
416 mkLitI64 :: Int64 -> [Word]
420 arr <- newArray_ ((0::Int),0)
422 f_arr <- castSTUArray arr
423 w0 <- readArray f_arr 0
430 arr <- newArray_ ((0::Int),1)
432 d_arr <- castSTUArray arr
433 w0 <- readArray d_arr 0
434 w1 <- readArray d_arr 1
435 return [w0 :: Word, w1]
439 arr <- newArray_ ((0::Int),0)
441 d_arr <- castSTUArray arr
442 w0 <- readArray d_arr 0
449 arr <- newArray_ ((0::Int),1)
451 d_arr <- castSTUArray arr
452 w0 <- readArray d_arr 0
453 w1 <- readArray d_arr 1
454 return [w0 :: Word,w1]
458 arr <- newArray_ ((0::Int),0)
460 d_arr <- castSTUArray arr
461 w0 <- readArray d_arr 0
467 arr <- newArray_ ((0::Int),0)
469 i_arr <- castSTUArray arr
470 w0 <- readArray i_arr 0
476 arr <- newArray_ ((0::Int),0)
478 a_arr <- castSTUArray arr
479 w0 <- readArray a_arr 0
484 %************************************************************************
486 \subsection{Connect to actual values for bytecode opcodes}
488 %************************************************************************
492 #include "Bytecodes.h"
494 i_ARGCHECK = (bci_ARGCHECK :: Int)
495 i_PUSH_L = (bci_PUSH_L :: Int)
496 i_PUSH_LL = (bci_PUSH_LL :: Int)
497 i_PUSH_LLL = (bci_PUSH_LLL :: Int)
498 i_PUSH_G = (bci_PUSH_G :: Int)
499 i_PUSH_AS = (bci_PUSH_AS :: Int)
500 i_PUSH_UBX = (bci_PUSH_UBX :: Int)
501 i_PUSH_TAG = (bci_PUSH_TAG :: Int)
502 i_SLIDE = (bci_SLIDE :: Int)
503 i_ALLOC = (bci_ALLOC :: Int)
504 i_MKAP = (bci_MKAP :: Int)
505 i_UNPACK = (bci_UNPACK :: Int)
506 i_UPK_TAG = (bci_UPK_TAG :: Int)
507 i_PACK = (bci_PACK :: Int)
508 i_TESTLT_I = (bci_TESTLT_I :: Int)
509 i_TESTEQ_I = (bci_TESTEQ_I :: Int)
510 i_TESTLT_F = (bci_TESTLT_F :: Int)
511 i_TESTEQ_F = (bci_TESTEQ_F :: Int)
512 i_TESTLT_D = (bci_TESTLT_D :: Int)
513 i_TESTEQ_D = (bci_TESTEQ_D :: Int)
514 i_TESTLT_P = (bci_TESTLT_P :: Int)
515 i_TESTEQ_P = (bci_TESTEQ_P :: Int)
516 i_CASEFAIL = (bci_CASEFAIL :: Int)
517 i_ENTER = (bci_ENTER :: Int)
518 i_RETURN = (bci_RETURN :: Int)
519 i_STKCHECK = (bci_STKCHECK :: Int)
520 i_JMP = (bci_JMP :: Int)
522 i_CCALL = (bci_CCALL :: Int)
523 i_SWIZZLE = (bci_SWIZZLE :: Int)
525 i_CCALL = error "Sorry pal, you need to bootstrap to use i_CCALL."
526 i_SWIZZLE = error "Sorry pal, you need to bootstrap to use i_SWIZZLE."
529 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)