2 % (c) The University of Glasgow 2002-2006
5 ByteCodeLink: Bytecode assembler and linker
8 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
11 assembleBCOs, assembleBCO,
14 UnlinkedBCO(..), BCOPtr(..), bcoFreeNames,
15 SizedSeq, sizeSS, ssElts,
16 iNTERP_STACK_CHECK_THRESH
19 #include "HsVersions.h"
36 import Control.Monad ( foldM )
37 import Control.Monad.ST ( runST )
39 import GHC.Word ( Word(..) )
40 import Data.Array.MArray
41 import Data.Array.Unboxed ( listArray )
42 import Data.Array.Base ( UArray(..) )
43 import Data.Array.ST ( castSTUArray )
44 import Foreign ( Word16, free )
46 import Data.Int ( Int64 )
47 import Data.Char ( ord )
49 import GHC.Base ( ByteArray# )
50 import GHC.IOBase ( IO(..) )
51 import GHC.Ptr ( Ptr(..) )
53 -- -----------------------------------------------------------------------------
56 -- CompiledByteCode represents the result of byte-code
57 -- compiling a bunch of functions and data types
60 = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
61 ItblEnv -- A mapping from DataCons to their itbls
63 instance Outputable CompiledByteCode where
64 ppr (ByteCode bcos _) = ppr bcos
69 unlinkedBCOName :: Name,
70 unlinkedBCOArity :: Int,
71 unlinkedBCOInstrs :: ByteArray#, -- insns
72 unlinkedBCOBitmap :: ByteArray#, -- bitmap
73 unlinkedBCOLits :: (SizedSeq (Either Word FastString)), -- literals
74 -- Either literal words or a pointer to a asciiz
75 -- string, denoting a label whose *address* should
76 -- be determined at link time
77 unlinkedBCOPtrs :: (SizedSeq BCOPtr), -- ptrs
78 unlinkedBCOItbls :: (SizedSeq Name) -- itbl refs
84 | BCOPtrBCO UnlinkedBCO
86 -- | Finds external references. Remember to remove the names
87 -- defined by this group of BCOs themselves
88 bcoFreeNames :: UnlinkedBCO -> NameSet
90 = bco_refs bco `minusNameSet` mkNameSet [unlinkedBCOName bco]
92 bco_refs (UnlinkedBCO _ _ _ _ _ ptrs itbls)
94 mkNameSet [ n | BCOPtrName n <- ssElts ptrs ] :
95 mkNameSet (ssElts itbls) :
96 map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
99 instance Outputable UnlinkedBCO where
100 ppr (UnlinkedBCO nm arity insns bitmap lits ptrs itbls)
101 = sep [text "BCO", ppr nm, text "with",
102 int (sizeSS lits), text "lits",
103 int (sizeSS ptrs), text "ptrs",
104 int (sizeSS itbls), text "itbls"]
106 -- -----------------------------------------------------------------------------
107 -- The bytecode assembler
109 -- The object format for bytecodes is: 16 bits for the opcode, and 16
110 -- for each field -- so the code can be considered a sequence of
111 -- 16-bit ints. Each field denotes either a stack offset or number of
112 -- items on the stack (eg SLIDE), and index into the pointer table (eg
113 -- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
114 -- bytecode address in this BCO.
116 -- Top level assembler fn.
117 assembleBCOs :: [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
118 assembleBCOs proto_bcos tycons
119 = do itblenv <- mkITbls tycons
120 bcos <- mapM assembleBCO proto_bcos
121 return (ByteCode bcos itblenv)
123 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
124 assembleBCO (ProtoBCO nm instrs bitmap bsize arity origin malloced)
126 -- pass 1: collect up the offsets of the local labels.
127 -- Remember that the first insn starts at offset 1 since offset 0
128 -- (eventually) will hold the total # of insns.
129 label_env = mkLabelEnv emptyFM 1 instrs
131 mkLabelEnv env i_offset [] = env
132 mkLabelEnv env i_offset (i:is)
134 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
135 in mkLabelEnv new_env (i_offset + instrSize16s i) is
138 = case lookupFM label_env lab of
139 Just bco_offset -> bco_offset
140 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
142 do -- pass 2: generate the instruction, ptr and nonptr bits
143 insns <- return emptySS :: IO (SizedSeq Word16)
144 lits <- return emptySS :: IO (SizedSeq (Either Word FastString))
145 ptrs <- return emptySS :: IO (SizedSeq BCOPtr)
146 itbls <- return emptySS :: IO (SizedSeq Name)
147 let init_asm_state = (insns,lits,ptrs,itbls)
148 (final_insns, final_lits, final_ptrs, final_itbls)
149 <- mkBits findLabel init_asm_state instrs
151 let asm_insns = ssElts final_insns
152 n_insns = sizeSS final_insns
155 | n_insns > 65535 = panic "linkBCO: >= 64k insns in BCO"
156 | otherwise = mkInstrArray n_insns asm_insns
157 insns_barr = case insns_arr of UArray _lo _hi barr -> barr
159 bitmap_arr = mkBitmapArray bsize bitmap
160 bitmap_barr = case bitmap_arr of UArray _lo _hi barr -> barr
162 let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits
163 final_ptrs final_itbls
165 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
166 -- objects, since they might get run too early. Disable this until
167 -- we figure out what to do.
168 -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
172 zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
175 mkBitmapArray :: Int -> [StgWord] -> UArray Int StgWord
176 mkBitmapArray bsize bitmap
177 = listArray (0, length bitmap) (fromIntegral bsize : bitmap)
179 mkInstrArray :: Int -> [Word16] -> UArray Int Word16
180 mkInstrArray n_insns asm_insns
181 = listArray (0, n_insns) (fromIntegral n_insns : asm_insns)
183 -- instrs nonptrs ptrs itbls
184 type AsmState = (SizedSeq Word16,
185 SizedSeq (Either Word FastString),
189 data SizedSeq a = SizedSeq !Int [a]
190 emptySS = SizedSeq 0 []
192 -- Why are these two monadic???
193 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
194 addListToSS (SizedSeq n r_xs) xs
195 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
197 ssElts :: SizedSeq a -> [a]
198 ssElts (SizedSeq n r_xs) = reverse r_xs
200 sizeSS :: SizedSeq a -> Int
201 sizeSS (SizedSeq n r_xs) = n
203 -- Bring in all the bci_ bytecode constants.
204 #include "Bytecodes.h"
206 largeArgInstr :: Int -> Int
207 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
209 largeArg :: Int -> [Int]
211 | wORD_SIZE_IN_BITS == 64
212 = [(i .&. 0xFFFF000000000000) `shiftR` 48,
213 (i .&. 0x0000FFFF00000000) `shiftR` 32,
214 (i .&. 0x00000000FFFF0000) `shiftR` 16,
215 (i .&. 0x000000000000FFFF)]
216 | wORD_SIZE_IN_BITS == 32
217 = [(i .&. 0xFFFF0000) `shiftR` 16,
219 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
221 -- This is where all the action is (pass 2 of the assembler)
222 mkBits :: (Int -> Int) -- label finder
224 -> [BCInstr] -- instructions (in)
227 mkBits findLabel st proto_insns
228 = foldM doInstr st proto_insns
230 doInstr :: AsmState -> BCInstr -> IO AsmState
235 instrn st (largeArgInstr bci_STKCHECK : largeArg n)
236 | otherwise -> instr2 st bci_STKCHECK n
237 PUSH_L o1 -> instr2 st bci_PUSH_L o1
238 PUSH_LL o1 o2 -> instr3 st bci_PUSH_LL o1 o2
239 PUSH_LLL o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
240 PUSH_G nm -> do (p, st2) <- ptr st (BCOPtrName nm)
241 instr2 st2 bci_PUSH_G p
242 PUSH_PRIMOP op -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
243 instr2 st2 bci_PUSH_G p
244 PUSH_BCO proto -> do ul_bco <- assembleBCO proto
245 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
246 instr2 st2 bci_PUSH_G p
247 PUSH_ALTS proto -> do ul_bco <- assembleBCO proto
248 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
249 instr2 st2 bci_PUSH_ALTS p
250 PUSH_ALTS_UNLIFTED proto pk -> do
251 ul_bco <- assembleBCO proto
252 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
253 instr2 st2 (push_alts pk) p
254 PUSH_UBX (Left lit) nws
255 -> do (np, st2) <- literal st lit
256 instr3 st2 bci_PUSH_UBX np nws
257 PUSH_UBX (Right aa) nws
258 -> do (np, st2) <- addr st aa
259 instr3 st2 bci_PUSH_UBX np nws
261 PUSH_APPLY_N -> do instr1 st bci_PUSH_APPLY_N
262 PUSH_APPLY_V -> do instr1 st bci_PUSH_APPLY_V
263 PUSH_APPLY_F -> do instr1 st bci_PUSH_APPLY_F
264 PUSH_APPLY_D -> do instr1 st bci_PUSH_APPLY_D
265 PUSH_APPLY_L -> do instr1 st bci_PUSH_APPLY_L
266 PUSH_APPLY_P -> do instr1 st bci_PUSH_APPLY_P
267 PUSH_APPLY_PP -> do instr1 st bci_PUSH_APPLY_PP
268 PUSH_APPLY_PPP -> do instr1 st bci_PUSH_APPLY_PPP
269 PUSH_APPLY_PPPP -> do instr1 st bci_PUSH_APPLY_PPPP
270 PUSH_APPLY_PPPPP -> do instr1 st bci_PUSH_APPLY_PPPPP
271 PUSH_APPLY_PPPPPP -> do instr1 st bci_PUSH_APPLY_PPPPPP
273 SLIDE n by -> instr3 st bci_SLIDE n by
274 ALLOC_AP n -> instr2 st bci_ALLOC_AP n
275 ALLOC_PAP arity n -> instr3 st bci_ALLOC_PAP arity n
276 MKAP off sz -> instr3 st bci_MKAP off sz
277 MKPAP off sz -> instr3 st bci_MKPAP off sz
278 UNPACK n -> instr2 st bci_UNPACK n
279 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
280 instr3 st2 bci_PACK itbl_no sz
281 LABEL lab -> return st
282 TESTLT_I i l -> do (np, st2) <- int st i
283 instr3 st2 bci_TESTLT_I np (findLabel l)
284 TESTEQ_I i l -> do (np, st2) <- int st i
285 instr3 st2 bci_TESTEQ_I np (findLabel l)
286 TESTLT_F f l -> do (np, st2) <- float st f
287 instr3 st2 bci_TESTLT_F np (findLabel l)
288 TESTEQ_F f l -> do (np, st2) <- float st f
289 instr3 st2 bci_TESTEQ_F np (findLabel l)
290 TESTLT_D d l -> do (np, st2) <- double st d
291 instr3 st2 bci_TESTLT_D np (findLabel l)
292 TESTEQ_D d l -> do (np, st2) <- double st d
293 instr3 st2 bci_TESTEQ_D np (findLabel l)
294 TESTLT_P i l -> instr3 st bci_TESTLT_P i (findLabel l)
295 TESTEQ_P i l -> instr3 st bci_TESTEQ_P i (findLabel l)
296 CASEFAIL -> instr1 st bci_CASEFAIL
297 SWIZZLE stkoff n -> instr3 st bci_SWIZZLE stkoff n
298 JMP l -> instr2 st bci_JMP (findLabel l)
299 ENTER -> instr1 st bci_ENTER
300 RETURN -> instr1 st bci_RETURN
301 RETURN_UBX rep -> instr1 st (return_ubx rep)
302 CCALL off m_addr -> do (np, st2) <- addr st m_addr
303 instr3 st2 bci_CCALL off np
308 instrn :: AsmState -> [Int] -> IO AsmState
309 instrn st [] = return st
310 instrn (st_i, st_l, st_p, st_I) (i:is)
311 = do st_i' <- addToSS st_i (i2s i)
312 instrn (st_i', st_l, st_p, st_I) is
314 instr1 (st_i0,st_l0,st_p0,st_I0) i1
315 = do st_i1 <- addToSS st_i0 i1
316 return (st_i1,st_l0,st_p0,st_I0)
318 instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
319 = do st_i1 <- addToSS st_i0 (i2s i1)
320 st_i2 <- addToSS st_i1 (i2s i2)
321 return (st_i2,st_l0,st_p0,st_I0)
323 instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
324 = do st_i1 <- addToSS st_i0 (i2s i1)
325 st_i2 <- addToSS st_i1 (i2s i2)
326 st_i3 <- addToSS st_i2 (i2s i3)
327 return (st_i3,st_l0,st_p0,st_I0)
329 instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
330 = do st_i1 <- addToSS st_i0 (i2s i1)
331 st_i2 <- addToSS st_i1 (i2s i2)
332 st_i3 <- addToSS st_i2 (i2s i3)
333 st_i4 <- addToSS st_i3 (i2s i4)
334 return (st_i4,st_l0,st_p0,st_I0)
336 float (st_i0,st_l0,st_p0,st_I0) f
337 = do let ws = mkLitF f
338 st_l1 <- addListToSS st_l0 (map Left ws)
339 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
341 double (st_i0,st_l0,st_p0,st_I0) d
342 = do let ws = mkLitD d
343 st_l1 <- addListToSS st_l0 (map Left ws)
344 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
346 int (st_i0,st_l0,st_p0,st_I0) i
347 = do let ws = mkLitI i
348 st_l1 <- addListToSS st_l0 (map Left ws)
349 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
351 int64 (st_i0,st_l0,st_p0,st_I0) i
352 = do let ws = mkLitI64 i
353 st_l1 <- addListToSS st_l0 (map Left ws)
354 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
356 addr (st_i0,st_l0,st_p0,st_I0) a
357 = do let ws = mkLitPtr a
358 st_l1 <- addListToSS st_l0 (map Left ws)
359 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
361 litlabel (st_i0,st_l0,st_p0,st_I0) fs
362 = do st_l1 <- addListToSS st_l0 [Right fs]
363 return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
365 ptr (st_i0,st_l0,st_p0,st_I0) p
366 = do st_p1 <- addToSS st_p0 p
367 return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
369 itbl (st_i0,st_l0,st_p0,st_I0) dcon
370 = do st_I1 <- addToSS st_I0 (getName dcon)
371 return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
373 literal st (MachLabel fs _) = litlabel st fs
374 literal st (MachWord w) = int st (fromIntegral w)
375 literal st (MachInt j) = int st (fromIntegral j)
376 literal st (MachFloat r) = float st (fromRational r)
377 literal st (MachDouble r) = double st (fromRational r)
378 literal st (MachChar c) = int st (ord c)
379 literal st (MachInt64 ii) = int64 st (fromIntegral ii)
380 literal st (MachWord64 ii) = int64 st (fromIntegral ii)
381 literal st other = pprPanic "ByteCodeLink.literal" (ppr other)
384 push_alts NonPtrArg = bci_PUSH_ALTS_N
385 push_alts FloatArg = bci_PUSH_ALTS_F
386 push_alts DoubleArg = bci_PUSH_ALTS_D
387 push_alts VoidArg = bci_PUSH_ALTS_V
388 push_alts LongArg = bci_PUSH_ALTS_L
389 push_alts PtrArg = bci_PUSH_ALTS_P
391 return_ubx NonPtrArg = bci_RETURN_N
392 return_ubx FloatArg = bci_RETURN_F
393 return_ubx DoubleArg = bci_RETURN_D
394 return_ubx VoidArg = bci_RETURN_V
395 return_ubx LongArg = bci_RETURN_L
396 return_ubx PtrArg = bci_RETURN_P
399 -- The size in 16-bit entities of an instruction.
400 instrSize16s :: BCInstr -> Int
411 PUSH_ALTS_UNLIFTED{} -> 2
420 PUSH_APPLY_PPP{} -> 1
421 PUSH_APPLY_PPPP{} -> 1
422 PUSH_APPLY_PPPPP{} -> 1
423 PUSH_APPLY_PPPPPP{} -> 1
448 -- Make lists of host-sized words for literals, so that when the
449 -- words are placed in memory at increasing addresses, the
450 -- bit pattern is correct for the host's word size and endianness.
451 mkLitI :: Int -> [Word]
452 mkLitF :: Float -> [Word]
453 mkLitD :: Double -> [Word]
454 mkLitPtr :: Ptr () -> [Word]
455 mkLitI64 :: Int64 -> [Word]
459 arr <- newArray_ ((0::Int),0)
461 f_arr <- castSTUArray arr
462 w0 <- readArray f_arr 0
469 arr <- newArray_ ((0::Int),1)
471 d_arr <- castSTUArray arr
472 w0 <- readArray d_arr 0
473 w1 <- readArray d_arr 1
474 return [w0 :: Word, w1]
478 arr <- newArray_ ((0::Int),0)
480 d_arr <- castSTUArray arr
481 w0 <- readArray d_arr 0
488 arr <- newArray_ ((0::Int),1)
490 d_arr <- castSTUArray arr
491 w0 <- readArray d_arr 0
492 w1 <- readArray d_arr 1
493 return [w0 :: Word,w1]
497 arr <- newArray_ ((0::Int),0)
499 d_arr <- castSTUArray arr
500 w0 <- readArray d_arr 0
506 arr <- newArray_ ((0::Int),0)
508 i_arr <- castSTUArray arr
509 w0 <- readArray i_arr 0
515 arr <- newArray_ ((0::Int),0)
517 a_arr <- castSTUArray arr
518 w0 <- readArray a_arr 0
522 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)