2 % (c) The University of Glasgow 2002-2006
5 ByteCodeLink: Bytecode assembler and linker
8 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
9 {-# LANGUAGE BangPatterns #-}
12 assembleBCOs, assembleBCO,
15 UnlinkedBCO(..), BCOPtr(..), BCONPtr(..), bcoFreeNames,
16 SizedSeq, sizeSS, ssElts,
17 iNTERP_STACK_CHECK_THRESH
20 #include "HsVersions.h"
37 import Control.Monad ( foldM )
38 import Control.Monad.ST ( runST )
40 import Data.Array.MArray
41 import Data.Array.Unboxed ( listArray )
42 import Data.Array.Base ( UArray(..) )
43 import Data.Array.ST ( castSTUArray )
45 import Data.Char ( ord )
48 import qualified Data.Map as Map
50 import GHC.Base ( ByteArray#, MutableByteArray#, RealWorld )
52 -- -----------------------------------------------------------------------------
55 -- CompiledByteCode represents the result of byte-code
56 -- compiling a bunch of functions and data types
59 = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
60 ItblEnv -- A mapping from DataCons to their itbls
62 instance Outputable CompiledByteCode where
63 ppr (ByteCode bcos _) = ppr bcos
68 unlinkedBCOName :: Name,
69 unlinkedBCOArity :: Int,
70 unlinkedBCOInstrs :: ByteArray#, -- insns
71 unlinkedBCOBitmap :: ByteArray#, -- bitmap
72 unlinkedBCOLits :: (SizedSeq BCONPtr), -- non-ptrs
73 unlinkedBCOPtrs :: (SizedSeq BCOPtr) -- ptrs
79 | BCOPtrBCO UnlinkedBCO
80 | BCOPtrBreakInfo BreakInfo
81 | BCOPtrArray (MutableByteArray# RealWorld)
85 | BCONPtrLbl FastString
88 -- | Finds external references. Remember to remove the names
89 -- defined by this group of BCOs themselves
90 bcoFreeNames :: UnlinkedBCO -> NameSet
92 = bco_refs bco `minusNameSet` mkNameSet [unlinkedBCOName bco]
94 bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
96 mkNameSet [ n | BCOPtrName n <- ssElts ptrs ] :
97 mkNameSet [ n | BCONPtrItbl n <- ssElts nonptrs ] :
98 map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
101 instance Outputable UnlinkedBCO where
102 ppr (UnlinkedBCO nm _arity _insns _bitmap lits ptrs)
103 = sep [text "BCO", ppr nm, text "with",
104 ppr (sizeSS lits), text "lits",
105 ppr (sizeSS ptrs), text "ptrs" ]
107 -- -----------------------------------------------------------------------------
108 -- The bytecode assembler
110 -- The object format for bytecodes is: 16 bits for the opcode, and 16
111 -- for each field -- so the code can be considered a sequence of
112 -- 16-bit ints. Each field denotes either a stack offset or number of
113 -- items on the stack (eg SLIDE), and index into the pointer table (eg
114 -- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
115 -- bytecode address in this BCO.
117 -- Top level assembler fn.
118 assembleBCOs :: DynFlags -> [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
119 assembleBCOs dflags proto_bcos tycons
120 = do itblenv <- mkITbls tycons
121 bcos <- mapM (assembleBCO dflags) proto_bcos
122 return (ByteCode bcos itblenv)
124 assembleBCO :: DynFlags -> ProtoBCO Name -> IO UnlinkedBCO
125 assembleBCO dflags (ProtoBCO nm instrs bitmap bsize arity _origin _malloced)
127 -- pass 1: collect up the offsets of the local labels.
128 -- Remember that the first insn starts at offset
129 -- sizeOf Word / sizeOf Word16
130 -- since offset 0 (eventually) will hold the total # of insns.
132 | wORD_SIZE_IN_BITS == 64 = 4
133 | wORD_SIZE_IN_BITS == 32 = 2
134 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
135 label_env = mkLabelEnv Map.empty lableInitialOffset instrs
137 mkLabelEnv :: Map Word16 Word -> Word -> [BCInstr]
139 mkLabelEnv env _ [] = env
140 mkLabelEnv env i_offset (i:is)
142 = case i of LABEL n -> Map.insert n i_offset env ; _ -> env
143 in mkLabelEnv new_env (i_offset + instrSize16s i) is
145 findLabel :: Word16 -> Word
147 = case Map.lookup lab label_env of
148 Just bco_offset -> bco_offset
149 Nothing -> pprPanic "assembleBCO.findLabel" (ppr lab)
151 do -- pass 2: generate the instruction, ptr and nonptr bits
152 insns <- return emptySS :: IO (SizedSeq Word16)
153 lits <- return emptySS :: IO (SizedSeq BCONPtr)
154 ptrs <- return emptySS :: IO (SizedSeq BCOPtr)
155 let init_asm_state = (insns,lits,ptrs)
156 (final_insns, final_lits, final_ptrs)
157 <- mkBits dflags findLabel init_asm_state instrs
159 let asm_insns = ssElts final_insns
160 n_insns = sizeSS final_insns
162 insns_arr = mkInstrArray lableInitialOffset n_insns asm_insns
163 !insns_barr = case insns_arr of UArray _lo _hi _n barr -> barr
165 bitmap_arr = mkBitmapArray bsize bitmap
166 !bitmap_barr = case bitmap_arr of UArray _lo _hi _n barr -> barr
168 let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits final_ptrs
170 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
171 -- objects, since they might get run too early. Disable this until
172 -- we figure out what to do.
173 -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
177 -- zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
180 mkBitmapArray :: Word16 -> [StgWord] -> UArray Int StgWord
181 mkBitmapArray bsize bitmap
182 = listArray (0, length bitmap) (fromIntegral bsize : bitmap)
184 mkInstrArray :: Word -> Word -> [Word16] -> UArray Word Word16
185 mkInstrArray lableInitialOffset n_insns asm_insns
186 = let size = lableInitialOffset + n_insns
187 in listArray (0, size - 1) (largeArg size ++ asm_insns)
189 -- instrs nonptrs ptrs
190 type AsmState = (SizedSeq Word16,
194 data SizedSeq a = SizedSeq !Word [a]
195 emptySS :: SizedSeq a
196 emptySS = SizedSeq 0 []
198 -- Why are these two monadic???
199 addToSS :: SizedSeq a -> a -> IO (SizedSeq a)
200 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
201 addListToSS :: SizedSeq a -> [a] -> IO (SizedSeq a)
202 addListToSS (SizedSeq n r_xs) xs
203 = return (SizedSeq (n + genericLength xs) (reverse xs ++ r_xs))
205 ssElts :: SizedSeq a -> [a]
206 ssElts (SizedSeq _ r_xs) = reverse r_xs
208 sizeSS :: SizedSeq a -> Word
209 sizeSS (SizedSeq n _) = n
211 sizeSS16 :: SizedSeq a -> Word16
212 sizeSS16 (SizedSeq n _) = fromIntegral n
214 -- Bring in all the bci_ bytecode constants.
215 #include "rts/Bytecodes.h"
217 largeArgInstr :: Word16 -> Word16
218 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
220 largeArg :: Word -> [Word16]
222 | wORD_SIZE_IN_BITS == 64
223 = [fromIntegral (w `shiftR` 48),
224 fromIntegral (w `shiftR` 32),
225 fromIntegral (w `shiftR` 16),
227 | wORD_SIZE_IN_BITS == 32
228 = [fromIntegral (w `shiftR` 16),
230 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
232 -- This is where all the action is (pass 2 of the assembler)
234 -> (Word16 -> Word) -- label finder
236 -> [BCInstr] -- instructions (in)
239 mkBits dflags findLabel st proto_insns
240 = foldM doInstr st proto_insns
242 doInstr :: AsmState -> BCInstr -> IO AsmState
245 STKCHECK n -> instr1Large st bci_STKCHECK n
246 PUSH_L o1 -> instr2 st bci_PUSH_L o1
247 PUSH_LL o1 o2 -> instr3 st bci_PUSH_LL o1 o2
248 PUSH_LLL o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
249 PUSH_G nm -> do (p, st2) <- ptr st (BCOPtrName nm)
250 instr2 st2 bci_PUSH_G p
251 PUSH_PRIMOP op -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
252 instr2 st2 bci_PUSH_G p
253 PUSH_BCO proto -> do ul_bco <- assembleBCO dflags proto
254 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
255 instr2 st2 bci_PUSH_G p
256 PUSH_ALTS proto -> do ul_bco <- assembleBCO dflags proto
257 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
258 instr2 st2 bci_PUSH_ALTS p
259 PUSH_ALTS_UNLIFTED proto pk -> do
260 ul_bco <- assembleBCO dflags proto
261 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
262 instr2 st2 (push_alts pk) p
263 PUSH_UBX (Left lit) nws
264 -> do (np, st2) <- literal st lit
265 instr3 st2 bci_PUSH_UBX np nws
266 PUSH_UBX (Right aa) nws
267 -> do (np, st2) <- addr st aa
268 instr3 st2 bci_PUSH_UBX np nws
270 PUSH_APPLY_N -> do instr1 st bci_PUSH_APPLY_N
271 PUSH_APPLY_V -> do instr1 st bci_PUSH_APPLY_V
272 PUSH_APPLY_F -> do instr1 st bci_PUSH_APPLY_F
273 PUSH_APPLY_D -> do instr1 st bci_PUSH_APPLY_D
274 PUSH_APPLY_L -> do instr1 st bci_PUSH_APPLY_L
275 PUSH_APPLY_P -> do instr1 st bci_PUSH_APPLY_P
276 PUSH_APPLY_PP -> do instr1 st bci_PUSH_APPLY_PP
277 PUSH_APPLY_PPP -> do instr1 st bci_PUSH_APPLY_PPP
278 PUSH_APPLY_PPPP -> do instr1 st bci_PUSH_APPLY_PPPP
279 PUSH_APPLY_PPPPP -> do instr1 st bci_PUSH_APPLY_PPPPP
280 PUSH_APPLY_PPPPPP -> do instr1 st bci_PUSH_APPLY_PPPPPP
282 SLIDE n by -> instr3 st bci_SLIDE n by
283 ALLOC_AP n -> instr2 st bci_ALLOC_AP n
284 ALLOC_AP_NOUPD n -> instr2 st bci_ALLOC_AP_NOUPD n
285 ALLOC_PAP arity n -> instr3 st bci_ALLOC_PAP arity n
286 MKAP off sz -> instr3 st bci_MKAP off sz
287 MKPAP off sz -> instr3 st bci_MKPAP off sz
288 UNPACK n -> instr2 st bci_UNPACK n
289 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
290 instr3 st2 bci_PACK itbl_no sz
292 TESTLT_I i l -> do (np, st2) <- int st i
293 instr2Large st2 bci_TESTLT_I np (findLabel l)
294 TESTEQ_I i l -> do (np, st2) <- int st i
295 instr2Large st2 bci_TESTEQ_I np (findLabel l)
296 TESTLT_W w l -> do (np, st2) <- word st w
297 instr2Large st2 bci_TESTLT_W np (findLabel l)
298 TESTEQ_W w l -> do (np, st2) <- word st w
299 instr2Large st2 bci_TESTEQ_W np (findLabel l)
300 TESTLT_F f l -> do (np, st2) <- float st f
301 instr2Large st2 bci_TESTLT_F np (findLabel l)
302 TESTEQ_F f l -> do (np, st2) <- float st f
303 instr2Large st2 bci_TESTEQ_F np (findLabel l)
304 TESTLT_D d l -> do (np, st2) <- double st d
305 instr2Large st2 bci_TESTLT_D np (findLabel l)
306 TESTEQ_D d l -> do (np, st2) <- double st d
307 instr2Large st2 bci_TESTEQ_D np (findLabel l)
308 TESTLT_P i l -> instr2Large st bci_TESTLT_P i (findLabel l)
309 TESTEQ_P i l -> instr2Large st bci_TESTEQ_P i (findLabel l)
310 CASEFAIL -> instr1 st bci_CASEFAIL
311 SWIZZLE stkoff n -> instr3 st bci_SWIZZLE stkoff n
312 JMP l -> instr1Large st bci_JMP (findLabel l)
313 ENTER -> instr1 st bci_ENTER
314 RETURN -> instr1 st bci_RETURN
315 RETURN_UBX rep -> instr1 st (return_ubx rep)
316 CCALL off m_addr int -> do (np, st2) <- addr st m_addr
317 instr4 st2 bci_CCALL off np int
318 BRK_FUN array index info -> do
319 (p1, st2) <- ptr st (BCOPtrArray array)
320 (p2, st3) <- ptr st2 (BCOPtrBreakInfo info)
321 instr4 st3 bci_BRK_FUN p1 index p2
323 instrn :: AsmState -> [Word16] -> IO AsmState
324 instrn st [] = return st
325 instrn (st_i, st_l, st_p) (i:is)
326 = do st_i' <- addToSS st_i i
327 instrn (st_i', st_l, st_p) is
329 instr1Large st i1 large
330 | large > 65535 = instrn st (largeArgInstr i1 : largeArg large)
331 | otherwise = instr2 st i1 (fromIntegral large)
333 instr2Large st i1 i2 large
334 | large > 65535 = instrn st (largeArgInstr i1 : i2 : largeArg large)
335 | otherwise = instr3 st i1 i2 (fromIntegral large)
337 instr1 (st_i0,st_l0,st_p0) i1
338 = do st_i1 <- addToSS st_i0 i1
339 return (st_i1,st_l0,st_p0)
341 instr2 (st_i0,st_l0,st_p0) w1 w2
342 = do st_i1 <- addToSS st_i0 w1
343 st_i2 <- addToSS st_i1 w2
344 return (st_i2,st_l0,st_p0)
346 instr3 (st_i0,st_l0,st_p0) w1 w2 w3
347 = do st_i1 <- addToSS st_i0 w1
348 st_i2 <- addToSS st_i1 w2
349 st_i3 <- addToSS st_i2 w3
350 return (st_i3,st_l0,st_p0)
352 instr4 (st_i0,st_l0,st_p0) w1 w2 w3 w4
353 = do st_i1 <- addToSS st_i0 w1
354 st_i2 <- addToSS st_i1 w2
355 st_i3 <- addToSS st_i2 w3
356 st_i4 <- addToSS st_i3 w4
357 return (st_i4,st_l0,st_p0)
359 float (st_i0,st_l0,st_p0) f
360 = do let ws = mkLitF f
361 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
362 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
364 double (st_i0,st_l0,st_p0) d
365 = do let ws = mkLitD d
366 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
367 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
369 int (st_i0,st_l0,st_p0) i
370 = do let ws = mkLitI i
371 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
372 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
374 word (st_i0,st_l0,st_p0) w
376 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
377 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
379 int64 (st_i0,st_l0,st_p0) i
380 = do let ws = mkLitI64 i
381 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
382 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
384 addr (st_i0,st_l0,st_p0) a
385 = do let ws = mkLitPtr a
386 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
387 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
389 litlabel (st_i0,st_l0,st_p0) fs
390 = do st_l1 <- addListToSS st_l0 [BCONPtrLbl fs]
391 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
393 ptr (st_i0,st_l0,st_p0) p
394 = do st_p1 <- addToSS st_p0 p
395 return (sizeSS16 st_p0, (st_i0,st_l0,st_p1))
397 itbl (st_i0,st_l0,st_p0) dcon
398 = do st_l1 <- addToSS st_l0 (BCONPtrItbl (getName dcon))
399 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
401 literal st (MachLabel fs (Just sz) _)
402 | platformOS (targetPlatform dflags) == OSMinGW32
403 = litlabel st (appendFS fs (mkFastString ('@':show sz)))
404 -- On Windows, stdcall labels have a suffix indicating the no. of
405 -- arg words, e.g. foo@8. testcase: ffi012(ghci)
406 literal st (MachLabel fs _ _) = litlabel st fs
407 literal st (MachWord w) = int st (fromIntegral w)
408 literal st (MachInt j) = int st (fromIntegral j)
409 literal st MachNullAddr = int st 0
410 literal st (MachFloat r) = float st (fromRational r)
411 literal st (MachDouble r) = double st (fromRational r)
412 literal st (MachChar c) = int st (ord c)
413 literal st (MachInt64 ii) = int64 st (fromIntegral ii)
414 literal st (MachWord64 ii) = int64 st (fromIntegral ii)
415 literal _ other = pprPanic "ByteCodeAsm.literal" (ppr other)
418 push_alts :: CgRep -> Word16
419 push_alts NonPtrArg = bci_PUSH_ALTS_N
420 push_alts FloatArg = bci_PUSH_ALTS_F
421 push_alts DoubleArg = bci_PUSH_ALTS_D
422 push_alts VoidArg = bci_PUSH_ALTS_V
423 push_alts LongArg = bci_PUSH_ALTS_L
424 push_alts PtrArg = bci_PUSH_ALTS_P
426 return_ubx :: CgRep -> Word16
427 return_ubx NonPtrArg = bci_RETURN_N
428 return_ubx FloatArg = bci_RETURN_F
429 return_ubx DoubleArg = bci_RETURN_D
430 return_ubx VoidArg = bci_RETURN_V
431 return_ubx LongArg = bci_RETURN_L
432 return_ubx PtrArg = bci_RETURN_P
435 -- The size in 16-bit entities of an instruction.
436 instrSize16s :: BCInstr -> Word
447 PUSH_ALTS_UNLIFTED{} -> 2
456 PUSH_APPLY_PPP{} -> 1
457 PUSH_APPLY_PPPP{} -> 1
458 PUSH_APPLY_PPPPP{} -> 1
459 PUSH_APPLY_PPPPPP{} -> 1
462 ALLOC_AP_NOUPD{} -> 2
488 -- Make lists of host-sized words for literals, so that when the
489 -- words are placed in memory at increasing addresses, the
490 -- bit pattern is correct for the host's word size and endianness.
491 mkLitI :: Int -> [Word]
492 mkLitF :: Float -> [Word]
493 mkLitD :: Double -> [Word]
494 mkLitPtr :: Ptr () -> [Word]
495 mkLitI64 :: Int64 -> [Word]
499 arr <- newArray_ ((0::Int),0)
501 f_arr <- castSTUArray arr
502 w0 <- readArray f_arr 0
509 arr <- newArray_ ((0::Int),1)
511 d_arr <- castSTUArray arr
512 w0 <- readArray d_arr 0
513 w1 <- readArray d_arr 1
514 return [w0 :: Word, w1]
518 arr <- newArray_ ((0::Int),0)
520 d_arr <- castSTUArray arr
521 w0 <- readArray d_arr 0
525 = panic "mkLitD: Bad wORD_SIZE"
530 arr <- newArray_ ((0::Int),1)
532 d_arr <- castSTUArray arr
533 w0 <- readArray d_arr 0
534 w1 <- readArray d_arr 1
535 return [w0 :: Word,w1]
539 arr <- newArray_ ((0::Int),0)
541 d_arr <- castSTUArray arr
542 w0 <- readArray d_arr 0
546 = panic "mkLitI64: Bad wORD_SIZE"
550 arr <- newArray_ ((0::Int),0)
552 i_arr <- castSTUArray arr
553 w0 <- readArray i_arr 0
559 arr <- newArray_ ((0::Int),0)
561 a_arr <- castSTUArray arr
562 w0 <- readArray a_arr 0
566 iNTERP_STACK_CHECK_THRESH :: Int
567 iNTERP_STACK_CHECK_THRESH = INTERP_STACK_CHECK_THRESH