2 % (c) The University of Glasgow 2002-2006
5 ByteCodeLink: Bytecode assembler and linker
8 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
11 assembleBCOs, assembleBCO,
14 UnlinkedBCO(..), BCOPtr(..), BCONPtr(..), bcoFreeNames,
15 SizedSeq, sizeSS, ssElts,
16 iNTERP_STACK_CHECK_THRESH
19 #include "HsVersions.h"
35 import Control.Monad ( foldM )
36 import Control.Monad.ST ( runST )
38 import Data.Array.MArray
39 import Data.Array.Unboxed ( listArray )
40 import Data.Array.Base ( UArray(..) )
41 import Data.Array.ST ( castSTUArray )
43 import Data.Char ( ord )
46 import GHC.Base ( ByteArray#, MutableByteArray#, RealWorld )
48 -- -----------------------------------------------------------------------------
51 -- CompiledByteCode represents the result of byte-code
52 -- compiling a bunch of functions and data types
55 = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
56 ItblEnv -- A mapping from DataCons to their itbls
58 instance Outputable CompiledByteCode where
59 ppr (ByteCode bcos _) = ppr bcos
64 unlinkedBCOName :: Name,
65 unlinkedBCOArity :: Int,
66 unlinkedBCOInstrs :: ByteArray#, -- insns
67 unlinkedBCOBitmap :: ByteArray#, -- bitmap
68 unlinkedBCOLits :: (SizedSeq BCONPtr), -- non-ptrs
69 unlinkedBCOPtrs :: (SizedSeq BCOPtr) -- ptrs
75 | BCOPtrBCO UnlinkedBCO
76 | BCOPtrBreakInfo BreakInfo
77 | BCOPtrArray (MutableByteArray# RealWorld)
81 | BCONPtrLbl FastString
84 -- | Finds external references. Remember to remove the names
85 -- defined by this group of BCOs themselves
86 bcoFreeNames :: UnlinkedBCO -> NameSet
88 = bco_refs bco `minusNameSet` mkNameSet [unlinkedBCOName bco]
90 bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
92 mkNameSet [ n | BCOPtrName n <- ssElts ptrs ] :
93 mkNameSet [ n | BCONPtrItbl n <- ssElts nonptrs ] :
94 map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
97 instance Outputable UnlinkedBCO where
98 ppr (UnlinkedBCO nm _arity _insns _bitmap lits ptrs)
99 = sep [text "BCO", ppr nm, text "with",
100 ppr (sizeSS lits), text "lits",
101 ppr (sizeSS ptrs), text "ptrs" ]
103 -- -----------------------------------------------------------------------------
104 -- The bytecode assembler
106 -- The object format for bytecodes is: 16 bits for the opcode, and 16
107 -- for each field -- so the code can be considered a sequence of
108 -- 16-bit ints. Each field denotes either a stack offset or number of
109 -- items on the stack (eg SLIDE), and index into the pointer table (eg
110 -- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
111 -- bytecode address in this BCO.
113 -- Top level assembler fn.
114 assembleBCOs :: [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
115 assembleBCOs proto_bcos tycons
116 = do itblenv <- mkITbls tycons
117 bcos <- mapM assembleBCO proto_bcos
118 return (ByteCode bcos itblenv)
120 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
121 assembleBCO (ProtoBCO nm instrs bitmap bsize arity _origin _malloced)
123 -- pass 1: collect up the offsets of the local labels.
124 -- Remember that the first insn starts at offset
125 -- sizeOf Word / sizeOf Word16
126 -- since offset 0 (eventually) will hold the total # of insns.
128 | wORD_SIZE_IN_BITS == 64 = 4
129 | wORD_SIZE_IN_BITS == 32 = 2
130 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
131 label_env = mkLabelEnv emptyFM lableInitialOffset instrs
133 mkLabelEnv :: FiniteMap Word16 Word -> Word -> [BCInstr]
134 -> FiniteMap Word16 Word
135 mkLabelEnv env _ [] = env
136 mkLabelEnv env i_offset (i:is)
138 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
139 in mkLabelEnv new_env (i_offset + instrSize16s i) is
141 findLabel :: Word16 -> Word
143 = case lookupFM label_env lab of
144 Just bco_offset -> bco_offset
145 Nothing -> pprPanic "assembleBCO.findLabel" (ppr lab)
147 do -- pass 2: generate the instruction, ptr and nonptr bits
148 insns <- return emptySS :: IO (SizedSeq Word16)
149 lits <- return emptySS :: IO (SizedSeq BCONPtr)
150 ptrs <- return emptySS :: IO (SizedSeq BCOPtr)
151 let init_asm_state = (insns,lits,ptrs)
152 (final_insns, final_lits, final_ptrs)
153 <- mkBits findLabel init_asm_state instrs
155 let asm_insns = ssElts final_insns
156 n_insns = sizeSS final_insns
158 insns_arr = mkInstrArray lableInitialOffset n_insns asm_insns
159 !insns_barr = case insns_arr of UArray _lo _hi _n barr -> barr
161 bitmap_arr = mkBitmapArray bsize bitmap
162 !bitmap_barr = case bitmap_arr of UArray _lo _hi _n barr -> barr
164 let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits final_ptrs
166 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
167 -- objects, since they might get run too early. Disable this until
168 -- we figure out what to do.
169 -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
173 -- zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
176 mkBitmapArray :: Word16 -> [StgWord] -> UArray Int StgWord
177 mkBitmapArray bsize bitmap
178 = listArray (0, length bitmap) (fromIntegral bsize : bitmap)
180 mkInstrArray :: Word -> Word -> [Word16] -> UArray Word Word16
181 mkInstrArray lableInitialOffset n_insns asm_insns
182 = let size = lableInitialOffset + n_insns
183 in listArray (0, size - 1) (largeArg size ++ asm_insns)
185 -- instrs nonptrs ptrs
186 type AsmState = (SizedSeq Word16,
190 data SizedSeq a = SizedSeq !Word [a]
191 emptySS :: SizedSeq a
192 emptySS = SizedSeq 0 []
194 -- Why are these two monadic???
195 addToSS :: SizedSeq a -> a -> IO (SizedSeq a)
196 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
197 addListToSS :: SizedSeq a -> [a] -> IO (SizedSeq a)
198 addListToSS (SizedSeq n r_xs) xs
199 = return (SizedSeq (n + genericLength xs) (reverse xs ++ r_xs))
201 ssElts :: SizedSeq a -> [a]
202 ssElts (SizedSeq _ r_xs) = reverse r_xs
204 sizeSS :: SizedSeq a -> Word
205 sizeSS (SizedSeq n _) = n
207 sizeSS16 :: SizedSeq a -> Word16
208 sizeSS16 (SizedSeq n _) = fromIntegral n
210 -- Bring in all the bci_ bytecode constants.
211 #include "rts/Bytecodes.h"
213 largeArgInstr :: Word16 -> Word16
214 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
216 largeArg :: Word -> [Word16]
218 | wORD_SIZE_IN_BITS == 64
219 = [fromIntegral (w `shiftR` 48),
220 fromIntegral (w `shiftR` 32),
221 fromIntegral (w `shiftR` 16),
223 | wORD_SIZE_IN_BITS == 32
224 = [fromIntegral (w `shiftR` 16),
226 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
228 -- This is where all the action is (pass 2 of the assembler)
229 mkBits :: (Word16 -> Word) -- label finder
231 -> [BCInstr] -- instructions (in)
234 mkBits findLabel st proto_insns
235 = foldM doInstr st proto_insns
237 doInstr :: AsmState -> BCInstr -> IO AsmState
240 STKCHECK n -> instr1Large st bci_STKCHECK n
241 PUSH_L o1 -> instr2 st bci_PUSH_L o1
242 PUSH_LL o1 o2 -> instr3 st bci_PUSH_LL o1 o2
243 PUSH_LLL o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
244 PUSH_G nm -> do (p, st2) <- ptr st (BCOPtrName nm)
245 instr2 st2 bci_PUSH_G p
246 PUSH_PRIMOP op -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
247 instr2 st2 bci_PUSH_G p
248 PUSH_BCO proto -> do ul_bco <- assembleBCO proto
249 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
250 instr2 st2 bci_PUSH_G p
251 PUSH_ALTS proto -> do ul_bco <- assembleBCO proto
252 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
253 instr2 st2 bci_PUSH_ALTS p
254 PUSH_ALTS_UNLIFTED proto pk -> do
255 ul_bco <- assembleBCO proto
256 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
257 instr2 st2 (push_alts pk) p
258 PUSH_UBX (Left lit) nws
259 -> do (np, st2) <- literal st lit
260 instr3 st2 bci_PUSH_UBX np nws
261 PUSH_UBX (Right aa) nws
262 -> do (np, st2) <- addr st aa
263 instr3 st2 bci_PUSH_UBX np nws
265 PUSH_APPLY_N -> do instr1 st bci_PUSH_APPLY_N
266 PUSH_APPLY_V -> do instr1 st bci_PUSH_APPLY_V
267 PUSH_APPLY_F -> do instr1 st bci_PUSH_APPLY_F
268 PUSH_APPLY_D -> do instr1 st bci_PUSH_APPLY_D
269 PUSH_APPLY_L -> do instr1 st bci_PUSH_APPLY_L
270 PUSH_APPLY_P -> do instr1 st bci_PUSH_APPLY_P
271 PUSH_APPLY_PP -> do instr1 st bci_PUSH_APPLY_PP
272 PUSH_APPLY_PPP -> do instr1 st bci_PUSH_APPLY_PPP
273 PUSH_APPLY_PPPP -> do instr1 st bci_PUSH_APPLY_PPPP
274 PUSH_APPLY_PPPPP -> do instr1 st bci_PUSH_APPLY_PPPPP
275 PUSH_APPLY_PPPPPP -> do instr1 st bci_PUSH_APPLY_PPPPPP
277 SLIDE n by -> instr3 st bci_SLIDE n by
278 ALLOC_AP n -> instr2 st bci_ALLOC_AP n
279 ALLOC_AP_NOUPD n -> instr2 st bci_ALLOC_AP_NOUPD n
280 ALLOC_PAP arity n -> instr3 st bci_ALLOC_PAP arity n
281 MKAP off sz -> instr3 st bci_MKAP off sz
282 MKPAP off sz -> instr3 st bci_MKPAP off sz
283 UNPACK n -> instr2 st bci_UNPACK n
284 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
285 instr3 st2 bci_PACK itbl_no sz
287 TESTLT_I i l -> do (np, st2) <- int st i
288 instr2Large st2 bci_TESTLT_I np (findLabel l)
289 TESTEQ_I i l -> do (np, st2) <- int st i
290 instr2Large st2 bci_TESTEQ_I np (findLabel l)
291 TESTLT_F f l -> do (np, st2) <- float st f
292 instr2Large st2 bci_TESTLT_F np (findLabel l)
293 TESTEQ_F f l -> do (np, st2) <- float st f
294 instr2Large st2 bci_TESTEQ_F np (findLabel l)
295 TESTLT_D d l -> do (np, st2) <- double st d
296 instr2Large st2 bci_TESTLT_D np (findLabel l)
297 TESTEQ_D d l -> do (np, st2) <- double st d
298 instr2Large st2 bci_TESTEQ_D np (findLabel l)
299 TESTLT_P i l -> instr2Large st bci_TESTLT_P i (findLabel l)
300 TESTEQ_P i l -> instr2Large st bci_TESTEQ_P i (findLabel l)
301 CASEFAIL -> instr1 st bci_CASEFAIL
302 SWIZZLE stkoff n -> instr3 st bci_SWIZZLE stkoff n
303 JMP l -> instr1Large st bci_JMP (findLabel l)
304 ENTER -> instr1 st bci_ENTER
305 RETURN -> instr1 st bci_RETURN
306 RETURN_UBX rep -> instr1 st (return_ubx rep)
307 CCALL off m_addr -> do (np, st2) <- addr st m_addr
308 instr3 st2 bci_CCALL off np
309 BRK_FUN array index info -> do
310 (p1, st2) <- ptr st (BCOPtrArray array)
311 (p2, st3) <- ptr st2 (BCOPtrBreakInfo info)
312 instr4 st3 bci_BRK_FUN p1 index p2
314 instrn :: AsmState -> [Word16] -> IO AsmState
315 instrn st [] = return st
316 instrn (st_i, st_l, st_p) (i:is)
317 = do st_i' <- addToSS st_i i
318 instrn (st_i', st_l, st_p) is
320 instr1Large st i1 large
321 | large > 65535 = instrn st (largeArgInstr i1 : largeArg large)
322 | otherwise = instr2 st i1 (fromIntegral large)
324 instr2Large st i1 i2 large
325 | large > 65535 = instrn st (largeArgInstr i1 : i2 : largeArg large)
326 | otherwise = instr3 st i1 i2 (fromIntegral large)
328 instr1 (st_i0,st_l0,st_p0) i1
329 = do st_i1 <- addToSS st_i0 i1
330 return (st_i1,st_l0,st_p0)
332 instr2 (st_i0,st_l0,st_p0) w1 w2
333 = do st_i1 <- addToSS st_i0 w1
334 st_i2 <- addToSS st_i1 w2
335 return (st_i2,st_l0,st_p0)
337 instr3 (st_i0,st_l0,st_p0) w1 w2 w3
338 = do st_i1 <- addToSS st_i0 w1
339 st_i2 <- addToSS st_i1 w2
340 st_i3 <- addToSS st_i2 w3
341 return (st_i3,st_l0,st_p0)
343 instr4 (st_i0,st_l0,st_p0) w1 w2 w3 w4
344 = do st_i1 <- addToSS st_i0 w1
345 st_i2 <- addToSS st_i1 w2
346 st_i3 <- addToSS st_i2 w3
347 st_i4 <- addToSS st_i3 w4
348 return (st_i4,st_l0,st_p0)
350 float (st_i0,st_l0,st_p0) f
351 = do let ws = mkLitF f
352 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
353 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
355 double (st_i0,st_l0,st_p0) d
356 = do let ws = mkLitD d
357 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
358 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
360 int (st_i0,st_l0,st_p0) i
361 = do let ws = mkLitI i
362 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
363 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
365 int64 (st_i0,st_l0,st_p0) i
366 = do let ws = mkLitI64 i
367 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
368 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
370 addr (st_i0,st_l0,st_p0) a
371 = do let ws = mkLitPtr a
372 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
373 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
375 litlabel (st_i0,st_l0,st_p0) fs
376 = do st_l1 <- addListToSS st_l0 [BCONPtrLbl fs]
377 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
379 ptr (st_i0,st_l0,st_p0) p
380 = do st_p1 <- addToSS st_p0 p
381 return (sizeSS16 st_p0, (st_i0,st_l0,st_p1))
383 itbl (st_i0,st_l0,st_p0) dcon
384 = do st_l1 <- addToSS st_l0 (BCONPtrItbl (getName dcon))
385 return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
387 #ifdef mingw32_TARGET_OS
388 literal st (MachLabel fs (Just sz) _)
389 = litlabel st (appendFS fs (mkFastString ('@':show sz)))
390 -- On Windows, stdcall labels have a suffix indicating the no. of
391 -- arg words, e.g. foo@8. testcase: ffi012(ghci)
393 literal st (MachLabel fs _ _) = litlabel st fs
394 literal st (MachWord w) = int st (fromIntegral w)
395 literal st (MachInt j) = int st (fromIntegral j)
396 literal st MachNullAddr = int st 0
397 literal st (MachFloat r) = float st (fromRational r)
398 literal st (MachDouble r) = double st (fromRational r)
399 literal st (MachChar c) = int st (ord c)
400 literal st (MachInt64 ii) = int64 st (fromIntegral ii)
401 literal st (MachWord64 ii) = int64 st (fromIntegral ii)
402 literal _ other = pprPanic "ByteCodeAsm.literal" (ppr other)
405 push_alts :: CgRep -> Word16
406 push_alts NonPtrArg = bci_PUSH_ALTS_N
407 push_alts FloatArg = bci_PUSH_ALTS_F
408 push_alts DoubleArg = bci_PUSH_ALTS_D
409 push_alts VoidArg = bci_PUSH_ALTS_V
410 push_alts LongArg = bci_PUSH_ALTS_L
411 push_alts PtrArg = bci_PUSH_ALTS_P
413 return_ubx :: CgRep -> Word16
414 return_ubx NonPtrArg = bci_RETURN_N
415 return_ubx FloatArg = bci_RETURN_F
416 return_ubx DoubleArg = bci_RETURN_D
417 return_ubx VoidArg = bci_RETURN_V
418 return_ubx LongArg = bci_RETURN_L
419 return_ubx PtrArg = bci_RETURN_P
422 -- The size in 16-bit entities of an instruction.
423 instrSize16s :: BCInstr -> Word
434 PUSH_ALTS_UNLIFTED{} -> 2
443 PUSH_APPLY_PPP{} -> 1
444 PUSH_APPLY_PPPP{} -> 1
445 PUSH_APPLY_PPPPP{} -> 1
446 PUSH_APPLY_PPPPPP{} -> 1
449 ALLOC_AP_NOUPD{} -> 2
473 -- Make lists of host-sized words for literals, so that when the
474 -- words are placed in memory at increasing addresses, the
475 -- bit pattern is correct for the host's word size and endianness.
476 mkLitI :: Int -> [Word]
477 mkLitF :: Float -> [Word]
478 mkLitD :: Double -> [Word]
479 mkLitPtr :: Ptr () -> [Word]
480 mkLitI64 :: Int64 -> [Word]
484 arr <- newArray_ ((0::Int),0)
486 f_arr <- castSTUArray arr
487 w0 <- readArray f_arr 0
494 arr <- newArray_ ((0::Int),1)
496 d_arr <- castSTUArray arr
497 w0 <- readArray d_arr 0
498 w1 <- readArray d_arr 1
499 return [w0 :: Word, w1]
503 arr <- newArray_ ((0::Int),0)
505 d_arr <- castSTUArray arr
506 w0 <- readArray d_arr 0
510 = panic "mkLitD: Bad wORD_SIZE"
515 arr <- newArray_ ((0::Int),1)
517 d_arr <- castSTUArray arr
518 w0 <- readArray d_arr 0
519 w1 <- readArray d_arr 1
520 return [w0 :: Word,w1]
524 arr <- newArray_ ((0::Int),0)
526 d_arr <- castSTUArray arr
527 w0 <- readArray d_arr 0
531 = panic "mkLitI64: Bad wORD_SIZE"
535 arr <- newArray_ ((0::Int),0)
537 i_arr <- castSTUArray arr
538 w0 <- readArray i_arr 0
544 arr <- newArray_ ((0::Int),0)
546 a_arr <- castSTUArray arr
547 w0 <- readArray a_arr 0
551 iNTERP_STACK_CHECK_THRESH :: Int
552 iNTERP_STACK_CHECK_THRESH = INTERP_STACK_CHECK_THRESH