2 % (c) The University of Glasgow 2002-2006
5 ByteCodeLink: Bytecode assembler and linker
8 {-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
11 -- The above warning supression flag is a temporary kludge.
12 -- While working on this module you are encouraged to remove it and fix
13 -- any warnings in the module. See
14 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
18 assembleBCOs, assembleBCO,
21 UnlinkedBCO(..), BCOPtr(..), BCONPtr(..), bcoFreeNames,
22 SizedSeq, sizeSS, ssElts,
23 iNTERP_STACK_CHECK_THRESH
26 #include "HsVersions.h"
43 import Control.Monad ( foldM )
44 import Control.Monad.ST ( runST )
46 import GHC.Word ( Word(..) )
47 import Data.Array.MArray
48 import Data.Array.Unboxed ( listArray )
49 import Data.Array.Base ( UArray(..) )
50 import Data.Array.ST ( castSTUArray )
51 import Foreign ( Word16, free )
53 import Data.Int ( Int64 )
54 import Data.Char ( ord )
56 import GHC.Base ( ByteArray#, MutableByteArray#, RealWorld )
57 import GHC.IOBase ( IO(..) )
58 import GHC.Ptr ( Ptr(..) )
60 -- -----------------------------------------------------------------------------
63 -- CompiledByteCode represents the result of byte-code
64 -- compiling a bunch of functions and data types
67 = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
68 ItblEnv -- A mapping from DataCons to their itbls
70 instance Outputable CompiledByteCode where
71 ppr (ByteCode bcos _) = ppr bcos
76 unlinkedBCOName :: Name,
77 unlinkedBCOArity :: Int,
78 unlinkedBCOInstrs :: ByteArray#, -- insns
79 unlinkedBCOBitmap :: ByteArray#, -- bitmap
80 unlinkedBCOLits :: (SizedSeq BCONPtr), -- non-ptrs
81 unlinkedBCOPtrs :: (SizedSeq BCOPtr) -- ptrs
87 | BCOPtrBCO UnlinkedBCO
88 | BCOPtrBreakInfo BreakInfo
89 | BCOPtrArray (MutableByteArray# RealWorld)
93 | BCONPtrLbl FastString
96 -- | Finds external references. Remember to remove the names
97 -- defined by this group of BCOs themselves
98 bcoFreeNames :: UnlinkedBCO -> NameSet
100 = bco_refs bco `minusNameSet` mkNameSet [unlinkedBCOName bco]
102 bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
103 = unionManyNameSets (
104 mkNameSet [ n | BCOPtrName n <- ssElts ptrs ] :
105 mkNameSet [ n | BCONPtrItbl n <- ssElts nonptrs ] :
106 map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
109 instance Outputable UnlinkedBCO where
110 ppr (UnlinkedBCO nm arity insns bitmap lits ptrs)
111 = sep [text "BCO", ppr nm, text "with",
112 int (sizeSS lits), text "lits",
113 int (sizeSS ptrs), text "ptrs" ]
115 -- -----------------------------------------------------------------------------
116 -- The bytecode assembler
118 -- The object format for bytecodes is: 16 bits for the opcode, and 16
119 -- for each field -- so the code can be considered a sequence of
120 -- 16-bit ints. Each field denotes either a stack offset or number of
121 -- items on the stack (eg SLIDE), and index into the pointer table (eg
122 -- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
123 -- bytecode address in this BCO.
125 -- Top level assembler fn.
126 assembleBCOs :: [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
127 assembleBCOs proto_bcos tycons
128 = do itblenv <- mkITbls tycons
129 bcos <- mapM assembleBCO proto_bcos
130 return (ByteCode bcos itblenv)
132 assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
133 assembleBCO (ProtoBCO nm instrs bitmap bsize arity origin malloced)
135 -- pass 1: collect up the offsets of the local labels.
136 -- Remember that the first insn starts at offset 1 since offset 0
137 -- (eventually) will hold the total # of insns.
138 label_env = mkLabelEnv emptyFM 1 instrs
140 mkLabelEnv env i_offset [] = env
141 mkLabelEnv env i_offset (i:is)
143 = case i of LABEL n -> addToFM env n i_offset ; _ -> env
144 in mkLabelEnv new_env (i_offset + instrSize16s i) is
147 = case lookupFM label_env lab of
148 Just bco_offset -> bco_offset
149 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
151 do -- pass 2: generate the instruction, ptr and nonptr bits
152 insns <- return emptySS :: IO (SizedSeq Word16)
153 lits <- return emptySS :: IO (SizedSeq BCONPtr)
154 ptrs <- return emptySS :: IO (SizedSeq BCOPtr)
155 let init_asm_state = (insns,lits,ptrs)
156 (final_insns, final_lits, final_ptrs)
157 <- mkBits findLabel init_asm_state instrs
159 let asm_insns = ssElts final_insns
160 n_insns = sizeSS final_insns
163 | n_insns > 65535 = panic "linkBCO: >= 64k insns in BCO"
164 | otherwise = mkInstrArray n_insns asm_insns
165 insns_barr = case insns_arr of UArray _lo _hi _n barr -> barr
167 bitmap_arr = mkBitmapArray bsize bitmap
168 bitmap_barr = case bitmap_arr of UArray _lo _hi _n barr -> barr
170 let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits final_ptrs
172 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
173 -- objects, since they might get run too early. Disable this until
174 -- we figure out what to do.
175 -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
179 -- zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
182 mkBitmapArray :: Int -> [StgWord] -> UArray Int StgWord
183 mkBitmapArray bsize bitmap
184 = listArray (0, length bitmap) (fromIntegral bsize : bitmap)
186 mkInstrArray :: Int -> [Word16] -> UArray Int Word16
187 mkInstrArray n_insns asm_insns
188 = listArray (0, n_insns) (fromIntegral n_insns : asm_insns)
190 -- instrs nonptrs ptrs
191 type AsmState = (SizedSeq Word16,
195 data SizedSeq a = SizedSeq !Int [a]
196 emptySS = SizedSeq 0 []
198 -- Why are these two monadic???
199 addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
200 addListToSS (SizedSeq n r_xs) xs
201 = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
203 ssElts :: SizedSeq a -> [a]
204 ssElts (SizedSeq n r_xs) = reverse r_xs
206 sizeSS :: SizedSeq a -> Int
207 sizeSS (SizedSeq n r_xs) = n
209 -- Bring in all the bci_ bytecode constants.
210 #include "Bytecodes.h"
212 largeArgInstr :: Int -> Int
213 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
215 largeArg :: Int -> [Int]
217 | wORD_SIZE_IN_BITS == 64
218 = [(i .&. 0xFFFF000000000000) `shiftR` 48,
219 (i .&. 0x0000FFFF00000000) `shiftR` 32,
220 (i .&. 0x00000000FFFF0000) `shiftR` 16,
221 (i .&. 0x000000000000FFFF)]
222 | wORD_SIZE_IN_BITS == 32
223 = [(i .&. 0xFFFF0000) `shiftR` 16,
225 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
227 -- This is where all the action is (pass 2 of the assembler)
228 mkBits :: (Int -> Int) -- label finder
230 -> [BCInstr] -- instructions (in)
233 mkBits findLabel st proto_insns
234 = foldM doInstr st proto_insns
236 doInstr :: AsmState -> BCInstr -> IO AsmState
241 instrn st (largeArgInstr bci_STKCHECK : largeArg n)
242 | otherwise -> instr2 st bci_STKCHECK n
243 PUSH_L o1 -> instr2 st bci_PUSH_L o1
244 PUSH_LL o1 o2 -> instr3 st bci_PUSH_LL o1 o2
245 PUSH_LLL o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
246 PUSH_G nm -> do (p, st2) <- ptr st (BCOPtrName nm)
247 instr2 st2 bci_PUSH_G p
248 PUSH_PRIMOP op -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
249 instr2 st2 bci_PUSH_G p
250 PUSH_BCO proto -> do ul_bco <- assembleBCO proto
251 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
252 instr2 st2 bci_PUSH_G p
253 PUSH_ALTS proto -> do ul_bco <- assembleBCO proto
254 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
255 instr2 st2 bci_PUSH_ALTS p
256 PUSH_ALTS_UNLIFTED proto pk -> do
257 ul_bco <- assembleBCO proto
258 (p, st2) <- ptr st (BCOPtrBCO ul_bco)
259 instr2 st2 (push_alts pk) p
260 PUSH_UBX (Left lit) nws
261 -> do (np, st2) <- literal st lit
262 instr3 st2 bci_PUSH_UBX np nws
263 PUSH_UBX (Right aa) nws
264 -> do (np, st2) <- addr st aa
265 instr3 st2 bci_PUSH_UBX np nws
267 PUSH_APPLY_N -> do instr1 st bci_PUSH_APPLY_N
268 PUSH_APPLY_V -> do instr1 st bci_PUSH_APPLY_V
269 PUSH_APPLY_F -> do instr1 st bci_PUSH_APPLY_F
270 PUSH_APPLY_D -> do instr1 st bci_PUSH_APPLY_D
271 PUSH_APPLY_L -> do instr1 st bci_PUSH_APPLY_L
272 PUSH_APPLY_P -> do instr1 st bci_PUSH_APPLY_P
273 PUSH_APPLY_PP -> do instr1 st bci_PUSH_APPLY_PP
274 PUSH_APPLY_PPP -> do instr1 st bci_PUSH_APPLY_PPP
275 PUSH_APPLY_PPPP -> do instr1 st bci_PUSH_APPLY_PPPP
276 PUSH_APPLY_PPPPP -> do instr1 st bci_PUSH_APPLY_PPPPP
277 PUSH_APPLY_PPPPPP -> do instr1 st bci_PUSH_APPLY_PPPPPP
279 SLIDE n by -> instr3 st bci_SLIDE n by
280 ALLOC_AP n -> instr2 st bci_ALLOC_AP n
281 ALLOC_AP_NOUPD n -> instr2 st bci_ALLOC_AP_NOUPD n
282 ALLOC_PAP arity n -> instr3 st bci_ALLOC_PAP arity n
283 MKAP off sz -> instr3 st bci_MKAP off sz
284 MKPAP off sz -> instr3 st bci_MKPAP off sz
285 UNPACK n -> instr2 st bci_UNPACK n
286 PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
287 instr3 st2 bci_PACK itbl_no sz
288 LABEL lab -> return st
289 TESTLT_I i l -> do (np, st2) <- int st i
290 instr3 st2 bci_TESTLT_I np (findLabel l)
291 TESTEQ_I i l -> do (np, st2) <- int st i
292 instr3 st2 bci_TESTEQ_I np (findLabel l)
293 TESTLT_F f l -> do (np, st2) <- float st f
294 instr3 st2 bci_TESTLT_F np (findLabel l)
295 TESTEQ_F f l -> do (np, st2) <- float st f
296 instr3 st2 bci_TESTEQ_F np (findLabel l)
297 TESTLT_D d l -> do (np, st2) <- double st d
298 instr3 st2 bci_TESTLT_D np (findLabel l)
299 TESTEQ_D d l -> do (np, st2) <- double st d
300 instr3 st2 bci_TESTEQ_D np (findLabel l)
301 TESTLT_P i l -> instr3 st bci_TESTLT_P i (findLabel l)
302 TESTEQ_P i l -> instr3 st bci_TESTEQ_P i (findLabel l)
303 CASEFAIL -> instr1 st bci_CASEFAIL
304 SWIZZLE stkoff n -> instr3 st bci_SWIZZLE stkoff n
305 JMP l -> instr2 st bci_JMP (findLabel l)
306 ENTER -> instr1 st bci_ENTER
307 RETURN -> instr1 st bci_RETURN
308 RETURN_UBX rep -> instr1 st (return_ubx rep)
309 CCALL off m_addr -> do (np, st2) <- addr st m_addr
310 instr3 st2 bci_CCALL off np
311 BRK_FUN array index info -> do
312 (p1, st2) <- ptr st (BCOPtrArray array)
313 (p2, st3) <- ptr st2 (BCOPtrBreakInfo info)
314 instr4 st3 bci_BRK_FUN p1 index p2
319 instrn :: AsmState -> [Int] -> IO AsmState
320 instrn st [] = return st
321 instrn (st_i, st_l, st_p) (i:is)
322 = do st_i' <- addToSS st_i (i2s i)
323 instrn (st_i', st_l, st_p) is
325 instr1 (st_i0,st_l0,st_p0) i1
326 = do st_i1 <- addToSS st_i0 i1
327 return (st_i1,st_l0,st_p0)
329 instr2 (st_i0,st_l0,st_p0) i1 i2
330 = do st_i1 <- addToSS st_i0 (i2s i1)
331 st_i2 <- addToSS st_i1 (i2s i2)
332 return (st_i2,st_l0,st_p0)
334 instr3 (st_i0,st_l0,st_p0) i1 i2 i3
335 = do st_i1 <- addToSS st_i0 (i2s i1)
336 st_i2 <- addToSS st_i1 (i2s i2)
337 st_i3 <- addToSS st_i2 (i2s i3)
338 return (st_i3,st_l0,st_p0)
340 instr4 (st_i0,st_l0,st_p0) i1 i2 i3 i4
341 = do st_i1 <- addToSS st_i0 (i2s i1)
342 st_i2 <- addToSS st_i1 (i2s i2)
343 st_i3 <- addToSS st_i2 (i2s i3)
344 st_i4 <- addToSS st_i3 (i2s i4)
345 return (st_i4,st_l0,st_p0)
347 float (st_i0,st_l0,st_p0) f
348 = do let ws = mkLitF f
349 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
350 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
352 double (st_i0,st_l0,st_p0) d
353 = do let ws = mkLitD d
354 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
355 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
357 int (st_i0,st_l0,st_p0) i
358 = do let ws = mkLitI i
359 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
360 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
362 int64 (st_i0,st_l0,st_p0) i
363 = do let ws = mkLitI64 i
364 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
365 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
367 addr (st_i0,st_l0,st_p0) a
368 = do let ws = mkLitPtr a
369 st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
370 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
372 litlabel (st_i0,st_l0,st_p0) fs
373 = do st_l1 <- addListToSS st_l0 [BCONPtrLbl fs]
374 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
376 ptr (st_i0,st_l0,st_p0) p
377 = do st_p1 <- addToSS st_p0 p
378 return (sizeSS st_p0, (st_i0,st_l0,st_p1))
380 itbl (st_i0,st_l0,st_p0) dcon
381 = do st_l1 <- addToSS st_l0 (BCONPtrItbl (getName dcon))
382 return (sizeSS st_l0, (st_i0,st_l1,st_p0))
384 #ifdef mingw32_TARGET_OS
385 literal st (MachLabel fs (Just sz))
386 = litlabel st (appendFS fs (mkFastString ('@':show sz)))
387 -- On Windows, stdcall labels have a suffix indicating the no. of
388 -- arg words, e.g. foo@8. testcase: ffi012(ghci)
390 literal st (MachLabel fs _) = litlabel st fs
391 literal st (MachWord w) = int st (fromIntegral w)
392 literal st (MachInt j) = int st (fromIntegral j)
393 literal st MachNullAddr = int st (fromIntegral 0)
394 literal st (MachFloat r) = float st (fromRational r)
395 literal st (MachDouble r) = double st (fromRational r)
396 literal st (MachChar c) = int st (ord c)
397 literal st (MachInt64 ii) = int64 st (fromIntegral ii)
398 literal st (MachWord64 ii) = int64 st (fromIntegral ii)
399 literal st other = pprPanic "ByteCodeAsm.literal" (ppr other)
402 push_alts NonPtrArg = bci_PUSH_ALTS_N
403 push_alts FloatArg = bci_PUSH_ALTS_F
404 push_alts DoubleArg = bci_PUSH_ALTS_D
405 push_alts VoidArg = bci_PUSH_ALTS_V
406 push_alts LongArg = bci_PUSH_ALTS_L
407 push_alts PtrArg = bci_PUSH_ALTS_P
409 return_ubx NonPtrArg = bci_RETURN_N
410 return_ubx FloatArg = bci_RETURN_F
411 return_ubx DoubleArg = bci_RETURN_D
412 return_ubx VoidArg = bci_RETURN_V
413 return_ubx LongArg = bci_RETURN_L
414 return_ubx PtrArg = bci_RETURN_P
417 -- The size in 16-bit entities of an instruction.
418 instrSize16s :: BCInstr -> Int
429 PUSH_ALTS_UNLIFTED{} -> 2
438 PUSH_APPLY_PPP{} -> 1
439 PUSH_APPLY_PPPP{} -> 1
440 PUSH_APPLY_PPPPP{} -> 1
441 PUSH_APPLY_PPPPPP{} -> 1
444 ALLOC_AP_NOUPD{} -> 2
468 -- Make lists of host-sized words for literals, so that when the
469 -- words are placed in memory at increasing addresses, the
470 -- bit pattern is correct for the host's word size and endianness.
471 mkLitI :: Int -> [Word]
472 mkLitF :: Float -> [Word]
473 mkLitD :: Double -> [Word]
474 mkLitPtr :: Ptr () -> [Word]
475 mkLitI64 :: Int64 -> [Word]
479 arr <- newArray_ ((0::Int),0)
481 f_arr <- castSTUArray arr
482 w0 <- readArray f_arr 0
489 arr <- newArray_ ((0::Int),1)
491 d_arr <- castSTUArray arr
492 w0 <- readArray d_arr 0
493 w1 <- readArray d_arr 1
494 return [w0 :: Word, w1]
498 arr <- newArray_ ((0::Int),0)
500 d_arr <- castSTUArray arr
501 w0 <- readArray d_arr 0
508 arr <- newArray_ ((0::Int),1)
510 d_arr <- castSTUArray arr
511 w0 <- readArray d_arr 0
512 w1 <- readArray d_arr 1
513 return [w0 :: Word,w1]
517 arr <- newArray_ ((0::Int),0)
519 d_arr <- castSTUArray arr
520 w0 <- readArray d_arr 0
526 arr <- newArray_ ((0::Int),0)
528 i_arr <- castSTUArray arr
529 w0 <- readArray i_arr 0
535 arr <- newArray_ ((0::Int),0)
537 a_arr <- castSTUArray arr
538 w0 <- readArray a_arr 0
542 iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)