2 % (c) The GRASP/AQUA Project, Glasgow University, 1994-1998
5 % ---------------------------------------------------------------------------
7 \section[Costs]{Evaluating the costs of computing some abstract C code}
9 This module provides all necessary functions for computing for a given
10 abstract~C Program the costs of executing that program. This is done by the
14 {\verb type CostRes = (Int, Int, Int, Int, Int)}
15 {\verb costs :: AbstractC -> CostRes }
18 The meaning of the result tuple is:
20 \item The first component ({\tt i}) counts the number of integer,
21 arithmetic and bit-manipulating instructions.
22 \item The second component ({\tt b}) counts the number of branches (direct
23 branches as well as indirect ones).
24 \item The third component ({\tt l}) counts the number of load instructions.
25 \item The fourth component ({\tt s}) counts the number of store
27 \item The fifth component ({\tt f}) counts the number of floating point
31 This function is needed in GrAnSim for parallelism.
33 These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):
39 #define INT_ARITHM_COSTS 1
40 #define GMP_ARITHM_COSTS 3 {- any clue for GMP costs ? -}
41 #define FLOAT_ARITHM_COSTS 3 {- any clue for float costs ? -}
42 #define BRANCH_COSTS 2
47 #define ACCUM_COSTS(i,b,l,s,f) (i+b+l+s+f)
49 #define NUM_REGS 10 {- PprAbsCSyn.lhs -} {- runtime/c-as-asm/CallWrap_C.lc -}
50 #define RESTORE_COSTS (Cost (0, 0, NUM_REGS, 0, 0) :: CostRes)
51 #define SAVE_COSTS (Cost (0, 0, 0, NUM_REGS, 0) :: CostRes)
52 #define CCALL_COSTS_GUESS (Cost (50, 0, 0, 0, 0) :: CostRes)
55 addrModeCosts, CostRes(Cost), nullCosts, Side(..)
58 #include "HsVersions.h"
61 import PrimOp ( primOpNeedsWrapper, PrimOp(..) )
62 import Panic ( trace )
64 -- --------------------------------------------------------------------------
65 data CostRes = Cost (Int, Int, Int, Int, Int)
68 nullCosts = Cost (0, 0, 0, 0, 0) :: CostRes
69 initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
70 errorCosts = Cost (-1, -1, -1, -1, -1) -- just for debugging
72 oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes
74 instance Eq CostRes where
75 (==) t1 t2 = i && b && l && s && f
76 where (i,b,l,s,f) = binOp' (==) t1 t2
78 instance Num CostRes where
86 mapOp :: (Int -> Int) -> CostRes -> CostRes
87 mapOp g ( Cost (i, b, l, s, f) ) = Cost (g i, g b, g l, g s, g f)
89 foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
90 foldrOp o x ( Cost (i1, b1, l1, s1, f1) ) =
91 i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))
93 binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes
94 binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost (i2, b2, l2, s2, f2) ) =
95 ( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )
97 binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a)
98 binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost (i2, b2, l2, s2, f2) ) =
99 (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2)
101 -- --------------------------------------------------------------------------
103 data Side = Lhs | Rhs
106 -- --------------------------------------------------------------------------
108 costs :: AbstractC -> CostRes
114 AbsCStmts absC1 absC2 -> costs absC1 + costs absC2
116 CAssign (CReg _) (CReg _) -> Cost (1,0,0,0,0) -- typ.: mov %reg1,%reg2
118 CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)
120 CAssign (CReg _) (CAddr _) -> Cost (1,0,0,0,0) -- typ.: add %reg1,<adr>,%reg2
122 CAssign target_m source_m -> addrModeCosts target_m Lhs +
123 addrModeCosts source_m Rhs
125 CJump (CLbl _ _) -> Cost (0,1,0,0,0) -- no ld for call necessary
127 CJump mode -> addrModeCosts mode Rhs +
130 CFallThrough mode -> addrModeCosts mode Rhs + -- chu' 0.24
133 CReturn mode info -> case info of
134 DirectReturn -> addrModeCosts mode Rhs +
137 -- i.e. ld address to reg and call reg
139 DynamicVectoredReturn mode' ->
140 addrModeCosts mode Rhs +
141 addrModeCosts mode' Rhs +
144 {- generates code like this:
145 JMP_(<mode>)[RVREL(<mode'>)];
146 i.e. 1 possb ld for mode'
151 StaticVectoredReturn _ -> addrModeCosts mode Rhs +
154 -- as above with mode' fixed to CLit
155 -- typically 2 ld + 1 call; 1st ld due
158 CSwitch mode alts absC -> nullCosts
159 {- for handling costs of all branches of
160 a CSwitch see PprAbsC.
163 Costs before CSwitch +
164 addrModeCosts of head +
165 Costs for 1 cond branch +
166 Costs for body of branch
169 CCodeBlock _ absC -> costs absC
171 CInitHdr cl_info reg_rel cost_centre -> initHdrCosts
173 {- This is more fancy but superflous: The addr modes
174 are fixed and so the costs are const!
176 argCosts + initHdrCosts
177 where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
178 addrModeCosts base_lbl + -- CLbl!
179 3*addrModeCosts (mkIntCLit 1{- any val -})
181 {- this extends to something like
183 For costing the args of this macro
184 see PprAbsC.lhs where args are inserted -}
186 COpStmt modes_res primOp modes_args _ ->
193 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
197 foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res] +
198 foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args] +
200 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
203 CSimultaneous absC -> costs absC
205 CCheck _ amodes code -> Cost (2, 1, 0, 0, 0)
207 CMacroStmt macro modes -> stmtMacroCosts macro modes
209 CCallProfCtrMacro _ _ -> nullCosts
210 {- we don't count profiling in GrAnSim -}
212 CCallProfCCMacro _ _ -> nullCosts
213 {- we don't count profiling in GrAnSim -}
215 -- *** the next three [or so...] are DATA (those above are CODE) ***
216 -- as they are data rather than code they all have nullCosts -- HWL
218 CStaticClosure _ _ _ _ -> nullCosts
220 CClosureInfoAndCode _ _ _ _ _ -> nullCosts
222 CRetDirect _ _ _ _ -> nullCosts
224 CRetVector _ _ _ _ -> nullCosts
226 CCostCentreDecl _ _ -> nullCosts
227 CCostCentreStackDecl _ -> nullCosts
229 CSplitMarker -> nullCosts
231 -- ---------------------------------------------------------------------------
233 addrModeCosts :: CAddrMode -> Side -> CostRes
235 -- addrModeCosts _ _ = nullCosts
237 addrModeCosts addr_mode side =
242 CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
243 else Cost (0, 0, 1, 0, 0)
245 CAddr _ -> if lhs then Cost (0, 0, 0, 1, 0) -- ??unchecked
246 else Cost (0, 0, 1, 0, 0)
248 CReg _ -> nullCosts {- loading from, storing to reg is free ! -}
249 {- for costing CReg->Creg ops see special -}
250 {- case in costs fct -}
251 CTableEntry base_mode offset_mode kind ->
252 addrModeCosts base_mode side +
253 addrModeCosts offset_mode side +
256 CTemp _ _ -> nullCosts {- if lhs then Cost (0, 0, 0, 1, 0)
257 else Cost (0, 0, 1, 0, 0) -}
258 -- ``Temporaries'' correspond to local variables in C, and registers in
260 -- I assume they can be somewhat optimized by gcc -- HWL
262 CLbl _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
263 else Cost (2, 0, 0, 0, 0)
264 -- Rhs: typically: sethi %hi(lbl),%tmp_reg
265 -- or %tmp_reg,%lo(lbl),%target_reg
267 -- Check the following 3 (checked form CLit on)
269 CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
270 else Cost (0, 0, 1, 0, 0)
272 CIntLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
273 else Cost (0, 0, 1, 0, 0)
275 CString _ -> if lhs then Cost (0, 0, 0, 1, 0)
276 else Cost (0, 0, 1, 0, 0)
278 CLit _ -> if lhs then nullCosts -- should never occur
279 else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg
281 CLitLit _ _ -> if lhs then nullCosts
282 else Cost (1, 0, 0, 0, 0)
285 CJoinPoint _ -> if lhs then Cost (0, 0, 0, 1, 0)
286 else Cost (0, 0, 1, 0, 0)
288 CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list
290 -- ---------------------------------------------------------------------------
292 exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes
294 exprMacroCosts side macro mode_list =
296 arg_costs = foldl (+) nullCosts
297 (map (\ x -> addrModeCosts x Rhs) mode_list)
301 ENTRY_CODE -> nullCosts
302 ARG_TAG -> nullCosts -- XXX
303 GET_TAG -> nullCosts -- XXX
306 -- ---------------------------------------------------------------------------
308 stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes
310 stmtMacroCosts macro modes =
312 arg_costs = foldl (+) nullCosts
313 [addrModeCosts mode Rhs | mode <- modes]
316 ARGS_CHK_LOAD_NODE -> Cost (2, 1, 0, 0, 0) {- StgMacros.lh -}
317 -- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
318 ARGS_CHK -> Cost (2, 1, 0, 0, 0) {- StgMacros.lh -}
319 UPD_CAF -> Cost (7, 0, 1, 3, 0) {- SMupdate.lh -}
320 UPD_BH_UPDATABLE -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -}
321 UPD_BH_SINGLE_ENTRY -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -}
322 PUSH_UPD_FRAME -> Cost (3, 0, 0, 4, 0) {- SMupdate.lh -}
323 SET_TAG -> nullCosts {- COptRegs.lh -}
324 GRAN_FETCH -> nullCosts {- GrAnSim bookkeeping -}
325 GRAN_RESCHEDULE -> nullCosts {- GrAnSim bookkeeping -}
326 GRAN_FETCH_AND_RESCHEDULE -> nullCosts {- GrAnSim bookkeeping -}
327 GRAN_YIELD -> nullCosts {- GrAnSim bookkeeping -- added SOF -}
328 THREAD_CONTEXT_SWITCH -> nullCosts {- GrAnSim bookkeeping -}
329 _ -> trace ("Costs.stmtMacroCosts: "++show macro) nullCosts
331 -- ---------------------------------------------------------------------------
335 [ FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp
336 , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
337 , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
338 , Float2IntOp , Int2FloatOp
339 , FloatExpOp , FloatLogOp , FloatSqrtOp
340 , FloatSinOp , FloatCosOp , FloatTanOp
341 , FloatAsinOp , FloatAcosOp , FloatAtanOp
342 , FloatSinhOp , FloatCoshOp , FloatTanhOp
344 , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
345 , Double2IntOp , Int2DoubleOp
346 , Double2FloatOp , Float2DoubleOp
347 , DoubleExpOp , DoubleLogOp , DoubleSqrtOp
348 , DoubleSinOp , DoubleCosOp , DoubleTanOp
349 , DoubleAsinOp , DoubleAcosOp , DoubleAtanOp
350 , DoubleSinhOp , DoubleCoshOp , DoubleTanhOp
358 [ IntegerAddOp , IntegerSubOp , IntegerMulOp
359 , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
361 , Integer2IntOp , Int2IntegerOp
366 abs_costs = nullCosts -- NB: This is normal STG code with costs already
367 -- included; no need to add costs again.
369 umul_costs = Cost (21,4,0,0,0) -- due to spy counts
370 rem_costs = Cost (30,15,0,0,0) -- due to spy counts
371 div_costs = Cost (30,15,0,0,0) -- due to spy counts
373 primOpCosts :: PrimOp -> CostRes
377 primOpCosts (CCallOp _ _ _ _) = SAVE_COSTS + RESTORE_COSTS
378 -- don't guess costs of ccall proper
379 -- for exact costing use a GRAN_EXEC
382 -- Usually 3 mov instructions are needed to get args and res in right place.
384 primOpCosts IntMulOp = Cost (3, 1, 0, 0, 0) + umul_costs
385 primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0) + div_costs
386 primOpCosts IntRemOp = Cost (3, 1, 0, 0, 0) + rem_costs
387 primOpCosts IntNegOp = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
388 primOpCosts IntAbsOp = Cost (0, 1, 0, 0, 0) -- abs closure already costed
390 primOpCosts FloatGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
391 primOpCosts FloatGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
392 primOpCosts FloatEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
393 primOpCosts FloatNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
394 primOpCosts FloatLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
395 primOpCosts FloatLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
396 primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
397 primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
398 primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
399 primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
400 primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
401 primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
403 primOpCosts FloatExpOp = Cost (2, 1, 4, 4, 3)
404 primOpCosts FloatLogOp = Cost (2, 1, 4, 4, 3)
405 primOpCosts FloatSqrtOp = Cost (2, 1, 4, 4, 3)
406 primOpCosts FloatSinOp = Cost (2, 1, 4, 4, 3)
407 primOpCosts FloatCosOp = Cost (2, 1, 4, 4, 3)
408 primOpCosts FloatTanOp = Cost (2, 1, 4, 4, 3)
409 primOpCosts FloatAsinOp = Cost (2, 1, 4, 4, 3)
410 primOpCosts FloatAcosOp = Cost (2, 1, 4, 4, 3)
411 primOpCosts FloatAtanOp = Cost (2, 1, 4, 4, 3)
412 primOpCosts FloatSinhOp = Cost (2, 1, 4, 4, 3)
413 primOpCosts FloatCoshOp = Cost (2, 1, 4, 4, 3)
414 primOpCosts FloatTanhOp = Cost (2, 1, 4, 4, 3)
415 --primOpCosts FloatAsinhOp = Cost (2, 1, 4, 4, 3)
416 --primOpCosts FloatAcoshOp = Cost (2, 1, 4, 4, 3)
417 --primOpCosts FloatAtanhOp = Cost (2, 1, 4, 4, 3)
418 primOpCosts FloatPowerOp = Cost (2, 1, 4, 4, 3)
420 {- There should be special handling of the Array PrimOps in here HWL -}
423 | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1) :: CostRes
424 | primOp `elem` gmpOps = Cost (30, 5, 10, 10, 0) :: CostRes -- GUESS; check it
425 | otherwise = Cost (1, 0, 0, 0, 0)
427 -- ---------------------------------------------------------------------------
428 {- HWL: currently unused
430 costsByKind :: PrimRep -> Side -> CostRes
432 -- The following PrimKinds say that the data is already in a reg
434 costsByKind CharRep _ = nullCosts
435 costsByKind IntRep _ = nullCosts
436 costsByKind WordRep _ = nullCosts
437 costsByKind AddrRep _ = nullCosts
438 costsByKind FloatRep _ = nullCosts
439 costsByKind DoubleRep _ = nullCosts
441 -- ---------------------------------------------------------------------------
444 This is the data structure of {\tt PrimOp} copied from prelude/PrimOp.lhs.
445 I include here some comments about the estimated costs for these @PrimOps@.
446 Compare with the @primOpCosts@ fct above. -- HWL
450 -- I assume all these basic comparisons take just one ALU instruction
451 -- Checked that for Char, Int; Word, Addr should be the same as Int.
453 = CharGtOp | CharGeOp | CharEqOp | CharNeOp | CharLtOp | CharLeOp
454 | IntGtOp | IntGeOp | IntEqOp | IntNeOp | IntLtOp | IntLeOp
455 | WordGtOp | WordGeOp | WordEqOp | WordNeOp | WordLtOp | WordLeOp
456 | AddrGtOp | AddrGeOp | AddrEqOp | AddrNeOp | AddrLtOp | AddrLeOp
458 -- Analogously, these take one FP unit instruction
459 -- Haven't checked that, yet.
461 | FloatGtOp | FloatGeOp | FloatEqOp | FloatNeOp | FloatLtOp | FloatLeOp
462 | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp
464 -- 1 ALU op; unchecked
467 -- these just take 1 ALU op; checked
468 | IntAddOp | IntSubOp
470 -- but these take more than that; see special cases in primOpCosts
471 -- I counted the generated ass. instructions for these -> checked
472 | IntMulOp | IntQuotOp
473 | IntRemOp | IntNegOp | IntAbsOp
475 -- Rest is unchecked so far -- HWL
477 -- Word#-related ops:
478 | AndOp | OrOp | NotOp | XorOp | ShiftLOp | ShiftROp
479 | Int2WordOp | Word2IntOp -- casts
481 -- Addr#-related ops:
482 | Int2AddrOp | Addr2IntOp -- casts
484 -- Float#-related ops:
485 | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
486 | Float2IntOp | Int2FloatOp
488 | FloatExpOp | FloatLogOp | FloatSqrtOp
489 | FloatSinOp | FloatCosOp | FloatTanOp
490 | FloatAsinOp | FloatAcosOp | FloatAtanOp
491 | FloatSinhOp | FloatCoshOp | FloatTanhOp
492 -- not all machines have these available conveniently:
493 -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
494 | FloatPowerOp -- ** op
496 -- Double#-related ops:
497 | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
498 | Double2IntOp | Int2DoubleOp
499 | Double2FloatOp | Float2DoubleOp
501 | DoubleExpOp | DoubleLogOp | DoubleSqrtOp
502 | DoubleSinOp | DoubleCosOp | DoubleTanOp
503 | DoubleAsinOp | DoubleAcosOp | DoubleAtanOp
504 | DoubleSinhOp | DoubleCoshOp | DoubleTanhOp
505 -- not all machines have these available conveniently:
506 -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
507 | DoublePowerOp -- ** op
509 -- Integer (and related...) ops:
510 -- slightly weird -- to match GMP package.
511 | IntegerAddOp | IntegerSubOp | IntegerMulOp
512 | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp
516 | Integer2IntOp | Int2IntegerOp
517 | Addr2IntegerOp -- "Addr" is *always* a literal string
520 | FloatEncodeOp | FloatDecodeOp
521 | DoubleEncodeOp | DoubleDecodeOp
523 -- primitive ops for primitive arrays
526 | NewByteArrayOp PrimRep
529 | SameMutableByteArrayOp
531 | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs
533 | ReadByteArrayOp PrimRep
534 | WriteByteArrayOp PrimRep
535 | IndexByteArrayOp PrimRep
536 | IndexOffAddrOp PrimRep
537 -- PrimRep can be one of {Char,Int,Addr,Float,Double}Kind.
538 -- This is just a cheesy encoding of a bunch of ops.
539 -- Note that ForeignObjRep is not included -- the only way of
540 -- creating a ForeignObj is with a ccall or casm.
542 | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp
544 | MakeStablePtrOp | DeRefStablePtrOp
547 A special ``trap-door'' to use in making calls direct to C functions:
548 Note: From GrAn point of view, CCall is probably very expensive
549 The programmer can specify the costs of the Ccall by inserting
550 a GRAN_EXEC(a,b,l,s,f) at the end of the C- code, specifing the
551 number or arithm., branch, load, store and floating point instructions
555 | CCallOp String -- An "unboxed" ccall# to this named function
556 Bool -- True <=> really a "casm"
557 Bool -- True <=> might invoke Haskell GC
558 [Type] -- Unboxed argument; the state-token
559 -- argument will have been put *first*
560 Type -- Return type; one of the "StateAnd<blah>#" types
562 -- (... to be continued ... )