2 % (c) The GRASP/AQUA Project, Glasgow University, 1994-1998
5 % ---------------------------------------------------------------------------
7 \section[Costs]{Evaluating the costs of computing some abstract C code}
9 This module provides all necessary functions for computing for a given
10 abstract~C Program the costs of executing that program. This is done by the
14 {\verb type CostRes = (Int, Int, Int, Int, Int)}
15 {\verb costs :: AbstractC -> CostRes }
18 The meaning of the result tuple is:
20 \item The first component ({\tt i}) counts the number of integer,
21 arithmetic and bit-manipulating instructions.
22 \item The second component ({\tt b}) counts the number of branches (direct
23 branches as well as indirect ones).
24 \item The third component ({\tt l}) counts the number of load instructions.
25 \item The fourth component ({\tt s}) counts the number of store
27 \item The fifth component ({\tt f}) counts the number of floating point
31 This function is needed in GrAnSim for parallelism.
33 These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):
39 #define INT_ARITHM_COSTS 1
40 #define GMP_ARITHM_COSTS 3 {- any clue for GMP costs ? -}
41 #define FLOAT_ARITHM_COSTS 3 {- any clue for float costs ? -}
42 #define BRANCH_COSTS 2
47 #define ACCUM_COSTS(i,b,l,s,f) (i+b+l+s+f)
49 #define NUM_REGS 10 {- PprAbsCSyn.lhs -} {- runtime/c-as-asm/CallWrap_C.lc -}
50 #define RESTORE_COSTS (Cost (0, 0, NUM_REGS, 0, 0) :: CostRes)
51 #define SAVE_COSTS (Cost (0, 0, 0, NUM_REGS, 0) :: CostRes)
52 #define CCALL_COSTS_GUESS (Cost (50, 0, 0, 0, 0) :: CostRes)
55 addrModeCosts, CostRes(Cost), nullCosts, Side(..)
58 #include "HsVersions.h"
61 import PrimOp ( primOpNeedsWrapper, PrimOp(..) )
62 import Panic ( trace )
64 -- --------------------------------------------------------------------------
65 data CostRes = Cost (Int, Int, Int, Int, Int)
68 nullCosts = Cost (0, 0, 0, 0, 0) :: CostRes
69 initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
70 errorCosts = Cost (-1, -1, -1, -1, -1) -- just for debugging
72 oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes
74 instance Eq CostRes where
75 (==) t1 t2 = i && b && l && s && f
76 where (i,b,l,s,f) = binOp' (==) t1 t2
78 instance Num CostRes where
86 mapOp :: (Int -> Int) -> CostRes -> CostRes
87 mapOp g ( Cost (i, b, l, s, f) ) = Cost (g i, g b, g l, g s, g f)
89 foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
90 foldrOp o x ( Cost (i1, b1, l1, s1, f1) ) =
91 i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))
93 binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes
94 binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost (i2, b2, l2, s2, f2) ) =
95 ( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )
97 binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a)
98 binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost (i2, b2, l2, s2, f2) ) =
99 (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2)
101 -- --------------------------------------------------------------------------
103 data Side = Lhs | Rhs
106 -- --------------------------------------------------------------------------
108 costs :: AbstractC -> CostRes
114 AbsCStmts absC1 absC2 -> costs absC1 + costs absC2
116 CAssign (CReg _) (CReg _) -> Cost (1,0,0,0,0) -- typ.: mov %reg1,%reg2
118 CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)
120 CAssign (CReg _) source_m -> addrModeCosts source_m Rhs
122 CAssign target_m source_m -> addrModeCosts target_m Lhs +
123 addrModeCosts source_m Rhs
125 CJump (CLbl _ _) -> Cost (0,1,0,0,0) -- no ld for call necessary
127 CJump mode -> addrModeCosts mode Rhs +
130 CFallThrough mode -> addrModeCosts mode Rhs + -- chu' 0.24
133 CReturn mode info -> case info of
134 DirectReturn -> addrModeCosts mode Rhs +
137 -- i.e. ld address to reg and call reg
139 DynamicVectoredReturn mode' ->
140 addrModeCosts mode Rhs +
141 addrModeCosts mode' Rhs +
144 {- generates code like this:
145 JMP_(<mode>)[RVREL(<mode'>)];
146 i.e. 1 possb ld for mode'
151 StaticVectoredReturn _ -> addrModeCosts mode Rhs +
154 -- as above with mode' fixed to CLit
155 -- typically 2 ld + 1 call; 1st ld due
158 CSwitch mode alts absC -> nullCosts
159 {- for handling costs of all branches of
160 a CSwitch see PprAbsC.
163 Costs before CSwitch +
164 addrModeCosts of head +
165 Costs for 1 cond branch +
166 Costs for body of branch
169 CCodeBlock _ absC -> costs absC
171 CInitHdr cl_info reg_rel cost_centre -> initHdrCosts
173 {- This is more fancy but superflous: The addr modes
174 are fixed and so the costs are const!
176 argCosts + initHdrCosts
177 where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
178 addrModeCosts base_lbl + -- CLbl!
179 3*addrModeCosts (mkIntCLit 1{- any val -})
181 {- this extends to something like
183 For costing the args of this macro
184 see PprAbsC.lhs where args are inserted -}
186 COpStmt modes_res primOp modes_args _ ->
193 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
197 foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res] +
198 foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args] +
200 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
203 CSimultaneous absC -> costs absC
205 CCheck _ amodes code -> Cost (2, 1, 0, 0, 0)
207 CMacroStmt macro modes -> stmtMacroCosts macro modes
209 CCallProfCtrMacro _ _ -> nullCosts
210 {- we don't count profiling in GrAnSim -}
212 CCallProfCCMacro _ _ -> nullCosts
213 {- we don't count profiling in GrAnSim -}
215 -- *** the next three [or so...] are DATA (those above are CODE) ***
216 -- as they are data rather than code they all have nullCosts -- HWL
218 CStaticClosure _ _ _ _ -> nullCosts
220 CClosureInfoAndCode _ _ _ _ -> nullCosts
222 CRetDirect _ _ _ _ -> nullCosts
224 CRetVector _ _ _ _ -> nullCosts
226 CCostCentreDecl _ _ -> nullCosts
227 CCostCentreStackDecl _ -> nullCosts
229 CSplitMarker -> nullCosts
231 -- ---------------------------------------------------------------------------
233 addrModeCosts :: CAddrMode -> Side -> CostRes
235 -- addrModeCosts _ _ = nullCosts
237 addrModeCosts addr_mode side =
242 CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
243 else Cost (0, 0, 1, 0, 0)
245 CReg _ -> nullCosts {- loading from, storing to reg is free ! -}
246 {- for costing CReg->Creg ops see special -}
247 {- case in costs fct -}
249 CTemp _ _ -> nullCosts {- if lhs then Cost (0, 0, 0, 1, 0)
250 else Cost (0, 0, 1, 0, 0) -}
251 -- ``Temporaries'' correspond to local variables in C, and registers in
253 -- I assume they can be somewhat optimized by gcc -- HWL
255 CLbl _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
256 else Cost (2, 0, 0, 0, 0)
257 -- Rhs: typically: sethi %hi(lbl),%tmp_reg
258 -- or %tmp_reg,%lo(lbl),%target_reg
260 -- Check the following 3 (checked form CLit on)
262 CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
263 else Cost (0, 0, 1, 0, 0)
265 CIntLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
266 else Cost (0, 0, 1, 0, 0)
268 CLit _ -> if lhs then nullCosts -- should never occur
269 else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg
271 CLitLit _ _ -> if lhs then nullCosts
272 else Cost (1, 0, 0, 0, 0)
275 CJoinPoint _ -> if lhs then Cost (0, 0, 0, 1, 0)
276 else Cost (0, 0, 1, 0, 0)
278 CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list
280 -- ---------------------------------------------------------------------------
282 exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes
284 exprMacroCosts side macro mode_list =
286 arg_costs = foldl (+) nullCosts
287 (map (\ x -> addrModeCosts x Rhs) mode_list)
291 ENTRY_CODE -> nullCosts
292 ARG_TAG -> nullCosts -- XXX
293 GET_TAG -> nullCosts -- XXX
296 -- ---------------------------------------------------------------------------
298 stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes
300 stmtMacroCosts macro modes =
302 arg_costs = foldl (+) nullCosts
303 [addrModeCosts mode Rhs | mode <- modes]
306 ARGS_CHK_LOAD_NODE -> Cost (2, 1, 0, 0, 0) {- StgMacros.lh -}
307 -- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
308 ARGS_CHK -> Cost (2, 1, 0, 0, 0) {- StgMacros.lh -}
309 UPD_CAF -> Cost (7, 0, 1, 3, 0) {- SMupdate.lh -}
310 UPD_BH_UPDATABLE -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -}
311 UPD_BH_SINGLE_ENTRY -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -}
312 PUSH_UPD_FRAME -> Cost (3, 0, 0, 4, 0) {- SMupdate.lh -}
313 SET_TAG -> nullCosts {- COptRegs.lh -}
314 GRAN_FETCH -> nullCosts {- GrAnSim bookkeeping -}
315 GRAN_RESCHEDULE -> nullCosts {- GrAnSim bookkeeping -}
316 GRAN_FETCH_AND_RESCHEDULE -> nullCosts {- GrAnSim bookkeeping -}
317 GRAN_YIELD -> nullCosts {- GrAnSim bookkeeping -- added SOF -}
318 THREAD_CONTEXT_SWITCH -> nullCosts {- GrAnSim bookkeeping -}
319 _ -> trace ("Costs.stmtMacroCosts") nullCosts
321 -- ---------------------------------------------------------------------------
325 [ FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp
326 , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
327 , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
328 , Float2IntOp , Int2FloatOp
329 , FloatExpOp , FloatLogOp , FloatSqrtOp
330 , FloatSinOp , FloatCosOp , FloatTanOp
331 , FloatAsinOp , FloatAcosOp , FloatAtanOp
332 , FloatSinhOp , FloatCoshOp , FloatTanhOp
334 , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
335 , Double2IntOp , Int2DoubleOp
336 , Double2FloatOp , Float2DoubleOp
337 , DoubleExpOp , DoubleLogOp , DoubleSqrtOp
338 , DoubleSinOp , DoubleCosOp , DoubleTanOp
339 , DoubleAsinOp , DoubleAcosOp , DoubleAtanOp
340 , DoubleSinhOp , DoubleCoshOp , DoubleTanhOp
348 [ IntegerAddOp , IntegerSubOp , IntegerMulOp
349 , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
351 , Integer2IntOp , Int2IntegerOp
356 abs_costs = nullCosts -- NB: This is normal STG code with costs already
357 -- included; no need to add costs again.
359 umul_costs = Cost (21,4,0,0,0) -- due to spy counts
360 rem_costs = Cost (30,15,0,0,0) -- due to spy counts
361 div_costs = Cost (30,15,0,0,0) -- due to spy counts
363 primOpCosts :: PrimOp -> CostRes
367 primOpCosts (CCallOp _ _ _ _) = SAVE_COSTS + RESTORE_COSTS
368 -- don't guess costs of ccall proper
369 -- for exact costing use a GRAN_EXEC
372 -- Usually 3 mov instructions are needed to get args and res in right place.
374 primOpCosts IntMulOp = Cost (3, 1, 0, 0, 0) + umul_costs
375 primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0) + div_costs
376 primOpCosts IntRemOp = Cost (3, 1, 0, 0, 0) + rem_costs
377 primOpCosts IntNegOp = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
379 primOpCosts FloatGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
380 primOpCosts FloatGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
381 primOpCosts FloatEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
382 primOpCosts FloatNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
383 primOpCosts FloatLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
384 primOpCosts FloatLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
385 primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
386 primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
387 primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
388 primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
389 primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
390 primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
392 primOpCosts FloatExpOp = Cost (2, 1, 4, 4, 3)
393 primOpCosts FloatLogOp = Cost (2, 1, 4, 4, 3)
394 primOpCosts FloatSqrtOp = Cost (2, 1, 4, 4, 3)
395 primOpCosts FloatSinOp = Cost (2, 1, 4, 4, 3)
396 primOpCosts FloatCosOp = Cost (2, 1, 4, 4, 3)
397 primOpCosts FloatTanOp = Cost (2, 1, 4, 4, 3)
398 primOpCosts FloatAsinOp = Cost (2, 1, 4, 4, 3)
399 primOpCosts FloatAcosOp = Cost (2, 1, 4, 4, 3)
400 primOpCosts FloatAtanOp = Cost (2, 1, 4, 4, 3)
401 primOpCosts FloatSinhOp = Cost (2, 1, 4, 4, 3)
402 primOpCosts FloatCoshOp = Cost (2, 1, 4, 4, 3)
403 primOpCosts FloatTanhOp = Cost (2, 1, 4, 4, 3)
404 --primOpCosts FloatAsinhOp = Cost (2, 1, 4, 4, 3)
405 --primOpCosts FloatAcoshOp = Cost (2, 1, 4, 4, 3)
406 --primOpCosts FloatAtanhOp = Cost (2, 1, 4, 4, 3)
407 primOpCosts FloatPowerOp = Cost (2, 1, 4, 4, 3)
409 {- There should be special handling of the Array PrimOps in here HWL -}
412 | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1) :: CostRes
413 | primOp `elem` gmpOps = Cost (30, 5, 10, 10, 0) :: CostRes -- GUESS; check it
414 | otherwise = Cost (1, 0, 0, 0, 0)
416 -- ---------------------------------------------------------------------------
417 {- HWL: currently unused
419 costsByKind :: PrimRep -> Side -> CostRes
421 -- The following PrimKinds say that the data is already in a reg
423 costsByKind CharRep _ = nullCosts
424 costsByKind IntRep _ = nullCosts
425 costsByKind WordRep _ = nullCosts
426 costsByKind AddrRep _ = nullCosts
427 costsByKind FloatRep _ = nullCosts
428 costsByKind DoubleRep _ = nullCosts
430 -- ---------------------------------------------------------------------------
433 This is the data structure of {\tt PrimOp} copied from prelude/PrimOp.lhs.
434 I include here some comments about the estimated costs for these @PrimOps@.
435 Compare with the @primOpCosts@ fct above. -- HWL
439 -- I assume all these basic comparisons take just one ALU instruction
440 -- Checked that for Char, Int; Word, Addr should be the same as Int.
442 = CharGtOp | CharGeOp | CharEqOp | CharNeOp | CharLtOp | CharLeOp
443 | IntGtOp | IntGeOp | IntEqOp | IntNeOp | IntLtOp | IntLeOp
444 | WordGtOp | WordGeOp | WordEqOp | WordNeOp | WordLtOp | WordLeOp
445 | AddrGtOp | AddrGeOp | AddrEqOp | AddrNeOp | AddrLtOp | AddrLeOp
447 -- Analogously, these take one FP unit instruction
448 -- Haven't checked that, yet.
450 | FloatGtOp | FloatGeOp | FloatEqOp | FloatNeOp | FloatLtOp | FloatLeOp
451 | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp
453 -- 1 ALU op; unchecked
456 -- these just take 1 ALU op; checked
457 | IntAddOp | IntSubOp
459 -- but these take more than that; see special cases in primOpCosts
460 -- I counted the generated ass. instructions for these -> checked
461 | IntMulOp | IntQuotOp
462 | IntRemOp | IntNegOp
464 -- Rest is unchecked so far -- HWL
466 -- Word#-related ops:
467 | AndOp | OrOp | NotOp | XorOp | ShiftLOp | ShiftROp
468 | Int2WordOp | Word2IntOp -- casts
470 -- Addr#-related ops:
471 | Int2AddrOp | Addr2IntOp -- casts
473 -- Float#-related ops:
474 | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
475 | Float2IntOp | Int2FloatOp
477 | FloatExpOp | FloatLogOp | FloatSqrtOp
478 | FloatSinOp | FloatCosOp | FloatTanOp
479 | FloatAsinOp | FloatAcosOp | FloatAtanOp
480 | FloatSinhOp | FloatCoshOp | FloatTanhOp
481 -- not all machines have these available conveniently:
482 -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
483 | FloatPowerOp -- ** op
485 -- Double#-related ops:
486 | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
487 | Double2IntOp | Int2DoubleOp
488 | Double2FloatOp | Float2DoubleOp
490 | DoubleExpOp | DoubleLogOp | DoubleSqrtOp
491 | DoubleSinOp | DoubleCosOp | DoubleTanOp
492 | DoubleAsinOp | DoubleAcosOp | DoubleAtanOp
493 | DoubleSinhOp | DoubleCoshOp | DoubleTanhOp
494 -- not all machines have these available conveniently:
495 -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
496 | DoublePowerOp -- ** op
498 -- Integer (and related...) ops:
499 -- slightly weird -- to match GMP package.
500 | IntegerAddOp | IntegerSubOp | IntegerMulOp
501 | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp
505 | Integer2IntOp | Int2IntegerOp
506 | Addr2IntegerOp -- "Addr" is *always* a literal string
509 | FloatEncodeOp | FloatDecodeOp
510 | DoubleEncodeOp | DoubleDecodeOp
512 -- primitive ops for primitive arrays
515 | NewByteArrayOp PrimRep
518 | SameMutableByteArrayOp
520 | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs
522 | ReadByteArrayOp PrimRep
523 | WriteByteArrayOp PrimRep
524 | IndexByteArrayOp PrimRep
525 | IndexOffAddrOp PrimRep
526 -- PrimRep can be one of {Char,Int,Addr,Float,Double}Kind.
527 -- This is just a cheesy encoding of a bunch of ops.
528 -- Note that ForeignObjRep is not included -- the only way of
529 -- creating a ForeignObj is with a ccall or casm.
531 | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp
533 | MakeStablePtrOp | DeRefStablePtrOp
536 A special ``trap-door'' to use in making calls direct to C functions:
537 Note: From GrAn point of view, CCall is probably very expensive
538 The programmer can specify the costs of the Ccall by inserting
539 a GRAN_EXEC(a,b,l,s,f) at the end of the C- code, specifing the
540 number or arithm., branch, load, store and floating point instructions
544 | CCallOp String -- An "unboxed" ccall# to this named function
545 Bool -- True <=> really a "casm"
546 Bool -- True <=> might invoke Haskell GC
547 [Type] -- Unboxed argument; the state-token
548 -- argument will have been put *first*
549 Type -- Return type; one of the "StateAnd<blah>#" types
551 -- (... to be continued ... )