ghc/compiler/absCSyn/Costs.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1994-1996
   3 %     Hans Wolfgang Loidl
   4 %
   5 % ---------------------------------------------------------------------------
   6
   7 \section[Costs]{Evaluating the costs of computing some abstract C code}
   8
   9 This module   provides all necessary  functions for   computing for a given
  10 abstract~C Program the costs of executing that program. This is done by the
  11 exported function:
  12
  13 \begin{quote}
  14  {\verb type CostRes = (Int, Int, Int, Int, Int)}
  15  {\verb costs :: AbstractC -> CostRes }
  16 \end{quote}
  17
  18 The meaning of the result tuple is:
  19 \begin{itemize}
  20  \item The first component ({\tt i}) counts the number of integer,
  21    arithmetic and bit-manipulating instructions.
  22  \item The second component ({\tt b}) counts the number of branches (direct
  23    branches as well as indirect ones).
  24  \item The third component ({\tt l}) counts the number of load instructions.
  25  \item The fourth component ({\tt s}) counts the number of store
  26    instructions.
  27  \item The fifth component ({\tt f}) counts the number of floating point
  28    instructions.
  29 \end{itemize}
  30
  31 This function is needed in GrAnSim for parallelism.
  32
  33 These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):
  34
  35 \begin{pseudocode}
  36
  37 #define LOAD_COSTS              2
  38 #define STORE_COSTS             2
  39 #define INT_ARITHM_COSTS        1
  40 #define GMP_ARITHM_COSTS        3 {- any clue for GMP costs ? -}
  41 #define FLOAT_ARITHM_COSTS      3 {- any clue for float costs ? -}
  42 #define BRANCH_COSTS            2
  43
  44 \end{pseudocode}
  45
  46 \begin{code}
  47 #include "HsVersions.h"
  48
  49 #define ACCUM_COSTS(i,b,l,s,f)  (i+b+l+s+f)
  50
  51 #define NUM_REGS                10 {- PprAbsCSyn.lhs -}       {- runtime/c-as-asm/CallWrap_C.lc -}
  52 #define RESTORE_COSTS           (Cost (0, 0, NUM_REGS, 0, 0)  :: CostRes)
  53 #define SAVE_COSTS              (Cost (0, 0, 0, NUM_REGS, 0)  :: CostRes)
  54 #define CCALL_COSTS_GUESS       (Cost (50, 0, 0, 0, 0)        :: CostRes)
  55
  56 module Costs( costs,
  57               addrModeCosts, CostRes(Cost), nullCosts, Side(..)
  58     ) where
  59
  60 IMP_Ubiq(){-uitous-}
  61
  62 import AbsCSyn
  63 import PrimOp           ( primOpNeedsWrapper, PrimOp(..) )
  64
  65 -- --------------------------------------------------------------------------
  66 data CostRes = Cost (Int, Int, Int, Int, Int)
  67                deriving (Text)
  68
  69 nullCosts    = Cost (0, 0, 0, 0, 0) :: CostRes
  70 initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
  71 errorCosts   = Cost (-1, -1, -1, -1, -1)  -- just for debugging
  72
  73 oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes
  74
  75 instance Eq CostRes where
  76  (==) t1 t2 = i && b && l && s && f
  77              where (i,b,l,s,f) = binOp' (==) t1 t2
  78
  79 instance Num CostRes where
  80  (+) = binOp (+)
  81  (-) = binOp (-)
  82  (*) = binOp (*)
  83  negate  = mapOp negate
  84  abs     = mapOp abs
  85  signum  = mapOp signum
  86
  87 mapOp :: (Int -> Int) -> CostRes -> CostRes
  88 mapOp g ( Cost (i, b, l, s, f) )  = Cost (g i, g b, g l, g s, g f)
  89
  90 foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
  91 foldrOp o x  ( Cost (i1, b1, l1, s1, f1) )   =
  92         i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))
  93
  94 binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes
  95 binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
  96         ( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )
  97
  98 binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a)
  99 binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
 100          (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2)
 101
 102 -- --------------------------------------------------------------------------
 103
 104 data Side = Lhs | Rhs
 105             deriving (Eq)
 106
 107 -- --------------------------------------------------------------------------
 108
 109 costs :: AbstractC -> CostRes
 110
 111 costs absC =
 112   case absC of
 113    AbsCNop                      ->  nullCosts
 114
 115    AbsCStmts absC1 absC2        -> costs absC1 + costs absC2
 116
 117    CAssign (CReg _) (CReg _)    -> Cost (1,0,0,0,0)   -- typ.: mov %reg1,%reg2
 118
 119    CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)
 120
 121    CAssign (CReg _) (CAddr _)   -> Cost (1,0,0,0,0)  -- typ.: add %reg1,<adr>,%reg2
 122
 123    CAssign target_m source_m    -> addrModeCosts target_m Lhs +
 124                                    addrModeCosts source_m Rhs
 125
 126    CJump (CLbl _  _)            -> Cost (0,1,0,0,0)  -- no ld for call necessary
 127
 128    CJump mode                   -> addrModeCosts mode Rhs +
 129                                    Cost (0,1,0,0,0)
 130
 131    CFallThrough mode  -> addrModeCosts mode Rhs +               -- chu' 0.24
 132                          Cost (0,1,0,0,0)
 133
 134    CReturn mode info  -> case info of
 135                           DirectReturn -> addrModeCosts mode Rhs +
 136                                           Cost (0,1,0,0,0)
 137
 138                             -- i.e. ld address to reg and call reg
 139
 140                           DynamicVectoredReturn mode' ->
 141                                         addrModeCosts mode Rhs +
 142                                         addrModeCosts mode' Rhs +
 143                                         Cost (0,1,1,0,0)
 144
 145                             {- generates code like this:
 146                                 JMP_(<mode>)[RVREL(<mode'>)];
 147                                i.e. 1 possb ld for mode'
 148                                     1 ld for RVREL
 149                                     1 possb ld for mode
 150                                     1 call                              -}
 151
 152                           StaticVectoredReturn _ -> addrModeCosts mode Rhs +
 153                                                   Cost (0,1,1,0,0)
 154
 155                             -- as above with mode' fixed to CLit
 156                             -- typically 2 ld + 1 call; 1st ld due
 157                             -- to CVal as mode
 158
 159    CSwitch mode alts absC     -> nullCosts
 160                                  {- for handling costs of all branches of
 161                                     a CSwitch see PprAbsC.
 162                                     Basically:
 163                                      Costs for branch =
 164                                         Costs before CSwitch +
 165                                         addrModeCosts of head +
 166                                         Costs for 1 cond branch +
 167                                         Costs for body of branch
 168                                  -}
 169
 170    CCodeBlock _ absC          -> costs absC
 171
 172    CInitHdr cl_info reg_rel cost_centre inplace_upd -> initHdrCosts
 173
 174                         {- This is more fancy but superflous: The addr modes
 175                            are fixed and so the costs are const!
 176
 177                         argCosts + initHdrCosts
 178                         where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
 179                                          addrModeCosts base_lbl +    -- CLbl!
 180                                          3*addrModeCosts (mkIntCLit 1{- any val -})
 181                         -}
 182                         {- this extends to something like
 183                             SET_SPEC_HDR(...)
 184                            For costing the args of this macro
 185                            see PprAbsC.lhs where args are inserted -}
 186
 187    COpStmt modes_res primOp modes_args _ _ ->
 188         {-
 189            let
 190                 n = length modes_res
 191            in
 192                 (0, 0, n, n, 0) +
 193                 primOpCosts primOp +
 194                 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
 195                                              else nullCosts
 196            -- ^^HWL
 197         -}
 198         foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res]  +
 199         foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args]  +
 200         primOpCosts primOp +
 201         if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
 202                                      else nullCosts
 203
 204    CSimultaneous absC        -> costs absC
 205
 206    CMacroStmt   macro modes  -> stmtMacroCosts macro modes
 207
 208    CCallProfCtrMacro   _ _   -> nullCosts
 209                                   {- we don't count profiling in GrAnSim -}
 210
 211    CCallProfCCMacro    _ _   -> nullCosts
 212                                   {- we don't count profiling in GrAnSim -}
 213
 214   -- *** the next three [or so...] are DATA (those above are CODE) ***
 215   -- as they are data rather than code they all have nullCosts         -- HWL
 216
 217    CStaticClosure _ _ _ _    -> nullCosts
 218
 219    CClosureInfoAndCode _ _ _ _ _ _ -> nullCosts
 220
 221    CRetVector _ _ _          -> nullCosts
 222
 223    CRetUnVector _ _          -> nullCosts
 224
 225    CFlatRetVector _ _        -> nullCosts
 226
 227    CCostCentreDecl _ _       -> nullCosts
 228
 229    CClosureUpdInfo _         -> nullCosts
 230
 231    CSplitMarker              -> nullCosts
 232
 233 -- ---------------------------------------------------------------------------
 234
 235 addrModeCosts :: CAddrMode -> Side -> CostRes
 236
 237 -- addrModeCosts _ _ = nullCosts
 238
 239 addrModeCosts addr_mode side =
 240   let
 241     lhs = side == Lhs
 242   in
 243   case addr_mode of
 244     CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
 245                        else Cost (0, 0, 1, 0, 0)
 246
 247     CAddr _  -> if lhs then Cost (0, 0, 0, 1, 0)  -- ??unchecked
 248                        else Cost (0, 0, 1, 0, 0)
 249
 250     CReg _   -> nullCosts        {- loading from, storing to reg is free ! -}
 251                                  {- for costing CReg->Creg ops see special -}
 252                                  {- case in costs fct -}
 253     CTableEntry base_mode offset_mode kind ->
 254                 addrModeCosts base_mode side +
 255                 addrModeCosts offset_mode side +
 256                 Cost (1,0,1,0,0)
 257
 258     CTemp _ _  -> nullCosts     {- if lhs then Cost (0, 0, 0, 1, 0)
 259                                           else Cost (0, 0, 1, 0, 0)  -}
 260         -- ``Temporaries'' correspond to local variables in C, and registers in
 261         -- native code.
 262         -- I assume they can be somewhat optimized by gcc -- HWL
 263
 264     CLbl _ _   -> if lhs then Cost (0, 0, 0, 1, 0)
 265                          else Cost (2, 0, 0, 0, 0)
 266                   -- Rhs: typically: sethi %hi(lbl),%tmp_reg
 267                   --                 or    %tmp_reg,%lo(lbl),%target_reg
 268
 269     CUnVecLbl _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
 270                             else Cost (2, 0, 0, 0, 0)
 271                      -- same as CLbl
 272
 273     --  Check the following 3 (checked form CLit on)
 274
 275     CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
 276                              else Cost (0, 0, 1, 0, 0)
 277
 278     CIntLike mode  -> if lhs then Cost (0, 0, 0, 1, 0)
 279                              else Cost (0, 0, 1, 0, 0)
 280
 281     CString _      -> if lhs then Cost (0, 0, 0, 1, 0)
 282                              else Cost (0, 0, 1, 0, 0)
 283
 284     CLit    _      -> if lhs then nullCosts            -- should never occur
 285                              else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg
 286
 287     CLitLit _  _   -> if lhs then nullCosts
 288                              else Cost (1, 0, 0, 0, 0)
 289                       -- same es CLit
 290
 291     COffset _      -> if lhs then nullCosts
 292                              else Cost (1, 0, 0, 0, 0)
 293                       -- same es CLit
 294
 295     CCode absC     -> costs absC
 296
 297     CLabelledCode _ absC  ->  costs absC
 298
 299     CJoinPoint _ _        -> if lhs then Cost (0, 0, 0, 1, 0)
 300                                     else Cost (0, 0, 1, 0, 0)
 301
 302     CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list
 303
 304     CCostCentre _ _ -> nullCosts
 305
 306 -- ---------------------------------------------------------------------------
 307
 308 exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes
 309
 310 exprMacroCosts side macro mode_list =
 311   let
 312     arg_costs = foldl (+) nullCosts
 313                       (map (\ x -> addrModeCosts x Rhs) mode_list)
 314   in
 315   arg_costs +
 316   case macro of
 317     INFO_PTR   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
 318                                  else Cost (0, 0, 1, 0, 0)
 319     ENTRY_CODE -> nullCosts
 320     INFO_TAG   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
 321                                  else Cost (0, 0, 1, 0, 0)
 322     EVAL_TAG   -> if side == Lhs then Cost (1, 0, 0, 1, 0)
 323                                  else Cost (1, 0, 1, 0, 0)
 324                   -- costs of INFO_TAG + (1,0,0,0,0)
 325
 326 -- ---------------------------------------------------------------------------
 327
 328 stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes
 329
 330 stmtMacroCosts macro modes =
 331   let
 332     arg_costs =   foldl (+) nullCosts
 333                         [addrModeCosts mode Rhs | mode <- modes]
 334   in
 335   case macro of
 336     ARGS_CHK_A_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 337                 -- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
 338     ARGS_CHK_A            ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 339                 -- p=probability of PAP (instead of AP): + p*(0,1,0,0,0)
 340     ARGS_CHK_B_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 341     ARGS_CHK_B            ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 342     HEAP_CHK              ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 343     -- STK_CHK               ->  (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 344     STK_CHK               ->  Cost (0, 0, 0, 0, 0)       {- StgMacros.lh  -}
 345     UPD_CAF               ->  Cost (7, 0, 1, 3, 0)       {- SMupdate.lh  -}
 346     UPD_IND               ->  Cost (8, 2, 2, 0, 0)       {- SMupdate.lh
 347                                 updatee in old-gen: Cost (4, 1, 1, 0, 0)
 348                                 updatee in new-gen: Cost (4, 1, 1, 0, 0)
 349                                 NB: we include costs fo checking if there is
 350                                     a BQ, but we omit costs for awakening BQ
 351                                     (these probably differ between old-gen and
 352                                     new gen) -}
 353     UPD_INPLACE_NOPTRS    ->  Cost (13, 3, 3, 2, 0)       {- SMupdate.lh
 354                                 common for both:    Cost (4, 1, 1, 0, 0)
 355                                 updatee in old-gen: Cost (14, 3, 2, 4, 0)
 356                                 updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}
 357     UPD_INPLACE_PTRS      ->  Cost (13, 3, 3, 2, 0)       {- SMupdate.lh
 358                                 common for both:    Cost (4, 1, 1, 0, 0)
 359                                 updatee in old-gen: Cost (14, 3, 2, 4, 0)
 360                                 updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}
 361
 362     UPD_BH_UPDATABLE      ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
 363     UPD_BH_SINGLE_ENTRY   ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
 364     PUSH_STD_UPD_FRAME    ->  Cost (3, 0, 0, 4, 0)       {- SMupdate.lh  -}
 365     POP_STD_UPD_FRAME     ->  Cost (1, 0, 3, 0, 0)       {- SMupdate.lh  -}
 366     SET_TAG               ->  nullCosts             {- COptRegs.lh -}
 367     GRAN_FETCH                  ->  nullCosts     {- GrAnSim bookkeeping -}
 368     GRAN_RESCHEDULE             ->  nullCosts     {- GrAnSim bookkeeping -}
 369     GRAN_FETCH_AND_RESCHEDULE   ->  nullCosts     {- GrAnSim bookkeeping -}
 370     GRAN_YIELD                  ->  nullCosts     {- GrAnSim bookkeeping -- added SOF -}
 371     THREAD_CONTEXT_SWITCH       ->  nullCosts     {- GrAnSim bookkeeping -}
 372     _ -> trace ("Costs.stmtMacroCosts: "++show macro) nullCosts
 373
 374 -- ---------------------------------------------------------------------------
 375
 376 floatOps :: [PrimOp]
 377 floatOps =
 378   [   FloatGtOp  , FloatGeOp  , FloatEqOp  , FloatNeOp  , FloatLtOp  , FloatLeOp
 379     , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
 380     , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
 381     , Float2IntOp , Int2FloatOp
 382     , FloatExpOp   , FloatLogOp   , FloatSqrtOp
 383     , FloatSinOp   , FloatCosOp   , FloatTanOp
 384     , FloatAsinOp  , FloatAcosOp  , FloatAtanOp
 385     , FloatSinhOp  , FloatCoshOp  , FloatTanhOp
 386     , FloatPowerOp
 387     , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
 388     , Double2IntOp , Int2DoubleOp
 389     , Double2FloatOp , Float2DoubleOp
 390     , DoubleExpOp   , DoubleLogOp   , DoubleSqrtOp
 391     , DoubleSinOp   , DoubleCosOp   , DoubleTanOp
 392     , DoubleAsinOp  , DoubleAcosOp  , DoubleAtanOp
 393     , DoubleSinhOp  , DoubleCoshOp  , DoubleTanhOp
 394     , DoublePowerOp
 395     , FloatEncodeOp  , FloatDecodeOp
 396     , DoubleEncodeOp , DoubleDecodeOp
 397   ]
 398
 399 gmpOps :: [PrimOp]
 400 gmpOps  =
 401   [   IntegerAddOp , IntegerSubOp , IntegerMulOp
 402     , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
 403     , IntegerCmpOp
 404     , Integer2IntOp  , Int2IntegerOp
 405     , Addr2IntegerOp
 406   ]
 407
 408
 409 abs_costs = nullCosts   -- NB:  This is normal STG code with costs already
 410                         --      included; no need to add costs again.
 411
 412 umul_costs = Cost (21,4,0,0,0)     -- due to spy counts
 413 rem_costs =  Cost (30,15,0,0,0)    -- due to spy counts
 414 div_costs =  Cost (30,15,0,0,0)    -- due to spy counts
 415
 416 primOpCosts :: PrimOp -> CostRes
 417
 418 -- Special cases
 419
 420 primOpCosts (CCallOp _ _ _ _ _) = SAVE_COSTS + RESTORE_COSTS
 421                                   -- don't guess costs of ccall proper
 422                                   -- for exact costing use a GRAN_EXEC
 423                                   -- in the C code
 424
 425 -- Usually 3 mov instructions are needed to get args and res in right place.
 426
 427 primOpCosts IntMulOp  = Cost (3, 1, 0, 0, 0)  + umul_costs
 428 primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0)  + div_costs
 429 primOpCosts IntRemOp  = Cost (3, 1, 0, 0, 0)  + rem_costs
 430 primOpCosts IntNegOp  = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
 431 primOpCosts IntAbsOp  = Cost (0, 1, 0, 0, 0) -- abs closure already costed
 432
 433 primOpCosts FloatGtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 434 primOpCosts FloatGeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 435 primOpCosts FloatEqOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 436 primOpCosts FloatNeOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 437 primOpCosts FloatLtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 438 primOpCosts FloatLeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 439 primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 440 primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 441 primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 442 primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 443 primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 444 primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 445
 446 primOpCosts FloatExpOp    = Cost (2, 1, 4, 4, 3)
 447 primOpCosts FloatLogOp    = Cost (2, 1, 4, 4, 3)
 448 primOpCosts FloatSqrtOp   = Cost (2, 1, 4, 4, 3)
 449 primOpCosts FloatSinOp    = Cost (2, 1, 4, 4, 3)
 450 primOpCosts FloatCosOp    = Cost (2, 1, 4, 4, 3)
 451 primOpCosts FloatTanOp    = Cost (2, 1, 4, 4, 3)
 452 primOpCosts FloatAsinOp   = Cost (2, 1, 4, 4, 3)
 453 primOpCosts FloatAcosOp   = Cost (2, 1, 4, 4, 3)
 454 primOpCosts FloatAtanOp   = Cost (2, 1, 4, 4, 3)
 455 primOpCosts FloatSinhOp   = Cost (2, 1, 4, 4, 3)
 456 primOpCosts FloatCoshOp   = Cost (2, 1, 4, 4, 3)
 457 primOpCosts FloatTanhOp   = Cost (2, 1, 4, 4, 3)
 458 --primOpCosts FloatAsinhOp  = Cost (2, 1, 4, 4, 3)
 459 --primOpCosts FloatAcoshOp  = Cost (2, 1, 4, 4, 3)
 460 --primOpCosts FloatAtanhOp  = Cost (2, 1, 4, 4, 3)
 461 primOpCosts FloatPowerOp  = Cost (2, 1, 4, 4, 3)
 462
 463 {- There should be special handling of the Array PrimOps in here   HWL -}
 464
 465 primOpCosts primOp
 466   | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1)  :: CostRes
 467   | primOp `elem` gmpOps   = Cost (30, 5, 10, 10, 0) :: CostRes  -- GUESS; check it
 468   | otherwise              = Cost (1, 0, 0, 0, 0)
 469
 470 -- ---------------------------------------------------------------------------
 471 {- HWL: currently unused
 472
 473 costsByKind :: PrimRep -> Side -> CostRes
 474
 475 -- The following PrimKinds say that the data is already in a reg
 476
 477 costsByKind CharRep     _ = nullCosts
 478 costsByKind IntRep      _ = nullCosts
 479 costsByKind WordRep     _ = nullCosts
 480 costsByKind AddrRep     _ = nullCosts
 481 costsByKind FloatRep    _ = nullCosts
 482 costsByKind DoubleRep   _ = nullCosts
 483 -}
 484 -- ---------------------------------------------------------------------------
 485 \end{code}
 486
 487 This is the data structure of {\tt PrimOp} copied from prelude/PrimOp.lhs.
 488 I include here some comments about the estimated costs for these @PrimOps@.
 489 Compare with the @primOpCosts@ fct above.  -- HWL
 490
 491 \begin{pseudocode}
 492 data PrimOp
 493     -- I assume all these basic comparisons take just one ALU instruction
 494     -- Checked that for Char, Int; Word, Addr should be the same as Int.
 495
 496     = CharGtOp   | CharGeOp   | CharEqOp   | CharNeOp   | CharLtOp   | CharLeOp
 497     | IntGtOp    | IntGeOp    | IntEqOp    | IntNeOp    | IntLtOp    | IntLeOp
 498     | WordGtOp   | WordGeOp   | WordEqOp   | WordNeOp   | WordLtOp   | WordLeOp
 499     | AddrGtOp   | AddrGeOp   | AddrEqOp   | AddrNeOp   | AddrLtOp   | AddrLeOp
 500
 501     -- Analogously, these take one FP unit instruction
 502     -- Haven't checked that, yet.
 503
 504     | FloatGtOp  | FloatGeOp  | FloatEqOp  | FloatNeOp  | FloatLtOp  | FloatLeOp
 505     | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp
 506
 507     -- 1 ALU op; unchecked
 508     | OrdOp | ChrOp
 509
 510     -- these just take 1 ALU op; checked
 511     | IntAddOp | IntSubOp
 512
 513     -- but these take more than that; see special cases in primOpCosts
 514     -- I counted the generated ass. instructions for these -> checked
 515     | IntMulOp | IntQuotOp
 516     | IntRemOp | IntNegOp | IntAbsOp
 517
 518     -- Rest is unchecked so far -- HWL
 519
 520     -- Word#-related ops:
 521     | AndOp   | OrOp  | NotOp | XorOp | ShiftLOp | ShiftROp
 522     | Int2WordOp | Word2IntOp -- casts
 523
 524     -- Addr#-related ops:
 525     | Int2AddrOp | Addr2IntOp -- casts
 526
 527     -- Float#-related ops:
 528     | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
 529     | Float2IntOp | Int2FloatOp
 530
 531     | FloatExpOp   | FloatLogOp   | FloatSqrtOp
 532     | FloatSinOp   | FloatCosOp   | FloatTanOp
 533     | FloatAsinOp  | FloatAcosOp  | FloatAtanOp
 534     | FloatSinhOp  | FloatCoshOp  | FloatTanhOp
 535     -- not all machines have these available conveniently:
 536     -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
 537     | FloatPowerOp -- ** op
 538
 539     -- Double#-related ops:
 540     | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
 541     | Double2IntOp | Int2DoubleOp
 542     | Double2FloatOp | Float2DoubleOp
 543
 544     | DoubleExpOp   | DoubleLogOp   | DoubleSqrtOp
 545     | DoubleSinOp   | DoubleCosOp   | DoubleTanOp
 546     | DoubleAsinOp  | DoubleAcosOp  | DoubleAtanOp
 547     | DoubleSinhOp  | DoubleCoshOp  | DoubleTanhOp
 548     -- not all machines have these available conveniently:
 549     -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
 550     | DoublePowerOp -- ** op
 551
 552     -- Integer (and related...) ops:
 553     -- slightly weird -- to match GMP package.
 554     | IntegerAddOp | IntegerSubOp | IntegerMulOp
 555     | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp
 556
 557     | IntegerCmpOp
 558
 559     | Integer2IntOp  | Int2IntegerOp
 560     | Addr2IntegerOp -- "Addr" is *always* a literal string
 561     -- ?? gcd, etc?
 562
 563     | FloatEncodeOp  | FloatDecodeOp
 564     | DoubleEncodeOp | DoubleDecodeOp
 565
 566     -- primitive ops for primitive arrays
 567
 568     | NewArrayOp
 569     | NewByteArrayOp PrimRep
 570
 571     | SameMutableArrayOp
 572     | SameMutableByteArrayOp
 573
 574     | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs
 575
 576     | ReadByteArrayOp   PrimRep
 577     | WriteByteArrayOp  PrimRep
 578     | IndexByteArrayOp  PrimRep
 579     | IndexOffAddrOp    PrimRep
 580         -- PrimRep can be one of {Char,Int,Addr,Float,Double}Kind.
 581         -- This is just a cheesy encoding of a bunch of ops.
 582         -- Note that ForeignObjRep is not included -- the only way of
 583         -- creating a ForeignObj is with a ccall or casm.
 584
 585     | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp
 586
 587     | MakeStablePtrOp | DeRefStablePtrOp
 588 \end{pseudocode}
 589
 590 A special ``trap-door'' to use in making calls direct to C functions:
 591 Note: From GrAn point of view, CCall is probably very expensive
 592       The programmer can specify the costs of the Ccall by inserting
 593       a GRAN_EXEC(a,b,l,s,f) at the end of the C- code, specifing the
 594       number or arithm., branch, load, store and floating point instructions
 595       -- HWL
 596
 597 \begin{pseudocode}
 598     | CCallOp   String  -- An "unboxed" ccall# to this named function
 599                 Bool    -- True <=> really a "casm"
 600                 Bool    -- True <=> might invoke Haskell GC
 601                 [Type]  -- Unboxed argument; the state-token
 602                         -- argument will have been put *first*
 603                 Type    -- Return type; one of the "StateAnd<blah>#" types
 604
 605     -- (... to be continued ... )
 606 \end{pseudocode}