ghc/compiler/absCSyn/Costs.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1994-1996
   3 %     Hans Wolfgang Loidl
   4 %
   5 % ---------------------------------------------------------------------------
   6
   7 \section[Costs]{Evaluating the costs of computing some abstract C code}
   8
   9 This module   provides all necessary  functions for   computing for a given
  10 abstract~C Program the costs of executing that program. This is done by the
  11 exported function:
  12
  13 \begin{quote}
  14  {\verb type CostRes = (Int, Int, Int, Int, Int)}
  15  {\verb costs :: AbstractC -> CostRes }
  16 \end{quote}
  17
  18 The meaning of the result tuple is:
  19 \begin{itemize}
  20  \item The first component ({\tt i}) counts the number of integer,
  21    arithmetic and bit-manipulating instructions.
  22  \item The second component ({\tt b}) counts the number of branches (direct
  23    branches as well as indirect ones).
  24  \item The third component ({\tt l}) counts the number of load instructions.
  25  \item The fourth component ({\tt s}) counts the number of store
  26    instructions.
  27  \item The fifth component ({\tt f}) counts the number of floating point
  28    instructions.
  29 \end{itemize}
  30
  31 This function is needed in GrAnSim for parallelism.
  32
  33 These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):
  34
  35 \begin{pseudocode}
  36
  37 #define LOAD_COSTS              2
  38 #define STORE_COSTS             2
  39 #define INT_ARITHM_COSTS        1
  40 #define GMP_ARITHM_COSTS        3 {- any clue for GMP costs ? -}
  41 #define FLOAT_ARITHM_COSTS      3 {- any clue for float costs ? -}
  42 #define BRANCH_COSTS            2
  43
  44 \end{pseudocode}
  45
  46 \begin{code}
  47 #include "HsVersions.h"
  48
  49 #define ACCUM_COSTS(i,b,l,s,f)  (i+b+l+s+f)
  50
  51 #define NUM_REGS                10 {- PprAbsCSyn.lhs -}       {- runtime/c-as-asm/CallWrap_C.lc -}
  52 #define RESTORE_COSTS           (Cost (0, 0, NUM_REGS, 0, 0)  :: CostRes)
  53 #define SAVE_COSTS              (Cost (0, 0, 0, NUM_REGS, 0)  :: CostRes)
  54 #define CCALL_COSTS_GUESS       (Cost (50, 0, 0, 0, 0)        :: CostRes)
  55
  56 module Costs( costs,
  57               addrModeCosts, CostRes(Cost), nullCosts, Side(..)
  58     ) where
  59
  60 IMP_Ubiq(){-uitous-}
  61
  62 import AbsCSyn
  63 import PrimOp           ( primOpNeedsWrapper, PrimOp(..) )
  64
  65 -- --------------------------------------------------------------------------
  66 data CostRes = Cost (Int, Int, Int, Int, Int)
  67                deriving (Text)
  68
  69 nullCosts    = Cost (0, 0, 0, 0, 0) :: CostRes
  70 initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
  71 errorCosts   = Cost (-1, -1, -1, -1, -1)  -- just for debugging
  72
  73 oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes
  74
  75 instance Eq CostRes where
  76  (==) t1 t2 = i && b && l && s && f
  77              where (i,b,l,s,f) = binOp' (==) t1 t2
  78
  79 instance Num CostRes where
  80  (+) = binOp (+)
  81  (-) = binOp (-)
  82  (*) = binOp (*)
  83  negate  = mapOp negate
  84  abs     = mapOp abs
  85  signum  = mapOp signum
  86
  87 mapOp :: (Int -> Int) -> CostRes -> CostRes
  88 mapOp g ( Cost (i, b, l, s, f) )  = Cost (g i, g b, g l, g s, g f)
  89
  90 foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
  91 foldrOp o x  ( Cost (i1, b1, l1, s1, f1) )   =
  92         i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))
  93
  94 binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes
  95 binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
  96         ( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )
  97
  98 binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a)
  99 binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
 100          (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2)
 101
 102 -- --------------------------------------------------------------------------
 103
 104 data Side = Lhs | Rhs
 105             deriving (Eq)
 106
 107 -- --------------------------------------------------------------------------
 108
 109 costs :: AbstractC -> CostRes
 110
 111 costs absC =
 112   case absC of
 113    AbsCNop                      ->  nullCosts
 114
 115    AbsCStmts absC1 absC2        -> costs absC1 + costs absC2
 116
 117    CAssign (CReg _) (CReg _)    -> Cost (1,0,0,0,0)   -- typ.: mov %reg1,%reg2
 118
 119    CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)
 120
 121    CAssign (CReg _) (CAddr _)   -> Cost (1,0,0,0,0)  -- typ.: add %reg1,<adr>,%reg2
 122
 123    CAssign target_m source_m    -> addrModeCosts target_m Lhs +
 124                                    addrModeCosts source_m Rhs
 125
 126    CJump (CLbl _  _)            -> Cost (0,1,0,0,0)  -- no ld for call necessary
 127
 128    CJump mode                   -> addrModeCosts mode Rhs +
 129                                    Cost (0,1,0,0,0)
 130
 131    CFallThrough mode  -> addrModeCosts mode Rhs +               -- chu' 0.24
 132                          Cost (0,1,0,0,0)
 133
 134    CReturn mode info  -> case info of
 135                           DirectReturn -> addrModeCosts mode Rhs +
 136                                           Cost (0,1,0,0,0)
 137
 138                             -- i.e. ld address to reg and call reg
 139
 140                           DynamicVectoredReturn mode' ->
 141                                         addrModeCosts mode Rhs +
 142                                         addrModeCosts mode' Rhs +
 143                                         Cost (0,1,1,0,0)
 144
 145                             {- generates code like this:
 146                                 JMP_(<mode>)[RVREL(<mode'>)];
 147                                i.e. 1 possb ld for mode'
 148                                     1 ld for RVREL
 149                                     1 possb ld for mode
 150                                     1 call                              -}
 151
 152                           StaticVectoredReturn _ -> addrModeCosts mode Rhs +
 153                                                   Cost (0,1,1,0,0)
 154
 155                             -- as above with mode' fixed to CLit
 156                             -- typically 2 ld + 1 call; 1st ld due
 157                             -- to CVal as mode
 158
 159    CSwitch mode alts absC     -> nullCosts
 160                                  {- for handling costs of all branches of
 161                                     a CSwitch see PprAbsC.
 162                                     Basically:
 163                                      Costs for branch =
 164                                         Costs before CSwitch +
 165                                         addrModeCosts of head +
 166                                         Costs for 1 cond branch +
 167                                         Costs for body of branch
 168                                  -}
 169
 170    CCodeBlock _ absC          -> costs absC
 171
 172    CInitHdr cl_info reg_rel cost_centre inplace_upd -> initHdrCosts
 173
 174                         {- This is more fancy but superflous: The addr modes
 175                            are fixed and so the costs are const!
 176
 177                         argCosts + initHdrCosts
 178                         where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
 179                                          addrModeCosts base_lbl +    -- CLbl!
 180                                          3*addrModeCosts (mkIntCLit 1{- any val -})
 181                         -}
 182                         {- this extends to something like
 183                             SET_SPEC_HDR(...)
 184                            For costing the args of this macro
 185                            see PprAbsC.lhs where args are inserted -}
 186
 187    COpStmt modes_res primOp modes_args _ _ ->
 188         {-
 189            let
 190                 n = length modes_res
 191            in
 192                 (0, 0, n, n, 0) +
 193                 primOpCosts primOp +
 194                 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
 195                                              else nullCosts
 196            -- ^^HWL
 197         -}
 198         foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res]  +
 199         foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args]  +
 200         primOpCosts primOp +
 201         if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
 202                                      else nullCosts
 203
 204    CSimultaneous absC        -> costs absC
 205
 206    CMacroStmt   macro modes  -> stmtMacroCosts macro modes
 207
 208    CCallProfCtrMacro   _ _   -> nullCosts
 209                                   {- we don't count profiling in GrAnSim -}
 210
 211    CCallProfCCMacro    _ _   -> nullCosts
 212                                   {- we don't count profiling in GrAnSim -}
 213
 214   -- *** the next three [or so...] are DATA (those above are CODE) ***
 215   -- as they are data rather than code they all have nullCosts         -- HWL
 216
 217    CStaticClosure _ _ _ _    -> nullCosts
 218
 219    CClosureInfoAndCode _ _ _ _ _ _ -> nullCosts
 220
 221    CRetVector _ _ _          -> nullCosts
 222
 223    CRetUnVector _ _          -> nullCosts
 224
 225    CFlatRetVector _ _        -> nullCosts
 226
 227    CCostCentreDecl _ _       -> nullCosts
 228
 229    CClosureUpdInfo _         -> nullCosts
 230
 231    CSplitMarker              -> nullCosts
 232
 233 -- ---------------------------------------------------------------------------
 234
 235 addrModeCosts :: CAddrMode -> Side -> CostRes
 236
 237 -- addrModeCosts _ _ = nullCosts
 238
 239 addrModeCosts addr_mode side =
 240   let
 241     lhs = side == Lhs
 242   in
 243   case addr_mode of
 244     CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
 245                        else Cost (0, 0, 1, 0, 0)
 246
 247     CAddr _  -> if lhs then Cost (0, 0, 0, 1, 0)  -- ??unchecked
 248                        else Cost (0, 0, 1, 0, 0)
 249
 250     CReg _   -> nullCosts        {- loading from, storing to reg is free ! -}
 251                                  {- for costing CReg->Creg ops see special -}
 252                                  {- case in costs fct -}
 253     CTableEntry base_mode offset_mode kind ->
 254                 addrModeCosts base_mode side +
 255                 addrModeCosts offset_mode side +
 256                 Cost (1,0,1,0,0)
 257
 258     CTemp _ _  -> nullCosts     {- if lhs then Cost (0, 0, 0, 1, 0)
 259                                           else Cost (0, 0, 1, 0, 0)  -}
 260         -- ``Temporaries'' correspond to local variables in C, and registers in
 261         -- native code.
 262         -- I assume they can be somewhat optimized by gcc -- HWL
 263
 264     CLbl _ _   -> if lhs then Cost (0, 0, 0, 1, 0)
 265                          else Cost (2, 0, 0, 0, 0)
 266                   -- Rhs: typically: sethi %hi(lbl),%tmp_reg
 267                   --                 or    %tmp_reg,%lo(lbl),%target_reg
 268
 269     CUnVecLbl _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
 270                             else Cost (2, 0, 0, 0, 0)
 271                      -- same as CLbl
 272
 273     --  Check the following 3 (checked form CLit on)
 274
 275     CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
 276                              else Cost (0, 0, 1, 0, 0)
 277
 278     CIntLike mode  -> if lhs then Cost (0, 0, 0, 1, 0)
 279                              else Cost (0, 0, 1, 0, 0)
 280
 281     CString _      -> if lhs then Cost (0, 0, 0, 1, 0)
 282                              else Cost (0, 0, 1, 0, 0)
 283
 284     CLit    _      -> if lhs then nullCosts            -- should never occur
 285                              else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg
 286
 287     CLitLit _  _   -> if lhs then nullCosts
 288                              else Cost (1, 0, 0, 0, 0)
 289                       -- same es CLit
 290
 291     COffset _      -> if lhs then nullCosts
 292                              else Cost (1, 0, 0, 0, 0)
 293                       -- same es CLit
 294
 295     CCode absC     -> costs absC
 296
 297     CLabelledCode _ absC  ->  costs absC
 298
 299     CJoinPoint _ _        -> if lhs then Cost (0, 0, 0, 1, 0)
 300                                     else Cost (0, 0, 1, 0, 0)
 301
 302     CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list
 303
 304     CCostCentre _ _ -> nullCosts
 305
 306 -- ---------------------------------------------------------------------------
 307
 308 exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes
 309
 310 exprMacroCosts side macro mode_list =
 311   let
 312     arg_costs = foldl (+) nullCosts
 313                       (map (\ x -> addrModeCosts x Rhs) mode_list)
 314   in
 315   arg_costs +
 316   case macro of
 317     INFO_PTR   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
 318                                  else Cost (0, 0, 1, 0, 0)
 319     ENTRY_CODE -> nullCosts
 320     INFO_TAG   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
 321                                  else Cost (0, 0, 1, 0, 0)
 322     EVAL_TAG   -> if side == Lhs then Cost (1, 0, 0, 1, 0)
 323                                  else Cost (1, 0, 1, 0, 0)
 324                   -- costs of INFO_TAG + (1,0,0,0,0)
 325
 326 -- ---------------------------------------------------------------------------
 327
 328 stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes
 329
 330 stmtMacroCosts macro modes =
 331   let
 332     arg_costs =   foldl (+) nullCosts
 333                         [addrModeCosts mode Rhs | mode <- modes]
 334   in
 335   case macro of
 336     ARGS_CHK_A_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 337                 -- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
 338     ARGS_CHK_A            ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 339                 -- p=probability of PAP (instead of AP): + p*(0,1,0,0,0)
 340     ARGS_CHK_B_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 341     ARGS_CHK_B            ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 342     HEAP_CHK              ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 343     -- STK_CHK               ->  (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 344     STK_CHK               ->  Cost (0, 0, 0, 0, 0)       {- StgMacros.lh  -}
 345     UPD_CAF               ->  Cost (7, 0, 1, 3, 0)       {- SMupdate.lh  -}
 346     UPD_IND               ->  Cost (8, 2, 2, 0, 0)       {- SMupdate.lh
 347                                 updatee in old-gen: Cost (4, 1, 1, 0, 0)
 348                                 updatee in new-gen: Cost (4, 1, 1, 0, 0)
 349                                 NB: we include costs fo checking if there is
 350                                     a BQ, but we omit costs for awakening BQ
 351                                     (these probably differ between old-gen and
 352                                     new gen) -}
 353     UPD_INPLACE_NOPTRS    ->  Cost (13, 3, 3, 2, 0)       {- SMupdate.lh
 354                                 common for both:    Cost (4, 1, 1, 0, 0)
 355                                 updatee in old-gen: Cost (14, 3, 2, 4, 0)
 356                                 updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}
 357     UPD_INPLACE_PTRS      ->  Cost (13, 3, 3, 2, 0)       {- SMupdate.lh
 358                                 common for both:    Cost (4, 1, 1, 0, 0)
 359                                 updatee in old-gen: Cost (14, 3, 2, 4, 0)
 360                                 updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}
 361
 362     UPD_BH_UPDATABLE      ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
 363     UPD_BH_SINGLE_ENTRY   ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
 364     PUSH_STD_UPD_FRAME    ->  Cost (3, 0, 0, 4, 0)       {- SMupdate.lh  -}
 365     POP_STD_UPD_FRAME     ->  Cost (1, 0, 3, 0, 0)       {- SMupdate.lh  -}
 366     SET_TAG               ->  nullCosts             {- COptRegs.lh -}
 367     GRAN_FETCH                  ->  nullCosts     {- GrAnSim bookkeeping -}
 368     GRAN_RESCHEDULE             ->  nullCosts     {- GrAnSim bookkeeping -}
 369     GRAN_FETCH_AND_RESCHEDULE   ->  nullCosts     {- GrAnSim bookkeeping -}
 370     THREAD_CONTEXT_SWITCH       ->  nullCosts     {- GrAnSim bookkeeping -}
 371
 372 -- ---------------------------------------------------------------------------
 373
 374 floatOps :: [PrimOp]
 375 floatOps =
 376   [   FloatGtOp  , FloatGeOp  , FloatEqOp  , FloatNeOp  , FloatLtOp  , FloatLeOp
 377     , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
 378     , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
 379     , Float2IntOp , Int2FloatOp
 380     , FloatExpOp   , FloatLogOp   , FloatSqrtOp
 381     , FloatSinOp   , FloatCosOp   , FloatTanOp
 382     , FloatAsinOp  , FloatAcosOp  , FloatAtanOp
 383     , FloatSinhOp  , FloatCoshOp  , FloatTanhOp
 384     , FloatPowerOp
 385     , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
 386     , Double2IntOp , Int2DoubleOp
 387     , Double2FloatOp , Float2DoubleOp
 388     , DoubleExpOp   , DoubleLogOp   , DoubleSqrtOp
 389     , DoubleSinOp   , DoubleCosOp   , DoubleTanOp
 390     , DoubleAsinOp  , DoubleAcosOp  , DoubleAtanOp
 391     , DoubleSinhOp  , DoubleCoshOp  , DoubleTanhOp
 392     , DoublePowerOp
 393     , FloatEncodeOp  , FloatDecodeOp
 394     , DoubleEncodeOp , DoubleDecodeOp
 395   ]
 396
 397 gmpOps :: [PrimOp]
 398 gmpOps  =
 399   [   IntegerAddOp , IntegerSubOp , IntegerMulOp
 400     , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
 401     , IntegerCmpOp
 402     , Integer2IntOp  , Int2IntegerOp
 403     , Addr2IntegerOp
 404   ]
 405
 406
 407 abs_costs = nullCosts   -- NB:  This is normal STG code with costs already
 408                         --      included; no need to add costs again.
 409
 410 umul_costs = Cost (21,4,0,0,0)     -- due to spy counts
 411 rem_costs =  Cost (30,15,0,0,0)    -- due to spy counts
 412 div_costs =  Cost (30,15,0,0,0)    -- due to spy counts
 413
 414 primOpCosts :: PrimOp -> CostRes
 415
 416 -- Special cases
 417
 418 primOpCosts (CCallOp _ _ _ _ _) = SAVE_COSTS + RESTORE_COSTS
 419                                   -- don't guess costs of ccall proper
 420                                   -- for exact costing use a GRAN_EXEC
 421                                   -- in the C code
 422
 423 -- Usually 3 mov instructions are needed to get args and res in right place.
 424
 425 primOpCosts IntMulOp  = Cost (3, 1, 0, 0, 0)  + umul_costs
 426 primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0)  + div_costs
 427 primOpCosts IntRemOp  = Cost (3, 1, 0, 0, 0)  + rem_costs
 428 primOpCosts IntNegOp  = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
 429 primOpCosts IntAbsOp  = Cost (0, 1, 0, 0, 0) -- abs closure already costed
 430
 431 primOpCosts FloatGtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 432 primOpCosts FloatGeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 433 primOpCosts FloatEqOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 434 primOpCosts FloatNeOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 435 primOpCosts FloatLtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 436 primOpCosts FloatLeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 437 primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 438 primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 439 primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 440 primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 441 primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 442 primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 443
 444 primOpCosts FloatExpOp    = Cost (2, 1, 4, 4, 3)
 445 primOpCosts FloatLogOp    = Cost (2, 1, 4, 4, 3)
 446 primOpCosts FloatSqrtOp   = Cost (2, 1, 4, 4, 3)
 447 primOpCosts FloatSinOp    = Cost (2, 1, 4, 4, 3)
 448 primOpCosts FloatCosOp    = Cost (2, 1, 4, 4, 3)
 449 primOpCosts FloatTanOp    = Cost (2, 1, 4, 4, 3)
 450 primOpCosts FloatAsinOp   = Cost (2, 1, 4, 4, 3)
 451 primOpCosts FloatAcosOp   = Cost (2, 1, 4, 4, 3)
 452 primOpCosts FloatAtanOp   = Cost (2, 1, 4, 4, 3)
 453 primOpCosts FloatSinhOp   = Cost (2, 1, 4, 4, 3)
 454 primOpCosts FloatCoshOp   = Cost (2, 1, 4, 4, 3)
 455 primOpCosts FloatTanhOp   = Cost (2, 1, 4, 4, 3)
 456 --primOpCosts FloatAsinhOp  = Cost (2, 1, 4, 4, 3)
 457 --primOpCosts FloatAcoshOp  = Cost (2, 1, 4, 4, 3)
 458 --primOpCosts FloatAtanhOp  = Cost (2, 1, 4, 4, 3)
 459 primOpCosts FloatPowerOp  = Cost (2, 1, 4, 4, 3)
 460
 461 {- There should be special handling of the Array PrimOps in here   HWL -}
 462
 463 primOpCosts primOp
 464   | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1)  :: CostRes
 465   | primOp `elem` gmpOps   = Cost (30, 5, 10, 10, 0) :: CostRes  -- GUESS; check it
 466   | otherwise              = Cost (1, 0, 0, 0, 0)
 467
 468 -- ---------------------------------------------------------------------------
 469 {- HWL: currently unused
 470
 471 costsByKind :: PrimRep -> Side -> CostRes
 472
 473 -- The following PrimKinds say that the data is already in a reg
 474
 475 costsByKind CharRep     _ = nullCosts
 476 costsByKind IntRep      _ = nullCosts
 477 costsByKind WordRep     _ = nullCosts
 478 costsByKind AddrRep     _ = nullCosts
 479 costsByKind FloatRep    _ = nullCosts
 480 costsByKind DoubleRep   _ = nullCosts
 481 -}
 482 -- ---------------------------------------------------------------------------
 483 \end{code}
 484
 485 This is the data structure of {\tt PrimOp} copied from prelude/PrimOp.lhs.
 486 I include here some comments about the estimated costs for these @PrimOps@.
 487 Compare with the @primOpCosts@ fct above.  -- HWL
 488
 489 \begin{pseudocode}
 490 data PrimOp
 491     -- I assume all these basic comparisons take just one ALU instruction
 492     -- Checked that for Char, Int; Word, Addr should be the same as Int.
 493
 494     = CharGtOp   | CharGeOp   | CharEqOp   | CharNeOp   | CharLtOp   | CharLeOp
 495     | IntGtOp    | IntGeOp    | IntEqOp    | IntNeOp    | IntLtOp    | IntLeOp
 496     | WordGtOp   | WordGeOp   | WordEqOp   | WordNeOp   | WordLtOp   | WordLeOp
 497     | AddrGtOp   | AddrGeOp   | AddrEqOp   | AddrNeOp   | AddrLtOp   | AddrLeOp
 498
 499     -- Analogously, these take one FP unit instruction
 500     -- Haven't checked that, yet.
 501
 502     | FloatGtOp  | FloatGeOp  | FloatEqOp  | FloatNeOp  | FloatLtOp  | FloatLeOp
 503     | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp
 504
 505     -- 1 ALU op; unchecked
 506     | OrdOp | ChrOp
 507
 508     -- these just take 1 ALU op; checked
 509     | IntAddOp | IntSubOp
 510
 511     -- but these take more than that; see special cases in primOpCosts
 512     -- I counted the generated ass. instructions for these -> checked
 513     | IntMulOp | IntQuotOp
 514     | IntRemOp | IntNegOp | IntAbsOp
 515
 516     -- Rest is unchecked so far -- HWL
 517
 518     -- Word#-related ops:
 519     | AndOp   | OrOp  | NotOp | ShiftLOp | ShiftROp
 520     | Int2WordOp | Word2IntOp -- casts
 521
 522     -- Addr#-related ops:
 523     | Int2AddrOp | Addr2IntOp -- casts
 524
 525     -- Float#-related ops:
 526     | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
 527     | Float2IntOp | Int2FloatOp
 528
 529     | FloatExpOp   | FloatLogOp   | FloatSqrtOp
 530     | FloatSinOp   | FloatCosOp   | FloatTanOp
 531     | FloatAsinOp  | FloatAcosOp  | FloatAtanOp
 532     | FloatSinhOp  | FloatCoshOp  | FloatTanhOp
 533     -- not all machines have these available conveniently:
 534     -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
 535     | FloatPowerOp -- ** op
 536
 537     -- Double#-related ops:
 538     | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
 539     | Double2IntOp | Int2DoubleOp
 540     | Double2FloatOp | Float2DoubleOp
 541
 542     | DoubleExpOp   | DoubleLogOp   | DoubleSqrtOp
 543     | DoubleSinOp   | DoubleCosOp   | DoubleTanOp
 544     | DoubleAsinOp  | DoubleAcosOp  | DoubleAtanOp
 545     | DoubleSinhOp  | DoubleCoshOp  | DoubleTanhOp
 546     -- not all machines have these available conveniently:
 547     -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
 548     | DoublePowerOp -- ** op
 549
 550     -- Integer (and related...) ops:
 551     -- slightly weird -- to match GMP package.
 552     | IntegerAddOp | IntegerSubOp | IntegerMulOp
 553     | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp
 554
 555     | IntegerCmpOp
 556
 557     | Integer2IntOp  | Int2IntegerOp
 558     | Addr2IntegerOp -- "Addr" is *always* a literal string
 559     -- ?? gcd, etc?
 560
 561     | FloatEncodeOp  | FloatDecodeOp
 562     | DoubleEncodeOp | DoubleDecodeOp
 563
 564     -- primitive ops for primitive arrays
 565
 566     | NewArrayOp
 567     | NewByteArrayOp PrimRep
 568
 569     | SameMutableArrayOp
 570     | SameMutableByteArrayOp
 571
 572     | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs
 573
 574     | ReadByteArrayOp   PrimRep
 575     | WriteByteArrayOp  PrimRep
 576     | IndexByteArrayOp  PrimRep
 577     | IndexOffAddrOp    PrimRep
 578         -- PrimRep can be one of {Char,Int,Addr,Float,Double}Kind.
 579         -- This is just a cheesy encoding of a bunch of ops.
 580         -- Note that ForeignObjRep is not included -- the only way of
 581         -- creating a ForeignObj is with a ccall or casm.
 582
 583     | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp
 584
 585     | MakeStablePtrOp | DeRefStablePtrOp
 586 \end{pseudocode}
 587
 588 A special ``trap-door'' to use in making calls direct to C functions:
 589 Note: From GrAn point of view, CCall is probably very expensive
 590       The programmer can specify the costs of the Ccall by inserting
 591       a GRAN_EXEC(a,b,l,s,f) at the end of the C- code, specifing the
 592       number or arithm., branch, load, store and floating point instructions
 593       -- HWL
 594
 595 \begin{pseudocode}
 596     | CCallOp   String  -- An "unboxed" ccall# to this named function
 597                 Bool    -- True <=> really a "casm"
 598                 Bool    -- True <=> might invoke Haskell GC
 599                 [Type]  -- Unboxed argument; the state-token
 600                         -- argument will have been put *first*
 601                 Type    -- Return type; one of the "StateAnd<blah>#" types
 602
 603     -- (... to be continued ... )
 604 \end{pseudocode}