ghc/compiler/absCSyn/Costs.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1994-1998
   3 %     Hans Wolfgang Loidl
   4 %
   5 % ---------------------------------------------------------------------------
   6
   7 \section[Costs]{Evaluating the costs of computing some abstract C code}
   8
   9 This module   provides all necessary  functions for   computing for a given
  10 abstract~C Program the costs of executing that program. This is done by the
  11 exported function:
  12
  13 \begin{quote}
  14  {\verb type CostRes = (Int, Int, Int, Int, Int)}
  15  {\verb costs :: AbstractC -> CostRes }
  16 \end{quote}
  17
  18 The meaning of the result tuple is:
  19 \begin{itemize}
  20  \item The first component ({\tt i}) counts the number of integer,
  21    arithmetic and bit-manipulating instructions.
  22  \item The second component ({\tt b}) counts the number of branches (direct
  23    branches as well as indirect ones).
  24  \item The third component ({\tt l}) counts the number of load instructions.
  25  \item The fourth component ({\tt s}) counts the number of store
  26    instructions.
  27  \item The fifth component ({\tt f}) counts the number of floating point
  28    instructions.
  29 \end{itemize}
  30
  31 This function is needed in GrAnSim for parallelism.
  32
  33 These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):
  34
  35 \begin{pseudocode}
  36
  37 #define LOAD_COSTS              2
  38 #define STORE_COSTS             2
  39 #define INT_ARITHM_COSTS        1
  40 #define GMP_ARITHM_COSTS        3 {- any clue for GMP costs ? -}
  41 #define FLOAT_ARITHM_COSTS      3 {- any clue for float costs ? -}
  42 #define BRANCH_COSTS            2
  43
  44 \end{pseudocode}
  45
  46 \begin{code}
  47 #define ACCUM_COSTS(i,b,l,s,f)  (i+b+l+s+f)
  48
  49 #define NUM_REGS                10 {- PprAbsCSyn.lhs -}       {- runtime/c-as-asm/CallWrap_C.lc -}
  50 #define RESTORE_COSTS           (Cost (0, 0, NUM_REGS, 0, 0)  :: CostRes)
  51 #define SAVE_COSTS              (Cost (0, 0, 0, NUM_REGS, 0)  :: CostRes)
  52 #define CCALL_COSTS_GUESS       (Cost (50, 0, 0, 0, 0)        :: CostRes)
  53
  54 module Costs( costs,
  55               addrModeCosts, CostRes(Cost), nullCosts, Side(..)
  56     ) where
  57
  58 #include "HsVersions.h"
  59
  60 import AbsCSyn
  61 import PrimOp           ( primOpNeedsWrapper, PrimOp(..) )
  62 import Panic            ( trace )
  63
  64 -- --------------------------------------------------------------------------
  65 data CostRes = Cost (Int, Int, Int, Int, Int)
  66                deriving (Text)
  67
  68 nullCosts    = Cost (0, 0, 0, 0, 0) :: CostRes
  69 initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
  70 errorCosts   = Cost (-1, -1, -1, -1, -1)  -- just for debugging
  71
  72 oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes
  73
  74 instance Eq CostRes where
  75  (==) t1 t2 = i && b && l && s && f
  76              where (i,b,l,s,f) = binOp' (==) t1 t2
  77
  78 instance Num CostRes where
  79  (+) = binOp (+)
  80  (-) = binOp (-)
  81  (*) = binOp (*)
  82  negate  = mapOp negate
  83  abs     = mapOp abs
  84  signum  = mapOp signum
  85
  86 mapOp :: (Int -> Int) -> CostRes -> CostRes
  87 mapOp g ( Cost (i, b, l, s, f) )  = Cost (g i, g b, g l, g s, g f)
  88
  89 foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
  90 foldrOp o x  ( Cost (i1, b1, l1, s1, f1) )   =
  91         i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))
  92
  93 binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes
  94 binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
  95         ( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )
  96
  97 binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a)
  98 binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
  99          (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2)
 100
 101 -- --------------------------------------------------------------------------
 102
 103 data Side = Lhs | Rhs
 104             deriving (Eq)
 105
 106 -- --------------------------------------------------------------------------
 107
 108 costs :: AbstractC -> CostRes
 109
 110 costs absC =
 111   case absC of
 112    AbsCNop                      ->  nullCosts
 113
 114    AbsCStmts absC1 absC2        -> costs absC1 + costs absC2
 115
 116    CAssign (CReg _) (CReg _)    -> Cost (1,0,0,0,0)   -- typ.: mov %reg1,%reg2
 117
 118    CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)
 119
 120    CAssign (CReg _) source_m    -> addrModeCosts source_m Rhs
 121
 122    CAssign target_m source_m    -> addrModeCosts target_m Lhs +
 123                                    addrModeCosts source_m Rhs
 124
 125    CJump (CLbl _  _)            -> Cost (0,1,0,0,0)  -- no ld for call necessary
 126
 127    CJump mode                   -> addrModeCosts mode Rhs +
 128                                    Cost (0,1,0,0,0)
 129
 130    CFallThrough mode  -> addrModeCosts mode Rhs +               -- chu' 0.24
 131                          Cost (0,1,0,0,0)
 132
 133    CReturn mode info  -> case info of
 134                           DirectReturn -> addrModeCosts mode Rhs +
 135                                           Cost (0,1,0,0,0)
 136
 137                             -- i.e. ld address to reg and call reg
 138
 139                           DynamicVectoredReturn mode' ->
 140                                         addrModeCosts mode Rhs +
 141                                         addrModeCosts mode' Rhs +
 142                                         Cost (0,1,1,0,0)
 143
 144                             {- generates code like this:
 145                                 JMP_(<mode>)[RVREL(<mode'>)];
 146                                i.e. 1 possb ld for mode'
 147                                     1 ld for RVREL
 148                                     1 possb ld for mode
 149                                     1 call                              -}
 150
 151                           StaticVectoredReturn _ -> addrModeCosts mode Rhs +
 152                                                   Cost (0,1,1,0,0)
 153
 154                             -- as above with mode' fixed to CLit
 155                             -- typically 2 ld + 1 call; 1st ld due
 156                             -- to CVal as mode
 157
 158    CSwitch mode alts absC     -> nullCosts
 159                                  {- for handling costs of all branches of
 160                                     a CSwitch see PprAbsC.
 161                                     Basically:
 162                                      Costs for branch =
 163                                         Costs before CSwitch +
 164                                         addrModeCosts of head +
 165                                         Costs for 1 cond branch +
 166                                         Costs for body of branch
 167                                  -}
 168
 169    CCodeBlock _ absC          -> costs absC
 170
 171    CInitHdr cl_info reg_rel cost_centre -> initHdrCosts
 172
 173                         {- This is more fancy but superflous: The addr modes
 174                            are fixed and so the costs are const!
 175
 176                         argCosts + initHdrCosts
 177                         where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
 178                                          addrModeCosts base_lbl +    -- CLbl!
 179                                          3*addrModeCosts (mkIntCLit 1{- any val -})
 180                         -}
 181                         {- this extends to something like
 182                             SET_SPEC_HDR(...)
 183                            For costing the args of this macro
 184                            see PprAbsC.lhs where args are inserted -}
 185
 186    COpStmt modes_res primOp modes_args _ ->
 187         {-
 188            let
 189                 n = length modes_res
 190            in
 191                 (0, 0, n, n, 0) +
 192                 primOpCosts primOp +
 193                 if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
 194                                              else nullCosts
 195            -- ^^HWL
 196         -}
 197         foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res]  +
 198         foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args]  +
 199         primOpCosts primOp +
 200         if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
 201                                      else nullCosts
 202
 203    CSimultaneous absC        -> costs absC
 204
 205    CCheck _ amodes code      -> Cost (2, 1, 0, 0, 0)
 206
 207    CMacroStmt   macro modes  -> stmtMacroCosts macro modes
 208
 209    CCallProfCtrMacro   _ _   -> nullCosts
 210                                   {- we don't count profiling in GrAnSim -}
 211
 212    CCallProfCCMacro    _ _   -> nullCosts
 213                                   {- we don't count profiling in GrAnSim -}
 214
 215   -- *** the next three [or so...] are DATA (those above are CODE) ***
 216   -- as they are data rather than code they all have nullCosts         -- HWL
 217
 218    CStaticClosure _ _ _ _    -> nullCosts
 219
 220    CClosureInfoAndCode _ _ _ _ -> nullCosts
 221
 222    CRetDirect _ _ _ _        -> nullCosts
 223
 224    CRetVector _ _ _ _        -> nullCosts
 225
 226    CCostCentreDecl _ _       -> nullCosts
 227    CCostCentreStackDecl _    -> nullCosts
 228
 229    CSplitMarker              -> nullCosts
 230
 231 -- ---------------------------------------------------------------------------
 232
 233 addrModeCosts :: CAddrMode -> Side -> CostRes
 234
 235 -- addrModeCosts _ _ = nullCosts
 236
 237 addrModeCosts addr_mode side =
 238   let
 239     lhs = side == Lhs
 240   in
 241   case addr_mode of
 242     CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
 243                        else Cost (0, 0, 1, 0, 0)
 244
 245     CReg _   -> nullCosts        {- loading from, storing to reg is free ! -}
 246                                  {- for costing CReg->Creg ops see special -}
 247                                  {- case in costs fct -}
 248
 249     CTemp _ _  -> nullCosts     {- if lhs then Cost (0, 0, 0, 1, 0)
 250                                           else Cost (0, 0, 1, 0, 0)  -}
 251         -- ``Temporaries'' correspond to local variables in C, and registers in
 252         -- native code.
 253         -- I assume they can be somewhat optimized by gcc -- HWL
 254
 255     CLbl _ _   -> if lhs then Cost (0, 0, 0, 1, 0)
 256                          else Cost (2, 0, 0, 0, 0)
 257                   -- Rhs: typically: sethi %hi(lbl),%tmp_reg
 258                   --                 or    %tmp_reg,%lo(lbl),%target_reg
 259
 260     --  Check the following 3 (checked form CLit on)
 261
 262     CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
 263                              else Cost (0, 0, 1, 0, 0)
 264
 265     CIntLike mode  -> if lhs then Cost (0, 0, 0, 1, 0)
 266                              else Cost (0, 0, 1, 0, 0)
 267
 268     CLit    _      -> if lhs then nullCosts            -- should never occur
 269                              else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg
 270
 271     CLitLit _  _   -> if lhs then nullCosts
 272                              else Cost (1, 0, 0, 0, 0)
 273                       -- same es CLit
 274
 275     CJoinPoint _          -> if lhs then Cost (0, 0, 0, 1, 0)
 276                                     else Cost (0, 0, 1, 0, 0)
 277
 278     CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list
 279
 280 -- ---------------------------------------------------------------------------
 281
 282 exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes
 283
 284 exprMacroCosts side macro mode_list =
 285   let
 286     arg_costs = foldl (+) nullCosts
 287                       (map (\ x -> addrModeCosts x Rhs) mode_list)
 288   in
 289   arg_costs +
 290   case macro of
 291     ENTRY_CODE -> nullCosts
 292     ARG_TAG -> nullCosts -- XXX
 293     GET_TAG -> nullCosts -- XXX
 294
 295
 296 -- ---------------------------------------------------------------------------
 297
 298 stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes
 299
 300 stmtMacroCosts macro modes =
 301   let
 302     arg_costs =   foldl (+) nullCosts
 303                         [addrModeCosts mode Rhs | mode <- modes]
 304   in
 305   case macro of
 306     ARGS_CHK_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)         {- StgMacros.lh  -}
 307                 -- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
 308     ARGS_CHK              ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
 309     UPD_CAF               ->  Cost (7, 0, 1, 3, 0)       {- SMupdate.lh  -}
 310     UPD_BH_UPDATABLE      ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
 311     UPD_BH_SINGLE_ENTRY   ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
 312     PUSH_UPD_FRAME        ->  Cost (3, 0, 0, 4, 0)       {- SMupdate.lh  -}
 313     SET_TAG               ->  nullCosts             {- COptRegs.lh -}
 314     GRAN_FETCH                  ->  nullCosts     {- GrAnSim bookkeeping -}
 315     GRAN_RESCHEDULE             ->  nullCosts     {- GrAnSim bookkeeping -}
 316     GRAN_FETCH_AND_RESCHEDULE   ->  nullCosts     {- GrAnSim bookkeeping -}
 317     GRAN_YIELD                  ->  nullCosts     {- GrAnSim bookkeeping -- added SOF -}
 318     THREAD_CONTEXT_SWITCH       ->  nullCosts     {- GrAnSim bookkeeping -}
 319     _ -> trace ("Costs.stmtMacroCosts") nullCosts
 320
 321 -- ---------------------------------------------------------------------------
 322
 323 floatOps :: [PrimOp]
 324 floatOps =
 325   [   FloatGtOp  , FloatGeOp  , FloatEqOp  , FloatNeOp  , FloatLtOp  , FloatLeOp
 326     , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
 327     , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
 328     , Float2IntOp , Int2FloatOp
 329     , FloatExpOp   , FloatLogOp   , FloatSqrtOp
 330     , FloatSinOp   , FloatCosOp   , FloatTanOp
 331     , FloatAsinOp  , FloatAcosOp  , FloatAtanOp
 332     , FloatSinhOp  , FloatCoshOp  , FloatTanhOp
 333     , FloatPowerOp
 334     , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
 335     , Double2IntOp , Int2DoubleOp
 336     , Double2FloatOp , Float2DoubleOp
 337     , DoubleExpOp   , DoubleLogOp   , DoubleSqrtOp
 338     , DoubleSinOp   , DoubleCosOp   , DoubleTanOp
 339     , DoubleAsinOp  , DoubleAcosOp  , DoubleAtanOp
 340     , DoubleSinhOp  , DoubleCoshOp  , DoubleTanhOp
 341     , DoublePowerOp
 342     , FloatDecodeOp
 343     , DoubleDecodeOp
 344   ]
 345
 346 gmpOps :: [PrimOp]
 347 gmpOps  =
 348   [   IntegerAddOp , IntegerSubOp , IntegerMulOp
 349     , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
 350     , IntegerCmpOp
 351     , Integer2IntOp  , Int2IntegerOp
 352     , Addr2IntegerOp
 353   ]
 354
 355
 356 abs_costs = nullCosts   -- NB:  This is normal STG code with costs already
 357                         --      included; no need to add costs again.
 358
 359 umul_costs = Cost (21,4,0,0,0)     -- due to spy counts
 360 rem_costs =  Cost (30,15,0,0,0)    -- due to spy counts
 361 div_costs =  Cost (30,15,0,0,0)    -- due to spy counts
 362
 363 primOpCosts :: PrimOp -> CostRes
 364
 365 -- Special cases
 366
 367 primOpCosts (CCallOp _ _ _ _) = SAVE_COSTS + RESTORE_COSTS
 368                                   -- don't guess costs of ccall proper
 369                                   -- for exact costing use a GRAN_EXEC
 370                                   -- in the C code
 371
 372 -- Usually 3 mov instructions are needed to get args and res in right place.
 373
 374 primOpCosts IntMulOp  = Cost (3, 1, 0, 0, 0)  + umul_costs
 375 primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0)  + div_costs
 376 primOpCosts IntRemOp  = Cost (3, 1, 0, 0, 0)  + rem_costs
 377 primOpCosts IntNegOp  = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
 378 primOpCosts IntAbsOp  = Cost (0, 1, 0, 0, 0) -- abs closure already costed
 379
 380 primOpCosts FloatGtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 381 primOpCosts FloatGeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 382 primOpCosts FloatEqOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 383 primOpCosts FloatNeOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 384 primOpCosts FloatLtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 385 primOpCosts FloatLeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 386 primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 387 primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 388 primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 389 primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
 390 primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 391 primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
 392
 393 primOpCosts FloatExpOp    = Cost (2, 1, 4, 4, 3)
 394 primOpCosts FloatLogOp    = Cost (2, 1, 4, 4, 3)
 395 primOpCosts FloatSqrtOp   = Cost (2, 1, 4, 4, 3)
 396 primOpCosts FloatSinOp    = Cost (2, 1, 4, 4, 3)
 397 primOpCosts FloatCosOp    = Cost (2, 1, 4, 4, 3)
 398 primOpCosts FloatTanOp    = Cost (2, 1, 4, 4, 3)
 399 primOpCosts FloatAsinOp   = Cost (2, 1, 4, 4, 3)
 400 primOpCosts FloatAcosOp   = Cost (2, 1, 4, 4, 3)
 401 primOpCosts FloatAtanOp   = Cost (2, 1, 4, 4, 3)
 402 primOpCosts FloatSinhOp   = Cost (2, 1, 4, 4, 3)
 403 primOpCosts FloatCoshOp   = Cost (2, 1, 4, 4, 3)
 404 primOpCosts FloatTanhOp   = Cost (2, 1, 4, 4, 3)
 405 --primOpCosts FloatAsinhOp  = Cost (2, 1, 4, 4, 3)
 406 --primOpCosts FloatAcoshOp  = Cost (2, 1, 4, 4, 3)
 407 --primOpCosts FloatAtanhOp  = Cost (2, 1, 4, 4, 3)
 408 primOpCosts FloatPowerOp  = Cost (2, 1, 4, 4, 3)
 409
 410 {- There should be special handling of the Array PrimOps in here   HWL -}
 411
 412 primOpCosts primOp
 413   | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1)  :: CostRes
 414   | primOp `elem` gmpOps   = Cost (30, 5, 10, 10, 0) :: CostRes  -- GUESS; check it
 415   | otherwise              = Cost (1, 0, 0, 0, 0)
 416
 417 -- ---------------------------------------------------------------------------
 418 {- HWL: currently unused
 419
 420 costsByKind :: PrimRep -> Side -> CostRes
 421
 422 -- The following PrimKinds say that the data is already in a reg
 423
 424 costsByKind CharRep     _ = nullCosts
 425 costsByKind IntRep      _ = nullCosts
 426 costsByKind WordRep     _ = nullCosts
 427 costsByKind AddrRep     _ = nullCosts
 428 costsByKind FloatRep    _ = nullCosts
 429 costsByKind DoubleRep   _ = nullCosts
 430 -}
 431 -- ---------------------------------------------------------------------------
 432 \end{code}
 433
 434 This is the data structure of {\tt PrimOp} copied from prelude/PrimOp.lhs.
 435 I include here some comments about the estimated costs for these @PrimOps@.
 436 Compare with the @primOpCosts@ fct above.  -- HWL
 437
 438 \begin{pseudocode}
 439 data PrimOp
 440     -- I assume all these basic comparisons take just one ALU instruction
 441     -- Checked that for Char, Int; Word, Addr should be the same as Int.
 442
 443     = CharGtOp   | CharGeOp   | CharEqOp   | CharNeOp   | CharLtOp   | CharLeOp
 444     | IntGtOp    | IntGeOp    | IntEqOp    | IntNeOp    | IntLtOp    | IntLeOp
 445     | WordGtOp   | WordGeOp   | WordEqOp   | WordNeOp   | WordLtOp   | WordLeOp
 446     | AddrGtOp   | AddrGeOp   | AddrEqOp   | AddrNeOp   | AddrLtOp   | AddrLeOp
 447
 448     -- Analogously, these take one FP unit instruction
 449     -- Haven't checked that, yet.
 450
 451     | FloatGtOp  | FloatGeOp  | FloatEqOp  | FloatNeOp  | FloatLtOp  | FloatLeOp
 452     | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp
 453
 454     -- 1 ALU op; unchecked
 455     | OrdOp | ChrOp
 456
 457     -- these just take 1 ALU op; checked
 458     | IntAddOp | IntSubOp
 459
 460     -- but these take more than that; see special cases in primOpCosts
 461     -- I counted the generated ass. instructions for these -> checked
 462     | IntMulOp | IntQuotOp
 463     | IntRemOp | IntNegOp | IntAbsOp
 464
 465     -- Rest is unchecked so far -- HWL
 466
 467     -- Word#-related ops:
 468     | AndOp   | OrOp  | NotOp | XorOp | ShiftLOp | ShiftROp
 469     | Int2WordOp | Word2IntOp -- casts
 470
 471     -- Addr#-related ops:
 472     | Int2AddrOp | Addr2IntOp -- casts
 473
 474     -- Float#-related ops:
 475     | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
 476     | Float2IntOp | Int2FloatOp
 477
 478     | FloatExpOp   | FloatLogOp   | FloatSqrtOp
 479     | FloatSinOp   | FloatCosOp   | FloatTanOp
 480     | FloatAsinOp  | FloatAcosOp  | FloatAtanOp
 481     | FloatSinhOp  | FloatCoshOp  | FloatTanhOp
 482     -- not all machines have these available conveniently:
 483     -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
 484     | FloatPowerOp -- ** op
 485
 486     -- Double#-related ops:
 487     | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
 488     | Double2IntOp | Int2DoubleOp
 489     | Double2FloatOp | Float2DoubleOp
 490
 491     | DoubleExpOp   | DoubleLogOp   | DoubleSqrtOp
 492     | DoubleSinOp   | DoubleCosOp   | DoubleTanOp
 493     | DoubleAsinOp  | DoubleAcosOp  | DoubleAtanOp
 494     | DoubleSinhOp  | DoubleCoshOp  | DoubleTanhOp
 495     -- not all machines have these available conveniently:
 496     -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
 497     | DoublePowerOp -- ** op
 498
 499     -- Integer (and related...) ops:
 500     -- slightly weird -- to match GMP package.
 501     | IntegerAddOp | IntegerSubOp | IntegerMulOp
 502     | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp
 503
 504     | IntegerCmpOp
 505
 506     | Integer2IntOp  | Int2IntegerOp
 507     | Addr2IntegerOp -- "Addr" is *always* a literal string
 508     -- ?? gcd, etc?
 509
 510     | FloatEncodeOp  | FloatDecodeOp
 511     | DoubleEncodeOp | DoubleDecodeOp
 512
 513     -- primitive ops for primitive arrays
 514
 515     | NewArrayOp
 516     | NewByteArrayOp PrimRep
 517
 518     | SameMutableArrayOp
 519     | SameMutableByteArrayOp
 520
 521     | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs
 522
 523     | ReadByteArrayOp   PrimRep
 524     | WriteByteArrayOp  PrimRep
 525     | IndexByteArrayOp  PrimRep
 526     | IndexOffAddrOp    PrimRep
 527         -- PrimRep can be one of {Char,Int,Addr,Float,Double}Kind.
 528         -- This is just a cheesy encoding of a bunch of ops.
 529         -- Note that ForeignObjRep is not included -- the only way of
 530         -- creating a ForeignObj is with a ccall or casm.
 531
 532     | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp
 533
 534     | MakeStablePtrOp | DeRefStablePtrOp
 535 \end{pseudocode}
 536
 537 A special ``trap-door'' to use in making calls direct to C functions:
 538 Note: From GrAn point of view, CCall is probably very expensive
 539       The programmer can specify the costs of the Ccall by inserting
 540       a GRAN_EXEC(a,b,l,s,f) at the end of the C- code, specifing the
 541       number or arithm., branch, load, store and floating point instructions
 542       -- HWL
 543
 544 \begin{pseudocode}
 545     | CCallOp   String  -- An "unboxed" ccall# to this named function
 546                 Bool    -- True <=> really a "casm"
 547                 Bool    -- True <=> might invoke Haskell GC
 548                 [Type]  -- Unboxed argument; the state-token
 549                         -- argument will have been put *first*
 550                 Type    -- Return type; one of the "StateAnd<blah>#" types
 551
 552     -- (... to be continued ... )
 553 \end{pseudocode}