From 46176dfa4f329af687c92e57740c800a6cada7b1 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Tue, 24 May 2011 13:16:28 +0100 Subject: [PATCH] Assign more accurate code sizes to primops, so that the inlining heuristics work better. Also removed the old unused "needs_wrapper" predicate for primops. This helps with #4978. --- compiler/coreSyn/CoreUnfold.lhs | 18 ++----- compiler/ghc.mk | 6 +-- compiler/prelude/PrimOp.lhs | 35 ++++++------ compiler/prelude/primops.txt.pp | 112 ++++++++++++++++++++++++++------------- utils/genprimopcode/Lexer.x | 1 + utils/genprimopcode/Main.hs | 13 ++--- utils/genprimopcode/Parser.y | 2 + utils/genprimopcode/ParserM.hs | 1 + utils/genprimopcode/Syntax.hs | 2 + 9 files changed, 111 insertions(+), 79 deletions(-) diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index da703ef..6a73716 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -585,19 +585,11 @@ didn't adopt the idea. \begin{code} primOpSize :: PrimOp -> Int -> ExprSize primOpSize op n_val_args - | not (primOpIsDupable op) = sizeN opt_UF_DearOp - | not (primOpOutOfLine op) = sizeN 1 - -- Be very keen to inline simple primops. - -- We give a discount of 1 for each arg so that (op# x y z) costs 2. - -- We can't make it cost 1, else we'll inline let v = (op# x y z) - -- at every use of v, which is excessive. - -- - -- A good example is: - -- let x = +# p q in C {x} - -- Even though x get's an occurrence of 'many', its RHS looks cheap, - -- and there's a good chance it'll get inlined back into C's RHS. Urgh! - - | otherwise = sizeN n_val_args + = if primOpOutOfLine op + then sizeN (op_size + n_val_args) + else sizeN op_size + where + op_size = primOpCodeSize op buildSize :: ExprSize diff --git a/compiler/ghc.mk b/compiler/ghc.mk index 2254332..8ed34c3 100644 --- a/compiler/ghc.mk +++ b/compiler/ghc.mk @@ -252,7 +252,7 @@ PRIMOP_BITS = compiler/primop-data-decl.hs-incl \ compiler/primop-has-side-effects.hs-incl \ compiler/primop-out-of-line.hs-incl \ compiler/primop-commutable.hs-incl \ - compiler/primop-needs-wrapper.hs-incl \ + compiler/primop-code-size.hs-incl \ compiler/primop-can-fail.hs-incl \ compiler/primop-strictness.hs-incl \ compiler/primop-primop-info.hs-incl @@ -278,8 +278,8 @@ compiler/primop-out-of-line.hs-incl: $(PRIMOPS_TXT) $(GENPRIMOP_INPLACE) "$(GENPRIMOP_INPLACE)" --out-of-line < $< > $@ compiler/primop-commutable.hs-incl: $(PRIMOPS_TXT) $(GENPRIMOP_INPLACE) "$(GENPRIMOP_INPLACE)" --commutable < $< > $@ -compiler/primop-needs-wrapper.hs-incl: $(PRIMOPS_TXT) $(GENPRIMOP_INPLACE) - "$(GENPRIMOP_INPLACE)" --needs-wrapper < $< > $@ +compiler/primop-code-size.hs-incl: $(PRIMOPS_TXT) $(GENPRIMOP_INPLACE) + "$(GENPRIMOP_INPLACE)" --code-size < $< > $@ compiler/primop-can-fail.hs-incl: $(PRIMOPS_TXT) $(GENPRIMOP_INPLACE) "$(GENPRIMOP_INPLACE)" --can-fail < $< > $@ compiler/primop-strictness.hs-incl: $(PRIMOPS_TXT) $(GENPRIMOP_INPLACE) diff --git a/compiler/prelude/PrimOp.lhs b/compiler/prelude/PrimOp.lhs index 8c532ff..29c5644 100644 --- a/compiler/prelude/PrimOp.lhs +++ b/compiler/prelude/PrimOp.lhs @@ -18,8 +18,8 @@ module PrimOp ( tagToEnumKey, - primOpOutOfLine, primOpNeedsWrapper, - primOpOkForSpeculation, primOpIsCheap, primOpIsDupable, + primOpOutOfLine, primOpCodeSize, + primOpOkForSpeculation, primOpIsCheap, getPrimOpResultInfo, PrimOpResultInfo(..), @@ -363,18 +363,23 @@ primOpIsCheap op = primOpOkForSpeculation op -- even if primOpIsCheap sometimes says 'True'. \end{code} -primOpIsDupable -~~~~~~~~~~~~~~~ -primOpIsDupable means that the use of the primop is small enough to -duplicate into different case branches. See CoreUtils.exprIsDupable. +primOpCodeSize +~~~~~~~~~~~~~~ +Gives an indication of the code size of a primop, for the purposes of +calculating unfolding sizes; see CoreUnfold.sizeExpr. \begin{code} -primOpIsDupable :: PrimOp -> Bool - -- See comments with CoreUtils.exprIsDupable - -- We say it's dupable it isn't implemented by a C call with a wrapper -primOpIsDupable op = not (primOpNeedsWrapper op) -\end{code} +primOpCodeSize :: PrimOp -> Int +#include "primop-code-size.hs-incl" + +primOpCodeSizeDefault :: Int +primOpCodeSizeDefault = 1 + -- CoreUnfold.primOpSize already takes into account primOpOutOfLine + -- and adds some further costs for the args in that case. +primOpCodeSizeForeignCall :: Int +primOpCodeSizeForeignCall = 4 +\end{code} \begin{code} primOpCanFail :: PrimOp -> Bool @@ -421,14 +426,6 @@ primOpHasSideEffects :: PrimOp -> Bool #include "primop-has-side-effects.hs-incl" \end{code} -Inline primitive operations that perform calls need wrappers to save -any live variables that are stored in caller-saves registers. - -\begin{code} -primOpNeedsWrapper :: PrimOp -> Bool -#include "primop-needs-wrapper.hs-incl" -\end{code} - \begin{code} primOpType :: PrimOp -> Type -- you may want to use primOpSig instead primOpType op diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 69a1274..4dfe019 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -43,7 +43,7 @@ defaults has_side_effects = False out_of_line = False commutable = False - needs_wrapper = False + code_size = { primOpCodeSizeDefault } can_fail = False strictness = { \ arity -> mkStrictSig (mkTopDmdType (replicate arity lazyDmd) TopRes) } @@ -155,6 +155,7 @@ primop CharLtOp "ltChar#" Compare Char# -> Char# -> Bool primop CharLeOp "leChar#" Compare Char# -> Char# -> Bool primop OrdOp "ord#" GenPrimOp Char# -> Int# + with code_size = 0 ------------------------------------------------------------------------ section "Int#" @@ -212,9 +213,12 @@ primop IntNegOp "negateInt#" Monadic Int# -> Int# primop IntAddCOp "addIntC#" GenPrimOp Int# -> Int# -> (# Int#, Int# #) {Add with carry. First member of result is (wrapped) sum; second member is 0 iff no overflow occured.} + with code_size = 2 + primop IntSubCOp "subIntC#" GenPrimOp Int# -> Int# -> (# Int#, Int# #) {Subtract with carry. First member of result is (wrapped) difference; second member is 0 iff no overflow occured.} + with code_size = 2 primop IntGtOp ">#" Compare Int# -> Int# -> Bool primop IntGeOp ">=#" Compare Int# -> Int# -> Bool @@ -231,8 +235,11 @@ primop IntLtOp "<#" Compare Int# -> Int# -> Bool primop IntLeOp "<=#" Compare Int# -> Int# -> Bool primop ChrOp "chr#" GenPrimOp Int# -> Char# + with code_size = 0 primop Int2WordOp "int2Word#" GenPrimOp Int# -> Word# + with code_size = 0 + primop Int2FloatOp "int2Float#" GenPrimOp Int# -> Float# primop Int2DoubleOp "int2Double#" GenPrimOp Int# -> Double# @@ -286,6 +293,7 @@ primop SrlOp "uncheckedShiftRL#" GenPrimOp Word# -> Int# -> Word# in the range 0 to word size - 1 inclusive.} primop Word2IntOp "word2Int#" GenPrimOp Word# -> Int# + with code_size = 0 primop WordGtOp "gtWord#" Compare Word# -> Word# -> Bool primop WordGeOp "geWord#" Compare Word# -> Word# -> Bool @@ -396,63 +404,72 @@ primop Double2FloatOp "double2Float#" GenPrimOp Double# -> Float# primop DoubleExpOp "expDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleLogOp "logDouble#" Monadic Double# -> Double# with - needs_wrapper = True + code_size = { primOpCodeSizeForeignCall } can_fail = True primop DoubleSqrtOp "sqrtDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleSinOp "sinDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleCosOp "cosDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleTanOp "tanDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleAsinOp "asinDouble#" Monadic Double# -> Double# with - needs_wrapper = True + code_size = { primOpCodeSizeForeignCall } can_fail = True primop DoubleAcosOp "acosDouble#" Monadic Double# -> Double# with - needs_wrapper = True + code_size = { primOpCodeSizeForeignCall } can_fail = True primop DoubleAtanOp "atanDouble#" Monadic Double# -> Double# with - needs_wrapper = True + code_size = { primOpCodeSizeForeignCall } primop DoubleSinhOp "sinhDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleCoshOp "coshDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleTanhOp "tanhDouble#" Monadic Double# -> Double# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoublePowerOp "**##" Dyadic Double# -> Double# -> Double# {Exponentiation.} - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop DoubleDecode_2IntOp "decodeDouble_2Int#" GenPrimOp Double# -> (# Int#, Word#, Word#, Int# #) @@ -506,58 +523,71 @@ primop Float2IntOp "float2Int#" GenPrimOp Float# -> Int# primop FloatExpOp "expFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatLogOp "logFloat#" Monadic Float# -> Float# - with needs_wrapper = True - can_fail = True + with + code_size = { primOpCodeSizeForeignCall } + can_fail = True primop FloatSqrtOp "sqrtFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatSinOp "sinFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatCosOp "cosFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatTanOp "tanFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatAsinOp "asinFloat#" Monadic Float# -> Float# - with needs_wrapper = True - can_fail = True + with + code_size = { primOpCodeSizeForeignCall } + can_fail = True primop FloatAcosOp "acosFloat#" Monadic Float# -> Float# - with needs_wrapper = True - can_fail = True + with + code_size = { primOpCodeSizeForeignCall } + can_fail = True primop FloatAtanOp "atanFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatSinhOp "sinhFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatCoshOp "coshFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatTanhOp "tanhFloat#" Monadic Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop FloatPowerOp "powerFloat#" Dyadic Float# -> Float# -> Float# - with needs_wrapper = True + with + code_size = { primOpCodeSizeForeignCall } primop Float2DoubleOp "float2Double#" GenPrimOp Float# -> Double# @@ -599,6 +629,7 @@ primop WriteArrayOp "writeArray#" GenPrimOp {Write to specified index of mutable array.} with has_side_effects = True + code_size = 2 -- card update too primop SizeofArrayOp "sizeofArray#" GenPrimOp Array# a -> Int# @@ -633,6 +664,7 @@ primop CopyArrayOp "copyArray#" GenPrimOp The two arrays must not be the same array in different states, but this is not checked either.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall + 4 } primop CopyMutableArrayOp "copyMutableArray#" GenPrimOp MutableArray# s a -> Int# -> MutableArray# s a -> Int# -> Int# -> State# s -> State# s @@ -640,6 +672,7 @@ primop CopyMutableArrayOp "copyMutableArray#" GenPrimOp Both arrays must fully contain the specified ranges, but this is not checked.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall + 4 } primop CloneArrayOp "cloneArray#" GenPrimOp Array# a -> Int# -> Int# -> Array# a @@ -647,6 +680,7 @@ primop CloneArrayOp "cloneArray#" GenPrimOp The provided Array# should contain the full subrange specified by the two Int#s, but this is not checked.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall + 4 } primop CloneMutableArrayOp "cloneMutableArray#" GenPrimOp MutableArray# s a -> Int# -> Int# -> State# s -> (# State# s, MutableArray# s a #) @@ -654,6 +688,7 @@ primop CloneMutableArrayOp "cloneMutableArray#" GenPrimOp The provided MutableArray# should contain the full subrange specified by the two Int#s, but this is not checked.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall + 4 } primop FreezeArrayOp "freezeArray#" GenPrimOp MutableArray# s a -> Int# -> Int# -> State# s -> (# State# s, Array# a #) @@ -661,6 +696,7 @@ primop FreezeArrayOp "freezeArray#" GenPrimOp The provided MutableArray# should contain the full subrange specified by the two Int#s, but this is not checked.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall + 4 } primop ThawArrayOp "thawArray#" GenPrimOp Array# a -> Int# -> Int# -> State# s -> (# State# s, MutableArray# s a #) @@ -668,6 +704,7 @@ primop ThawArrayOp "thawArray#" GenPrimOp The provided Array# should contain the full subrange specified by the two Int#s, but this is not checked.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall + 4 } ------------------------------------------------------------------------ section "Byte Arrays" @@ -931,8 +968,10 @@ primop AddrRemOp "remAddr#" GenPrimOp Addr# -> Int# -> Int# #if (WORD_SIZE_IN_BITS == 32 || WORD_SIZE_IN_BITS == 64) primop Addr2IntOp "addr2Int#" GenPrimOp Addr# -> Int# {Coerce directly from address to int. Strongly deprecated.} + with code_size = 0 primop Int2AddrOp "int2Addr#" GenPrimOp Int# -> Addr# {Coerce directly from int to address. Strongly deprecated.} + with code_size = 0 #endif primop AddrGtOp "gtAddr#" Compare Addr# -> Addr# -> Bool @@ -1149,6 +1188,7 @@ primop WriteMutVarOp "writeMutVar#" GenPrimOp {Write contents of {\tt MutVar\#}.} with has_side_effects = True + code_size = { primOpCodeSizeForeignCall } -- for the write barrier primop SameMutVarOp "sameMutVar#" GenPrimOp MutVar# s a -> MutVar# s a -> Bool @@ -1381,7 +1421,6 @@ primop DelayOp "delay#" GenPrimOp Int# -> State# s -> State# s {Sleep specified number of microseconds.} with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1389,7 +1428,6 @@ primop WaitReadOp "waitRead#" GenPrimOp Int# -> State# s -> State# s {Block until input is available on specified file descriptor.} with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1397,7 +1435,6 @@ primop WaitWriteOp "waitWrite#" GenPrimOp Int# -> State# s -> State# s {Block until output is possible on specified file descriptor.} with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1406,7 +1443,6 @@ primop AsyncReadOp "asyncRead#" GenPrimOp Int# -> Int# -> Int# -> Addr# -> State# RealWorld-> (# State# RealWorld, Int#, Int# #) {Asynchronously read bytes from specified file descriptor.} with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1414,7 +1450,6 @@ primop AsyncWriteOp "asyncWrite#" GenPrimOp Int# -> Int# -> Int# -> Addr# -> State# RealWorld-> (# State# RealWorld, Int#, Int# #) {Asynchronously write bytes from specified file descriptor.} with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1422,7 +1457,6 @@ primop AsyncDoProcOp "asyncDoProc#" GenPrimOp Addr# -> Addr# -> State# RealWorld-> (# State# RealWorld, Int#, Int# #) {Asynchronously perform procedure (first arg), passing it 2nd arg.} with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1539,6 +1573,7 @@ primop FinalizeWeakOp "finalizeWeak#" GenPrimOp primop TouchOp "touch#" GenPrimOp o -> State# RealWorld -> State# RealWorld with + code_size = { 0 } has_side_effects = True ------------------------------------------------------------------------ @@ -1558,7 +1593,6 @@ primop MakeStablePtrOp "makeStablePtr#" GenPrimOp primop DeRefStablePtrOp "deRefStablePtr#" GenPrimOp StablePtr# a -> State# RealWorld -> (# State# RealWorld, a #) with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1570,7 +1604,6 @@ primop EqStablePtrOp "eqStablePtr#" GenPrimOp primop MakeStableNameOp "makeStableName#" GenPrimOp a -> State# RealWorld -> (# State# RealWorld, StableName# a #) with - needs_wrapper = True has_side_effects = True out_of_line = True @@ -1598,6 +1631,7 @@ primop ParOp "par#" GenPrimOp -- Note that Par is lazy to avoid that the sparked thing -- gets evaluted strictly, which it should *not* be has_side_effects = True + code_size = { primOpCodeSizeForeignCall } primop GetSparkOp "getSpark#" GenPrimOp State# s -> (# State# s, Int#, a #) @@ -1687,6 +1721,8 @@ primtype BCO# primop AddrToHValueOp "addrToHValue#" GenPrimOp Addr# -> (# a #) {Convert an {\tt Addr\#} to a followable type.} + with + code_size = 0 primop MkApUpd0_Op "mkApUpd0#" GenPrimOp BCO# -> (# a #) diff --git a/utils/genprimopcode/Lexer.x b/utils/genprimopcode/Lexer.x index df710d7..6f48c02 100644 --- a/utils/genprimopcode/Lexer.x +++ b/utils/genprimopcode/Lexer.x @@ -54,6 +54,7 @@ words :- <0> "thats_all_folks" { mkT TThatsAllFolks } <0> [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName } <0> [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName } + <0> [0-9][0-9]* { mkTv (TInteger . read) } <0> \" [^\"]* \" { mkTv (TString . tail . init) } [^\{\}]+ { mkTv TNoBraces } \n { mkTv TNoBraces } diff --git a/utils/genprimopcode/Main.hs b/utils/genprimopcode/Main.hs index 5b802bc..d9bfd21 100644 --- a/utils/genprimopcode/Main.hs +++ b/utils/genprimopcode/Main.hs @@ -46,13 +46,13 @@ main = getArgs >>= \args -> "commutable" "commutableOp" p_o_specs) - "--needs-wrapper" + "--code-size" -> putStr (gen_switch_from_attribs - "needs_wrapper" - "primOpNeedsWrapper" p_o_specs) + "code_size" + "primOpCodeSize" p_o_specs) - "--can-fail" - -> putStr (gen_switch_from_attribs + "--can-fail" + -> putStr (gen_switch_from_attribs "can_fail" "primOpCanFail" p_o_specs) @@ -91,7 +91,7 @@ known_args "--has-side-effects", "--out-of-line", "--commutable", - "--needs-wrapper", + "--code-size", "--can-fail", "--strictness", "--primop-primop-info", @@ -550,6 +550,7 @@ gen_switch_from_attribs attrib_name fn_name (Info defaults entries) getAltRhs (OptionFalse _) = "False" getAltRhs (OptionTrue _) = "True" + getAltRhs (OptionInteger _ i) = show i getAltRhs (OptionString _ s) = s mkAlt po diff --git a/utils/genprimopcode/Parser.y b/utils/genprimopcode/Parser.y index b20414d..5773abb 100644 --- a/utils/genprimopcode/Parser.y +++ b/utils/genprimopcode/Parser.y @@ -48,6 +48,7 @@ import Syntax lowerName { TLowerName $$ } upperName { TUpperName $$ } string { TString $$ } + integer { TInteger $$ } noBraces { TNoBraces $$ } %% @@ -66,6 +67,7 @@ pOption :: { Option } pOption : lowerName '=' false { OptionFalse $1 } | lowerName '=' true { OptionTrue $1 } | lowerName '=' pStuffBetweenBraces { OptionString $1 $3 } + | lowerName '=' integer { OptionInteger $1 $3 } pEntries :: { [Entry] } pEntries : pEntry pEntries { $1 : $2 } diff --git a/utils/genprimopcode/ParserM.hs b/utils/genprimopcode/ParserM.hs index edc300d..a2b39d7 100644 --- a/utils/genprimopcode/ParserM.hs +++ b/utils/genprimopcode/ParserM.hs @@ -81,6 +81,7 @@ data Token = TEOF | TUpperName String | TString String | TNoBraces String + | TInteger Int deriving Show -- Actions diff --git a/utils/genprimopcode/Syntax.hs b/utils/genprimopcode/Syntax.hs index 8094670..5fe4e0b 100644 --- a/utils/genprimopcode/Syntax.hs +++ b/utils/genprimopcode/Syntax.hs @@ -40,6 +40,7 @@ data Option = OptionFalse String -- name = False | OptionTrue String -- name = True | OptionString String String -- name = { ... unparsed stuff ... } + | OptionInteger String Int -- name = deriving Show -- categorises primops @@ -120,6 +121,7 @@ get_attrib_name :: Option -> String get_attrib_name (OptionFalse nm) = nm get_attrib_name (OptionTrue nm) = nm get_attrib_name (OptionString nm _) = nm +get_attrib_name (OptionInteger nm _) = nm lookup_attrib :: String -> [Option] -> Maybe Option lookup_attrib _ [] = Nothing -- 1.7.10.4