1 -----------------------------------------------------------------------------
5 -- (c) The University of Glasgow 2006
7 -----------------------------------------------------------------------------
15 #include "HsVersions.h"
18 import CmmUtils ( hasNoGlobalRegs )
19 import CLabel ( entryLblToInfoLbl )
21 import SMRep ( tablesNextToCode )
24 import Unique ( Unique )
25 import Panic ( panic )
35 -- -----------------------------------------------------------------------------
39 This pass inlines assignments to temporaries that are used just
40 once. It works as follows:
42 - count uses of each temporary
43 - for each temporary that occurs just once:
44 - attempt to push it forward to the statement that uses it
45 - only push forward past assignments to other temporaries
46 (assumes that temporaries are single-assignment)
47 - if we reach the statement that uses it, inline the rhs
48 and delete the original assignment.
50 Possible generalisations: here is an example from factorial
55 if (_smi != 0) goto cmK;
64 We want to inline _smi and _smn. To inline _smn:
66 - we must be able to push forward past assignments to global regs.
67 We can do this if the rhs of the assignment we are pushing
68 forward doesn't refer to the global reg being assigned to; easy
73 - It is a trivial replacement, reg for reg, but it occurs more than
75 - We can inline trivial assignments even if the temporary occurs
76 more than once, as long as we don't eliminate the original assignment
77 (this doesn't help much on its own).
78 - We need to be able to propagate the assignment forward through jumps;
79 if we did this, we would find that it can be inlined safely in all
83 cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock]
84 cmmMiniInline blocks = map do_inline blocks
86 blockUses (BasicBlock _ stmts)
87 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
89 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
91 do_inline (BasicBlock id stmts)
92 = BasicBlock id (cmmMiniInlineStmts uses stmts)
95 cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
96 cmmMiniInlineStmts uses [] = []
97 cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
98 | Just 1 <- lookupUFM uses u,
99 Just stmts' <- lookForInline u expr stmts
102 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
104 cmmMiniInlineStmts uses stmts'
106 cmmMiniInlineStmts uses (stmt:stmts)
107 = stmt : cmmMiniInlineStmts uses stmts
110 -- Try to inline a temporary assignment. We can skip over assignments to
111 -- other tempoararies, because we know that expressions aren't side-effecting
112 -- and temporaries are single-assignment.
113 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
115 = case lookupUFM (getExprUses rhs) u of
116 Just 1 -> Just (inlineStmt u expr stmt : rest)
117 _other -> case lookForInline u expr rest of
119 Just stmts -> Just (stmt:stmts)
121 lookForInline u expr (CmmNop : rest)
122 = lookForInline u expr rest
124 lookForInline u expr (stmt:stmts)
125 = case lookupUFM (getStmtUses stmt) u of
126 Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts)
129 -- we don't inline into CmmCall if the expression refers to global
130 -- registers. This is a HACK to avoid global registers clashing with
131 -- C argument-passing registers, really the back-end ought to be able
132 -- to handle it properly, but currently neither PprC nor the NCG can
133 -- do it. See also CgForeignCall:load_args_into_temps.
134 ok_to_inline = case stmt of
135 CmmCall{} -> hasNoGlobalRegs expr
138 -- -----------------------------------------------------------------------------
139 -- Boring Cmm traversals for collecting usage info and substitutions.
141 getStmtUses :: CmmStmt -> UniqFM Int
142 getStmtUses (CmmAssign _ e) = getExprUses e
143 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
144 getStmtUses (CmmCall target _ es _)
145 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
146 where uses (CmmForeignCall e _) = getExprUses e
148 getStmtUses (CmmCondBranch e _) = getExprUses e
149 getStmtUses (CmmSwitch e _) = getExprUses e
150 getStmtUses (CmmJump e _) = getExprUses e
151 getStmtUses _ = emptyUFM
153 getExprUses :: CmmExpr -> UniqFM Int
154 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
155 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
156 getExprUses (CmmLoad e _) = getExprUses e
157 getExprUses (CmmMachOp _ es) = getExprsUses es
158 getExprUses _other = emptyUFM
160 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
162 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
163 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
164 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
165 inlineStmt u a (CmmCall target regs es vols)
166 = CmmCall (infn target) regs es' vols
167 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
168 infn (CmmPrim p) = CmmPrim p
169 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
170 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
171 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
172 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
173 inlineStmt u a other_stmt = other_stmt
175 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
176 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
179 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
180 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
182 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
183 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
184 inlineExpr u a other_expr = other_expr
186 -- -----------------------------------------------------------------------------
187 -- MachOp constant folder
189 -- Now, try to constant-fold the MachOps. The arguments have already
190 -- been optimized and folded.
193 :: MachOp -- The operation from an CmmMachOp
194 -> [CmmExpr] -- The optimized arguments
197 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
199 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
200 MO_Not r -> CmmLit (CmmInt (complement x) rep)
202 -- these are interesting: we must first narrow to the
203 -- "from" type, in order to truncate to the correct size.
204 -- The final narrow/widen to the destination type
205 -- is implicit in the CmmLit.
207 | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
208 | otherwise -> CmmLit (CmmInt (narrowS from x) to)
209 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
211 _ -> panic "cmmMachOpFold: unknown unary op"
214 -- Eliminate conversion NOPs
215 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
216 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
218 -- Eliminate nested conversions where possible
219 cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
220 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
221 Just (_, rep3,signed2) <- isIntConversion conv_outer
223 -- widen then narrow to the same size is a nop
224 _ | rep1 < rep2 && rep1 == rep3 -> x
225 -- Widen then narrow to different size: collapse to single conversion
226 -- but remember to use the signedness from the widening, just in case
227 -- the final conversion is a widen.
228 | rep1 < rep2 && rep2 > rep3 ->
229 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
230 -- Nested widenings: collapse if the signedness is the same
231 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
232 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
233 -- Nested narrowings: collapse
234 | rep1 > rep2 && rep2 > rep3 ->
235 cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
237 CmmMachOp conv_outer args
239 isIntConversion (MO_U_Conv rep1 rep2)
240 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
241 = Just (rep1,rep2,False)
242 isIntConversion (MO_S_Conv rep1 rep2)
243 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
244 = Just (rep1,rep2,True)
245 isIntConversion _ = Nothing
247 intconv True = MO_S_Conv
248 intconv False = MO_U_Conv
250 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
251 -- but what if the architecture only supports word-sized loads, should
252 -- we do the transformation anyway?
254 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
256 -- for comparisons: don't forget to narrow the arguments before
257 -- comparing, since they might be out of range.
258 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
259 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
261 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
262 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
263 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
264 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
266 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
267 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
268 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
269 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
271 MO_Add r -> CmmLit (CmmInt (x + y) r)
272 MO_Sub r -> CmmLit (CmmInt (x - y) r)
273 MO_Mul r -> CmmLit (CmmInt (x * y) r)
274 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
275 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
277 MO_And r -> CmmLit (CmmInt (x .&. y) r)
278 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
279 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
281 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
282 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
283 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
285 other -> CmmMachOp mop args
294 -- When possible, shift the constants to the right-hand side, so that we
295 -- can match for strength reductions. Note that the code generator will
296 -- also assume that constants have been shifted to the right when
299 cmmMachOpFold op [x@(CmmLit _), y]
300 | not (isLit y) && isCommutableMachOp op
301 = cmmMachOpFold op [y, x]
303 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
304 -- moved to the right, it is more likely that we will find
305 -- opportunities for constant folding when the expression is
308 -- ToDo: this appears to introduce a quadratic behaviour due to the
309 -- nested cmmMachOpFold. Can we fix this?
311 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
312 -- is also a lit (otherwise arg1 would be on the right). If we
313 -- put arg1 on the left of the rearranged expression, we'll get into a
314 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
316 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
317 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
318 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
320 -- Make a RegOff if we can
321 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
322 = CmmRegOff reg (fromIntegral (narrowS rep n))
323 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
324 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
325 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
326 = CmmRegOff reg (- fromIntegral (narrowS rep n))
327 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
328 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
330 -- Fold label(+/-)offset into a CmmLit where possible
332 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
333 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
334 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
335 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
336 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
337 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
339 -- We can often do something with constants of 0 and 1 ...
341 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
352 MO_Ne r | isComparisonExpr x -> x
353 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
354 MO_U_Gt r | isComparisonExpr x -> x
355 MO_S_Gt r | isComparisonExpr x -> x
356 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
357 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
358 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
359 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
360 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
361 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
362 other -> CmmMachOp mop args
364 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
369 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
370 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
371 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
372 MO_Eq r | isComparisonExpr x -> x
373 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
374 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
375 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
376 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
377 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
378 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
379 MO_U_Ge r | isComparisonExpr x -> x
380 MO_S_Ge r | isComparisonExpr x -> x
381 other -> CmmMachOp mop args
383 -- Now look for multiplication/division by powers of 2 (integers).
385 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
388 -> case exactLog2 n of
390 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
392 -> case exactLog2 n of
394 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
398 unchanged = CmmMachOp mop args
400 -- Anything else is just too hard.
402 cmmMachOpFold mop args = CmmMachOp mop args
404 -- -----------------------------------------------------------------------------
407 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
408 -- from GCC. It requires bit manipulation primitives, and we use GHC
409 -- extensions. Tough.
411 -- Used to be in MachInstrs --SDM.
412 -- ToDo: remove use of unboxery --SDM.
417 exactLog2 :: Integer -> Maybe Integer
419 = if (x <= 0 || x >= 2147483648) then
422 case fromInteger x of { I# x# ->
423 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
426 Just (toInteger (I# (pow2 x#)))
429 pow2 x# | x# ==# 1# = 0#
430 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
433 -- -----------------------------------------------------------------------------
434 -- widening / narrowing
436 narrowU :: MachRep -> Integer -> Integer
437 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
438 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
439 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
440 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
441 narrowU _ _ = panic "narrowTo"
443 narrowS :: MachRep -> Integer -> Integer
444 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
445 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
446 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
447 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
448 narrowS _ _ = panic "narrowTo"
450 -- -----------------------------------------------------------------------------
454 This is a simple pass that replaces tail-recursive functions like this:
469 the latter generates better C code, because the C compiler treats it
470 like a loop, and brings full loop optimisation to bear.
472 In my measurements this makes little or no difference to anything
473 except factorial, but what the hell.
476 cmmLoopifyForC :: CmmTop -> CmmTop
477 cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _))
478 | null info = p -- only if there's an info table, ignore case alts
480 -- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
481 CmmProc info entry_lbl [] blocks'
482 where blocks' = [ BasicBlock id (map do_stmt stmts)
483 | BasicBlock id stmts <- blocks ]
485 do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
489 jump_lbl | tablesNextToCode = entryLblToInfoLbl entry_lbl
490 | otherwise = entry_lbl
492 cmmLoopifyForC top = top
494 -- -----------------------------------------------------------------------------
497 isLit (CmmLit _) = True
500 isComparisonExpr :: CmmExpr -> Bool
501 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
502 isComparisonExpr _other = False
504 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
505 maybeInvertConditionalExpr (CmmMachOp op args)
506 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
507 maybeInvertConditionalExpr _ = Nothing