1 -----------------------------------------------------------------------------
5 -- (c) The University of Glasgow 2006
7 -----------------------------------------------------------------------------
15 #include "HsVersions.h"
33 -- -----------------------------------------------------------------------------
37 This pass inlines assignments to temporaries that are used just
38 once. It works as follows:
40 - count uses of each temporary
41 - for each temporary that occurs just once:
42 - attempt to push it forward to the statement that uses it
43 - only push forward past assignments to other temporaries
44 (assumes that temporaries are single-assignment)
45 - if we reach the statement that uses it, inline the rhs
46 and delete the original assignment.
48 Possible generalisations: here is an example from factorial
53 if (_smi != 0) goto cmK;
62 We want to inline _smi and _smn. To inline _smn:
64 - we must be able to push forward past assignments to global regs.
65 We can do this if the rhs of the assignment we are pushing
66 forward doesn't refer to the global reg being assigned to; easy
71 - It is a trivial replacement, reg for reg, but it occurs more than
73 - We can inline trivial assignments even if the temporary occurs
74 more than once, as long as we don't eliminate the original assignment
75 (this doesn't help much on its own).
76 - We need to be able to propagate the assignment forward through jumps;
77 if we did this, we would find that it can be inlined safely in all
81 cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock]
82 cmmMiniInline blocks = map do_inline blocks
84 blockUses (BasicBlock _ stmts)
85 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
87 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
89 do_inline (BasicBlock id stmts)
90 = BasicBlock id (cmmMiniInlineStmts uses stmts)
93 cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
94 cmmMiniInlineStmts uses [] = []
95 cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _ _)) expr) : stmts)
96 | Just 1 <- lookupUFM uses u,
97 Just stmts' <- lookForInline u expr stmts
100 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
102 cmmMiniInlineStmts uses stmts'
104 cmmMiniInlineStmts uses (stmt:stmts)
105 = stmt : cmmMiniInlineStmts uses stmts
108 -- Try to inline a temporary assignment. We can skip over assignments to
109 -- other tempoararies, because we know that expressions aren't side-effecting
110 -- and temporaries are single-assignment.
111 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _ _)) rhs) : rest)
113 = case lookupUFM (getExprUses rhs) u of
114 Just 1 -> Just (inlineStmt u expr stmt : rest)
115 _other -> case lookForInline u expr rest of
117 Just stmts -> Just (stmt:stmts)
119 lookForInline u expr (CmmNop : rest)
120 = lookForInline u expr rest
122 lookForInline u expr (stmt:stmts)
123 = case lookupUFM (getStmtUses stmt) u of
124 Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts)
127 -- we don't inline into CmmCall if the expression refers to global
128 -- registers. This is a HACK to avoid global registers clashing with
129 -- C argument-passing registers, really the back-end ought to be able
130 -- to handle it properly, but currently neither PprC nor the NCG can
131 -- do it. See also CgForeignCall:load_args_into_temps.
132 ok_to_inline = case stmt of
133 CmmCall{} -> hasNoGlobalRegs expr
136 -- -----------------------------------------------------------------------------
137 -- Boring Cmm traversals for collecting usage info and substitutions.
139 getStmtUses :: CmmStmt -> UniqFM Int
140 getStmtUses (CmmAssign _ e) = getExprUses e
141 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
142 getStmtUses (CmmCall target _ es _)
143 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
144 where uses (CmmCallee e _) = getExprUses e
146 getStmtUses (CmmCondBranch e _) = getExprUses e
147 getStmtUses (CmmSwitch e _) = getExprUses e
148 getStmtUses (CmmJump e _) = getExprUses e
149 getStmtUses _ = emptyUFM
151 getExprUses :: CmmExpr -> UniqFM Int
152 getExprUses (CmmReg (CmmLocal (LocalReg u _ _))) = unitUFM u 1
153 getExprUses (CmmRegOff (CmmLocal (LocalReg u _ _)) _) = unitUFM u 1
154 getExprUses (CmmLoad e _) = getExprUses e
155 getExprUses (CmmMachOp _ es) = getExprsUses es
156 getExprUses _other = emptyUFM
158 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
160 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
161 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
162 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
163 inlineStmt u a (CmmCall target regs es srt)
164 = CmmCall (infn target) regs es' srt
165 where infn (CmmCallee fn cconv) = CmmCallee fn cconv
166 infn (CmmPrim p) = CmmPrim p
167 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
168 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
169 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
170 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
171 inlineStmt u a other_stmt = other_stmt
173 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
174 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _ _)))
177 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep _)) off)
178 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
180 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
181 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
182 inlineExpr u a other_expr = other_expr
184 -- -----------------------------------------------------------------------------
185 -- MachOp constant folder
187 -- Now, try to constant-fold the MachOps. The arguments have already
188 -- been optimized and folded.
191 :: MachOp -- The operation from an CmmMachOp
192 -> [CmmExpr] -- The optimized arguments
195 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
197 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
198 MO_Not r -> CmmLit (CmmInt (complement x) rep)
200 -- these are interesting: we must first narrow to the
201 -- "from" type, in order to truncate to the correct size.
202 -- The final narrow/widen to the destination type
203 -- is implicit in the CmmLit.
205 | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
206 | otherwise -> CmmLit (CmmInt (narrowS from x) to)
207 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
209 _ -> panic "cmmMachOpFold: unknown unary op"
212 -- Eliminate conversion NOPs
213 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
214 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
216 -- Eliminate nested conversions where possible
217 cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
218 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
219 Just (_, rep3,signed2) <- isIntConversion conv_outer
221 -- widen then narrow to the same size is a nop
222 _ | rep1 < rep2 && rep1 == rep3 -> x
223 -- Widen then narrow to different size: collapse to single conversion
224 -- but remember to use the signedness from the widening, just in case
225 -- the final conversion is a widen.
226 | rep1 < rep2 && rep2 > rep3 ->
227 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
228 -- Nested widenings: collapse if the signedness is the same
229 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
230 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
231 -- Nested narrowings: collapse
232 | rep1 > rep2 && rep2 > rep3 ->
233 cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
235 CmmMachOp conv_outer args
237 isIntConversion (MO_U_Conv rep1 rep2)
238 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
239 = Just (rep1,rep2,False)
240 isIntConversion (MO_S_Conv rep1 rep2)
241 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
242 = Just (rep1,rep2,True)
243 isIntConversion _ = Nothing
245 intconv True = MO_S_Conv
246 intconv False = MO_U_Conv
248 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
249 -- but what if the architecture only supports word-sized loads, should
250 -- we do the transformation anyway?
252 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
254 -- for comparisons: don't forget to narrow the arguments before
255 -- comparing, since they might be out of range.
256 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
257 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
259 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
260 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
261 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
262 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
264 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
265 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
266 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
267 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
269 MO_Add r -> CmmLit (CmmInt (x + y) r)
270 MO_Sub r -> CmmLit (CmmInt (x - y) r)
271 MO_Mul r -> CmmLit (CmmInt (x * y) r)
272 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
273 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
275 MO_And r -> CmmLit (CmmInt (x .&. y) r)
276 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
277 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
279 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
280 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
281 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
283 other -> CmmMachOp mop args
292 -- When possible, shift the constants to the right-hand side, so that we
293 -- can match for strength reductions. Note that the code generator will
294 -- also assume that constants have been shifted to the right when
297 cmmMachOpFold op [x@(CmmLit _), y]
298 | not (isLit y) && isCommutableMachOp op
299 = cmmMachOpFold op [y, x]
301 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
302 -- moved to the right, it is more likely that we will find
303 -- opportunities for constant folding when the expression is
306 -- ToDo: this appears to introduce a quadratic behaviour due to the
307 -- nested cmmMachOpFold. Can we fix this?
309 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
310 -- is also a lit (otherwise arg1 would be on the right). If we
311 -- put arg1 on the left of the rearranged expression, we'll get into a
312 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
314 -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
315 -- PicBaseReg from the corresponding label (or label difference).
317 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
318 | mop1 == mop2 && isAssociativeMachOp mop1
319 && not (isLit arg1) && not (isPicReg arg1)
320 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
322 -- Make a RegOff if we can
323 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
324 = CmmRegOff reg (fromIntegral (narrowS rep n))
325 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
326 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
327 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
328 = CmmRegOff reg (- fromIntegral (narrowS rep n))
329 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
330 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
332 -- Fold label(+/-)offset into a CmmLit where possible
334 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
335 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
336 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
337 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
338 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
339 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
342 -- Comparison of literal with narrowed/widened operand: perform
343 -- the comparison at a different width, as long as the literal is
346 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
347 -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
349 cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
350 | Just (rep, narrow) <- maybe_conversion conv,
351 Just narrow_cmp <- maybe_comparison cmp rep,
352 let narrow_i = narrow rep i,
354 = cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt narrow_i rep)]
356 maybe_conversion (MO_U_Conv from _) = Just (from, narrowU)
357 maybe_conversion (MO_S_Conv from _) = Just (from, narrowS)
358 maybe_conversion _ = Nothing
360 maybe_comparison (MO_U_Gt _) rep = Just (MO_U_Gt rep)
361 maybe_comparison (MO_U_Ge _) rep = Just (MO_U_Ge rep)
362 maybe_comparison (MO_U_Lt _) rep = Just (MO_U_Lt rep)
363 maybe_comparison (MO_U_Le _) rep = Just (MO_U_Le rep)
364 maybe_comparison (MO_S_Gt _) rep = Just (MO_S_Gt rep)
365 maybe_comparison (MO_S_Ge _) rep = Just (MO_S_Ge rep)
366 maybe_comparison (MO_S_Lt _) rep = Just (MO_S_Lt rep)
367 maybe_comparison (MO_S_Le _) rep = Just (MO_S_Le rep)
368 maybe_comparison (MO_Eq _) rep = Just (MO_Eq rep)
369 maybe_comparison _ _ = Nothing
373 -- We can often do something with constants of 0 and 1 ...
375 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
386 MO_Ne r | isComparisonExpr x -> x
387 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
388 MO_U_Gt r | isComparisonExpr x -> x
389 MO_S_Gt r | isComparisonExpr x -> x
390 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
391 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
392 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
393 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
394 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
395 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
396 other -> CmmMachOp mop args
398 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
403 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
404 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
405 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
406 MO_Eq r | isComparisonExpr x -> x
407 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
408 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
409 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
410 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
411 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
412 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
413 MO_U_Ge r | isComparisonExpr x -> x
414 MO_S_Ge r | isComparisonExpr x -> x
415 other -> CmmMachOp mop args
417 -- Now look for multiplication/division by powers of 2 (integers).
419 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
422 | Just p <- exactLog2 n ->
423 CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
425 | Just p <- exactLog2 n,
426 CmmReg _ <- x -> -- We duplicate x below, hence require
427 -- it is a reg. FIXME: remove this restriction.
428 -- shift right is not the same as quot, because it rounds
429 -- to minus infinity, whereasq uot rounds toward zero.
430 -- To fix this up, we add one less than the divisor to the
431 -- dividend if it is a negative number.
433 -- to avoid a test/jump, we use the following sequence:
434 -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
435 -- x2 = y & (divisor-1)
436 -- result = (x+x2) >>= log2(divisor)
437 -- this could be done a bit more simply using conditional moves,
438 -- but we're processor independent here.
440 -- we optimise the divide by 2 case slightly, generating
441 -- x1 = x >> word_size-1 (unsigned)
442 -- return = (x + x1) >>= log2(divisor)
444 bits = fromIntegral (machRepBitWidth rep) - 1
445 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
446 x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
447 x2 = if p == 1 then x1 else
448 CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
449 x3 = CmmMachOp (MO_Add rep) [x, x2]
451 CmmMachOp (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)]
455 unchanged = CmmMachOp mop args
457 -- Anything else is just too hard.
459 cmmMachOpFold mop args = CmmMachOp mop args
461 -- -----------------------------------------------------------------------------
464 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
465 -- from GCC. It requires bit manipulation primitives, and we use GHC
466 -- extensions. Tough.
468 -- Used to be in MachInstrs --SDM.
469 -- ToDo: remove use of unboxery --SDM.
474 exactLog2 :: Integer -> Maybe Integer
476 = if (x <= 0 || x >= 2147483648) then
479 case fromInteger x of { I# x# ->
480 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
483 Just (toInteger (I# (pow2 x#)))
486 pow2 x# | x# ==# 1# = 0#
487 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
490 -- -----------------------------------------------------------------------------
491 -- widening / narrowing
493 narrowU :: MachRep -> Integer -> Integer
494 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
495 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
496 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
497 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
498 narrowU _ _ = panic "narrowTo"
500 narrowS :: MachRep -> Integer -> Integer
501 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
502 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
503 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
504 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
505 narrowS _ _ = panic "narrowTo"
507 -- -----------------------------------------------------------------------------
511 This is a simple pass that replaces tail-recursive functions like this:
526 the latter generates better C code, because the C compiler treats it
527 like a loop, and brings full loop optimisation to bear.
529 In my measurements this makes little or no difference to anything
530 except factorial, but what the hell.
533 cmmLoopifyForC :: RawCmmTop -> RawCmmTop
534 cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _))
535 | null info = p -- only if there's an info table, ignore case alts
537 -- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
538 CmmProc info entry_lbl [] blocks'
539 where blocks' = [ BasicBlock id (map do_stmt stmts)
540 | BasicBlock id stmts <- blocks ]
542 do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
546 jump_lbl | tablesNextToCode = entryLblToInfoLbl entry_lbl
547 | otherwise = entry_lbl
549 cmmLoopifyForC top = top
551 -- -----------------------------------------------------------------------------
554 isLit (CmmLit _) = True
557 isComparisonExpr :: CmmExpr -> Bool
558 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
559 isComparisonExpr _other = False
561 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
562 maybeInvertConditionalExpr (CmmMachOp op args)
563 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
564 maybeInvertConditionalExpr _ = Nothing
566 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True