1 -----------------------------------------------------------------------------
5 -- (c) The University of Glasgow 2006
7 -----------------------------------------------------------------------------
15 #include "HsVersions.h"
18 import CmmUtils ( hasNoGlobalRegs )
19 import CLabel ( entryLblToInfoLbl )
21 import SMRep ( tablesNextToCode )
24 import Unique ( Unique )
25 import Panic ( panic )
35 -- -----------------------------------------------------------------------------
38 -- This pass inlines assignments to temporaries that are used just
39 -- once in the very next statement only. Generalising this would be
40 -- quite difficult (have to take into account aliasing of memory
41 -- writes, and so on), but at the moment it catches a number of useful
42 -- cases and lets the code generator generate much better code.
44 -- NB. This assumes that temporaries are single-assignment.
46 cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock]
47 cmmMiniInline blocks = map do_inline blocks
49 blockUses (BasicBlock _ stmts)
50 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
52 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
54 do_inline (BasicBlock id stmts)
55 = BasicBlock id (cmmMiniInlineStmts uses stmts)
58 cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
59 cmmMiniInlineStmts uses [] = []
60 cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
61 | Just 1 <- lookupUFM uses u,
62 Just stmts' <- lookForInline u expr stmts
65 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
67 cmmMiniInlineStmts uses stmts'
69 cmmMiniInlineStmts uses (stmt:stmts)
70 = stmt : cmmMiniInlineStmts uses stmts
73 -- Try to inline a temporary assignment. We can skip over assignments to
74 -- other tempoararies, because we know that expressions aren't side-effecting
75 -- and temporaries are single-assignment.
76 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
78 = case lookupUFM (getExprUses rhs) u of
79 Just 1 -> Just (inlineStmt u expr stmt : rest)
80 _other -> case lookForInline u expr rest of
82 Just stmts -> Just (stmt:stmts)
84 lookForInline u expr (CmmNop : rest)
85 = lookForInline u expr rest
87 lookForInline u expr (stmt:stmts)
88 = case lookupUFM (getStmtUses stmt) u of
89 Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts)
92 -- we don't inline into CmmCall if the expression refers to global
93 -- registers. This is a HACK to avoid global registers clashing with
94 -- C argument-passing registers, really the back-end ought to be able
95 -- to handle it properly, but currently neither PprC nor the NCG can
96 -- do it. See also CgForeignCall:load_args_into_temps.
97 ok_to_inline = case stmt of
98 CmmCall{} -> hasNoGlobalRegs expr
101 -- -----------------------------------------------------------------------------
102 -- Boring Cmm traversals for collecting usage info and substitutions.
104 getStmtUses :: CmmStmt -> UniqFM Int
105 getStmtUses (CmmAssign _ e) = getExprUses e
106 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
107 getStmtUses (CmmCall target _ es _)
108 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
109 where uses (CmmForeignCall e _) = getExprUses e
111 getStmtUses (CmmCondBranch e _) = getExprUses e
112 getStmtUses (CmmSwitch e _) = getExprUses e
113 getStmtUses (CmmJump e _) = getExprUses e
114 getStmtUses _ = emptyUFM
116 getExprUses :: CmmExpr -> UniqFM Int
117 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
118 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
119 getExprUses (CmmLoad e _) = getExprUses e
120 getExprUses (CmmMachOp _ es) = getExprsUses es
121 getExprUses _other = emptyUFM
123 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
125 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
126 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
127 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
128 inlineStmt u a (CmmCall target regs es vols)
129 = CmmCall (infn target) regs es' vols
130 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
131 infn (CmmPrim p) = CmmPrim p
132 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
133 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
134 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
135 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
136 inlineStmt u a other_stmt = other_stmt
138 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
139 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
142 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
143 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
145 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
146 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
147 inlineExpr u a other_expr = other_expr
149 -- -----------------------------------------------------------------------------
150 -- MachOp constant folder
152 -- Now, try to constant-fold the MachOps. The arguments have already
153 -- been optimized and folded.
156 :: MachOp -- The operation from an CmmMachOp
157 -> [CmmExpr] -- The optimized arguments
160 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
162 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
163 MO_Not r -> CmmLit (CmmInt (complement x) rep)
165 -- these are interesting: we must first narrow to the
166 -- "from" type, in order to truncate to the correct size.
167 -- The final narrow/widen to the destination type
168 -- is implicit in the CmmLit.
170 | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
171 | otherwise -> CmmLit (CmmInt (narrowS from x) to)
172 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
174 _ -> panic "cmmMachOpFold: unknown unary op"
177 -- Eliminate conversion NOPs
178 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
179 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
181 -- Eliminate nested conversions where possible
182 cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
183 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
184 Just (_, rep3,signed2) <- isIntConversion conv_outer
186 -- widen then narrow to the same size is a nop
187 _ | rep1 < rep2 && rep1 == rep3 -> x
188 -- Widen then narrow to different size: collapse to single conversion
189 -- but remember to use the signedness from the widening, just in case
190 -- the final conversion is a widen.
191 | rep1 < rep2 && rep2 > rep3 ->
192 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
193 -- Nested widenings: collapse if the signedness is the same
194 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
195 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
196 -- Nested narrowings: collapse
197 | rep1 > rep2 && rep2 > rep3 ->
198 cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
200 CmmMachOp conv_outer args
202 isIntConversion (MO_U_Conv rep1 rep2)
203 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
204 = Just (rep1,rep2,False)
205 isIntConversion (MO_S_Conv rep1 rep2)
206 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
207 = Just (rep1,rep2,True)
208 isIntConversion _ = Nothing
210 intconv True = MO_S_Conv
211 intconv False = MO_U_Conv
213 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
214 -- but what if the architecture only supports word-sized loads, should
215 -- we do the transformation anyway?
217 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
219 -- for comparisons: don't forget to narrow the arguments before
220 -- comparing, since they might be out of range.
221 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
222 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
224 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
225 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
226 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
227 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
229 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
230 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
231 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
232 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
234 MO_Add r -> CmmLit (CmmInt (x + y) r)
235 MO_Sub r -> CmmLit (CmmInt (x - y) r)
236 MO_Mul r -> CmmLit (CmmInt (x * y) r)
237 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
238 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
240 MO_And r -> CmmLit (CmmInt (x .&. y) r)
241 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
242 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
244 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
245 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
246 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
248 other -> CmmMachOp mop args
257 -- When possible, shift the constants to the right-hand side, so that we
258 -- can match for strength reductions. Note that the code generator will
259 -- also assume that constants have been shifted to the right when
262 cmmMachOpFold op [x@(CmmLit _), y]
263 | not (isLit y) && isCommutableMachOp op
264 = cmmMachOpFold op [y, x]
266 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
267 -- moved to the right, it is more likely that we will find
268 -- opportunities for constant folding when the expression is
271 -- ToDo: this appears to introduce a quadratic behaviour due to the
272 -- nested cmmMachOpFold. Can we fix this?
274 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
275 -- is also a lit (otherwise arg1 would be on the right). If we
276 -- put arg1 on the left of the rearranged expression, we'll get into a
277 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
279 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
280 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
281 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
283 -- Make a RegOff if we can
284 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
285 = CmmRegOff reg (fromIntegral (narrowS rep n))
286 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
287 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
288 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
289 = CmmRegOff reg (- fromIntegral (narrowS rep n))
290 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
291 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
293 -- Fold label(+/-)offset into a CmmLit where possible
295 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
296 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
297 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
298 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
299 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
300 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
302 -- We can often do something with constants of 0 and 1 ...
304 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
315 MO_Ne r | isComparisonExpr x -> x
316 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
317 MO_U_Gt r | isComparisonExpr x -> x
318 MO_S_Gt r | isComparisonExpr x -> x
319 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
320 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
321 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
322 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
323 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
324 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
325 other -> CmmMachOp mop args
327 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
332 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
333 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
334 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
335 MO_Eq r | isComparisonExpr x -> x
336 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
337 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
338 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
339 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
340 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
341 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
342 MO_U_Ge r | isComparisonExpr x -> x
343 MO_S_Ge r | isComparisonExpr x -> x
344 other -> CmmMachOp mop args
346 -- Now look for multiplication/division by powers of 2 (integers).
348 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
351 -> case exactLog2 n of
353 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
355 -> case exactLog2 n of
357 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
361 unchanged = CmmMachOp mop args
363 -- Anything else is just too hard.
365 cmmMachOpFold mop args = CmmMachOp mop args
367 -- -----------------------------------------------------------------------------
370 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
371 -- from GCC. It requires bit manipulation primitives, and we use GHC
372 -- extensions. Tough.
374 -- Used to be in MachInstrs --SDM.
375 -- ToDo: remove use of unboxery --SDM.
380 exactLog2 :: Integer -> Maybe Integer
382 = if (x <= 0 || x >= 2147483648) then
385 case fromInteger x of { I# x# ->
386 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
389 Just (toInteger (I# (pow2 x#)))
392 pow2 x# | x# ==# 1# = 0#
393 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
396 -- -----------------------------------------------------------------------------
397 -- widening / narrowing
399 narrowU :: MachRep -> Integer -> Integer
400 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
401 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
402 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
403 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
404 narrowU _ _ = panic "narrowTo"
406 narrowS :: MachRep -> Integer -> Integer
407 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
408 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
409 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
410 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
411 narrowS _ _ = panic "narrowTo"
413 -- -----------------------------------------------------------------------------
417 This is a simple pass that replaces tail-recursive functions like this:
432 the latter generates better C code, because the C compiler treats it
433 like a loop, and brings full loop optimisation to bear.
435 In my measurements this makes little or no difference to anything
436 except factorial, but what the hell.
439 cmmLoopifyForC :: CmmTop -> CmmTop
440 cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _))
441 | null info = p -- only if there's an info table, ignore case alts
443 -- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
444 CmmProc info entry_lbl [] blocks'
445 where blocks' = [ BasicBlock id (map do_stmt stmts)
446 | BasicBlock id stmts <- blocks ]
448 do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
452 jump_lbl | tablesNextToCode = entryLblToInfoLbl entry_lbl
453 | otherwise = entry_lbl
455 cmmLoopifyForC top = top
457 -- -----------------------------------------------------------------------------
460 isLit (CmmLit _) = True
463 isComparisonExpr :: CmmExpr -> Bool
464 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
465 isComparisonExpr _other = False
467 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
468 maybeInvertConditionalExpr (CmmMachOp op args)
469 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
470 maybeInvertConditionalExpr _ = Nothing