1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import PprCmm ( pprStmt, pprCmms )
27 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
28 #if powerpc_TARGET_ARCH
29 import CLabel ( mkRtsCodeLabel )
33 import Unique ( Unique, getUnique )
36 import List ( groupBy, sortBy )
37 import CLabel ( pprCLabel )
38 import ErrUtils ( dumpIfSet_dyn )
39 import CmdLineOpts ( DynFlags, DynFlag(..), dopt, opt_Static,
40 opt_EnsureSplittableC, opt_PIC )
43 import qualified Pretty
51 import List ( intersperse )
60 The native-code generator has machine-independent and
61 machine-dependent modules.
63 This module ("AsmCodeGen") is the top-level machine-independent
64 module. Before entering machine-dependent land, we do some
65 machine-independent optimisations (defined below) on the
68 We convert to the machine-specific 'Instr' datatype with
69 'cmmCodeGen', assuming an infinite supply of registers. We then use
70 a machine-independent register allocator ('regAlloc') to rejoin
71 reality. Obviously, 'regAlloc' has machine-specific helper
72 functions (see about "RegAllocInfo" below).
74 Finally, we order the basic blocks of the function so as to minimise
75 the number of jumps between blocks, by utilising fallthrough wherever
78 The machine-dependent bits break down as follows:
80 * ["MachRegs"] Everything about the target platform's machine
81 registers (and immediate operands, and addresses, which tend to
82 intermingle/interact with registers).
84 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
85 have a module of its own), plus a miscellany of other things
86 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
88 * ["MachCodeGen"] is where 'Cmm' stuff turns into
91 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
94 * ["RegAllocInfo"] In the register allocator, we manipulate
95 'MRegsState's, which are 'BitSet's, one bit per machine register.
96 When we want to say something about a specific machine register
97 (e.g., ``it gets clobbered by this instruction''), we set/unset
98 its bit. Obviously, we do this 'BitSet' thing for efficiency
101 The 'RegAllocInfo' module collects together the machine-specific
102 info needed to do register allocation.
104 * ["RegisterAlloc"] The (machine-independent) register allocator.
107 -- -----------------------------------------------------------------------------
108 -- Top-level of the native codegen
110 -- NB. We *lazilly* compile each block of code for space reasons.
112 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
113 nativeCodeGen dflags cmms us
114 = let ((ppr_cmms, insn_sdoc, imports), _) = initUs us $
115 cgCmm (concat (map add_split cmms))
117 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
119 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
120 let (cmms,docs,imps) = unzip3 results in
121 returnUs (Cmm cmms, my_vcat docs, concat imps)
123 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
124 return (insn_sdoc Pretty.$$ dyld_stubs imports)
129 | opt_EnsureSplittableC = split_marker : tops
132 split_marker = CmmProc [] mkSplitMarkerLabel [] []
134 -- Generate "symbol stubs" for all external symbols that might
135 -- come from a dynamic library.
136 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
137 map head $ group $ sort imps-}
139 -- (Hack) sometimes two Labels pretty-print the same, but have
140 -- different uniques; so we compare their text versions...
142 | needImportedSymbols
144 (pprGotDeclaration :) $
145 map (pprImportedSymbol . fst . head) $
146 groupBy (\(_,a) (_,b) -> a == b) $
147 sortBy (\(_,a) (_,b) -> compare a b) $
153 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
154 astyle = mkCodeStyle AsmStyle
157 my_vcat sds = Pretty.vcat sds
159 my_vcat sds = Pretty.vcat (
162 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
163 Pretty.$$ Pretty.char ' '
170 -- Complete native code generation phase for a single top-level chunk
173 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
174 cmmNativeGen dflags cmm
175 = {-# SCC "fixAssigns" #-}
176 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
177 {-# SCC "genericOpt" #-}
178 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
179 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
181 else CmmData Text []) `bind` \ ppr_cmm ->
182 {-# SCC "genMachCode" #-}
183 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
184 {-# SCC "regAlloc" #-}
185 map regAlloc pre_regalloc `bind` \ with_regs ->
186 {-# SCC "sequenceBlocks" #-}
187 map sequenceTop with_regs `bind` \ sequenced ->
188 {-# SCC "x86fp_kludge" #-}
189 map x86fp_kludge sequenced `bind` \ final_mach_code ->
191 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
193 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
195 x86fp_kludge :: NatCmmTop -> NatCmmTop
196 x86fp_kludge top@(CmmData _ _) = top
198 x86fp_kludge top@(CmmProc info lbl params code) =
199 CmmProc info lbl params (map bb_i386_insert_ffrees code)
201 bb_i386_insert_ffrees (BasicBlock id instrs) =
202 BasicBlock id (i386_insert_ffrees instrs)
204 x86fp_kludge top = top
207 -- -----------------------------------------------------------------------------
208 -- Sequencing the basic blocks
210 -- Cmm BasicBlocks are self-contained entities: they always end in a
211 -- jump, either non-local or to another basic block in the same proc.
212 -- In this phase, we attempt to place the basic blocks in a sequence
213 -- such that as many of the local jumps as possible turn into
216 sequenceTop :: NatCmmTop -> NatCmmTop
217 sequenceTop top@(CmmData _ _) = top
218 sequenceTop (CmmProc info lbl params blocks) =
219 CmmProc info lbl params (sequenceBlocks blocks)
221 -- The algorithm is very simple (and stupid): we make a graph out of
222 -- the blocks where there is an edge from one block to another iff the
223 -- first block ends by jumping to the second. Then we topologically
224 -- sort this graph. Then traverse the list: for each block, we first
225 -- output the block, then if it has an out edge, we move the
226 -- destination of the out edge to the front of the list, and continue.
228 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
229 sequenceBlocks [] = []
230 sequenceBlocks (entry:blocks) =
231 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
232 -- the first block is the entry point ==> it must remain at the start.
234 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
235 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
237 getOutEdges :: [Instr] -> [Unique]
238 getOutEdges instrs = case jumpDests (last instrs) [] of
239 [one] -> [getUnique one]
241 -- we're only interested in the last instruction of
242 -- the block, and only if it has a single destination.
244 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
247 seqBlocks ((block,_,[]) : rest)
248 = block : seqBlocks rest
249 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
250 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
251 | otherwise = block : seqBlocks rest'
253 (can_fallthrough, rest') = reorder next [] rest
254 -- TODO: we should do a better job for cycles; try to maximise the
255 -- fallthroughs within a loop.
256 seqBlocks _ = panic "AsmCodegen:seqBlocks"
258 reorder id accum [] = (False, reverse accum)
259 reorder id accum (b@(block,id',out) : rest)
260 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
261 | otherwise = reorder id (b:accum) rest
263 -- -----------------------------------------------------------------------------
264 -- Instruction selection
266 -- Native code instruction selection for a chunk of stix code. For
267 -- this part of the computation, we switch from the UniqSM monad to
268 -- the NatM monad. The latter carries not only a Unique, but also an
269 -- Int denoting the current C stack pointer offset in the generated
270 -- code; this is needed for creating correct spill offsets on
271 -- architectures which don't offer, or for which it would be
272 -- prohibitively expensive to employ, a frame pointer register. Viz,
275 -- The offset is measured in bytes, and indicates the difference
276 -- between the current (simulated) C stack-ptr and the value it was at
277 -- the beginning of the block. For stacks which grow down, this value
278 -- should be either zero or negative.
280 -- Switching between the two monads whilst carrying along the same
281 -- Unique supply breaks abstraction. Is that bad?
283 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
285 genMachCode cmm_top initial_us
286 = let initial_st = mkNatM_State initial_us 0
287 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
288 final_us = natm_us final_st
289 final_delta = natm_delta final_st
290 final_imports = natm_imports final_st
293 then ((new_tops, final_imports), final_us)
294 else pprPanic "genMachCode: nonzero final delta"
297 -- -----------------------------------------------------------------------------
298 -- Fixup assignments to global registers so that they assign to
299 -- locations within the RegTable, if appropriate.
301 -- Note that we currently don't fixup reads here: they're done by
302 -- the generic optimiser below, to avoid having two separate passes
305 fixAssignsTop :: CmmTop -> UniqSM CmmTop
306 fixAssignsTop top@(CmmData _ _) = returnUs top
307 fixAssignsTop (CmmProc info lbl params blocks) =
308 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
309 returnUs (CmmProc info lbl params blocks')
311 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
312 fixAssignsBlock (BasicBlock id stmts) =
313 fixAssigns stmts `thenUs` \ stmts' ->
314 returnUs (BasicBlock id stmts')
316 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
318 mapUs fixAssign stmts `thenUs` \ stmtss ->
319 returnUs (concat stmtss)
321 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
322 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
323 = panic "cmmStmtConFold: assignment to BaseReg";
325 fixAssign (CmmAssign (CmmGlobal reg) src)
326 | Left realreg <- reg_or_addr
327 = returnUs [CmmAssign (CmmGlobal reg) src]
328 | Right baseRegAddr <- reg_or_addr
329 = returnUs [CmmStore baseRegAddr src]
330 -- Replace register leaves with appropriate StixTrees for
331 -- the given target. GlobalRegs which map to a reg on this
332 -- arch are left unchanged. Assigning to BaseReg is always
333 -- illegal, so we check for that.
335 reg_or_addr = get_GlobalReg_reg_or_addr reg
337 fixAssign (CmmCall target results args vols)
338 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
339 returnUs (CmmCall target results' args vols : concat stores)
341 fixResult g@(CmmGlobal reg,hint) =
342 case get_GlobalReg_reg_or_addr reg of
343 Left realreg -> returnUs (g, [])
345 getUniqueUs `thenUs` \ uq ->
346 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
347 returnUs ((local,hint),
348 [CmmStore baseRegAddr (CmmReg local)])
352 fixAssign other_stmt = returnUs [other_stmt]
354 -- -----------------------------------------------------------------------------
355 -- Generic Cmm optimiser
361 (b) Simple inlining: a temporary which is assigned to and then
362 used, once, can be shorted.
363 (c) Replacement of references to GlobalRegs which do not have
364 machine registers by the appropriate memory load (eg.
365 Hp ==> *(BaseReg + 34) ).
366 (d) Position independent code and dynamic linking
367 (i) introduce the appropriate indirections
368 and position independent refs
369 (ii) compile a list of imported symbols
371 Ideas for other things we could do (ToDo):
373 - shortcut jumps-to-jumps
374 - eliminate dead code blocks
377 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
378 cmmToCmm top@(CmmData _ _) = (top, [])
379 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
380 blocks' <- mapM cmmBlockConFold (cmmPeep blocks)
381 return $ CmmProc info lbl params blocks'
383 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
385 instance Monad CmmOptM where
386 return x = CmmOptM $ \imports -> (# x,imports #)
388 CmmOptM $ \imports ->
392 CmmOptM g' -> g' imports'
394 addImportCmmOpt :: CLabel -> CmmOptM ()
395 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
397 runCmmOpt :: CmmOptM a -> (a, [CLabel])
398 runCmmOpt (CmmOptM f) = case f [] of
399 (# result, imports #) -> (result, imports)
401 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
402 cmmBlockConFold (BasicBlock id stmts) = do
403 stmts' <- mapM cmmStmtConFold stmts
404 return $ BasicBlock id stmts'
409 -> do src' <- cmmExprConFold False src
410 return $ case src' of
411 CmmReg reg' | reg == reg' -> CmmNop
412 new_src -> CmmAssign reg new_src
415 -> do addr' <- cmmExprConFold False addr
416 src' <- cmmExprConFold False src
417 return $ CmmStore addr' src'
420 -> do addr' <- cmmExprConFold True addr
421 return $ CmmJump addr' regs
423 CmmCall target regs args vols
424 -> do target' <- case target of
425 CmmForeignCall e conv -> do
426 e' <- cmmExprConFold True e
427 return $ CmmForeignCall e' conv
428 other -> return other
429 args' <- mapM (\(arg, hint) -> do
430 arg' <- cmmExprConFold False arg
431 return (arg', hint)) args
432 return $ CmmCall target' regs args' vols
434 CmmCondBranch test dest
435 -> do test' <- cmmExprConFold False test
436 return $ case test' of
437 CmmLit (CmmInt 0 _) ->
438 CmmComment (mkFastString ("deleted: " ++
439 showSDoc (pprStmt stmt)))
441 CmmLit (CmmInt n _) -> CmmBranch dest
442 other -> CmmCondBranch test' dest
445 -> do expr' <- cmmExprConFold False expr
446 return $ CmmSwitch expr' ids
452 cmmExprConFold isJumpTarget expr
455 -> do addr' <- cmmExprConFold False addr
456 return $ CmmLoad addr' rep
459 -- For MachOps, we first optimize the children, and then we try
460 -- our hand at some constant-folding.
461 -> do args' <- mapM (cmmExprConFold False) args
462 return $ cmmMachOpFold mop args'
464 CmmLit (CmmLabel lbl)
465 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
466 CmmLit (CmmLabelOff lbl off)
467 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
468 return $ cmmMachOpFold (MO_Add wordRep) [
470 (CmmLit $ CmmInt (fromIntegral off) wordRep)
473 #if powerpc_TARGET_ARCH
474 -- On powerpc (non-PIC), it's easier to jump directly to a label than
475 -- to use the register table, so we replace these registers
476 -- with the corresponding labels:
477 CmmReg (CmmGlobal GCEnter1)
479 -> cmmExprConFold isJumpTarget $
480 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
481 CmmReg (CmmGlobal GCFun)
483 -> cmmExprConFold isJumpTarget $
484 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
487 CmmReg (CmmGlobal mid)
488 -- Replace register leaves with appropriate StixTrees for
489 -- the given target. MagicIds which map to a reg on this
490 -- arch are left unchanged. For the rest, BaseReg is taken
491 -- to mean the address of the reg table in MainCapability,
492 -- and for all others we generate an indirection to its
493 -- location in the register table.
494 -> case get_GlobalReg_reg_or_addr mid of
495 Left realreg -> return expr
498 BaseReg -> cmmExprConFold False baseRegAddr
499 other -> cmmExprConFold False (CmmLoad baseRegAddr
501 -- eliminate zero offsets
503 -> cmmExprConFold False (CmmReg reg)
505 CmmRegOff (CmmGlobal mid) offset
506 -- RegOf leaves are just a shorthand form. If the reg maps
507 -- to a real reg, we keep the shorthand, otherwise, we just
508 -- expand it and defer to the above code.
509 -> case get_GlobalReg_reg_or_addr mid of
510 Left realreg -> return expr
512 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
513 CmmReg (CmmGlobal mid),
514 CmmLit (CmmInt (fromIntegral offset)
520 -- -----------------------------------------------------------------------------
521 -- MachOp constant folder
523 -- Now, try to constant-fold the MachOps. The arguments have already
524 -- been optimized and folded.
527 :: MachOp -- The operation from an CmmMachOp
528 -> [CmmExpr] -- The optimized arguments
531 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
533 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
534 MO_Not r -> CmmLit (CmmInt (complement x) rep)
536 -- these are interesting: we must first narrow to the
537 -- "from" type, in order to truncate to the correct size.
538 -- The final narrow/widen to the destination type
539 -- is implicit in the CmmLit.
540 MO_S_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
541 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
542 _ -> panic "cmmMachOpFold: unknown unary op"
544 -- Eliminate conversion NOPs
545 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
546 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
548 -- ToDo: eliminate multiple conversions. Be careful though: can't remove
549 -- a narrowing, and can't remove conversions to/from floating point types.
551 -- ToDo: eliminate nested comparisons:
552 -- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
553 -- turns into a simple equality test.
555 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
557 -- for comparisons: don't forget to narrow the arguments before
558 -- comparing, since they might be out of range.
559 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
560 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
562 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
563 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
564 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
565 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
567 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
568 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
569 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
570 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
572 MO_Add r -> CmmLit (CmmInt (x + y) r)
573 MO_Sub r -> CmmLit (CmmInt (x - y) r)
574 MO_Mul r -> CmmLit (CmmInt (x * y) r)
575 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
576 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
578 MO_And r -> CmmLit (CmmInt (x .&. y) r)
579 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
580 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
582 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
583 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
584 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
586 other -> CmmMachOp mop args
595 -- When possible, shift the constants to the right-hand side, so that we
596 -- can match for strength reductions. Note that the code generator will
597 -- also assume that constants have been shifted to the right when
600 cmmMachOpFold op [x@(CmmLit _), y]
601 | not (isLit y) && isCommutableMachOp op
602 = cmmMachOpFold op [y, x]
604 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
605 -- moved to the right, it is more likely that we will find
606 -- opportunities for constant folding when the expression is
609 -- ToDo: this appears to introduce a quadratic behaviour due to the
610 -- nested cmmMachOpFold. Can we fix this?
612 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
613 -- is also a lit (otherwise arg1 would be on the right). If we
614 -- put arg1 on the left of the rearranged expression, we'll get into a
615 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
617 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
618 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
619 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
621 -- Make a RegOff if we can
622 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
623 = CmmRegOff reg (fromIntegral (narrowS rep n))
624 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
625 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
626 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
627 = CmmRegOff reg (- fromIntegral (narrowS rep n))
628 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
629 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
631 -- Fold label(+/-)offset into a CmmLit where possible
633 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
634 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
635 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
636 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
637 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
638 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
640 -- We can often do something with constants of 0 and 1 ...
642 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
653 MO_Ne r | isComparisonExpr x -> x
654 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
655 MO_U_Gt r | isComparisonExpr x -> x
656 MO_S_Gt r | isComparisonExpr x -> x
657 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
658 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
659 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
660 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
661 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
662 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
663 other -> CmmMachOp mop args
665 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
670 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
671 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
672 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
673 MO_Eq r | isComparisonExpr x -> x
674 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
675 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
676 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
677 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
678 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
679 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
680 MO_U_Ge r | isComparisonExpr x -> x
681 MO_S_Ge r | isComparisonExpr x -> x
682 other -> CmmMachOp mop args
684 -- Now look for multiplication/division by powers of 2 (integers).
686 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
689 -> case exactLog2 n of
691 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
693 -> case exactLog2 n of
695 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
699 unchanged = CmmMachOp mop args
701 -- Anything else is just too hard.
703 cmmMachOpFold mop args = CmmMachOp mop args
705 -- -----------------------------------------------------------------------------
708 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
709 -- from GCC. It requires bit manipulation primitives, and we use GHC
710 -- extensions. Tough.
712 -- Used to be in MachInstrs --SDM.
713 -- ToDo: remove use of unboxery --SDM.
718 exactLog2 :: Integer -> Maybe Integer
720 = if (x <= 0 || x >= 2147483648) then
723 case iUnbox (fromInteger x) of { x# ->
724 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
727 Just (toInteger (iBox (pow2 x#)))
730 pow2 x# | x# ==# 1# = 0#
731 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
734 -- -----------------------------------------------------------------------------
735 -- widening / narrowing
737 narrowU :: MachRep -> Integer -> Integer
738 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
739 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
740 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
741 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
742 narrowU _ _ = panic "narrowTo"
744 narrowS :: MachRep -> Integer -> Integer
745 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
746 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
747 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
748 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
749 narrowS _ _ = panic "narrowTo"
751 -- -----------------------------------------------------------------------------
754 -- This pass inlines assignments to temporaries that are used just
755 -- once in the very next statement only. Generalising this would be
756 -- quite difficult (have to take into account aliasing of memory
757 -- writes, and so on), but at the moment it catches a number of useful
758 -- cases and lets the code generator generate much better code.
760 -- NB. This assumes that temporaries are single-assignment.
762 cmmPeep :: [CmmBasicBlock] -> [CmmBasicBlock]
763 cmmPeep blocks = map do_inline blocks
765 blockUses (BasicBlock _ stmts)
766 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
768 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
770 do_inline (BasicBlock id stmts)
771 = BasicBlock id (cmmMiniInline uses stmts)
774 cmmMiniInline :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
775 cmmMiniInline uses [] = []
776 cmmMiniInline uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
777 | Just 1 <- lookupUFM uses u,
778 Just stmts' <- lookForInline u expr stmts
781 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
783 cmmMiniInline uses stmts'
785 cmmMiniInline uses (stmt:stmts)
786 = stmt : cmmMiniInline uses stmts
789 -- Try to inline a temporary assignment. We can skip over assignments to
790 -- other tempoararies, because we know that expressions aren't side-effecting
791 -- and temporaries are single-assignment.
792 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
794 = case lookupUFM (getExprUses rhs) u of
795 Just 1 -> Just (inlineStmt u expr stmt : rest)
796 _other -> case lookForInline u expr rest of
798 Just stmts -> Just (stmt:stmts)
800 lookForInline u expr (CmmNop : rest)
801 = lookForInline u expr rest
803 lookForInline u expr (stmt:stmts)
804 = case lookupUFM (getStmtUses stmt) u of
805 Just 1 -> Just (inlineStmt u expr stmt : stmts)
808 -- -----------------------------------------------------------------------------
809 -- Boring Cmm traversals for collecting usage info and substitutions.
811 getStmtUses :: CmmStmt -> UniqFM Int
812 getStmtUses (CmmAssign _ e) = getExprUses e
813 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
814 getStmtUses (CmmCall target _ es _)
815 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
816 where uses (CmmForeignCall e _) = getExprUses e
818 getStmtUses (CmmCondBranch e _) = getExprUses e
819 getStmtUses (CmmSwitch e _) = getExprUses e
820 getStmtUses (CmmJump e _) = getExprUses e
821 getStmtUses _ = emptyUFM
823 getExprUses :: CmmExpr -> UniqFM Int
824 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
825 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
826 getExprUses (CmmLoad e _) = getExprUses e
827 getExprUses (CmmMachOp _ es) = getExprsUses es
828 getExprUses _other = emptyUFM
830 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
832 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
833 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
834 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
835 inlineStmt u a (CmmCall target regs es vols)
836 = CmmCall (infn target) regs es' vols
837 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
838 infn (CmmPrim p) = CmmPrim p
839 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
840 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
841 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
842 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
843 inlineStmt u a other_stmt = other_stmt
845 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
846 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
849 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
850 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
852 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
853 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
854 inlineExpr u a other_expr = other_expr
856 -- -----------------------------------------------------------------------------
861 isLit (CmmLit _) = True
864 isComparisonExpr :: CmmExpr -> Bool
865 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
866 isComparisonExpr _other = False
868 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
869 maybeInvertConditionalExpr (CmmMachOp op args)
870 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
871 maybeInvertConditionalExpr _ = Nothing