1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import PprCmm ( pprStmt, pprCmms )
27 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
28 #if powerpc_TARGET_ARCH
29 import CLabel ( mkRtsCodeLabel )
33 import Unique ( Unique, getUnique )
36 #if darwin_TARGET_OS || (powerpc_TARGET_ARCH && linux_TARGET_OS)
37 import List ( groupBy, sortBy )
38 import CLabel ( pprCLabel )
40 import ErrUtils ( dumpIfSet_dyn )
41 import CmdLineOpts ( DynFlags, DynFlag(..), dopt, opt_Static,
42 opt_EnsureSplittableC, opt_PIC )
45 import qualified Pretty
53 import List ( intersperse )
62 The native-code generator has machine-independent and
63 machine-dependent modules.
65 This module ("AsmCodeGen") is the top-level machine-independent
66 module. Before entering machine-dependent land, we do some
67 machine-independent optimisations (defined below) on the
70 We convert to the machine-specific 'Instr' datatype with
71 'cmmCodeGen', assuming an infinite supply of registers. We then use
72 a machine-independent register allocator ('regAlloc') to rejoin
73 reality. Obviously, 'regAlloc' has machine-specific helper
74 functions (see about "RegAllocInfo" below).
76 Finally, we order the basic blocks of the function so as to minimise
77 the number of jumps between blocks, by utilising fallthrough wherever
80 The machine-dependent bits break down as follows:
82 * ["MachRegs"] Everything about the target platform's machine
83 registers (and immediate operands, and addresses, which tend to
84 intermingle/interact with registers).
86 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
87 have a module of its own), plus a miscellany of other things
88 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
90 * ["MachCodeGen"] is where 'Cmm' stuff turns into
93 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
96 * ["RegAllocInfo"] In the register allocator, we manipulate
97 'MRegsState's, which are 'BitSet's, one bit per machine register.
98 When we want to say something about a specific machine register
99 (e.g., ``it gets clobbered by this instruction''), we set/unset
100 its bit. Obviously, we do this 'BitSet' thing for efficiency
103 The 'RegAllocInfo' module collects together the machine-specific
104 info needed to do register allocation.
106 * ["RegisterAlloc"] The (machine-independent) register allocator.
109 -- -----------------------------------------------------------------------------
110 -- Top-level of the native codegen
112 -- NB. We *lazilly* compile each block of code for space reasons.
114 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
115 nativeCodeGen dflags cmms us
116 = let ((ppr_cmms, insn_sdoc, imports), _) = initUs us $
117 cgCmm (concat (map add_split cmms))
119 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
121 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
122 let (cmms,docs,imps) = unzip3 results in
123 returnUs (Cmm cmms, my_vcat docs, concat imps)
125 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
126 return (insn_sdoc Pretty.$$ dyld_stubs imports)
131 | opt_EnsureSplittableC = split_marker : tops
134 split_marker = CmmProc [] mkSplitMarkerLabel [] []
136 #if darwin_TARGET_OS || (powerpc_TARGET_ARCH && linux_TARGET_OS)
137 -- Generate "symbol stubs" for all external symbols that might
138 -- come from a dynamic library.
139 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
140 map head $ group $ sort imps-}
142 -- (Hack) sometimes two Labels pretty-print the same, but have
143 -- different uniques; so we compare their text versions...
145 | needImportedSymbols
147 (pprGotDeclaration :) $
148 map (pprImportedSymbol . fst . head) $
149 groupBy (\(_,a) (_,b) -> a == b) $
150 sortBy (\(_,a) (_,b) -> compare a b) $
156 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
157 astyle = mkCodeStyle AsmStyle
159 dyld_stubs imps = Pretty.empty
163 my_vcat sds = Pretty.vcat sds
165 my_vcat sds = Pretty.vcat (
168 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
169 Pretty.$$ Pretty.char ' '
176 -- Complete native code generation phase for a single top-level chunk
179 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
180 cmmNativeGen dflags cmm
181 = {-# SCC "fixAssigns" #-}
182 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
183 {-# SCC "genericOpt" #-}
184 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
185 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
187 else CmmData Text []) `bind` \ ppr_cmm ->
188 {-# SCC "genMachCode" #-}
189 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
190 {-# SCC "regAlloc" #-}
191 map regAlloc pre_regalloc `bind` \ with_regs ->
192 {-# SCC "sequenceBlocks" #-}
193 map sequenceTop with_regs `bind` \ sequenced ->
194 {-# SCC "x86fp_kludge" #-}
195 map x86fp_kludge sequenced `bind` \ final_mach_code ->
197 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
199 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
201 x86fp_kludge :: NatCmmTop -> NatCmmTop
202 x86fp_kludge top@(CmmData _ _) = top
204 x86fp_kludge top@(CmmProc info lbl params code) =
205 CmmProc info lbl params (map bb_i386_insert_ffrees code)
207 bb_i386_insert_ffrees (BasicBlock id instrs) =
208 BasicBlock id (i386_insert_ffrees instrs)
210 x86fp_kludge top = top
213 -- -----------------------------------------------------------------------------
214 -- Sequencing the basic blocks
216 -- Cmm BasicBlocks are self-contained entities: they always end in a
217 -- jump, either non-local or to another basic block in the same proc.
218 -- In this phase, we attempt to place the basic blocks in a sequence
219 -- such that as many of the local jumps as possible turn into
222 sequenceTop :: NatCmmTop -> NatCmmTop
223 sequenceTop top@(CmmData _ _) = top
224 sequenceTop (CmmProc info lbl params blocks) =
225 CmmProc info lbl params (sequenceBlocks blocks)
227 -- The algorithm is very simple (and stupid): we make a graph out of
228 -- the blocks where there is an edge from one block to another iff the
229 -- first block ends by jumping to the second. Then we topologically
230 -- sort this graph. Then traverse the list: for each block, we first
231 -- output the block, then if it has an out edge, we move the
232 -- destination of the out edge to the front of the list, and continue.
234 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
235 sequenceBlocks [] = []
236 sequenceBlocks (entry:blocks) =
237 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
238 -- the first block is the entry point ==> it must remain at the start.
240 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
241 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
243 getOutEdges :: [Instr] -> [Unique]
244 getOutEdges instrs = case jumpDests (last instrs) [] of
245 [one] -> [getUnique one]
247 -- we're only interested in the last instruction of
248 -- the block, and only if it has a single destination.
250 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
253 seqBlocks ((block,_,[]) : rest)
254 = block : seqBlocks rest
255 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
256 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
257 | otherwise = block : seqBlocks rest'
259 (can_fallthrough, rest') = reorder next [] rest
260 -- TODO: we should do a better job for cycles; try to maximise the
261 -- fallthroughs within a loop.
262 seqBlocks _ = panic "AsmCodegen:seqBlocks"
264 reorder id accum [] = (False, reverse accum)
265 reorder id accum (b@(block,id',out) : rest)
266 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
267 | otherwise = reorder id (b:accum) rest
269 -- -----------------------------------------------------------------------------
270 -- Instruction selection
272 -- Native code instruction selection for a chunk of stix code. For
273 -- this part of the computation, we switch from the UniqSM monad to
274 -- the NatM monad. The latter carries not only a Unique, but also an
275 -- Int denoting the current C stack pointer offset in the generated
276 -- code; this is needed for creating correct spill offsets on
277 -- architectures which don't offer, or for which it would be
278 -- prohibitively expensive to employ, a frame pointer register. Viz,
281 -- The offset is measured in bytes, and indicates the difference
282 -- between the current (simulated) C stack-ptr and the value it was at
283 -- the beginning of the block. For stacks which grow down, this value
284 -- should be either zero or negative.
286 -- Switching between the two monads whilst carrying along the same
287 -- Unique supply breaks abstraction. Is that bad?
289 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
291 genMachCode cmm_top initial_us
292 = let initial_st = mkNatM_State initial_us 0
293 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
294 final_us = natm_us final_st
295 final_delta = natm_delta final_st
296 final_imports = natm_imports final_st
299 then ((new_tops, final_imports), final_us)
300 else pprPanic "genMachCode: nonzero final delta"
303 -- -----------------------------------------------------------------------------
304 -- Fixup assignments to global registers so that they assign to
305 -- locations within the RegTable, if appropriate.
307 -- Note that we currently don't fixup reads here: they're done by
308 -- the generic optimiser below, to avoid having two separate passes
311 fixAssignsTop :: CmmTop -> UniqSM CmmTop
312 fixAssignsTop top@(CmmData _ _) = returnUs top
313 fixAssignsTop (CmmProc info lbl params blocks) =
314 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
315 returnUs (CmmProc info lbl params blocks')
317 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
318 fixAssignsBlock (BasicBlock id stmts) =
319 fixAssigns stmts `thenUs` \ stmts' ->
320 returnUs (BasicBlock id stmts')
322 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
324 mapUs fixAssign stmts `thenUs` \ stmtss ->
325 returnUs (concat stmtss)
327 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
328 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
329 = panic "cmmStmtConFold: assignment to BaseReg";
331 fixAssign (CmmAssign (CmmGlobal reg) src)
332 | Left realreg <- reg_or_addr
333 = returnUs [CmmAssign (CmmGlobal reg) src]
334 | Right baseRegAddr <- reg_or_addr
335 = returnUs [CmmStore baseRegAddr src]
336 -- Replace register leaves with appropriate StixTrees for
337 -- the given target. GlobalRegs which map to a reg on this
338 -- arch are left unchanged. Assigning to BaseReg is always
339 -- illegal, so we check for that.
341 reg_or_addr = get_GlobalReg_reg_or_addr reg
343 fixAssign (CmmCall target results args vols)
344 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
345 returnUs (CmmCall target results' args vols : concat stores)
347 fixResult g@(CmmGlobal reg,hint) =
348 case get_GlobalReg_reg_or_addr reg of
349 Left realreg -> returnUs (g, [])
351 getUniqueUs `thenUs` \ uq ->
352 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
353 returnUs ((local,hint),
354 [CmmStore baseRegAddr (CmmReg local)])
358 fixAssign other_stmt = returnUs [other_stmt]
360 -- -----------------------------------------------------------------------------
361 -- Generic Cmm optimiser
367 (b) Simple inlining: a temporary which is assigned to and then
368 used, once, can be shorted.
369 (c) Replacement of references to GlobalRegs which do not have
370 machine registers by the appropriate memory load (eg.
371 Hp ==> *(BaseReg + 34) ).
372 (d) Position independent code and dynamic linking
373 (i) introduce the appropriate indirections
374 and position independent refs
375 (ii) compile a list of imported symbols
377 Ideas for other things we could do (ToDo):
379 - shortcut jumps-to-jumps
380 - eliminate dead code blocks
383 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
384 cmmToCmm top@(CmmData _ _) = (top, [])
385 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
386 blocks' <- mapM cmmBlockConFold (cmmPeep blocks)
387 return $ CmmProc info lbl params blocks'
389 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
391 instance Monad CmmOptM where
392 return x = CmmOptM $ \imports -> (# x,imports #)
394 CmmOptM $ \imports ->
398 CmmOptM g' -> g' imports'
400 addImportCmmOpt :: CLabel -> CmmOptM ()
401 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
403 runCmmOpt :: CmmOptM a -> (a, [CLabel])
404 runCmmOpt (CmmOptM f) = case f [] of
405 (# result, imports #) -> (result, imports)
407 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
408 cmmBlockConFold (BasicBlock id stmts) = do
409 stmts' <- mapM cmmStmtConFold stmts
410 return $ BasicBlock id stmts'
415 -> do src' <- cmmExprConFold False src
416 return $ case src' of
417 CmmReg reg' | reg == reg' -> CmmNop
418 new_src -> CmmAssign reg new_src
421 -> do addr' <- cmmExprConFold False addr
422 src' <- cmmExprConFold False src
423 return $ CmmStore addr' src'
426 -> do addr' <- cmmExprConFold True addr
427 return $ CmmJump addr' regs
429 CmmCall target regs args vols
430 -> do target' <- case target of
431 CmmForeignCall e conv -> do
432 e' <- cmmExprConFold True e
433 return $ CmmForeignCall e' conv
434 other -> return other
435 args' <- mapM (\(arg, hint) -> do
436 arg' <- cmmExprConFold False arg
437 return (arg', hint)) args
438 return $ CmmCall target' regs args' vols
440 CmmCondBranch test dest
441 -> do test' <- cmmExprConFold False test
442 return $ case test' of
443 CmmLit (CmmInt 0 _) ->
444 CmmComment (mkFastString ("deleted: " ++
445 showSDoc (pprStmt stmt)))
447 CmmLit (CmmInt n _) -> CmmBranch dest
448 other -> CmmCondBranch test' dest
451 -> do expr' <- cmmExprConFold False expr
452 return $ CmmSwitch expr' ids
458 cmmExprConFold isJumpTarget expr
461 -> do addr' <- cmmExprConFold False addr
462 return $ CmmLoad addr' rep
465 -- For MachOps, we first optimize the children, and then we try
466 -- our hand at some constant-folding.
467 -> do args' <- mapM (cmmExprConFold False) args
468 return $ cmmMachOpFold mop args'
470 CmmLit (CmmLabel lbl)
471 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
472 CmmLit (CmmLabelOff lbl off)
473 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
474 return $ cmmMachOpFold (MO_Add wordRep) [
476 (CmmLit $ CmmInt (fromIntegral off) wordRep)
479 #if powerpc_TARGET_ARCH
480 -- On powerpc (non-PIC), it's easier to jump directly to a label than
481 -- to use the register table, so we replace these registers
482 -- with the corresponding labels:
483 CmmReg (CmmGlobal GCEnter1)
485 -> cmmExprConFold isJumpTarget $
486 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
487 CmmReg (CmmGlobal GCFun)
489 -> cmmExprConFold isJumpTarget $
490 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
493 CmmReg (CmmGlobal mid)
494 -- Replace register leaves with appropriate StixTrees for
495 -- the given target. MagicIds which map to a reg on this
496 -- arch are left unchanged. For the rest, BaseReg is taken
497 -- to mean the address of the reg table in MainCapability,
498 -- and for all others we generate an indirection to its
499 -- location in the register table.
500 -> case get_GlobalReg_reg_or_addr mid of
501 Left realreg -> return expr
504 BaseReg -> cmmExprConFold False baseRegAddr
505 other -> cmmExprConFold False (CmmLoad baseRegAddr
507 -- eliminate zero offsets
509 -> cmmExprConFold False (CmmReg reg)
511 CmmRegOff (CmmGlobal mid) offset
512 -- RegOf leaves are just a shorthand form. If the reg maps
513 -- to a real reg, we keep the shorthand, otherwise, we just
514 -- expand it and defer to the above code.
515 -> case get_GlobalReg_reg_or_addr mid of
516 Left realreg -> return expr
518 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
519 CmmReg (CmmGlobal mid),
520 CmmLit (CmmInt (fromIntegral offset)
526 -- -----------------------------------------------------------------------------
527 -- MachOp constant folder
529 -- Now, try to constant-fold the MachOps. The arguments have already
530 -- been optimized and folded.
533 :: MachOp -- The operation from an CmmMachOp
534 -> [CmmExpr] -- The optimized arguments
537 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
539 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
540 MO_Not r -> CmmLit (CmmInt (complement x) rep)
542 -- these are interesting: we must first narrow to the
543 -- "from" type, in order to truncate to the correct size.
544 -- The final narrow/widen to the destination type
545 -- is implicit in the CmmLit.
546 MO_S_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
547 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
548 _ -> panic "cmmMachOpFold: unknown unary op"
550 -- Eliminate conversion NOPs
551 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
552 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
554 -- ToDo: eliminate multiple conversions. Be careful though: can't remove
555 -- a narrowing, and can't remove conversions to/from floating point types.
557 -- ToDo: eliminate nested comparisons:
558 -- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
559 -- turns into a simple equality test.
561 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
563 -- for comparisons: don't forget to narrow the arguments before
564 -- comparing, since they might be out of range.
565 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
566 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
568 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
569 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
570 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
571 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
573 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
574 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
575 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
576 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
578 MO_Add r -> CmmLit (CmmInt (x + y) r)
579 MO_Sub r -> CmmLit (CmmInt (x - y) r)
580 MO_Mul r -> CmmLit (CmmInt (x * y) r)
581 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
582 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
584 MO_And r -> CmmLit (CmmInt (x .&. y) r)
585 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
586 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
588 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
589 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
590 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
592 other -> CmmMachOp mop args
601 -- When possible, shift the constants to the right-hand side, so that we
602 -- can match for strength reductions. Note that the code generator will
603 -- also assume that constants have been shifted to the right when
606 cmmMachOpFold op [x@(CmmLit _), y]
607 | not (isLit y) && isCommutableMachOp op
608 = cmmMachOpFold op [y, x]
610 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
611 -- moved to the right, it is more likely that we will find
612 -- opportunities for constant folding when the expression is
615 -- ToDo: this appears to introduce a quadratic behaviour due to the
616 -- nested cmmMachOpFold. Can we fix this?
618 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
619 -- is also a lit (otherwise arg1 would be on the right). If we
620 -- put arg1 on the left of the rearranged expression, we'll get into a
621 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
623 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
624 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
625 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
627 -- Make a RegOff if we can
628 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
629 = CmmRegOff reg (fromIntegral (narrowS rep n))
630 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
631 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
632 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
633 = CmmRegOff reg (- fromIntegral (narrowS rep n))
634 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
635 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
637 -- Fold label(+/-)offset into a CmmLit where possible
639 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
640 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
641 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
642 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
643 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
644 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
646 -- We can often do something with constants of 0 and 1 ...
648 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
659 MO_Ne r | isComparisonExpr x -> x
660 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
661 MO_U_Gt r | isComparisonExpr x -> x
662 MO_S_Gt r | isComparisonExpr x -> x
663 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
664 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
665 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
666 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
667 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
668 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
669 other -> CmmMachOp mop args
671 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
676 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
677 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
678 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
679 MO_Eq r | isComparisonExpr x -> x
680 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
681 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
682 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
683 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
684 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
685 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
686 MO_U_Ge r | isComparisonExpr x -> x
687 MO_S_Ge r | isComparisonExpr x -> x
688 other -> CmmMachOp mop args
690 -- Now look for multiplication/division by powers of 2 (integers).
692 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
695 -> case exactLog2 n of
697 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
699 -> case exactLog2 n of
701 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
705 unchanged = CmmMachOp mop args
707 -- Anything else is just too hard.
709 cmmMachOpFold mop args = CmmMachOp mop args
711 -- -----------------------------------------------------------------------------
714 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
715 -- from GCC. It requires bit manipulation primitives, and we use GHC
716 -- extensions. Tough.
718 -- Used to be in MachInstrs --SDM.
719 -- ToDo: remove use of unboxery --SDM.
724 exactLog2 :: Integer -> Maybe Integer
726 = if (x <= 0 || x >= 2147483648) then
729 case iUnbox (fromInteger x) of { x# ->
730 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
733 Just (toInteger (iBox (pow2 x#)))
736 pow2 x# | x# ==# 1# = 0#
737 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
740 -- -----------------------------------------------------------------------------
741 -- widening / narrowing
743 narrowU :: MachRep -> Integer -> Integer
744 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
745 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
746 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
747 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
748 narrowU _ _ = panic "narrowTo"
750 narrowS :: MachRep -> Integer -> Integer
751 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
752 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
753 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
754 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
755 narrowS _ _ = panic "narrowTo"
757 -- -----------------------------------------------------------------------------
760 -- This pass inlines assignments to temporaries that are used just
761 -- once in the very next statement only. Generalising this would be
762 -- quite difficult (have to take into account aliasing of memory
763 -- writes, and so on), but at the moment it catches a number of useful
764 -- cases and lets the code generator generate much better code.
766 -- NB. This assumes that temporaries are single-assignment.
768 cmmPeep :: [CmmBasicBlock] -> [CmmBasicBlock]
769 cmmPeep blocks = map do_inline blocks
771 blockUses (BasicBlock _ stmts)
772 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
774 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
776 do_inline (BasicBlock id stmts)
777 = BasicBlock id (cmmMiniInline uses stmts)
780 cmmMiniInline :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
781 cmmMiniInline uses [] = []
782 cmmMiniInline uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
783 | Just 1 <- lookupUFM uses u,
784 Just stmts' <- lookForInline u expr stmts
787 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
789 cmmMiniInline uses stmts'
791 cmmMiniInline uses (stmt:stmts)
792 = stmt : cmmMiniInline uses stmts
795 -- Try to inline a temporary assignment. We can skip over assignments to
796 -- other tempoararies, because we know that expressions aren't side-effecting
797 -- and temporaries are single-assignment.
798 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
800 = case lookupUFM (getExprUses rhs) u of
801 Just 1 -> Just (inlineStmt u expr stmt : rest)
802 _other -> case lookForInline u expr rest of
804 Just stmts -> Just (stmt:stmts)
806 lookForInline u expr (stmt:stmts)
807 = case lookupUFM (getStmtUses stmt) u of
808 Just 1 -> Just (inlineStmt u expr stmt : stmts)
811 -- -----------------------------------------------------------------------------
812 -- Boring Cmm traversals for collecting usage info and substitutions.
814 getStmtUses :: CmmStmt -> UniqFM Int
815 getStmtUses (CmmAssign _ e) = getExprUses e
816 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
817 getStmtUses (CmmCall target _ es _)
818 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
819 where uses (CmmForeignCall e _) = getExprUses e
821 getStmtUses (CmmCondBranch e _) = getExprUses e
822 getStmtUses (CmmSwitch e _) = getExprUses e
823 getStmtUses (CmmJump e _) = getExprUses e
824 getStmtUses _ = emptyUFM
826 getExprUses :: CmmExpr -> UniqFM Int
827 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
828 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
829 getExprUses (CmmLoad e _) = getExprUses e
830 getExprUses (CmmMachOp _ es) = getExprsUses es
831 getExprUses _other = emptyUFM
833 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
835 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
836 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
837 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
838 inlineStmt u a (CmmCall target regs es vols)
839 = CmmCall (infn target) regs es' vols
840 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
841 infn (CmmPrim p) = CmmPrim p
842 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
843 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
844 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
845 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
846 inlineStmt u a other_stmt = other_stmt
848 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
849 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
852 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
853 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
855 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
856 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
857 inlineExpr u a other_expr = other_expr
859 -- -----------------------------------------------------------------------------
864 isLit (CmmLit _) = True
867 isComparisonExpr :: CmmExpr -> Bool
868 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
869 isComparisonExpr _other = False
871 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
872 maybeInvertConditionalExpr (CmmMachOp op args)
873 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
874 maybeInvertConditionalExpr _ = Nothing