1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import PprCmm ( pprStmt, pprCmms )
27 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
28 #if powerpc_TARGET_ARCH
29 import CLabel ( mkRtsCodeLabel )
33 import Unique ( Unique, getUnique )
36 import List ( groupBy, sortBy )
37 import CLabel ( pprCLabel )
38 import ErrUtils ( dumpIfSet_dyn )
39 import DynFlags ( DynFlags, DynFlag(..), dopt )
40 import StaticFlags ( opt_Static, opt_PIC )
43 import qualified Pretty
51 import List ( intersperse )
60 The native-code generator has machine-independent and
61 machine-dependent modules.
63 This module ("AsmCodeGen") is the top-level machine-independent
64 module. Before entering machine-dependent land, we do some
65 machine-independent optimisations (defined below) on the
68 We convert to the machine-specific 'Instr' datatype with
69 'cmmCodeGen', assuming an infinite supply of registers. We then use
70 a machine-independent register allocator ('regAlloc') to rejoin
71 reality. Obviously, 'regAlloc' has machine-specific helper
72 functions (see about "RegAllocInfo" below).
74 Finally, we order the basic blocks of the function so as to minimise
75 the number of jumps between blocks, by utilising fallthrough wherever
78 The machine-dependent bits break down as follows:
80 * ["MachRegs"] Everything about the target platform's machine
81 registers (and immediate operands, and addresses, which tend to
82 intermingle/interact with registers).
84 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
85 have a module of its own), plus a miscellany of other things
86 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
88 * ["MachCodeGen"] is where 'Cmm' stuff turns into
91 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
94 * ["RegAllocInfo"] In the register allocator, we manipulate
95 'MRegsState's, which are 'BitSet's, one bit per machine register.
96 When we want to say something about a specific machine register
97 (e.g., ``it gets clobbered by this instruction''), we set/unset
98 its bit. Obviously, we do this 'BitSet' thing for efficiency
101 The 'RegAllocInfo' module collects together the machine-specific
102 info needed to do register allocation.
104 * ["RegisterAlloc"] The (machine-independent) register allocator.
107 -- -----------------------------------------------------------------------------
108 -- Top-level of the native codegen
110 -- NB. We *lazilly* compile each block of code for space reasons.
112 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
113 nativeCodeGen dflags cmms us
114 = let ((ppr_cmms, insn_sdoc, imports), _) = initUs us $
115 cgCmm (concat (map add_split cmms))
117 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
119 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
120 let (cmms,docs,imps) = unzip3 results in
121 returnUs (Cmm cmms, my_vcat docs, concat imps)
123 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
124 return (insn_sdoc Pretty.$$ dyld_stubs imports
125 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
126 -- On recent versions of Darwin, the linker supports
127 -- dead-stripping of code and data on a per-symbol basis.
128 -- There's a hack to make this work in PprMach.pprNatCmmTop.
129 Pretty.$$ Pretty.text ".subsections_via_symbols"
136 | dopt Opt_SplitObjs dflags = split_marker : tops
139 split_marker = CmmProc [] mkSplitMarkerLabel [] []
141 -- Generate "symbol stubs" for all external symbols that might
142 -- come from a dynamic library.
143 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
144 map head $ group $ sort imps-}
146 -- (Hack) sometimes two Labels pretty-print the same, but have
147 -- different uniques; so we compare their text versions...
149 | needImportedSymbols
151 (pprGotDeclaration :) $
152 map (pprImportedSymbol . fst . head) $
153 groupBy (\(_,a) (_,b) -> a == b) $
154 sortBy (\(_,a) (_,b) -> compare a b) $
160 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
161 astyle = mkCodeStyle AsmStyle
164 my_vcat sds = Pretty.vcat sds
166 my_vcat sds = Pretty.vcat (
169 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
170 Pretty.$$ Pretty.char ' '
177 -- Complete native code generation phase for a single top-level chunk
180 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
181 cmmNativeGen dflags cmm
182 = {-# SCC "fixAssigns" #-}
183 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
184 {-# SCC "genericOpt" #-}
185 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
186 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
188 else CmmData Text []) `bind` \ ppr_cmm ->
189 {-# SCC "genMachCode" #-}
190 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
191 {-# SCC "regAlloc" #-}
192 map regAlloc pre_regalloc `bind` \ with_regs ->
193 {-# SCC "sequenceBlocks" #-}
194 map sequenceTop with_regs `bind` \ sequenced ->
195 {-# SCC "x86fp_kludge" #-}
196 map x86fp_kludge sequenced `bind` \ final_mach_code ->
198 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
200 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
202 x86fp_kludge :: NatCmmTop -> NatCmmTop
203 x86fp_kludge top@(CmmData _ _) = top
205 x86fp_kludge top@(CmmProc info lbl params code) =
206 CmmProc info lbl params (map bb_i386_insert_ffrees code)
208 bb_i386_insert_ffrees (BasicBlock id instrs) =
209 BasicBlock id (i386_insert_ffrees instrs)
211 x86fp_kludge top = top
214 -- -----------------------------------------------------------------------------
215 -- Sequencing the basic blocks
217 -- Cmm BasicBlocks are self-contained entities: they always end in a
218 -- jump, either non-local or to another basic block in the same proc.
219 -- In this phase, we attempt to place the basic blocks in a sequence
220 -- such that as many of the local jumps as possible turn into
223 sequenceTop :: NatCmmTop -> NatCmmTop
224 sequenceTop top@(CmmData _ _) = top
225 sequenceTop (CmmProc info lbl params blocks) =
226 CmmProc info lbl params (sequenceBlocks blocks)
228 -- The algorithm is very simple (and stupid): we make a graph out of
229 -- the blocks where there is an edge from one block to another iff the
230 -- first block ends by jumping to the second. Then we topologically
231 -- sort this graph. Then traverse the list: for each block, we first
232 -- output the block, then if it has an out edge, we move the
233 -- destination of the out edge to the front of the list, and continue.
235 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
236 sequenceBlocks [] = []
237 sequenceBlocks (entry:blocks) =
238 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
239 -- the first block is the entry point ==> it must remain at the start.
241 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
242 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
244 getOutEdges :: [Instr] -> [Unique]
245 getOutEdges instrs = case jumpDests (last instrs) [] of
246 [one] -> [getUnique one]
248 -- we're only interested in the last instruction of
249 -- the block, and only if it has a single destination.
251 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
254 seqBlocks ((block,_,[]) : rest)
255 = block : seqBlocks rest
256 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
257 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
258 | otherwise = block : seqBlocks rest'
260 (can_fallthrough, rest') = reorder next [] rest
261 -- TODO: we should do a better job for cycles; try to maximise the
262 -- fallthroughs within a loop.
263 seqBlocks _ = panic "AsmCodegen:seqBlocks"
265 reorder id accum [] = (False, reverse accum)
266 reorder id accum (b@(block,id',out) : rest)
267 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
268 | otherwise = reorder id (b:accum) rest
270 -- -----------------------------------------------------------------------------
271 -- Instruction selection
273 -- Native code instruction selection for a chunk of stix code. For
274 -- this part of the computation, we switch from the UniqSM monad to
275 -- the NatM monad. The latter carries not only a Unique, but also an
276 -- Int denoting the current C stack pointer offset in the generated
277 -- code; this is needed for creating correct spill offsets on
278 -- architectures which don't offer, or for which it would be
279 -- prohibitively expensive to employ, a frame pointer register. Viz,
282 -- The offset is measured in bytes, and indicates the difference
283 -- between the current (simulated) C stack-ptr and the value it was at
284 -- the beginning of the block. For stacks which grow down, this value
285 -- should be either zero or negative.
287 -- Switching between the two monads whilst carrying along the same
288 -- Unique supply breaks abstraction. Is that bad?
290 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
292 genMachCode cmm_top initial_us
293 = let initial_st = mkNatM_State initial_us 0
294 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
295 final_us = natm_us final_st
296 final_delta = natm_delta final_st
297 final_imports = natm_imports final_st
300 then ((new_tops, final_imports), final_us)
301 else pprPanic "genMachCode: nonzero final delta"
304 -- -----------------------------------------------------------------------------
305 -- Fixup assignments to global registers so that they assign to
306 -- locations within the RegTable, if appropriate.
308 -- Note that we currently don't fixup reads here: they're done by
309 -- the generic optimiser below, to avoid having two separate passes
312 fixAssignsTop :: CmmTop -> UniqSM CmmTop
313 fixAssignsTop top@(CmmData _ _) = returnUs top
314 fixAssignsTop (CmmProc info lbl params blocks) =
315 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
316 returnUs (CmmProc info lbl params blocks')
318 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
319 fixAssignsBlock (BasicBlock id stmts) =
320 fixAssigns stmts `thenUs` \ stmts' ->
321 returnUs (BasicBlock id stmts')
323 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
325 mapUs fixAssign stmts `thenUs` \ stmtss ->
326 returnUs (concat stmtss)
328 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
329 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
330 = panic "cmmStmtConFold: assignment to BaseReg";
332 fixAssign (CmmAssign (CmmGlobal reg) src)
333 | Left realreg <- reg_or_addr
334 = returnUs [CmmAssign (CmmGlobal reg) src]
335 | Right baseRegAddr <- reg_or_addr
336 = returnUs [CmmStore baseRegAddr src]
337 -- Replace register leaves with appropriate StixTrees for
338 -- the given target. GlobalRegs which map to a reg on this
339 -- arch are left unchanged. Assigning to BaseReg is always
340 -- illegal, so we check for that.
342 reg_or_addr = get_GlobalReg_reg_or_addr reg
344 fixAssign (CmmCall target results args vols)
345 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
346 returnUs (CmmCall target results' args vols : concat stores)
348 fixResult g@(CmmGlobal reg,hint) =
349 case get_GlobalReg_reg_or_addr reg of
350 Left realreg -> returnUs (g, [])
352 getUniqueUs `thenUs` \ uq ->
353 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
354 returnUs ((local,hint),
355 [CmmStore baseRegAddr (CmmReg local)])
359 fixAssign other_stmt = returnUs [other_stmt]
361 -- -----------------------------------------------------------------------------
362 -- Generic Cmm optimiser
368 (b) Simple inlining: a temporary which is assigned to and then
369 used, once, can be shorted.
370 (c) Replacement of references to GlobalRegs which do not have
371 machine registers by the appropriate memory load (eg.
372 Hp ==> *(BaseReg + 34) ).
373 (d) Position independent code and dynamic linking
374 (i) introduce the appropriate indirections
375 and position independent refs
376 (ii) compile a list of imported symbols
378 Ideas for other things we could do (ToDo):
380 - shortcut jumps-to-jumps
381 - eliminate dead code blocks
382 - simple CSE: if an expr is assigned to a temp, then replace later occs of
383 that expr with the temp, until the expr is no longer valid (can push through
384 temp assignments, and certain assigns to mem...)
387 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
388 cmmToCmm top@(CmmData _ _) = (top, [])
389 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
390 blocks' <- mapM cmmBlockConFold (cmmPeep blocks)
391 return $ CmmProc info lbl params blocks'
393 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
395 instance Monad CmmOptM where
396 return x = CmmOptM $ \imports -> (# x,imports #)
398 CmmOptM $ \imports ->
402 CmmOptM g' -> g' imports'
404 addImportCmmOpt :: CLabel -> CmmOptM ()
405 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
407 runCmmOpt :: CmmOptM a -> (a, [CLabel])
408 runCmmOpt (CmmOptM f) = case f [] of
409 (# result, imports #) -> (result, imports)
411 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
412 cmmBlockConFold (BasicBlock id stmts) = do
413 stmts' <- mapM cmmStmtConFold stmts
414 return $ BasicBlock id stmts'
419 -> do src' <- cmmExprConFold False src
420 return $ case src' of
421 CmmReg reg' | reg == reg' -> CmmNop
422 new_src -> CmmAssign reg new_src
425 -> do addr' <- cmmExprConFold False addr
426 src' <- cmmExprConFold False src
427 return $ CmmStore addr' src'
430 -> do addr' <- cmmExprConFold True addr
431 return $ CmmJump addr' regs
433 CmmCall target regs args vols
434 -> do target' <- case target of
435 CmmForeignCall e conv -> do
436 e' <- cmmExprConFold True e
437 return $ CmmForeignCall e' conv
438 other -> return other
439 args' <- mapM (\(arg, hint) -> do
440 arg' <- cmmExprConFold False arg
441 return (arg', hint)) args
442 return $ CmmCall target' regs args' vols
444 CmmCondBranch test dest
445 -> do test' <- cmmExprConFold False test
446 return $ case test' of
447 CmmLit (CmmInt 0 _) ->
448 CmmComment (mkFastString ("deleted: " ++
449 showSDoc (pprStmt stmt)))
451 CmmLit (CmmInt n _) -> CmmBranch dest
452 other -> CmmCondBranch test' dest
455 -> do expr' <- cmmExprConFold False expr
456 return $ CmmSwitch expr' ids
462 cmmExprConFold isJumpTarget expr
465 -> do addr' <- cmmExprConFold False addr
466 return $ CmmLoad addr' rep
469 -- For MachOps, we first optimize the children, and then we try
470 -- our hand at some constant-folding.
471 -> do args' <- mapM (cmmExprConFold False) args
472 return $ cmmMachOpFold mop args'
474 CmmLit (CmmLabel lbl)
475 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
476 CmmLit (CmmLabelOff lbl off)
477 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
478 return $ cmmMachOpFold (MO_Add wordRep) [
480 (CmmLit $ CmmInt (fromIntegral off) wordRep)
483 #if powerpc_TARGET_ARCH
484 -- On powerpc (non-PIC), it's easier to jump directly to a label than
485 -- to use the register table, so we replace these registers
486 -- with the corresponding labels:
487 CmmReg (CmmGlobal GCEnter1)
489 -> cmmExprConFold isJumpTarget $
490 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
491 CmmReg (CmmGlobal GCFun)
493 -> cmmExprConFold isJumpTarget $
494 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
497 CmmReg (CmmGlobal mid)
498 -- Replace register leaves with appropriate StixTrees for
499 -- the given target. MagicIds which map to a reg on this
500 -- arch are left unchanged. For the rest, BaseReg is taken
501 -- to mean the address of the reg table in MainCapability,
502 -- and for all others we generate an indirection to its
503 -- location in the register table.
504 -> case get_GlobalReg_reg_or_addr mid of
505 Left realreg -> return expr
508 BaseReg -> cmmExprConFold False baseRegAddr
509 other -> cmmExprConFold False (CmmLoad baseRegAddr
511 -- eliminate zero offsets
513 -> cmmExprConFold False (CmmReg reg)
515 CmmRegOff (CmmGlobal mid) offset
516 -- RegOf leaves are just a shorthand form. If the reg maps
517 -- to a real reg, we keep the shorthand, otherwise, we just
518 -- expand it and defer to the above code.
519 -> case get_GlobalReg_reg_or_addr mid of
520 Left realreg -> return expr
522 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
523 CmmReg (CmmGlobal mid),
524 CmmLit (CmmInt (fromIntegral offset)
530 -- -----------------------------------------------------------------------------
531 -- MachOp constant folder
533 -- Now, try to constant-fold the MachOps. The arguments have already
534 -- been optimized and folded.
537 :: MachOp -- The operation from an CmmMachOp
538 -> [CmmExpr] -- The optimized arguments
541 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
543 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
544 MO_Not r -> CmmLit (CmmInt (complement x) rep)
546 -- these are interesting: we must first narrow to the
547 -- "from" type, in order to truncate to the correct size.
548 -- The final narrow/widen to the destination type
549 -- is implicit in the CmmLit.
551 | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
552 | otherwise -> CmmLit (CmmInt (narrowS from x) to)
553 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
555 _ -> panic "cmmMachOpFold: unknown unary op"
558 -- Eliminate conversion NOPs
559 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
560 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
562 -- Eliminate nested conversions where possible
563 cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
564 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
565 Just (_, rep3,signed2) <- isIntConversion conv_outer
567 -- widen then narrow to the same size is a nop
568 _ | rep1 < rep2 && rep1 == rep3 -> x
569 -- Widen then narrow to different size: collapse to single conversion
570 -- but remember to use the signedness from the widening, just in case
571 -- the final conversion is a widen.
572 | rep1 < rep2 && rep2 > rep3 ->
573 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
574 -- Nested widenings: collapse if the signedness is the same
575 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
576 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
577 -- Nested narrowings: collapse
578 | rep1 > rep2 && rep2 > rep3 ->
579 cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
581 CmmMachOp conv_outer args
583 isIntConversion (MO_U_Conv rep1 rep2)
584 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
585 = Just (rep1,rep2,False)
586 isIntConversion (MO_S_Conv rep1 rep2)
587 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
588 = Just (rep1,rep2,True)
589 isIntConversion _ = Nothing
591 intconv True = MO_S_Conv
592 intconv False = MO_U_Conv
594 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
595 -- but what if the architecture only supports word-sized loads, should
596 -- we do the transformation anyway?
598 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
600 -- for comparisons: don't forget to narrow the arguments before
601 -- comparing, since they might be out of range.
602 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
603 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
605 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
606 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
607 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
608 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
610 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
611 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
612 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
613 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
615 MO_Add r -> CmmLit (CmmInt (x + y) r)
616 MO_Sub r -> CmmLit (CmmInt (x - y) r)
617 MO_Mul r -> CmmLit (CmmInt (x * y) r)
618 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
619 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
621 MO_And r -> CmmLit (CmmInt (x .&. y) r)
622 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
623 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
625 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
626 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
627 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
629 other -> CmmMachOp mop args
638 -- When possible, shift the constants to the right-hand side, so that we
639 -- can match for strength reductions. Note that the code generator will
640 -- also assume that constants have been shifted to the right when
643 cmmMachOpFold op [x@(CmmLit _), y]
644 | not (isLit y) && isCommutableMachOp op
645 = cmmMachOpFold op [y, x]
647 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
648 -- moved to the right, it is more likely that we will find
649 -- opportunities for constant folding when the expression is
652 -- ToDo: this appears to introduce a quadratic behaviour due to the
653 -- nested cmmMachOpFold. Can we fix this?
655 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
656 -- is also a lit (otherwise arg1 would be on the right). If we
657 -- put arg1 on the left of the rearranged expression, we'll get into a
658 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
660 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
661 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
662 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
664 -- Make a RegOff if we can
665 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
666 = CmmRegOff reg (fromIntegral (narrowS rep n))
667 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
668 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
669 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
670 = CmmRegOff reg (- fromIntegral (narrowS rep n))
671 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
672 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
674 -- Fold label(+/-)offset into a CmmLit where possible
676 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
677 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
678 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
679 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
680 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
681 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
683 -- We can often do something with constants of 0 and 1 ...
685 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
696 MO_Ne r | isComparisonExpr x -> x
697 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
698 MO_U_Gt r | isComparisonExpr x -> x
699 MO_S_Gt r | isComparisonExpr x -> x
700 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
701 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
702 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
703 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
704 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
705 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
706 other -> CmmMachOp mop args
708 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
713 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
714 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
715 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
716 MO_Eq r | isComparisonExpr x -> x
717 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
718 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
719 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
720 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
721 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
722 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
723 MO_U_Ge r | isComparisonExpr x -> x
724 MO_S_Ge r | isComparisonExpr x -> x
725 other -> CmmMachOp mop args
727 -- Now look for multiplication/division by powers of 2 (integers).
729 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
732 -> case exactLog2 n of
734 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
736 -> case exactLog2 n of
738 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
742 unchanged = CmmMachOp mop args
744 -- Anything else is just too hard.
746 cmmMachOpFold mop args = CmmMachOp mop args
748 -- -----------------------------------------------------------------------------
751 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
752 -- from GCC. It requires bit manipulation primitives, and we use GHC
753 -- extensions. Tough.
755 -- Used to be in MachInstrs --SDM.
756 -- ToDo: remove use of unboxery --SDM.
761 exactLog2 :: Integer -> Maybe Integer
763 = if (x <= 0 || x >= 2147483648) then
766 case iUnbox (fromInteger x) of { x# ->
767 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
770 Just (toInteger (iBox (pow2 x#)))
773 pow2 x# | x# ==# 1# = 0#
774 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
777 -- -----------------------------------------------------------------------------
778 -- widening / narrowing
780 narrowU :: MachRep -> Integer -> Integer
781 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
782 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
783 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
784 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
785 narrowU _ _ = panic "narrowTo"
787 narrowS :: MachRep -> Integer -> Integer
788 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
789 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
790 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
791 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
792 narrowS _ _ = panic "narrowTo"
794 -- -----------------------------------------------------------------------------
797 -- This pass inlines assignments to temporaries that are used just
798 -- once in the very next statement only. Generalising this would be
799 -- quite difficult (have to take into account aliasing of memory
800 -- writes, and so on), but at the moment it catches a number of useful
801 -- cases and lets the code generator generate much better code.
803 -- NB. This assumes that temporaries are single-assignment.
805 cmmPeep :: [CmmBasicBlock] -> [CmmBasicBlock]
806 cmmPeep blocks = map do_inline blocks
808 blockUses (BasicBlock _ stmts)
809 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
811 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
813 do_inline (BasicBlock id stmts)
814 = BasicBlock id (cmmMiniInline uses stmts)
817 cmmMiniInline :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
818 cmmMiniInline uses [] = []
819 cmmMiniInline uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
820 | Just 1 <- lookupUFM uses u,
821 Just stmts' <- lookForInline u expr stmts
824 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
826 cmmMiniInline uses stmts'
828 cmmMiniInline uses (stmt:stmts)
829 = stmt : cmmMiniInline uses stmts
832 -- Try to inline a temporary assignment. We can skip over assignments to
833 -- other tempoararies, because we know that expressions aren't side-effecting
834 -- and temporaries are single-assignment.
835 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
837 = case lookupUFM (getExprUses rhs) u of
838 Just 1 -> Just (inlineStmt u expr stmt : rest)
839 _other -> case lookForInline u expr rest of
841 Just stmts -> Just (stmt:stmts)
843 lookForInline u expr (CmmNop : rest)
844 = lookForInline u expr rest
846 lookForInline u expr (stmt:stmts)
847 = case lookupUFM (getStmtUses stmt) u of
848 Just 1 -> Just (inlineStmt u expr stmt : stmts)
851 -- -----------------------------------------------------------------------------
852 -- Boring Cmm traversals for collecting usage info and substitutions.
854 getStmtUses :: CmmStmt -> UniqFM Int
855 getStmtUses (CmmAssign _ e) = getExprUses e
856 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
857 getStmtUses (CmmCall target _ es _)
858 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
859 where uses (CmmForeignCall e _) = getExprUses e
861 getStmtUses (CmmCondBranch e _) = getExprUses e
862 getStmtUses (CmmSwitch e _) = getExprUses e
863 getStmtUses (CmmJump e _) = getExprUses e
864 getStmtUses _ = emptyUFM
866 getExprUses :: CmmExpr -> UniqFM Int
867 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
868 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
869 getExprUses (CmmLoad e _) = getExprUses e
870 getExprUses (CmmMachOp _ es) = getExprsUses es
871 getExprUses _other = emptyUFM
873 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
875 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
876 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
877 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
878 inlineStmt u a (CmmCall target regs es vols)
879 = CmmCall (infn target) regs es' vols
880 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
881 infn (CmmPrim p) = CmmPrim p
882 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
883 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
884 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
885 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
886 inlineStmt u a other_stmt = other_stmt
888 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
889 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
892 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
893 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
895 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
896 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
897 inlineExpr u a other_expr = other_expr
899 -- -----------------------------------------------------------------------------
904 isLit (CmmLit _) = True
907 isComparisonExpr :: CmmExpr -> Bool
908 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
909 isComparisonExpr _other = False
911 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
912 maybeInvertConditionalExpr (CmmMachOp op args)
913 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
914 maybeInvertConditionalExpr _ = Nothing