1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
22 import PositionIndependentCode
25 import PprCmm ( pprStmt, pprCmms )
27 import CLabel ( CLabel, mkSplitMarkerLabel, mkAsmTempLabel )
28 #if powerpc_TARGET_ARCH
29 import CLabel ( mkRtsCodeLabel )
33 import Unique ( Unique, getUnique )
36 import List ( groupBy, sortBy )
37 import CLabel ( pprCLabel )
38 import ErrUtils ( dumpIfSet_dyn )
39 import DynFlags ( DynFlags, DynFlag(..), dopt )
40 import StaticFlags ( opt_Static, opt_PIC )
43 import qualified Pretty
51 import List ( intersperse )
60 The native-code generator has machine-independent and
61 machine-dependent modules.
63 This module ("AsmCodeGen") is the top-level machine-independent
64 module. Before entering machine-dependent land, we do some
65 machine-independent optimisations (defined below) on the
68 We convert to the machine-specific 'Instr' datatype with
69 'cmmCodeGen', assuming an infinite supply of registers. We then use
70 a machine-independent register allocator ('regAlloc') to rejoin
71 reality. Obviously, 'regAlloc' has machine-specific helper
72 functions (see about "RegAllocInfo" below).
74 Finally, we order the basic blocks of the function so as to minimise
75 the number of jumps between blocks, by utilising fallthrough wherever
78 The machine-dependent bits break down as follows:
80 * ["MachRegs"] Everything about the target platform's machine
81 registers (and immediate operands, and addresses, which tend to
82 intermingle/interact with registers).
84 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
85 have a module of its own), plus a miscellany of other things
86 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
88 * ["MachCodeGen"] is where 'Cmm' stuff turns into
91 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
94 * ["RegAllocInfo"] In the register allocator, we manipulate
95 'MRegsState's, which are 'BitSet's, one bit per machine register.
96 When we want to say something about a specific machine register
97 (e.g., ``it gets clobbered by this instruction''), we set/unset
98 its bit. Obviously, we do this 'BitSet' thing for efficiency
101 The 'RegAllocInfo' module collects together the machine-specific
102 info needed to do register allocation.
104 * ["RegisterAlloc"] The (machine-independent) register allocator.
107 -- -----------------------------------------------------------------------------
108 -- Top-level of the native codegen
110 -- NB. We *lazilly* compile each block of code for space reasons.
112 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
113 nativeCodeGen dflags cmms us
114 = let (res, _) = initUs us $
115 cgCmm (concat (map add_split cmms))
117 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [CLabel])
119 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
120 case unzip3 results of { (cmms,docs,imps) ->
121 returnUs (Cmm cmms, my_vcat docs, concat imps)
124 case res of { (ppr_cmms, insn_sdoc, imports) -> do
125 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
126 return (insn_sdoc Pretty.$$ dyld_stubs imports
127 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
128 -- On recent versions of Darwin, the linker supports
129 -- dead-stripping of code and data on a per-symbol basis.
130 -- There's a hack to make this work in PprMach.pprNatCmmTop.
131 Pretty.$$ Pretty.text ".subsections_via_symbols"
139 | dopt Opt_SplitObjs dflags = split_marker : tops
142 split_marker = CmmProc [] mkSplitMarkerLabel [] []
144 -- Generate "symbol stubs" for all external symbols that might
145 -- come from a dynamic library.
146 {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
147 map head $ group $ sort imps-}
149 -- (Hack) sometimes two Labels pretty-print the same, but have
150 -- different uniques; so we compare their text versions...
152 | needImportedSymbols
154 (pprGotDeclaration :) $
155 map (pprImportedSymbol . fst . head) $
156 groupBy (\(_,a) (_,b) -> a == b) $
157 sortBy (\(_,a) (_,b) -> compare a b) $
163 where doPpr lbl = (lbl, Pretty.render $ pprCLabel lbl astyle)
164 astyle = mkCodeStyle AsmStyle
167 my_vcat sds = Pretty.vcat sds
169 my_vcat sds = Pretty.vcat (
172 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
173 Pretty.$$ Pretty.char ' '
180 -- Complete native code generation phase for a single top-level chunk
183 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [CLabel])
184 cmmNativeGen dflags cmm
185 = {-# SCC "fixAssigns" #-}
186 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
187 {-# SCC "genericOpt" #-}
188 cmmToCmm fixed_cmm `bind` \ (cmm, imports) ->
189 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
191 else CmmData Text []) `bind` \ ppr_cmm ->
192 {-# SCC "genMachCode" #-}
193 genMachCode cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
194 {-# SCC "regAlloc" #-}
195 map regAlloc pre_regalloc `bind` \ with_regs ->
196 {-# SCC "sequenceBlocks" #-}
197 map sequenceTop with_regs `bind` \ sequenced ->
198 {-# SCC "x86fp_kludge" #-}
199 map x86fp_kludge sequenced `bind` \ final_mach_code ->
201 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
203 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
205 x86fp_kludge :: NatCmmTop -> NatCmmTop
206 x86fp_kludge top@(CmmData _ _) = top
208 x86fp_kludge top@(CmmProc info lbl params code) =
209 CmmProc info lbl params (map bb_i386_insert_ffrees code)
211 bb_i386_insert_ffrees (BasicBlock id instrs) =
212 BasicBlock id (i386_insert_ffrees instrs)
214 x86fp_kludge top = top
217 -- -----------------------------------------------------------------------------
218 -- Sequencing the basic blocks
220 -- Cmm BasicBlocks are self-contained entities: they always end in a
221 -- jump, either non-local or to another basic block in the same proc.
222 -- In this phase, we attempt to place the basic blocks in a sequence
223 -- such that as many of the local jumps as possible turn into
226 sequenceTop :: NatCmmTop -> NatCmmTop
227 sequenceTop top@(CmmData _ _) = top
228 sequenceTop (CmmProc info lbl params blocks) =
229 CmmProc info lbl params (sequenceBlocks blocks)
231 -- The algorithm is very simple (and stupid): we make a graph out of
232 -- the blocks where there is an edge from one block to another iff the
233 -- first block ends by jumping to the second. Then we topologically
234 -- sort this graph. Then traverse the list: for each block, we first
235 -- output the block, then if it has an out edge, we move the
236 -- destination of the out edge to the front of the list, and continue.
238 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
239 sequenceBlocks [] = []
240 sequenceBlocks (entry:blocks) =
241 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
242 -- the first block is the entry point ==> it must remain at the start.
244 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
245 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
247 getOutEdges :: [Instr] -> [Unique]
248 getOutEdges instrs = case jumpDests (last instrs) [] of
249 [one] -> [getUnique one]
251 -- we're only interested in the last instruction of
252 -- the block, and only if it has a single destination.
254 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
257 seqBlocks ((block,_,[]) : rest)
258 = block : seqBlocks rest
259 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
260 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
261 | otherwise = block : seqBlocks rest'
263 (can_fallthrough, rest') = reorder next [] rest
264 -- TODO: we should do a better job for cycles; try to maximise the
265 -- fallthroughs within a loop.
266 seqBlocks _ = panic "AsmCodegen:seqBlocks"
268 reorder id accum [] = (False, reverse accum)
269 reorder id accum (b@(block,id',out) : rest)
270 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
271 | otherwise = reorder id (b:accum) rest
273 -- -----------------------------------------------------------------------------
274 -- Instruction selection
276 -- Native code instruction selection for a chunk of stix code. For
277 -- this part of the computation, we switch from the UniqSM monad to
278 -- the NatM monad. The latter carries not only a Unique, but also an
279 -- Int denoting the current C stack pointer offset in the generated
280 -- code; this is needed for creating correct spill offsets on
281 -- architectures which don't offer, or for which it would be
282 -- prohibitively expensive to employ, a frame pointer register. Viz,
285 -- The offset is measured in bytes, and indicates the difference
286 -- between the current (simulated) C stack-ptr and the value it was at
287 -- the beginning of the block. For stacks which grow down, this value
288 -- should be either zero or negative.
290 -- Switching between the two monads whilst carrying along the same
291 -- Unique supply breaks abstraction. Is that bad?
293 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [CLabel])
295 genMachCode cmm_top initial_us
296 = let initial_st = mkNatM_State initial_us 0
297 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
298 final_us = natm_us final_st
299 final_delta = natm_delta final_st
300 final_imports = natm_imports final_st
303 then ((new_tops, final_imports), final_us)
304 else pprPanic "genMachCode: nonzero final delta"
307 -- -----------------------------------------------------------------------------
308 -- Fixup assignments to global registers so that they assign to
309 -- locations within the RegTable, if appropriate.
311 -- Note that we currently don't fixup reads here: they're done by
312 -- the generic optimiser below, to avoid having two separate passes
315 fixAssignsTop :: CmmTop -> UniqSM CmmTop
316 fixAssignsTop top@(CmmData _ _) = returnUs top
317 fixAssignsTop (CmmProc info lbl params blocks) =
318 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
319 returnUs (CmmProc info lbl params blocks')
321 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
322 fixAssignsBlock (BasicBlock id stmts) =
323 fixAssigns stmts `thenUs` \ stmts' ->
324 returnUs (BasicBlock id stmts')
326 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
328 mapUs fixAssign stmts `thenUs` \ stmtss ->
329 returnUs (concat stmtss)
331 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
332 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
333 = panic "cmmStmtConFold: assignment to BaseReg";
335 fixAssign (CmmAssign (CmmGlobal reg) src)
336 | Left realreg <- reg_or_addr
337 = returnUs [CmmAssign (CmmGlobal reg) src]
338 | Right baseRegAddr <- reg_or_addr
339 = returnUs [CmmStore baseRegAddr src]
340 -- Replace register leaves with appropriate StixTrees for
341 -- the given target. GlobalRegs which map to a reg on this
342 -- arch are left unchanged. Assigning to BaseReg is always
343 -- illegal, so we check for that.
345 reg_or_addr = get_GlobalReg_reg_or_addr reg
347 fixAssign (CmmCall target results args vols)
348 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
349 returnUs (caller_save ++
350 CmmCall target results' args vols :
354 -- we also save/restore any caller-saves STG registers here
355 (caller_save, caller_restore) = callerSaveVolatileRegs vols
357 fixResult g@(CmmGlobal reg,hint) =
358 case get_GlobalReg_reg_or_addr reg of
359 Left realreg -> returnUs (g, [])
361 getUniqueUs `thenUs` \ uq ->
362 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
363 returnUs ((local,hint),
364 [CmmStore baseRegAddr (CmmReg local)])
368 fixAssign other_stmt = returnUs [other_stmt]
370 -- -----------------------------------------------------------------------------
371 -- Generic Cmm optimiser
377 (b) Simple inlining: a temporary which is assigned to and then
378 used, once, can be shorted.
379 (c) Replacement of references to GlobalRegs which do not have
380 machine registers by the appropriate memory load (eg.
381 Hp ==> *(BaseReg + 34) ).
382 (d) Position independent code and dynamic linking
383 (i) introduce the appropriate indirections
384 and position independent refs
385 (ii) compile a list of imported symbols
387 Ideas for other things we could do (ToDo):
389 - shortcut jumps-to-jumps
390 - eliminate dead code blocks
391 - simple CSE: if an expr is assigned to a temp, then replace later occs of
392 that expr with the temp, until the expr is no longer valid (can push through
393 temp assignments, and certain assigns to mem...)
396 cmmToCmm :: CmmTop -> (CmmTop, [CLabel])
397 cmmToCmm top@(CmmData _ _) = (top, [])
398 cmmToCmm (CmmProc info lbl params blocks) = runCmmOpt $ do
399 blocks' <- mapM cmmBlockConFold (cmmPeep blocks)
400 return $ CmmProc info lbl params blocks'
402 newtype CmmOptM a = CmmOptM ([CLabel] -> (# a, [CLabel] #))
404 instance Monad CmmOptM where
405 return x = CmmOptM $ \imports -> (# x,imports #)
407 CmmOptM $ \imports ->
411 CmmOptM g' -> g' imports'
413 addImportCmmOpt :: CLabel -> CmmOptM ()
414 addImportCmmOpt lbl = CmmOptM $ \imports -> (# (), lbl:imports #)
416 runCmmOpt :: CmmOptM a -> (a, [CLabel])
417 runCmmOpt (CmmOptM f) = case f [] of
418 (# result, imports #) -> (result, imports)
420 cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
421 cmmBlockConFold (BasicBlock id stmts) = do
422 stmts' <- mapM cmmStmtConFold stmts
423 return $ BasicBlock id stmts'
428 -> do src' <- cmmExprConFold False src
429 return $ case src' of
430 CmmReg reg' | reg == reg' -> CmmNop
431 new_src -> CmmAssign reg new_src
434 -> do addr' <- cmmExprConFold False addr
435 src' <- cmmExprConFold False src
436 return $ CmmStore addr' src'
439 -> do addr' <- cmmExprConFold True addr
440 return $ CmmJump addr' regs
442 CmmCall target regs args vols
443 -> do target' <- case target of
444 CmmForeignCall e conv -> do
445 e' <- cmmExprConFold True e
446 return $ CmmForeignCall e' conv
447 other -> return other
448 args' <- mapM (\(arg, hint) -> do
449 arg' <- cmmExprConFold False arg
450 return (arg', hint)) args
451 return $ CmmCall target' regs args' vols
453 CmmCondBranch test dest
454 -> do test' <- cmmExprConFold False test
455 return $ case test' of
456 CmmLit (CmmInt 0 _) ->
457 CmmComment (mkFastString ("deleted: " ++
458 showSDoc (pprStmt stmt)))
460 CmmLit (CmmInt n _) -> CmmBranch dest
461 other -> CmmCondBranch test' dest
464 -> do expr' <- cmmExprConFold False expr
465 return $ CmmSwitch expr' ids
471 cmmExprConFold isJumpTarget expr
474 -> do addr' <- cmmExprConFold False addr
475 return $ CmmLoad addr' rep
478 -- For MachOps, we first optimize the children, and then we try
479 -- our hand at some constant-folding.
480 -> do args' <- mapM (cmmExprConFold False) args
481 return $ cmmMachOpFold mop args'
483 CmmLit (CmmLabel lbl)
484 -> cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
485 CmmLit (CmmLabelOff lbl off)
486 -> do dynRef <- cmmMakeDynamicReference addImportCmmOpt isJumpTarget lbl
487 return $ cmmMachOpFold (MO_Add wordRep) [
489 (CmmLit $ CmmInt (fromIntegral off) wordRep)
492 #if powerpc_TARGET_ARCH
493 -- On powerpc (non-PIC), it's easier to jump directly to a label than
494 -- to use the register table, so we replace these registers
495 -- with the corresponding labels:
496 CmmReg (CmmGlobal GCEnter1)
498 -> cmmExprConFold isJumpTarget $
499 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
500 CmmReg (CmmGlobal GCFun)
502 -> cmmExprConFold isJumpTarget $
503 CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
506 CmmReg (CmmGlobal mid)
507 -- Replace register leaves with appropriate StixTrees for
508 -- the given target. MagicIds which map to a reg on this
509 -- arch are left unchanged. For the rest, BaseReg is taken
510 -- to mean the address of the reg table in MainCapability,
511 -- and for all others we generate an indirection to its
512 -- location in the register table.
513 -> case get_GlobalReg_reg_or_addr mid of
514 Left realreg -> return expr
517 BaseReg -> cmmExprConFold False baseRegAddr
518 other -> cmmExprConFold False (CmmLoad baseRegAddr
520 -- eliminate zero offsets
522 -> cmmExprConFold False (CmmReg reg)
524 CmmRegOff (CmmGlobal mid) offset
525 -- RegOf leaves are just a shorthand form. If the reg maps
526 -- to a real reg, we keep the shorthand, otherwise, we just
527 -- expand it and defer to the above code.
528 -> case get_GlobalReg_reg_or_addr mid of
529 Left realreg -> return expr
531 -> cmmExprConFold False (CmmMachOp (MO_Add wordRep) [
532 CmmReg (CmmGlobal mid),
533 CmmLit (CmmInt (fromIntegral offset)
539 -- -----------------------------------------------------------------------------
540 -- MachOp constant folder
542 -- Now, try to constant-fold the MachOps. The arguments have already
543 -- been optimized and folded.
546 :: MachOp -- The operation from an CmmMachOp
547 -> [CmmExpr] -- The optimized arguments
550 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
552 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
553 MO_Not r -> CmmLit (CmmInt (complement x) rep)
555 -- these are interesting: we must first narrow to the
556 -- "from" type, in order to truncate to the correct size.
557 -- The final narrow/widen to the destination type
558 -- is implicit in the CmmLit.
560 | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
561 | otherwise -> CmmLit (CmmInt (narrowS from x) to)
562 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
564 _ -> panic "cmmMachOpFold: unknown unary op"
567 -- Eliminate conversion NOPs
568 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
569 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
571 -- Eliminate nested conversions where possible
572 cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
573 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
574 Just (_, rep3,signed2) <- isIntConversion conv_outer
576 -- widen then narrow to the same size is a nop
577 _ | rep1 < rep2 && rep1 == rep3 -> x
578 -- Widen then narrow to different size: collapse to single conversion
579 -- but remember to use the signedness from the widening, just in case
580 -- the final conversion is a widen.
581 | rep1 < rep2 && rep2 > rep3 ->
582 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
583 -- Nested widenings: collapse if the signedness is the same
584 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
585 cmmMachOpFold (intconv signed1 rep1 rep3) [x]
586 -- Nested narrowings: collapse
587 | rep1 > rep2 && rep2 > rep3 ->
588 cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
590 CmmMachOp conv_outer args
592 isIntConversion (MO_U_Conv rep1 rep2)
593 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
594 = Just (rep1,rep2,False)
595 isIntConversion (MO_S_Conv rep1 rep2)
596 | not (isFloatingRep rep1) && not (isFloatingRep rep2)
597 = Just (rep1,rep2,True)
598 isIntConversion _ = Nothing
600 intconv True = MO_S_Conv
601 intconv False = MO_U_Conv
603 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
604 -- but what if the architecture only supports word-sized loads, should
605 -- we do the transformation anyway?
607 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
609 -- for comparisons: don't forget to narrow the arguments before
610 -- comparing, since they might be out of range.
611 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
612 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
614 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
615 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
616 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
617 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
619 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
620 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
621 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
622 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
624 MO_Add r -> CmmLit (CmmInt (x + y) r)
625 MO_Sub r -> CmmLit (CmmInt (x - y) r)
626 MO_Mul r -> CmmLit (CmmInt (x * y) r)
627 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
628 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
630 MO_And r -> CmmLit (CmmInt (x .&. y) r)
631 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
632 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
634 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
635 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
636 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
638 other -> CmmMachOp mop args
647 -- When possible, shift the constants to the right-hand side, so that we
648 -- can match for strength reductions. Note that the code generator will
649 -- also assume that constants have been shifted to the right when
652 cmmMachOpFold op [x@(CmmLit _), y]
653 | not (isLit y) && isCommutableMachOp op
654 = cmmMachOpFold op [y, x]
656 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
657 -- moved to the right, it is more likely that we will find
658 -- opportunities for constant folding when the expression is
661 -- ToDo: this appears to introduce a quadratic behaviour due to the
662 -- nested cmmMachOpFold. Can we fix this?
664 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
665 -- is also a lit (otherwise arg1 would be on the right). If we
666 -- put arg1 on the left of the rearranged expression, we'll get into a
667 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
669 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
670 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
671 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
673 -- Make a RegOff if we can
674 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
675 = CmmRegOff reg (fromIntegral (narrowS rep n))
676 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
677 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
678 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
679 = CmmRegOff reg (- fromIntegral (narrowS rep n))
680 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
681 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
683 -- Fold label(+/-)offset into a CmmLit where possible
685 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
686 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
687 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
688 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
689 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
690 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
692 -- We can often do something with constants of 0 and 1 ...
694 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
705 MO_Ne r | isComparisonExpr x -> x
706 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
707 MO_U_Gt r | isComparisonExpr x -> x
708 MO_S_Gt r | isComparisonExpr x -> x
709 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
710 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
711 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
712 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
713 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
714 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
715 other -> CmmMachOp mop args
717 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
722 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
723 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
724 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
725 MO_Eq r | isComparisonExpr x -> x
726 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
727 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
728 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
729 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
730 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
731 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
732 MO_U_Ge r | isComparisonExpr x -> x
733 MO_S_Ge r | isComparisonExpr x -> x
734 other -> CmmMachOp mop args
736 -- Now look for multiplication/division by powers of 2 (integers).
738 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
741 -> case exactLog2 n of
743 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
745 -> case exactLog2 n of
747 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
751 unchanged = CmmMachOp mop args
753 -- Anything else is just too hard.
755 cmmMachOpFold mop args = CmmMachOp mop args
757 -- -----------------------------------------------------------------------------
760 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
761 -- from GCC. It requires bit manipulation primitives, and we use GHC
762 -- extensions. Tough.
764 -- Used to be in MachInstrs --SDM.
765 -- ToDo: remove use of unboxery --SDM.
770 exactLog2 :: Integer -> Maybe Integer
772 = if (x <= 0 || x >= 2147483648) then
775 case iUnbox (fromInteger x) of { x# ->
776 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
779 Just (toInteger (iBox (pow2 x#)))
782 pow2 x# | x# ==# 1# = 0#
783 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
786 -- -----------------------------------------------------------------------------
787 -- widening / narrowing
789 narrowU :: MachRep -> Integer -> Integer
790 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
791 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
792 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
793 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
794 narrowU _ _ = panic "narrowTo"
796 narrowS :: MachRep -> Integer -> Integer
797 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
798 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
799 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
800 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
801 narrowS _ _ = panic "narrowTo"
803 -- -----------------------------------------------------------------------------
806 -- This pass inlines assignments to temporaries that are used just
807 -- once in the very next statement only. Generalising this would be
808 -- quite difficult (have to take into account aliasing of memory
809 -- writes, and so on), but at the moment it catches a number of useful
810 -- cases and lets the code generator generate much better code.
812 -- NB. This assumes that temporaries are single-assignment.
814 cmmPeep :: [CmmBasicBlock] -> [CmmBasicBlock]
815 cmmPeep blocks = map do_inline blocks
817 blockUses (BasicBlock _ stmts)
818 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
820 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
822 do_inline (BasicBlock id stmts)
823 = BasicBlock id (cmmMiniInline uses stmts)
826 cmmMiniInline :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
827 cmmMiniInline uses [] = []
828 cmmMiniInline uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
829 | Just 1 <- lookupUFM uses u,
830 Just stmts' <- lookForInline u expr stmts
833 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
835 cmmMiniInline uses stmts'
837 cmmMiniInline uses (stmt:stmts)
838 = stmt : cmmMiniInline uses stmts
841 -- Try to inline a temporary assignment. We can skip over assignments to
842 -- other tempoararies, because we know that expressions aren't side-effecting
843 -- and temporaries are single-assignment.
844 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
846 = case lookupUFM (getExprUses rhs) u of
847 Just 1 -> Just (inlineStmt u expr stmt : rest)
848 _other -> case lookForInline u expr rest of
850 Just stmts -> Just (stmt:stmts)
852 lookForInline u expr (CmmNop : rest)
853 = lookForInline u expr rest
855 lookForInline u expr (stmt:stmts)
856 = case lookupUFM (getStmtUses stmt) u of
857 Just 1 -> Just (inlineStmt u expr stmt : stmts)
860 -- -----------------------------------------------------------------------------
861 -- Boring Cmm traversals for collecting usage info and substitutions.
863 getStmtUses :: CmmStmt -> UniqFM Int
864 getStmtUses (CmmAssign _ e) = getExprUses e
865 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
866 getStmtUses (CmmCall target _ es _)
867 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
868 where uses (CmmForeignCall e _) = getExprUses e
870 getStmtUses (CmmCondBranch e _) = getExprUses e
871 getStmtUses (CmmSwitch e _) = getExprUses e
872 getStmtUses (CmmJump e _) = getExprUses e
873 getStmtUses _ = emptyUFM
875 getExprUses :: CmmExpr -> UniqFM Int
876 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
877 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
878 getExprUses (CmmLoad e _) = getExprUses e
879 getExprUses (CmmMachOp _ es) = getExprsUses es
880 getExprUses _other = emptyUFM
882 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
884 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
885 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
886 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
887 inlineStmt u a (CmmCall target regs es vols)
888 = CmmCall (infn target) regs es' vols
889 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
890 infn (CmmPrim p) = CmmPrim p
891 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
892 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
893 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
894 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
895 inlineStmt u a other_stmt = other_stmt
897 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
898 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
901 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
902 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
904 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
905 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
906 inlineExpr u a other_expr = other_expr
908 -- -----------------------------------------------------------------------------
913 isLit (CmmLit _) = True
916 isComparisonExpr :: CmmExpr -> Bool
917 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
918 isComparisonExpr _other = False
920 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
921 maybeInvertConditionalExpr (CmmMachOp op args)
922 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
923 maybeInvertConditionalExpr _ = Nothing