1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
24 import PprCmm ( pprStmt, pprCmms )
26 import CLabel ( CLabel, mkSplitMarkerLabel )
27 #if powerpc_TARGET_ARCH
28 import CLabel ( mkRtsCodeLabel )
32 import Unique ( Unique, getUnique )
36 import PprMach ( pprDyldSymbolStub )
37 import List ( group, sort )
39 import ErrUtils ( dumpIfSet_dyn )
40 import CmdLineOpts ( DynFlags, DynFlag(..), dopt, opt_Static,
41 opt_EnsureSplittableC )
44 import qualified Pretty
52 import List ( intersperse )
61 The native-code generator has machine-independent and
62 machine-dependent modules.
64 This module ("AsmCodeGen") is the top-level machine-independent
65 module. Before entering machine-dependent land, we do some
66 machine-independent optimisations (defined below) on the
69 We convert to the machine-specific 'Instr' datatype with
70 'cmmCodeGen', assuming an infinite supply of registers. We then use
71 a machine-independent register allocator ('regAlloc') to rejoin
72 reality. Obviously, 'regAlloc' has machine-specific helper
73 functions (see about "RegAllocInfo" below).
75 Finally, we order the basic blocks of the function so as to minimise
76 the number of jumps between blocks, by utilising fallthrough wherever
79 The machine-dependent bits break down as follows:
81 * ["MachRegs"] Everything about the target platform's machine
82 registers (and immediate operands, and addresses, which tend to
83 intermingle/interact with registers).
85 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
86 have a module of its own), plus a miscellany of other things
87 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
89 * ["MachCodeGen"] is where 'Cmm' stuff turns into
92 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
95 * ["RegAllocInfo"] In the register allocator, we manipulate
96 'MRegsState's, which are 'BitSet's, one bit per machine register.
97 When we want to say something about a specific machine register
98 (e.g., ``it gets clobbered by this instruction''), we set/unset
99 its bit. Obviously, we do this 'BitSet' thing for efficiency
102 The 'RegAllocInfo' module collects together the machine-specific
103 info needed to do register allocation.
105 * ["RegisterAlloc"] The (machine-independent) register allocator.
108 -- -----------------------------------------------------------------------------
109 -- Top-level of the native codegen
111 -- NB. We *lazilly* compile each block of code for space reasons.
113 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
114 nativeCodeGen dflags cmms us
116 = panic "NCG does not handle dynamic libraries right now"
117 -- ToDo: MachCodeGen used to have derefDLL function which expanded
118 -- dynamic CLabels (labelDynamic lbl == True) into the appropriate
119 -- dereferences. This should be done in the pre-NCG cmmToCmm pass instead.
120 -- It doesn't apply to static data, of course. There are hacks so that
121 -- the RTS knows what to do for references to closures in a DLL in SRTs,
122 -- and we never generate a reference to a closure in another DLL in a
123 -- static constructor.
126 = let ((ppr_cmms, insn_sdoc, imports), _) = initUs us $
127 cgCmm (concat (map add_split cmms))
129 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [(Bool, CLabel)])
131 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
132 let (cmms,docs,imps) = unzip3 results in
133 returnUs (Cmm cmms, my_vcat docs, concat imps)
135 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
136 return (insn_sdoc Pretty.$$ dyld_stubs imports)
141 | opt_EnsureSplittableC = split_marker : tops
144 split_marker = CmmProc [] mkSplitMarkerLabel [] []
147 -- Generate "symbol stubs" for all external symbols that might
148 -- come from a dynamic library.
149 dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
150 map head $ group $ sort imps
152 dyld_stubs imps = Pretty.empty
156 my_vcat sds = Pretty.vcat sds
158 my_vcat sds = Pretty.vcat (
161 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
162 Pretty.$$ Pretty.char ' '
169 -- Complete native code generation phase for a single top-level chunk
172 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [(Bool,CLabel)])
173 cmmNativeGen dflags cmm
174 = {-# SCC "fixAssigns" #-}
175 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
176 {-# SCC "genericOpt" #-}
177 cmmToCmm fixed_cmm `bind` \ cmm ->
178 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
180 else CmmData Text []) `bind` \ ppr_cmm ->
181 {-# SCC "genMachCode" #-}
182 genMachCode cmm `thenUs` \ (pre_regalloc, imports) ->
183 {-# SCC "regAlloc" #-}
184 map regAlloc pre_regalloc `bind` \ with_regs ->
185 {-# SCC "sequenceBlocks" #-}
186 map sequenceTop with_regs `bind` \ sequenced ->
187 {-# SCC "x86fp_kludge" #-}
188 map x86fp_kludge sequenced `bind` \ final_mach_code ->
190 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
192 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", imports)
194 x86fp_kludge :: NatCmmTop -> NatCmmTop
195 x86fp_kludge top@(CmmData _ _) = top
197 x86fp_kludge top@(CmmProc info lbl params code) =
198 CmmProc info lbl params (map bb_i386_insert_ffrees code)
200 bb_i386_insert_ffrees (BasicBlock id instrs) =
201 BasicBlock id (i386_insert_ffrees instrs)
203 x86fp_kludge top = top
206 -- -----------------------------------------------------------------------------
207 -- Sequencing the basic blocks
209 -- Cmm BasicBlocks are self-contained entities: they always end in a
210 -- jump, either non-local or to another basic block in the same proc.
211 -- In this phase, we attempt to place the basic blocks in a sequence
212 -- such that as many of the local jumps as possible turn into
215 sequenceTop :: NatCmmTop -> NatCmmTop
216 sequenceTop top@(CmmData _ _) = top
217 sequenceTop (CmmProc info lbl params blocks) =
218 CmmProc info lbl params (sequenceBlocks blocks)
220 -- The algorithm is very simple (and stupid): we make a graph out of
221 -- the blocks where there is an edge from one block to another iff the
222 -- first block ends by jumping to the second. Then we topologically
223 -- sort this graph. Then traverse the list: for each block, we first
224 -- output the block, then if it has an out edge, we move the
225 -- destination of the out edge to the front of the list, and continue.
227 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
228 sequenceBlocks [] = []
229 sequenceBlocks (entry:blocks) =
230 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
231 -- the first block is the entry point ==> it must remain at the start.
233 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
234 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
236 getOutEdges :: [Instr] -> [Unique]
237 getOutEdges instrs = case jumpDests (last instrs) [] of
238 [one] -> [getUnique one]
240 -- we're only interested in the last instruction of
241 -- the block, and only if it has a single destination.
243 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
246 seqBlocks ((block,_,[]) : rest)
247 = block : seqBlocks rest
248 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
249 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
250 | otherwise = block : seqBlocks rest'
252 (can_fallthrough, rest') = reorder next [] rest
253 -- TODO: we should do a better job for cycles; try to maximise the
254 -- fallthroughs within a loop.
255 seqBlocks _ = panic "AsmCodegen:seqBlocks"
257 reorder id accum [] = (False, reverse accum)
258 reorder id accum (b@(block,id',out) : rest)
259 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
260 | otherwise = reorder id (b:accum) rest
262 -- -----------------------------------------------------------------------------
263 -- Instruction selection
265 -- Native code instruction selection for a chunk of stix code. For
266 -- this part of the computation, we switch from the UniqSM monad to
267 -- the NatM monad. The latter carries not only a Unique, but also an
268 -- Int denoting the current C stack pointer offset in the generated
269 -- code; this is needed for creating correct spill offsets on
270 -- architectures which don't offer, or for which it would be
271 -- prohibitively expensive to employ, a frame pointer register. Viz,
274 -- The offset is measured in bytes, and indicates the difference
275 -- between the current (simulated) C stack-ptr and the value it was at
276 -- the beginning of the block. For stacks which grow down, this value
277 -- should be either zero or negative.
279 -- Switching between the two monads whilst carrying along the same
280 -- Unique supply breaks abstraction. Is that bad?
282 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [(Bool,CLabel)])
284 genMachCode cmm_top initial_us
285 = let initial_st = mkNatM_State initial_us 0
286 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
287 final_us = natm_us final_st
288 final_delta = natm_delta final_st
289 final_imports = natm_imports final_st
292 then ((new_tops, final_imports), final_us)
293 else pprPanic "genMachCode: nonzero final delta"
296 -- -----------------------------------------------------------------------------
297 -- Fixup assignments to global registers so that they assign to
298 -- locations within the RegTable, if appropriate.
300 -- Note that we currently don't fixup reads here: they're done by
301 -- the generic optimiser below, to avoid having two separate passes
304 fixAssignsTop :: CmmTop -> UniqSM CmmTop
305 fixAssignsTop top@(CmmData _ _) = returnUs top
306 fixAssignsTop (CmmProc info lbl params blocks) =
307 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
308 returnUs (CmmProc info lbl params blocks')
310 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
311 fixAssignsBlock (BasicBlock id stmts) =
312 fixAssigns stmts `thenUs` \ stmts' ->
313 returnUs (BasicBlock id stmts')
315 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
317 mapUs fixAssign stmts `thenUs` \ stmtss ->
318 returnUs (concat stmtss)
320 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
321 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
322 = panic "cmmStmtConFold: assignment to BaseReg";
324 fixAssign (CmmAssign (CmmGlobal reg) src)
325 | Left realreg <- reg_or_addr
326 = returnUs [CmmAssign (CmmGlobal reg) (cmmExprConFold src)]
327 | Right baseRegAddr <- reg_or_addr
328 = returnUs [CmmStore baseRegAddr src]
329 -- Replace register leaves with appropriate StixTrees for
330 -- the given target. GlobalRegs which map to a reg on this
331 -- arch are left unchanged. Assigning to BaseReg is always
332 -- illegal, so we check for that.
334 reg_or_addr = get_GlobalReg_reg_or_addr reg
336 fixAssign (CmmCall target results args vols)
337 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
338 returnUs (CmmCall target results' args vols : concat stores)
340 fixResult g@(CmmGlobal reg,hint) =
341 case get_GlobalReg_reg_or_addr reg of
342 Left realreg -> returnUs (g, [])
344 getUniqueUs `thenUs` \ uq ->
345 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
346 returnUs ((local,hint),
347 [CmmStore baseRegAddr (CmmReg local)])
351 fixAssign other_stmt = returnUs [other_stmt]
353 -- -----------------------------------------------------------------------------
354 -- Generic Cmm optimiser
360 (b) Simple inlining: a temporary which is assigned to and then
361 used, once, can be shorted.
362 (c) Replacement of references to GlobalRegs which do not have
363 machine registers by the appropriate memory load (eg.
364 Hp ==> *(BaseReg + 34) ).
366 Ideas for other things we could do (ToDo):
368 - shortcut jumps-to-jumps
369 - eliminate dead code blocks
372 cmmToCmm :: CmmTop -> CmmTop
373 cmmToCmm top@(CmmData _ _) = top
374 cmmToCmm (CmmProc info lbl params blocks) =
375 CmmProc info lbl params (map cmmBlockConFold (cmmPeep blocks))
377 cmmBlockConFold :: CmmBasicBlock -> CmmBasicBlock
378 cmmBlockConFold (BasicBlock id stmts) =
379 BasicBlock id (map cmmStmtConFold stmts)
384 -> case cmmExprConFold src of
385 CmmReg reg' | reg == reg' -> CmmNop
386 new_src -> CmmAssign reg new_src
389 -> CmmStore (cmmExprConFold addr) (cmmExprConFold src)
392 -> CmmJump (cmmExprConFold addr) regs
394 CmmCall target regs args vols
395 -> CmmCall (case target of
396 CmmForeignCall e conv ->
397 CmmForeignCall (cmmExprConFold e) conv
400 [ (cmmExprConFold arg,hint) | (arg,hint) <- args ]
403 CmmCondBranch test dest
404 -> let test_opt = cmmExprConFold test
407 CmmLit (CmmInt 0 _) ->
408 CmmComment (mkFastString ("deleted: " ++
409 showSDoc (pprStmt stmt)))
411 CmmLit (CmmInt n _) -> CmmBranch dest
412 other -> CmmCondBranch (cmmExprConFold test) dest
415 -> CmmSwitch (cmmExprConFold expr) ids
424 -> CmmLoad (cmmExprConFold addr) rep
427 -- For MachOps, we first optimize the children, and then we try
428 -- our hand at some constant-folding.
429 -> cmmMachOpFold mop (map cmmExprConFold args)
431 #if powerpc_TARGET_ARCH
432 -- On powerpc, it's easier to jump directly to a label than
433 -- to use the register table, so we replace these registers
434 -- with the corresponding labels:
435 CmmReg (CmmGlobal GCEnter1)
436 -> CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
437 CmmReg (CmmGlobal GCFun)
438 -> CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
441 CmmReg (CmmGlobal mid)
442 -- Replace register leaves with appropriate StixTrees for
443 -- the given target. MagicIds which map to a reg on this
444 -- arch are left unchanged. For the rest, BaseReg is taken
445 -- to mean the address of the reg table in MainCapability,
446 -- and for all others we generate an indirection to its
447 -- location in the register table.
448 -> case get_GlobalReg_reg_or_addr mid of
452 BaseReg -> cmmExprConFold baseRegAddr
453 other -> cmmExprConFold (CmmLoad baseRegAddr
455 -- eliminate zero offsets
457 -> cmmExprConFold (CmmReg reg)
459 CmmRegOff (CmmGlobal mid) offset
460 -- RegOf leaves are just a shorthand form. If the reg maps
461 -- to a real reg, we keep the shorthand, otherwise, we just
462 -- expand it and defer to the above code.
463 -> case get_GlobalReg_reg_or_addr mid of
466 -> cmmExprConFold (CmmMachOp (MO_Add wordRep) [
467 CmmReg (CmmGlobal mid),
468 CmmLit (CmmInt (fromIntegral offset)
474 -- -----------------------------------------------------------------------------
475 -- MachOp constant folder
477 -- Now, try to constant-fold the MachOps. The arguments have already
478 -- been optimized and folded.
481 :: MachOp -- The operation from an CmmMachOp
482 -> [CmmExpr] -- The optimized arguments
485 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
487 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
488 MO_Not r -> CmmLit (CmmInt (complement x) rep)
490 -- these are interesting: we must first narrow to the
491 -- "from" type, in order to truncate to the correct size.
492 -- The final narrow/widen to the destination type
493 -- is implicit in the CmmLit.
494 MO_S_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
495 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
496 _ -> panic "cmmMachOpFold: unknown unary op"
498 -- Eliminate conversion NOPs
499 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
500 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
502 -- ToDo: eliminate multiple conversions. Be careful though: can't remove
503 -- a narrowing, and can't remove conversions to/from floating point types.
505 -- ToDo: eliminate nested comparisons:
506 -- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
507 -- turns into a simple equality test.
509 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
511 -- for comparisons: don't forget to narrow the arguments before
512 -- comparing, since they might be out of range.
513 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
514 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
516 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
517 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
518 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
519 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
521 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
522 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
523 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
524 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
526 MO_Add r -> CmmLit (CmmInt (x + y) r)
527 MO_Sub r -> CmmLit (CmmInt (x - y) r)
528 MO_Mul r -> CmmLit (CmmInt (x * y) r)
529 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
530 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
532 MO_And r -> CmmLit (CmmInt (x .&. y) r)
533 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
534 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
536 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
537 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
538 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
540 other -> CmmMachOp mop args
549 -- When possible, shift the constants to the right-hand side, so that we
550 -- can match for strength reductions. Note that the code generator will
551 -- also assume that constants have been shifted to the right when
554 cmmMachOpFold op [x@(CmmLit _), y]
555 | not (isLit y) && isCommutableMachOp op
556 = cmmMachOpFold op [y, x]
558 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
559 -- moved to the right, it is more likely that we will find
560 -- opportunities for constant folding when the expression is
563 -- ToDo: this appears to introduce a quadratic behaviour due to the
564 -- nested cmmMachOpFold. Can we fix this?
566 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
567 -- is also a lit (otherwise arg1 would be on the right). If we
568 -- put arg1 on the left of the rearranged expression, we'll get into a
569 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
571 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
572 | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
573 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
575 -- Make a RegOff if we can
576 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
577 = CmmRegOff reg (fromIntegral (narrowS rep n))
578 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
579 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
580 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
581 = CmmRegOff reg (- fromIntegral (narrowS rep n))
582 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
583 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
585 -- Fold label(+/-)offset into a CmmLit where possible
587 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
588 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
589 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
590 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
591 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
592 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
594 -- We can often do something with constants of 0 and 1 ...
596 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
607 MO_Ne r | isComparisonExpr x -> x
608 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
609 MO_U_Gt r | isComparisonExpr x -> x
610 MO_S_Gt r | isComparisonExpr x -> x
611 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
612 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
613 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
614 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
615 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
616 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
617 other -> CmmMachOp mop args
619 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
624 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
625 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
626 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
627 MO_Eq r | isComparisonExpr x -> x
628 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
629 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
630 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
631 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
632 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
633 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
634 MO_U_Ge r | isComparisonExpr x -> x
635 MO_S_Ge r | isComparisonExpr x -> x
636 other -> CmmMachOp mop args
638 -- Now look for multiplication/division by powers of 2 (integers).
640 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
643 -> case exactLog2 n of
645 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
647 -> case exactLog2 n of
649 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
653 unchanged = CmmMachOp mop args
655 -- Anything else is just too hard.
657 cmmMachOpFold mop args = CmmMachOp mop args
660 -- -----------------------------------------------------------------------------
663 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
664 -- from GCC. It requires bit manipulation primitives, and we use GHC
665 -- extensions. Tough.
667 -- Used to be in MachInstrs --SDM.
668 -- ToDo: remove use of unboxery --SDM.
673 exactLog2 :: Integer -> Maybe Integer
675 = if (x <= 0 || x >= 2147483648) then
678 case iUnbox (fromInteger x) of { x# ->
679 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
682 Just (toInteger (iBox (pow2 x#)))
685 pow2 x# | x# ==# 1# = 0#
686 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
689 -- -----------------------------------------------------------------------------
690 -- widening / narrowing
692 narrowU :: MachRep -> Integer -> Integer
693 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
694 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
695 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
696 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
697 narrowU _ _ = panic "narrowTo"
699 narrowS :: MachRep -> Integer -> Integer
700 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
701 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
702 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
703 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
704 narrowS _ _ = panic "narrowTo"
706 -- -----------------------------------------------------------------------------
709 -- This pass inlines assignments to temporaries that are used just
710 -- once in the very next statement only. Generalising this would be
711 -- quite difficult (have to take into account aliasing of memory
712 -- writes, and so on), but at the moment it catches a number of useful
713 -- cases and lets the code generator generate much better code.
715 -- NB. This assumes that temporaries are single-assignment.
717 cmmPeep :: [CmmBasicBlock] -> [CmmBasicBlock]
718 cmmPeep blocks = map do_inline blocks
720 blockUses (BasicBlock _ stmts)
721 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
723 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
725 do_inline (BasicBlock id stmts)
726 = BasicBlock id (cmmMiniInline uses stmts)
729 cmmMiniInline :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
730 cmmMiniInline uses [] = []
731 cmmMiniInline uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
732 | Just 1 <- lookupUFM uses u,
733 Just stmts' <- lookForInline u expr stmts
736 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
738 cmmMiniInline uses stmts'
740 cmmMiniInline uses (stmt:stmts)
741 = stmt : cmmMiniInline uses stmts
744 -- Try to inline a temporary assignment. We can skip over assignments to
745 -- other tempoararies, because we know that expressions aren't side-effecting
746 -- and temporaries are single-assignment.
747 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
749 = case lookupUFM (getExprUses rhs) u of
750 Just 1 -> Just (inlineStmt u expr stmt : rest)
751 _other -> case lookForInline u expr rest of
753 Just stmts -> Just (stmt:stmts)
755 lookForInline u expr (stmt:stmts)
756 = case lookupUFM (getStmtUses stmt) u of
757 Just 1 -> Just (inlineStmt u expr stmt : stmts)
760 -- -----------------------------------------------------------------------------
761 -- Boring Cmm traversals for collecting usage info and substitutions.
763 getStmtUses :: CmmStmt -> UniqFM Int
764 getStmtUses (CmmAssign _ e) = getExprUses e
765 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
766 getStmtUses (CmmCall target _ es _)
767 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
768 where uses (CmmForeignCall e _) = getExprUses e
770 getStmtUses (CmmCondBranch e _) = getExprUses e
771 getStmtUses (CmmSwitch e _) = getExprUses e
772 getStmtUses (CmmJump e _) = getExprUses e
773 getStmtUses _ = emptyUFM
775 getExprUses :: CmmExpr -> UniqFM Int
776 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
777 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
778 getExprUses (CmmLoad e _) = getExprUses e
779 getExprUses (CmmMachOp _ es) = getExprsUses es
780 getExprUses _other = emptyUFM
782 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
784 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
785 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
786 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
787 inlineStmt u a (CmmCall target regs es vols)
788 = CmmCall (infn target) regs es' vols
789 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
790 infn (CmmPrim p) = CmmPrim p
791 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
792 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
793 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
794 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
795 inlineStmt u a other_stmt = other_stmt
797 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
798 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
801 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
802 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
804 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
805 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
806 inlineExpr u a other_expr = other_expr
808 -- -----------------------------------------------------------------------------
813 isLit (CmmLit _) = True
816 isComparisonExpr :: CmmExpr -> Bool
817 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
818 isComparisonExpr _other = False
820 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
821 maybeInvertConditionalExpr (CmmMachOp op args)
822 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
823 maybeInvertConditionalExpr _ = Nothing