1 -- -----------------------------------------------------------------------------
3 -- (c) The University of Glasgow 1993-2004
5 -- This is the top-level module in the native code generator.
7 -- -----------------------------------------------------------------------------
10 module AsmCodeGen ( nativeCodeGen ) where
12 #include "HsVersions.h"
20 import RegAllocInfo ( jumpDests )
24 import PprCmm ( pprStmt, pprCmms )
26 import CLabel ( CLabel, mkSplitMarkerLabel )
27 #if powerpc_TARGET_ARCH
28 import CLabel ( mkRtsCodeLabel )
32 import Unique ( Unique, getUnique )
36 import PprMach ( pprDyldSymbolStub )
37 import List ( group, sort )
39 import ErrUtils ( dumpIfSet_dyn )
40 import CmdLineOpts ( DynFlags, DynFlag(..), dopt, opt_Static,
41 opt_EnsureSplittableC )
44 import qualified Pretty
52 import List ( intersperse )
61 The native-code generator has machine-independent and
62 machine-dependent modules.
64 This module ("AsmCodeGen") is the top-level machine-independent
65 module. Before entering machine-dependent land, we do some
66 machine-independent optimisations (defined below) on the
69 We convert to the machine-specific 'Instr' datatype with
70 'cmmCodeGen', assuming an infinite supply of registers. We then use
71 a machine-independent register allocator ('regAlloc') to rejoin
72 reality. Obviously, 'regAlloc' has machine-specific helper
73 functions (see about "RegAllocInfo" below).
75 Finally, we order the basic blocks of the function so as to minimise
76 the number of jumps between blocks, by utilising fallthrough wherever
79 The machine-dependent bits break down as follows:
81 * ["MachRegs"] Everything about the target platform's machine
82 registers (and immediate operands, and addresses, which tend to
83 intermingle/interact with registers).
85 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
86 have a module of its own), plus a miscellany of other things
87 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
89 * ["MachCodeGen"] is where 'Cmm' stuff turns into
92 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
95 * ["RegAllocInfo"] In the register allocator, we manipulate
96 'MRegsState's, which are 'BitSet's, one bit per machine register.
97 When we want to say something about a specific machine register
98 (e.g., ``it gets clobbered by this instruction''), we set/unset
99 its bit. Obviously, we do this 'BitSet' thing for efficiency
102 The 'RegAllocInfo' module collects together the machine-specific
103 info needed to do register allocation.
105 * ["RegisterAlloc"] The (machine-independent) register allocator.
108 -- -----------------------------------------------------------------------------
109 -- Top-level of the native codegen
111 -- NB. We *lazilly* compile each block of code for space reasons.
113 nativeCodeGen :: DynFlags -> [Cmm] -> UniqSupply -> IO Pretty.Doc
114 nativeCodeGen dflags cmms us
116 = panic "NCG does not handle dynamic libraries right now"
117 -- ToDo: MachCodeGen used to have derefDLL function which expanded
118 -- dynamic CLabels (labelDynamic lbl == True) into the appropriate
119 -- dereferences. This should be done in the pre-NCG cmmToCmm pass instead.
120 -- It doesn't apply to static data, of course. There are hacks so that
121 -- the RTS knows what to do for references to closures in a DLL in SRTs,
122 -- and we never generate a reference to a closure in another DLL in a
123 -- static constructor.
126 = let ((ppr_cmms, insn_sdoc, imports), _) = initUs us $
127 cgCmm (concat (map add_split cmms))
129 cgCmm :: [CmmTop] -> UniqSM (Cmm, Pretty.Doc, [(Bool, CLabel)])
131 lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
132 let (cmms,docs,imps) = unzip3 results in
133 returnUs (Cmm cmms, my_vcat docs, concat imps)
135 dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
136 return (insn_sdoc Pretty.$$ dyld_stubs imports)
141 | opt_EnsureSplittableC = split_marker : tops
144 split_marker = CmmProc [] mkSplitMarkerLabel [] []
147 -- Generate "symbol stubs" for all external symbols that might
148 -- come from a dynamic library.
149 dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
150 map head $ group $ sort imps
152 dyld_stubs imps = Pretty.empty
156 my_vcat sds = Pretty.vcat sds
158 my_vcat sds = Pretty.vcat (
161 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
162 Pretty.$$ Pretty.char ' '
169 -- Complete native code generation phase for a single top-level chunk
172 cmmNativeGen :: DynFlags -> CmmTop -> UniqSM (CmmTop, Pretty.Doc, [(Bool,CLabel)])
173 cmmNativeGen dflags cmm
174 = {-# SCC "fixAssigns" #-}
175 fixAssignsTop cmm `thenUs` \ fixed_cmm ->
176 {-# SCC "genericOpt" #-}
177 cmmToCmm fixed_cmm `bind` \ cmm ->
178 (if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
180 else CmmData Text []) `bind` \ ppr_cmm ->
181 {-# SCC "genMachCode" #-}
182 genMachCode cmm `thenUs` \ (pre_regalloc, imports) ->
183 {-# SCC "regAlloc" #-}
184 map regAlloc pre_regalloc `bind` \ with_regs ->
185 {-# SCC "sequenceBlocks" #-}
186 map sequenceTop with_regs `bind` \ sequenced ->
187 {-# SCC "x86fp_kludge" #-}
188 map x86fp_kludge sequenced `bind` \ final_mach_code ->
190 Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
192 returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", imports)
194 x86fp_kludge :: NatCmmTop -> NatCmmTop
195 x86fp_kludge top@(CmmData _ _) = top
197 x86fp_kludge top@(CmmProc info lbl params code) =
198 CmmProc info lbl params (map bb_i386_insert_ffrees code)
200 bb_i386_insert_ffrees (BasicBlock id instrs) =
201 BasicBlock id (i386_insert_ffrees instrs)
203 x86fp_kludge top = top
206 -- -----------------------------------------------------------------------------
207 -- Sequencing the basic blocks
209 -- Cmm BasicBlocks are self-contained entities: they always end in a
210 -- jump, either non-local or to another basic block in the same proc.
211 -- In this phase, we attempt to place the basic blocks in a sequence
212 -- such that as many of the local jumps as possible turn into
215 sequenceTop :: NatCmmTop -> NatCmmTop
216 sequenceTop top@(CmmData _ _) = top
217 sequenceTop (CmmProc info lbl params blocks) =
218 CmmProc info lbl params (sequenceBlocks blocks)
220 -- The algorithm is very simple (and stupid): we make a graph out of
221 -- the blocks where there is an edge from one block to another iff the
222 -- first block ends by jumping to the second. Then we topologically
223 -- sort this graph. Then traverse the list: for each block, we first
224 -- output the block, then if it has an out edge, we move the
225 -- destination of the out edge to the front of the list, and continue.
227 sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
228 sequenceBlocks [] = []
229 sequenceBlocks (entry:blocks) =
230 seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
231 -- the first block is the entry point ==> it must remain at the start.
233 sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
234 sccBlocks blocks = stronglyConnCompR (map mkNode blocks)
236 getOutEdges :: [Instr] -> [Unique]
237 getOutEdges instrs = case jumpDests (last instrs) [] of
238 [one] -> [getUnique one]
240 -- we're only interested in the last instruction of
241 -- the block, and only if it has a single destination.
243 mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
246 seqBlocks ((block,_,[]) : rest)
247 = block : seqBlocks rest
248 seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
249 | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
250 | otherwise = block : seqBlocks rest'
252 (can_fallthrough, rest') = reorder next [] rest
253 -- TODO: we should do a better job for cycles; try to maximise the
254 -- fallthroughs within a loop.
255 seqBlocks _ = panic "AsmCodegen:seqBlocks"
257 reorder id accum [] = (False, reverse accum)
258 reorder id accum (b@(block,id',out) : rest)
259 | id == id' = (True, (block,id,out) : reverse accum ++ rest)
260 | otherwise = reorder id (b:accum) rest
262 -- -----------------------------------------------------------------------------
263 -- Instruction selection
265 -- Native code instruction selection for a chunk of stix code. For
266 -- this part of the computation, we switch from the UniqSM monad to
267 -- the NatM monad. The latter carries not only a Unique, but also an
268 -- Int denoting the current C stack pointer offset in the generated
269 -- code; this is needed for creating correct spill offsets on
270 -- architectures which don't offer, or for which it would be
271 -- prohibitively expensive to employ, a frame pointer register. Viz,
274 -- The offset is measured in bytes, and indicates the difference
275 -- between the current (simulated) C stack-ptr and the value it was at
276 -- the beginning of the block. For stacks which grow down, this value
277 -- should be either zero or negative.
279 -- Switching between the two monads whilst carrying along the same
280 -- Unique supply breaks abstraction. Is that bad?
282 genMachCode :: CmmTop -> UniqSM ([NatCmmTop], [(Bool,CLabel)])
284 genMachCode cmm_top initial_us
285 = let initial_st = mkNatM_State initial_us 0
286 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
287 final_us = natm_us final_st
288 final_delta = natm_delta final_st
289 final_imports = natm_imports final_st
292 then ((new_tops, final_imports), final_us)
293 else pprPanic "genMachCode: nonzero final delta"
296 -- -----------------------------------------------------------------------------
297 -- Fixup assignments to global registers so that they assign to
298 -- locations within the RegTable, if appropriate.
300 -- Note that we currently don't fixup reads here: they're done by
301 -- the generic optimiser below, to avoid having two separate passes
304 fixAssignsTop :: CmmTop -> UniqSM CmmTop
305 fixAssignsTop top@(CmmData _ _) = returnUs top
306 fixAssignsTop (CmmProc info lbl params blocks) =
307 mapUs fixAssignsBlock blocks `thenUs` \ blocks' ->
308 returnUs (CmmProc info lbl params blocks')
310 fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock
311 fixAssignsBlock (BasicBlock id stmts) =
312 fixAssigns stmts `thenUs` \ stmts' ->
313 returnUs (BasicBlock id stmts')
315 fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt]
317 mapUs fixAssign stmts `thenUs` \ stmtss ->
318 returnUs (concat stmtss)
320 fixAssign :: CmmStmt -> UniqSM [CmmStmt]
321 fixAssign (CmmAssign (CmmGlobal BaseReg) src)
322 = panic "cmmStmtConFold: assignment to BaseReg";
324 fixAssign (CmmAssign (CmmGlobal reg) src)
325 | Left realreg <- reg_or_addr
326 = returnUs [CmmAssign (CmmGlobal reg) (cmmExprConFold src)]
327 | Right baseRegAddr <- reg_or_addr
328 = returnUs [CmmStore baseRegAddr src]
329 -- Replace register leaves with appropriate StixTrees for
330 -- the given target. GlobalRegs which map to a reg on this
331 -- arch are left unchanged. Assigning to BaseReg is always
332 -- illegal, so we check for that.
334 reg_or_addr = get_GlobalReg_reg_or_addr reg
336 fixAssign (CmmCall target results args vols)
337 = mapAndUnzipUs fixResult results `thenUs` \ (results',stores) ->
338 returnUs (CmmCall target results' args vols : concat stores)
340 fixResult g@(CmmGlobal reg,hint) =
341 case get_GlobalReg_reg_or_addr reg of
342 Left realreg -> returnUs (g, [])
344 getUniqueUs `thenUs` \ uq ->
345 let local = CmmLocal (LocalReg uq (globalRegRep reg)) in
346 returnUs ((local,hint),
347 [CmmStore baseRegAddr (CmmReg local)])
351 fixAssign other_stmt = returnUs [other_stmt]
353 -- -----------------------------------------------------------------------------
354 -- Generic Cmm optimiser
360 (b) Simple inlining: a temporary which is assigned to and then
361 used, once, can be shorted.
362 (c) Replacement of references to GlobalRegs which do not have
363 machine registers by the appropriate memory load (eg.
364 Hp ==> *(BaseReg + 34) ).
366 Ideas for other things we could do (ToDo):
368 - shortcut jumps-to-jumps
369 - eliminate dead code blocks
372 cmmToCmm :: CmmTop -> CmmTop
373 cmmToCmm top@(CmmData _ _) = top
374 cmmToCmm (CmmProc info lbl params blocks) =
375 CmmProc info lbl params (map cmmBlockConFold (cmmPeep blocks))
377 cmmBlockConFold :: CmmBasicBlock -> CmmBasicBlock
378 cmmBlockConFold (BasicBlock id stmts) = BasicBlock id (map cmmStmtConFold stmts)
383 -> case cmmExprConFold src of
384 CmmReg reg' | reg == reg' -> CmmNop
385 new_src -> CmmAssign reg new_src
388 -> CmmStore (cmmExprConFold addr) (cmmExprConFold src)
391 -> CmmJump (cmmExprConFold addr) regs
393 CmmCall target regs args vols
394 -> CmmCall (case target of
395 CmmForeignCall e conv ->
396 CmmForeignCall (cmmExprConFold e) conv
399 [ (cmmExprConFold arg,hint) | (arg,hint) <- args ]
402 CmmCondBranch test dest
403 -> let test_opt = cmmExprConFold test
406 CmmLit (CmmInt 0 _) ->
407 CmmComment (mkFastString ("deleted: " ++
408 showSDoc (pprStmt stmt)))
410 CmmLit (CmmInt n _) -> CmmBranch dest
411 other -> CmmCondBranch (cmmExprConFold test) dest
414 -> CmmSwitch (cmmExprConFold expr) ids
423 -> CmmLoad (cmmExprConFold addr) rep
426 -- For MachOps, we first optimize the children, and then we try
427 -- our hand at some constant-folding.
428 -> cmmMachOpFold mop (map cmmExprConFold args)
430 #if powerpc_TARGET_ARCH
431 -- On powerpc, it's easier to jump directly to a label than
432 -- to use the register table, so we replace these registers
433 -- with the corresponding labels:
434 CmmReg (CmmGlobal GCEnter1)
435 -> CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_enter_1")))
436 CmmReg (CmmGlobal GCFun)
437 -> CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "__stg_gc_fun")))
440 CmmReg (CmmGlobal mid)
441 -- Replace register leaves with appropriate StixTrees for
442 -- the given target. MagicIds which map to a reg on this
443 -- arch are left unchanged. For the rest, BaseReg is taken
444 -- to mean the address of the reg table in MainCapability,
445 -- and for all others we generate an indirection to its
446 -- location in the register table.
447 -> case get_GlobalReg_reg_or_addr mid of
451 BaseReg -> cmmExprConFold baseRegAddr
452 other -> cmmExprConFold (CmmLoad baseRegAddr
454 -- eliminate zero offsets
456 -> cmmExprConFold (CmmReg reg)
458 CmmRegOff (CmmGlobal mid) offset
459 -- RegOf leaves are just a shorthand form. If the reg maps
460 -- to a real reg, we keep the shorthand, otherwise, we just
461 -- expand it and defer to the above code.
462 -> case get_GlobalReg_reg_or_addr mid of
465 -> cmmExprConFold (CmmMachOp (MO_Add wordRep) [
466 CmmReg (CmmGlobal mid),
467 CmmLit (CmmInt (fromIntegral offset)
473 -- -----------------------------------------------------------------------------
474 -- MachOp constant folder
476 -- Now, try to constant-fold the MachOps. The arguments have already
477 -- been optimized and folded.
480 :: MachOp -- The operation from an CmmMachOp
481 -> [CmmExpr] -- The optimized arguments
484 cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
486 MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
487 MO_Not r -> CmmLit (CmmInt (complement x) rep)
489 -- these are interesting: we must first narrow to the
490 -- "from" type, in order to truncate to the correct size.
491 -- The final narrow/widen to the destination type
492 -- is implicit in the CmmLit.
493 MO_S_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
494 MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
495 _ -> panic "cmmMachOpFold: unknown unary op"
497 -- Eliminate conversion NOPs
498 cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
499 cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
501 -- ToDo: eliminate multiple conversions. Be careful though: can't remove
502 -- a narrowing, and can't remove conversions to/from floating point types.
504 -- ToDo: eliminate nested comparisons:
505 -- CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
506 -- turns into a simple equality test.
508 cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
510 -- for comparisons: don't forget to narrow the arguments before
511 -- comparing, since they might be out of range.
512 MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
513 MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
515 MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
516 MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
517 MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
518 MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
520 MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
521 MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
522 MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
523 MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
525 MO_Add r -> CmmLit (CmmInt (x + y) r)
526 MO_Sub r -> CmmLit (CmmInt (x - y) r)
527 MO_Mul r -> CmmLit (CmmInt (x * y) r)
528 MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
529 MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
531 MO_And r -> CmmLit (CmmInt (x .&. y) r)
532 MO_Or r -> CmmLit (CmmInt (x .|. y) r)
533 MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
535 MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
536 MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
537 MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
539 other -> CmmMachOp mop args
548 -- When possible, shift the constants to the right-hand side, so that we
549 -- can match for strength reductions. Note that the code generator will
550 -- also assume that constants have been shifted to the right when
553 cmmMachOpFold op [x@(CmmLit _), y]
554 | not (isLit y) && isCommutableMachOp op
555 = cmmMachOpFold op [y, x]
557 isLit (CmmLit _) = True
560 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
561 -- moved to the right, it is more likely that we will find
562 -- opportunities for constant folding when the expression is
565 -- ToDo: this appears to introduce a quadratic behaviour due to the
566 -- nested cmmMachOpFold. Can we fix this?
567 cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
568 | mop1 == mop2 && isAssociative mop1
569 = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
571 isAssociative (MO_Add _) = True
572 isAssociative (MO_Mul _) = True
573 isAssociative (MO_And _) = True
574 isAssociative (MO_Or _) = True
575 isAssociative (MO_Xor _) = True
576 isAssociative _ = False
578 -- Make a RegOff if we can
579 cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
580 = CmmRegOff reg (fromIntegral (narrowS rep n))
581 cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
582 = CmmRegOff reg (off + fromIntegral (narrowS rep n))
583 cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
584 = CmmRegOff reg (- fromIntegral (narrowS rep n))
585 cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
586 = CmmRegOff reg (off - fromIntegral (narrowS rep n))
588 -- Fold label(+/-)offset into a CmmLit where possible
590 cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
591 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
592 cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
593 = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
594 cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
595 = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
597 -- We can often do something with constants of 0 and 1 ...
599 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
610 MO_Ne r | isComparisonExpr x -> x
611 MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
612 MO_U_Gt r | isComparisonExpr x -> x
613 MO_S_Gt r | isComparisonExpr x -> x
614 MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
615 MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
616 MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
617 MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
618 MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
619 MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
620 other -> CmmMachOp mop args
622 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
627 MO_S_Rem r -> CmmLit (CmmInt 0 rep)
628 MO_U_Rem r -> CmmLit (CmmInt 0 rep)
629 MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
630 MO_Eq r | isComparisonExpr x -> x
631 MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
632 MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
633 MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
634 MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
635 MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
636 MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
637 MO_U_Ge r | isComparisonExpr x -> x
638 MO_S_Ge r | isComparisonExpr x -> x
639 other -> CmmMachOp mop args
641 -- Now look for multiplication/division by powers of 2 (integers).
643 cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
646 -> case exactLog2 n of
648 Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
650 -> case exactLog2 n of
652 Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
656 unchanged = CmmMachOp mop args
658 -- Anything else is just too hard.
660 cmmMachOpFold mop args = CmmMachOp mop args
663 -- -----------------------------------------------------------------------------
666 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
667 -- from GCC. It requires bit manipulation primitives, and we use GHC
668 -- extensions. Tough.
670 -- Used to be in MachInstrs --SDM.
671 -- ToDo: remove use of unboxery --SDM.
676 exactLog2 :: Integer -> Maybe Integer
678 = if (x <= 0 || x >= 2147483648) then
681 case iUnbox (fromInteger x) of { x# ->
682 if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
685 Just (toInteger (iBox (pow2 x#)))
688 pow2 x# | x# ==# 1# = 0#
689 | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
692 -- -----------------------------------------------------------------------------
693 -- widening / narrowing
695 narrowU :: MachRep -> Integer -> Integer
696 narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
697 narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
698 narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
699 narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
700 narrowU _ _ = panic "narrowTo"
702 narrowS :: MachRep -> Integer -> Integer
703 narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
704 narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
705 narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
706 narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
707 narrowS _ _ = panic "narrowTo"
709 -- -----------------------------------------------------------------------------
712 -- This pass inlines assignments to temporaries that are used just
713 -- once in the very next statement only. Generalising this would be
714 -- quite difficult (have to take into account aliasing of memory
715 -- writes, and so on), but at the moment it catches a number of useful
716 -- cases and lets the code generator generate much better code.
718 -- NB. This assumes that temporaries are single-assignment.
720 cmmPeep :: [CmmBasicBlock] -> [CmmBasicBlock]
721 cmmPeep blocks = map do_inline blocks
723 blockUses (BasicBlock _ stmts)
724 = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
726 uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
728 do_inline (BasicBlock id stmts)
729 = BasicBlock id (cmmMiniInline uses stmts)
732 cmmMiniInline :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
733 cmmMiniInline uses [] = []
734 cmmMiniInline uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
735 | Just 1 <- lookupUFM uses u,
736 Just stmts' <- lookForInline u expr stmts
739 trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
741 cmmMiniInline uses stmts'
743 cmmMiniInline uses (stmt:stmts)
744 = stmt : cmmMiniInline uses stmts
747 -- Try to inline a temporary assignment. We can skip over assignments to
748 -- other tempoararies, because we know that expressions aren't side-effecting
749 -- and temporaries are single-assignment.
750 lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
752 = case lookupUFM (getExprUses rhs) u of
753 Just 1 -> Just (inlineStmt u expr stmt : rest)
754 _other -> case lookForInline u expr rest of
756 Just stmts -> Just (stmt:stmts)
758 lookForInline u expr (stmt:stmts)
759 = case lookupUFM (getStmtUses stmt) u of
760 Just 1 -> Just (inlineStmt u expr stmt : stmts)
763 -- -----------------------------------------------------------------------------
764 -- Boring Cmm traversals for collecting usage info and substitutions.
766 getStmtUses :: CmmStmt -> UniqFM Int
767 getStmtUses (CmmAssign _ e) = getExprUses e
768 getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
769 getStmtUses (CmmCall target _ es _)
770 = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
771 where uses (CmmForeignCall e _) = getExprUses e
773 getStmtUses (CmmCondBranch e _) = getExprUses e
774 getStmtUses (CmmSwitch e _) = getExprUses e
775 getStmtUses (CmmJump e _) = getExprUses e
776 getStmtUses _ = emptyUFM
778 getExprUses :: CmmExpr -> UniqFM Int
779 getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
780 getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
781 getExprUses (CmmLoad e _) = getExprUses e
782 getExprUses (CmmMachOp _ es) = getExprsUses es
783 getExprUses _other = emptyUFM
785 getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
787 inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
788 inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
789 inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
790 inlineStmt u a (CmmCall target regs es vols)
791 = CmmCall (infn target) regs es' vols
792 where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
793 infn (CmmPrim p) = CmmPrim p
794 es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
795 inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
796 inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
797 inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
798 inlineStmt u a other_stmt = other_stmt
800 inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
801 inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
804 inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
805 | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
807 inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
808 inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
809 inlineExpr u a other_expr = other_expr
811 -- -----------------------------------------------------------------------------
816 isComparisonExpr :: CmmExpr -> Bool
817 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
818 isComparisonExpr _other = False
820 maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
821 maybeInvertConditionalExpr (CmmMachOp op args)
822 | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
823 maybeInvertConditionalExpr _ = Nothing