2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
9 #include "nativeGen/NCG.h"
11 import List ( intersperse )
18 import AbsCStixGen ( genCodeAbstractC )
19 import AbsCSyn ( AbstractC )
20 import AbsCUtils ( mkAbsCStmtList )
21 import AsmRegAlloc ( runRegAllocate )
22 import PrimOp ( commutableOp, PrimOp(..) )
23 import RegAllocInfo ( findReservedRegs )
24 import Stix ( StixTree(..), StixReg(..),
25 pprStixTrees, pprStixTree,
26 stixCountTempUses, stixSubst,
29 uniqOfNatM_State, deltaOfNatM_State )
30 import UniqSupply ( returnUs, thenUs, initUs,
33 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
35 import OrdList ( concatOL )
40 The 96/03 native-code generator has machine-independent and
41 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
43 This module (@AsmCodeGen@) is the top-level machine-independent
44 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
45 (defined in module @Stix@), using support code from @StixInfo@ (info
46 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
47 macros), and @StixInteger@ (GMP arbitrary-precision operations).
49 Before entering machine-dependent land, we do some machine-independent
50 @genericOpt@imisations (defined below) on the @StixTree@s.
52 We convert to the machine-specific @Instr@ datatype with
53 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
54 use a machine-independent register allocator (@runRegAllocate@) to
55 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
56 helper functions (see about @RegAllocInfo@ below).
58 The machine-dependent bits break down as follows:
60 \item[@MachRegs@:] Everything about the target platform's machine
61 registers (and immediate operands, and addresses, which tend to
62 intermingle/interact with registers).
64 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
65 have a module of its own), plus a miscellany of other things
66 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
68 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
71 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
74 \item[@RegAllocInfo@:] In the register allocator, we manipulate
75 @MRegsState@s, which are @BitSet@s, one bit per machine register.
76 When we want to say something about a specific machine register
77 (e.g., ``it gets clobbered by this instruction''), we set/unset
78 its bit. Obviously, we do this @BitSet@ thing for efficiency
81 The @RegAllocInfo@ module collects together the machine-specific
82 info needed to do register allocation.
88 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc)
90 = let absCstmts = mkAbsCStmtList absC
91 (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
92 stix_sdocs = map fst sdoc_pairs
93 insn_sdocs = map snd sdoc_pairs
95 insn_sdoc = my_vcat insn_sdocs
96 stix_sdoc = vcat stix_sdocs
99 my_trace m x = trace m x
100 my_vcat sds = vcat (intersperse (char ' '
101 $$ ptext SLIT("# ___ncg_debug_marker")
105 my_vcat sds = vcat sds
109 my_trace "nativeGen: begin"
110 (stix_sdoc, insn_sdoc)
113 absCtoNat :: AbstractC -> UniqSM (SDoc, SDoc)
115 = _scc_ "genCodeAbstractC" genCodeAbstractC absC `thenUs` \ stixRaw ->
116 _scc_ "genericOpt" genericOpt stixRaw `bind` \ stixOpt ->
117 _scc_ "genMachCode" genMachCode stixOpt `thenUs` \ pre_regalloc ->
118 _scc_ "regAlloc" regAlloc pre_regalloc `bind` \ almost_final ->
119 _scc_ "x86fp_kludge" x86fp_kludge almost_final `bind` \ final_mach_code ->
120 _scc_ "vcat" vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
121 _scc_ "pprStixTrees" pprStixTrees stixOpt `bind` \ stix_sdoc ->
122 returnUs (stix_sdoc, final_sdoc)
126 x86fp_kludge :: [Instr] -> [Instr]
127 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
129 regAlloc :: InstrBlock -> [Instr]
130 regAlloc = runRegAllocate allocatableRegs findReservedRegs
133 Top level code generator for a chunk of stix code. For this part of
134 the computation, we switch from the UniqSM monad to the NatM monad.
135 The latter carries not only a Unique, but also an Int denoting the
136 current C stack pointer offset in the generated code; this is needed
137 for creating correct spill offsets on architectures which don't offer,
138 or for which it would be prohibitively expensive to employ, a frame
139 pointer register. Viz, x86.
141 The offset is measured in bytes, and indicates the difference between
142 the current (simulated) C stack-ptr and the value it was at the
143 beginning of the block. For stacks which grow down, this value should
144 be either zero or negative.
146 Switching between the two monads whilst carrying along the same Unique
147 supply breaks abstraction. Is that bad?
150 genMachCode :: [StixTree] -> UniqSM InstrBlock
152 genMachCode stmts initial_us
153 = let initial_st = mkNatM_State initial_us 0
154 (instr_list, final_st) = initNat initial_st (stmtsToInstrs stmts)
155 final_us = uniqOfNatM_State final_st
156 final_delta = deltaOfNatM_State final_st
159 then (instr_list, final_us)
160 else pprPanic "genMachCode: nonzero final delta"
164 %************************************************************************
166 \subsection[NCOpt]{The Generic Optimiser}
168 %************************************************************************
170 This is called between translating Abstract C to its Tree and actually
171 using the Native Code Generator to generate the annotations. It's a
172 chance to do some strength reductions.
174 ** Remember these all have to be machine independent ***
176 Note that constant-folding should have already happened, but we might
177 have introduced some new opportunities for constant-folding wrt
178 address manipulations.
181 genericOpt :: [StixTree] -> [StixTree]
182 genericOpt = map stixConFold . stixPeep
186 stixPeep :: [StixTree] -> [StixTree]
188 -- This transformation assumes that the temp assigned to in t1
189 -- is not assigned to in t2; for otherwise the target of the
190 -- second assignment would be substituted for, giving nonsense
191 -- code. As far as I can see, StixTemps are only ever assigned
192 -- to once. It would be nice to be sure!
194 stixPeep ( t1@(StAssign pka (StReg (StixTemp u pk)) rhs)
197 | stixCountTempUses u t2 == 1
198 && sum (map (stixCountTempUses u) ts) == 0
201 trace ("nativeGen: inlining " ++ showSDoc (pprStixTree rhs))
203 (stixPeep (stixSubst u rhs t2 : ts))
205 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
209 -- disable stix inlining until we figure out how to fix the
210 -- latent bugs in the register allocator which are exposed by
215 For most nodes, just optimize the children.
218 stixConFold :: StixTree -> StixTree
220 stixConFold (StInd pk addr) = StInd pk (stixConFold addr)
222 stixConFold (StAssign pk dst src)
223 = StAssign pk (stixConFold dst) (stixConFold src)
225 stixConFold (StJump dsts addr) = StJump dsts (stixConFold addr)
227 stixConFold (StCondJump addr test)
228 = StCondJump addr (stixConFold test)
230 stixConFold (StCall fn cconv pk args)
231 = StCall fn cconv pk (map stixConFold args)
234 Fold indices together when the types match:
236 stixConFold (StIndex pk (StIndex pk' base off) off')
238 = StIndex pk (stixConFold base)
239 (stixConFold (StPrim IntAddOp [off, off']))
241 stixConFold (StIndex pk base off)
242 = StIndex pk (stixConFold base) (stixConFold off)
245 For PrimOps, we first optimize the children, and then we try our hand
246 at some constant-folding.
249 stixConFold (StPrim op args) = stixPrimFold op (map stixConFold args)
252 Replace register leaves with appropriate StixTrees for the given
256 stixConFold leaf@(StReg (StixMagicId id))
257 = case (stgReg id) of
258 Always tree -> stixConFold tree
261 stixConFold other = other
264 Now, try to constant-fold the PrimOps. The arguments have already
265 been optimized and folded.
269 :: PrimOp -- The operation from an StPrim
270 -> [StixTree] -- The optimized arguments
273 stixPrimFold op arg@[StInt x]
275 IntNegOp -> StInt (-x)
278 stixPrimFold op args@[StInt x, StInt y]
280 CharGtOp -> StInt (if x > y then 1 else 0)
281 CharGeOp -> StInt (if x >= y then 1 else 0)
282 CharEqOp -> StInt (if x == y then 1 else 0)
283 CharNeOp -> StInt (if x /= y then 1 else 0)
284 CharLtOp -> StInt (if x < y then 1 else 0)
285 CharLeOp -> StInt (if x <= y then 1 else 0)
286 IntAddOp -> StInt (x + y)
287 IntSubOp -> StInt (x - y)
288 IntMulOp -> StInt (x * y)
289 IntQuotOp -> StInt (x `quot` y)
290 IntRemOp -> StInt (x `rem` y)
291 IntGtOp -> StInt (if x > y then 1 else 0)
292 IntGeOp -> StInt (if x >= y then 1 else 0)
293 IntEqOp -> StInt (if x == y then 1 else 0)
294 IntNeOp -> StInt (if x /= y then 1 else 0)
295 IntLtOp -> StInt (if x < y then 1 else 0)
296 IntLeOp -> StInt (if x <= y then 1 else 0)
297 -- ToDo: WordQuotOp, WordRemOp.
301 When possible, shift the constants to the right-hand side, so that we
302 can match for strength reductions. Note that the code generator will
303 also assume that constants have been shifted to the right when
307 stixPrimFold op [x@(StInt _), y] | commutableOp op = stixPrimFold op [y, x]
310 We can often do something with constants of 0 and 1 ...
313 stixPrimFold op args@[x, y@(StInt 0)]
326 IntNeOp | is_comparison -> x
331 StPrim opp [_, _] -> opp `elem` comparison_ops
334 stixPrimFold op args@[x, y@(StInt 1)]
342 Now look for multiplication/division by powers of 2 (integers).
345 stixPrimFold op args@[x, y@(StInt n)]
347 IntMulOp -> case exactLog2 n of
348 Nothing -> StPrim op args
349 Just p -> StPrim ISllOp [x, StInt p]
350 IntQuotOp -> case exactLog2 n of
351 Nothing -> StPrim op args
352 Just p -> StPrim ISrlOp [x, StInt p]
356 Anything else is just too hard.
359 stixPrimFold op args = StPrim op args
364 = [ CharGtOp , CharGeOp , CharEqOp , CharNeOp , CharLtOp , CharLeOp,
365 IntGtOp , IntGeOp , IntEqOp , IntNeOp , IntLtOp , IntLeOp,
366 WordGtOp , WordGeOp , WordEqOp , WordNeOp , WordLtOp , WordLeOp,
367 AddrGtOp , AddrGeOp , AddrEqOp , AddrNeOp , AddrLtOp , AddrLeOp,
368 FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp,
369 DoubleGtOp, DoubleGeOp, DoubleEqOp, DoubleNeOp, DoubleLtOp, DoubleLeOp