2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
9 #include "nativeGen/NCG.h"
11 import List ( intersperse )
18 import AbsCStixGen ( genCodeAbstractC )
19 import AbsCSyn ( AbstractC, MagicId )
20 import AbsCUtils ( mkAbsCStmtList )
21 import AsmRegAlloc ( runRegAllocate )
22 import PrimOp ( commutableOp, PrimOp(..) )
23 import RegAllocInfo ( findReservedRegs )
24 import Stix ( StixTree(..), StixReg(..),
25 pprStixTrees, pprStixTree, CodeSegment(..),
26 stixCountTempUses, stixSubst,
27 NatM, initNat, mapNat,
28 NatM_State, mkNatM_State,
29 uniqOfNatM_State, deltaOfNatM_State )
30 import UniqSupply ( returnUs, thenUs, mapUs, initUs,
31 initUs_, UniqSM, UniqSupply,
32 lazyThenUs, lazyMapUs )
33 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
35 import OrdList ( fromOL, concatOL )
40 The 96/03 native-code generator has machine-independent and
41 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
43 This module (@AsmCodeGen@) is the top-level machine-independent
44 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
45 (defined in module @Stix@), using support code from @StixInfo@ (info
46 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
47 macros), and @StixInteger@ (GMP arbitrary-precision operations).
49 Before entering machine-dependent land, we do some machine-independent
50 @genericOpt@imisations (defined below) on the @StixTree@s.
52 We convert to the machine-specific @Instr@ datatype with
53 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
54 use a machine-independent register allocator (@runRegAllocate@) to
55 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
56 helper functions (see about @RegAllocInfo@ below).
58 The machine-dependent bits break down as follows:
60 \item[@MachRegs@:] Everything about the target platform's machine
61 registers (and immediate operands, and addresses, which tend to
62 intermingle/interact with registers).
64 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
65 have a module of its own), plus a miscellany of other things
66 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
68 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
71 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
74 \item[@RegAllocInfo@:] In the register allocator, we manipulate
75 @MRegsState@s, which are @BitSet@s, one bit per machine register.
76 When we want to say something about a specific machine register
77 (e.g., ``it gets clobbered by this instruction''), we set/unset
78 its bit. Obviously, we do this @BitSet@ thing for efficiency
81 The @RegAllocInfo@ module collects together the machine-specific
82 info needed to do register allocation.
88 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc)
90 = let absCstmts = mkAbsCStmtList absC
91 (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
92 stix_sdocs = map fst sdoc_pairs
93 insn_sdocs = map snd sdoc_pairs
95 insn_sdoc = my_vcat insn_sdocs
96 stix_sdoc = vcat stix_sdocs
99 my_trace m x = trace m x
100 my_vcat sds = vcat (intersperse
102 $$ ptext SLIT("# __debug_NCG_split_marker")
106 my_vcat sds = vcat sds
110 my_trace "nativeGen: begin"
111 (stix_sdoc, insn_sdoc)
114 absCtoNat :: AbstractC -> UniqSM (SDoc, SDoc)
116 = genCodeAbstractC absC `thenUs` \ stixRaw ->
117 genericOpt stixRaw `bind` \ stixOpt ->
118 genMachCode stixOpt `thenUs` \ pre_regalloc ->
119 regAlloc pre_regalloc `bind` \ almost_final ->
120 x86fp_kludge almost_final `bind` \ final_mach_code ->
121 vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
122 pprStixTrees stixOpt `bind` \ stix_sdoc ->
123 returnUs (stix_sdoc, final_sdoc)
127 x86fp_kludge :: [Instr] -> [Instr]
128 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
130 regAlloc :: InstrBlock -> [Instr]
131 regAlloc = runRegAllocate allocatableRegs findReservedRegs
134 Top level code generator for a chunk of stix code. For this part of
135 the computation, we switch from the UniqSM monad to the NatM monad.
136 The latter carries not only a Unique, but also an Int denoting the
137 current C stack pointer offset in the generated code; this is needed
138 for creating correct spill offsets on architectures which don't offer,
139 or for which it would be prohibitively expensive to employ, a frame
140 pointer register. Viz, x86.
142 The offset is measured in bytes, and indicates the difference between
143 the current (simulated) C stack-ptr and the value it was at the
144 beginning of the block. For stacks which grow down, this value should
145 be either zero or negative.
147 Switching between the two monads whilst carrying along the same Unique
148 supply breaks abstraction. Is that bad?
151 genMachCode :: [StixTree] -> UniqSM InstrBlock
153 genMachCode stmts initial_us
154 = let initial_st = mkNatM_State initial_us 0
155 (blocks, final_st) = initNat initial_st
156 (mapNat stmt2Instrs stmts)
157 instr_list = concatOL blocks
158 final_us = uniqOfNatM_State final_st
159 final_delta = deltaOfNatM_State final_st
162 then (instr_list, final_us)
163 else pprPanic "genMachCode: nonzero final delta"
167 %************************************************************************
169 \subsection[NCOpt]{The Generic Optimiser}
171 %************************************************************************
173 This is called between translating Abstract C to its Tree and actually
174 using the Native Code Generator to generate the annotations. It's a
175 chance to do some strength reductions.
177 ** Remember these all have to be machine independent ***
179 Note that constant-folding should have already happened, but we might
180 have introduced some new opportunities for constant-folding wrt
181 address manipulations.
184 genericOpt :: [StixTree] -> [StixTree]
185 genericOpt = map stixConFold . stixPeep
189 stixPeep :: [StixTree] -> [StixTree]
191 -- This transformation assumes that the temp assigned to in t1
192 -- is not assigned to in t2; for otherwise the target of the
193 -- second assignment would be substituted for, giving nonsense
194 -- code. As far as I can see, StixTemps are only ever assigned
195 -- to once. It would be nice to be sure!
197 stixPeep ( t1@(StAssign pka (StReg (StixTemp u pk)) rhs)
200 | stixCountTempUses u t2 == 1
201 && sum (map (stixCountTempUses u) ts) == 0
204 trace ("nativeGen: inlining " ++ showSDoc (pprStixTree rhs))
206 (stixPeep (stixSubst u rhs t2 : ts))
208 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
212 -- disable stix inlining until we figure out how to fix the
213 -- latent bugs in the register allocator which are exposed by
218 For most nodes, just optimize the children.
221 stixConFold :: StixTree -> StixTree
223 stixConFold (StInd pk addr) = StInd pk (stixConFold addr)
225 stixConFold (StAssign pk dst src)
226 = StAssign pk (stixConFold dst) (stixConFold src)
228 stixConFold (StJump addr) = StJump (stixConFold addr)
230 stixConFold (StCondJump addr test)
231 = StCondJump addr (stixConFold test)
233 stixConFold (StCall fn cconv pk args)
234 = StCall fn cconv pk (map stixConFold args)
237 Fold indices together when the types match:
239 stixConFold (StIndex pk (StIndex pk' base off) off')
241 = StIndex pk (stixConFold base)
242 (stixConFold (StPrim IntAddOp [off, off']))
244 stixConFold (StIndex pk base off)
245 = StIndex pk (stixConFold base) (stixConFold off)
248 For PrimOps, we first optimize the children, and then we try our hand
249 at some constant-folding.
252 stixConFold (StPrim op args) = stixPrimFold op (map stixConFold args)
255 Replace register leaves with appropriate StixTrees for the given
259 stixConFold leaf@(StReg (StixMagicId id))
260 = case (stgReg id) of
261 Always tree -> stixConFold tree
264 stixConFold other = other
267 Now, try to constant-fold the PrimOps. The arguments have already
268 been optimized and folded.
272 :: PrimOp -- The operation from an StPrim
273 -> [StixTree] -- The optimized arguments
276 stixPrimFold op arg@[StInt x]
278 IntNegOp -> StInt (-x)
281 stixPrimFold op args@[StInt x, StInt y]
283 CharGtOp -> StInt (if x > y then 1 else 0)
284 CharGeOp -> StInt (if x >= y then 1 else 0)
285 CharEqOp -> StInt (if x == y then 1 else 0)
286 CharNeOp -> StInt (if x /= y then 1 else 0)
287 CharLtOp -> StInt (if x < y then 1 else 0)
288 CharLeOp -> StInt (if x <= y then 1 else 0)
289 IntAddOp -> StInt (x + y)
290 IntSubOp -> StInt (x - y)
291 IntMulOp -> StInt (x * y)
292 IntQuotOp -> StInt (x `quot` y)
293 IntRemOp -> StInt (x `rem` y)
294 IntGtOp -> StInt (if x > y then 1 else 0)
295 IntGeOp -> StInt (if x >= y then 1 else 0)
296 IntEqOp -> StInt (if x == y then 1 else 0)
297 IntNeOp -> StInt (if x /= y then 1 else 0)
298 IntLtOp -> StInt (if x < y then 1 else 0)
299 IntLeOp -> StInt (if x <= y then 1 else 0)
300 -- ToDo: WordQuotOp, WordRemOp.
304 When possible, shift the constants to the right-hand side, so that we
305 can match for strength reductions. Note that the code generator will
306 also assume that constants have been shifted to the right when
310 stixPrimFold op [x@(StInt _), y] | commutableOp op = stixPrimFold op [y, x]
313 We can often do something with constants of 0 and 1 ...
316 stixPrimFold op args@[x, y@(StInt 0)]
329 IntNeOp | is_comparison -> x
334 StPrim opp [_, _] -> opp `elem` comparison_ops
337 stixPrimFold op args@[x, y@(StInt 1)]
345 Now look for multiplication/division by powers of 2 (integers).
348 stixPrimFold op args@[x, y@(StInt n)]
350 IntMulOp -> case exactLog2 n of
351 Nothing -> StPrim op args
352 Just p -> StPrim ISllOp [x, StInt p]
353 IntQuotOp -> case exactLog2 n of
354 Nothing -> StPrim op args
355 Just p -> StPrim ISrlOp [x, StInt p]
359 Anything else is just too hard.
362 stixPrimFold op args = StPrim op args
367 = [ CharGtOp , CharGeOp , CharEqOp , CharNeOp , CharLtOp , CharLeOp,
368 IntGtOp , IntGeOp , IntEqOp , IntNeOp , IntLtOp , IntLeOp,
369 WordGtOp , WordGeOp , WordEqOp , WordNeOp , WordLtOp , WordLeOp,
370 AddrGtOp , AddrGeOp , AddrEqOp , AddrNeOp , AddrLtOp , AddrLeOp,
371 FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp,
372 DoubleGtOp, DoubleGeOp, DoubleEqOp, DoubleNeOp, DoubleLtOp, DoubleLeOp