2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
9 #include "nativeGen/NCG.h"
12 import List ( intersperse )
19 import AbsCStixGen ( genCodeAbstractC )
20 import AbsCSyn ( AbstractC, MagicId )
21 import AbsCUtils ( mkAbsCStmtList )
22 import AsmRegAlloc ( runRegAllocate )
23 import PrimOp ( commutableOp, PrimOp(..) )
24 import RegAllocInfo ( findReservedRegs )
25 import Stix ( StixTree(..), StixReg(..),
26 pprStixTrees, pprStixTree, CodeSegment(..),
27 stixCountTempUses, stixSubst,
28 NatM, initNat, mapNat,
29 NatM_State, mkNatM_State,
30 uniqOfNatM_State, deltaOfNatM_State )
31 import PrimRep ( isFloatingRep, PrimRep(..) )
32 import UniqSupply ( returnUs, thenUs, mapUs, initUs,
33 initUs_, UniqSM, UniqSupply,
34 lazyThenUs, lazyMapUs )
35 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
37 import OrdList ( fromOL, concatOL )
42 The 96/03 native-code generator has machine-independent and
43 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
45 This module (@AsmCodeGen@) is the top-level machine-independent
46 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
47 (defined in module @Stix@), using support code from @StixInfo@ (info
48 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
49 macros), and @StixInteger@ (GMP arbitrary-precision operations).
51 Before entering machine-dependent land, we do some machine-independent
52 @genericOpt@imisations (defined below) on the @StixTree@s.
54 We convert to the machine-specific @Instr@ datatype with
55 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
56 use a machine-independent register allocator (@runRegAllocate@) to
57 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
58 helper functions (see about @RegAllocInfo@ below).
60 The machine-dependent bits break down as follows:
62 \item[@MachRegs@:] Everything about the target platform's machine
63 registers (and immediate operands, and addresses, which tend to
64 intermingle/interact with registers).
66 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
67 have a module of its own), plus a miscellany of other things
68 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
70 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
73 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
76 \item[@RegAllocInfo@:] In the register allocator, we manipulate
77 @MRegsState@s, which are @BitSet@s, one bit per machine register.
78 When we want to say something about a specific machine register
79 (e.g., ``it gets clobbered by this instruction''), we set/unset
80 its bit. Obviously, we do this @BitSet@ thing for efficiency
83 The @RegAllocInfo@ module collects together the machine-specific
84 info needed to do register allocation.
90 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc)
92 = let absCstmts = mkAbsCStmtList absC
93 (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
94 stix_sdocs = map fst sdoc_pairs
95 insn_sdocs = map snd sdoc_pairs
97 insn_sdoc = my_vcat insn_sdocs
98 stix_sdoc = vcat stix_sdocs
101 my_trace m x = trace m x
102 my_vcat sds = vcat (intersperse (char ' '
103 $$ ptext SLIT("# ___stg_split_marker")
107 my_vcat sds = vcat sds
111 my_trace "nativeGen: begin"
112 (stix_sdoc, insn_sdoc)
115 absCtoNat :: AbstractC -> UniqSM (SDoc, SDoc)
117 = genCodeAbstractC absC `thenUs` \ stixRaw ->
118 genericOpt stixRaw `bind` \ stixOpt ->
119 genMachCode stixOpt `thenUs` \ pre_regalloc ->
120 regAlloc pre_regalloc `bind` \ almost_final ->
121 x86fp_kludge almost_final `bind` \ final_mach_code ->
122 vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
123 pprStixTrees stixOpt `bind` \ stix_sdoc ->
124 returnUs (stix_sdoc, final_sdoc)
128 x86fp_kludge :: [Instr] -> [Instr]
129 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
131 regAlloc :: InstrBlock -> [Instr]
132 regAlloc = runRegAllocate allocatableRegs findReservedRegs
135 Top level code generator for a chunk of stix code. For this part of
136 the computation, we switch from the UniqSM monad to the NatM monad.
137 The latter carries not only a Unique, but also an Int denoting the
138 current C stack pointer offset in the generated code; this is needed
139 for creating correct spill offsets on architectures which don't offer,
140 or for which it would be prohibitively expensive to employ, a frame
141 pointer register. Viz, x86.
143 The offset is measured in bytes, and indicates the difference between
144 the current (simulated) C stack-ptr and the value it was at the
145 beginning of the block. For stacks which grow down, this value should
146 be either zero or negative.
148 Switching between the two monads whilst carrying along the same Unique
149 supply breaks abstraction. Is that bad?
152 genMachCode :: [StixTree] -> UniqSM InstrBlock
154 genMachCode stmts initial_us
155 = let initial_st = mkNatM_State initial_us 0
156 (blocks, final_st) = initNat initial_st
157 (mapNat stmt2Instrs stmts)
158 instr_list = concatOL blocks
159 final_us = uniqOfNatM_State final_st
160 final_delta = deltaOfNatM_State final_st
163 then (instr_list, final_us)
164 else pprPanic "genMachCode: nonzero final delta"
168 %************************************************************************
170 \subsection[NCOpt]{The Generic Optimiser}
172 %************************************************************************
174 This is called between translating Abstract C to its Tree and actually
175 using the Native Code Generator to generate the annotations. It's a
176 chance to do some strength reductions.
178 ** Remember these all have to be machine independent ***
180 Note that constant-folding should have already happened, but we might
181 have introduced some new opportunities for constant-folding wrt
182 address manipulations.
185 genericOpt :: [StixTree] -> [StixTree]
186 genericOpt = map stixConFold . stixPeep
190 stixPeep :: [StixTree] -> [StixTree]
192 -- This transformation assumes that the temp assigned to in t1
193 -- is not assigned to in t2; for otherwise the target of the
194 -- second assignment would be substituted for, giving nonsense
195 -- code. As far as I can see, StixTemps are only ever assigned
196 -- to once. It would be nice to be sure!
198 stixPeep ( t1@(StAssign pka (StReg (StixTemp u pk)) rhs)
201 | stixCountTempUses u t2 == 1
202 && sum (map (stixCountTempUses u) ts) == 0
205 trace ("nativeGen: inlining " ++ showSDoc (pprStixTree rhs))
207 (stixPeep (stixSubst u rhs t2 : ts))
209 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
213 -- disable stix inlining until we figure out how to fix the
214 -- latent bugs in the register allocator which are exposed by
219 For most nodes, just optimize the children.
222 stixConFold :: StixTree -> StixTree
224 stixConFold (StInd pk addr) = StInd pk (stixConFold addr)
226 stixConFold (StAssign pk dst src)
227 = StAssign pk (stixConFold dst) (stixConFold src)
229 stixConFold (StJump addr) = StJump (stixConFold addr)
231 stixConFold (StCondJump addr test)
232 = StCondJump addr (stixConFold test)
234 stixConFold (StCall fn cconv pk args)
235 = StCall fn cconv pk (map stixConFold args)
238 Fold indices together when the types match:
240 stixConFold (StIndex pk (StIndex pk' base off) off')
242 = StIndex pk (stixConFold base)
243 (stixConFold (StPrim IntAddOp [off, off']))
245 stixConFold (StIndex pk base off)
246 = StIndex pk (stixConFold base) (stixConFold off)
249 For PrimOps, we first optimize the children, and then we try our hand
250 at some constant-folding.
253 stixConFold (StPrim op args) = stixPrimFold op (map stixConFold args)
256 Replace register leaves with appropriate StixTrees for the given
260 stixConFold leaf@(StReg (StixMagicId id))
261 = case (stgReg id) of
262 Always tree -> stixConFold tree
265 stixConFold other = other
268 Now, try to constant-fold the PrimOps. The arguments have already
269 been optimized and folded.
273 :: PrimOp -- The operation from an StPrim
274 -> [StixTree] -- The optimized arguments
277 stixPrimFold op arg@[StInt x]
279 IntNegOp -> StInt (-x)
282 stixPrimFold op args@[StInt x, StInt y]
284 CharGtOp -> StInt (if x > y then 1 else 0)
285 CharGeOp -> StInt (if x >= y then 1 else 0)
286 CharEqOp -> StInt (if x == y then 1 else 0)
287 CharNeOp -> StInt (if x /= y then 1 else 0)
288 CharLtOp -> StInt (if x < y then 1 else 0)
289 CharLeOp -> StInt (if x <= y then 1 else 0)
290 IntAddOp -> StInt (x + y)
291 IntSubOp -> StInt (x - y)
292 IntMulOp -> StInt (x * y)
293 IntQuotOp -> StInt (x `quot` y)
294 IntRemOp -> StInt (x `rem` y)
295 IntGtOp -> StInt (if x > y then 1 else 0)
296 IntGeOp -> StInt (if x >= y then 1 else 0)
297 IntEqOp -> StInt (if x == y then 1 else 0)
298 IntNeOp -> StInt (if x /= y then 1 else 0)
299 IntLtOp -> StInt (if x < y then 1 else 0)
300 IntLeOp -> StInt (if x <= y then 1 else 0)
301 -- ToDo: WordQuotOp, WordRemOp.
305 When possible, shift the constants to the right-hand side, so that we
306 can match for strength reductions. Note that the code generator will
307 also assume that constants have been shifted to the right when
311 stixPrimFold op [x@(StInt _), y] | commutableOp op = stixPrimFold op [y, x]
314 We can often do something with constants of 0 and 1 ...
317 stixPrimFold op args@[x, y@(StInt 0)]
330 IntNeOp | is_comparison -> x
335 StPrim opp [_, _] -> opp `elem` comparison_ops
338 stixPrimFold op args@[x, y@(StInt 1)]
346 Now look for multiplication/division by powers of 2 (integers).
349 stixPrimFold op args@[x, y@(StInt n)]
351 IntMulOp -> case exactLog2 n of
352 Nothing -> StPrim op args
353 Just p -> StPrim ISllOp [x, StInt p]
354 IntQuotOp -> case exactLog2 n of
355 Nothing -> StPrim op args
356 Just p -> StPrim ISrlOp [x, StInt p]
360 Anything else is just too hard.
363 stixPrimFold op args = StPrim op args
368 = [ CharGtOp , CharGeOp , CharEqOp , CharNeOp , CharLtOp , CharLeOp,
369 IntGtOp , IntGeOp , IntEqOp , IntNeOp , IntLtOp , IntLeOp,
370 WordGtOp , WordGeOp , WordEqOp , WordNeOp , WordLtOp , WordLeOp,
371 AddrGtOp , AddrGeOp , AddrEqOp , AddrNeOp , AddrLtOp , AddrLeOp,
372 FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp,
373 DoubleGtOp, DoubleGeOp, DoubleEqOp, DoubleNeOp, DoubleLtOp, DoubleLeOp