2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
17 import AbsCStixGen ( genCodeAbstractC )
18 import AbsCSyn ( AbstractC, MagicId )
19 import AsmRegAlloc ( runRegAllocate )
20 import OrdList ( OrdList )
21 import PrimOp ( commutableOp, PrimOp(..) )
22 import RegAllocInfo ( mkMRegsState, MRegsState )
23 import Stix ( StixTree(..), StixReg(..) )
24 import PrimRep ( isFloatingRep )
25 import UniqSupply ( returnUs, thenUs, mapUs, initUs_, UniqSM, UniqSupply )
26 import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM )
29 import GlaExts (trace) --tmp
30 #include "nativeGen/NCG.h"
33 The 96/03 native-code generator has machine-independent and
34 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
36 This module (@AsmCodeGen@) is the top-level machine-independent
37 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
38 (defined in module @Stix@), using support code from @StixInfo@ (info
39 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
40 macros), and @StixInteger@ (GMP arbitrary-precision operations).
42 Before entering machine-dependent land, we do some machine-independent
43 @genericOpt@imisations (defined below) on the @StixTree@s.
45 We convert to the machine-specific @Instr@ datatype with
46 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
47 use a machine-independent register allocator (@runRegAllocate@) to
48 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
49 helper functions (see about @RegAllocInfo@ below).
51 The machine-dependent bits break down as follows:
53 \item[@MachRegs@:] Everything about the target platform's machine
54 registers (and immediate operands, and addresses, which tend to
55 intermingle/interact with registers).
57 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
58 have a module of its own), plus a miscellany of other things
59 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
61 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
64 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
67 \item[@RegAllocInfo@:] In the register allocator, we manipulate
68 @MRegsState@s, which are @BitSet@s, one bit per machine register.
69 When we want to say something about a specific machine register
70 (e.g., ``it gets clobbered by this instruction''), we set/unset
71 its bit. Obviously, we do this @BitSet@ thing for efficiency
74 The @RegAllocInfo@ module collects together the machine-specific
75 info needed to do register allocation.
80 nativeCodeGen :: AbstractC -> UniqSupply -> SDoc
81 nativeCodeGen absC us = initUs_ us (runNCG absC)
84 = genCodeAbstractC absC `thenUs` \ treelists ->
86 stix = map (map genericOpt) treelists
90 stix' = map floatFix stix
98 @codeGen@ is the top-level code-generation function:
100 codeGen :: [[StixTree]] -> UniqSM SDoc
103 = mapUs genMachCode trees `thenUs` \ dynamic_codes ->
105 static_instrs = scheduleMachCode dynamic_codes
107 returnUs (vcat (map pprInstr static_instrs))
110 Top level code generator for a chunk of stix code:
112 genMachCode :: [StixTree] -> UniqSM InstrList
115 = mapUs stmt2Instrs stmts `thenUs` \ blocks ->
116 returnUs (foldr (.) id blocks asmVoid)
119 The next bit does the code scheduling. The scheduler must also deal
120 with register allocation of temporaries. Much parallelism can be
121 exposed via the OrdList, but more might occur, so further analysis
125 scheduleMachCode :: [InstrList] -> [Instr]
128 = concat . map (runRegAllocate freeRegsState reservedRegs)
130 freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
133 %************************************************************************
135 \subsection[NCOpt]{The Generic Optimiser}
137 %************************************************************************
139 This is called between translating Abstract C to its Tree and actually
140 using the Native Code Generator to generate the annotations. It's a
141 chance to do some strength reductions.
143 ** Remember these all have to be machine independent ***
145 Note that constant-folding should have already happened, but we might
146 have introduced some new opportunities for constant-folding wrt
147 address manipulations.
150 genericOpt :: StixTree -> StixTree
153 For most nodes, just optimize the children.
156 genericOpt (StInd pk addr) = StInd pk (genericOpt addr)
158 genericOpt (StAssign pk dst src)
159 = StAssign pk (genericOpt dst) (genericOpt src)
161 genericOpt (StJump addr) = StJump (genericOpt addr)
163 genericOpt (StCondJump addr test)
164 = StCondJump addr (genericOpt test)
166 genericOpt (StCall fn cconv pk args)
167 = StCall fn cconv pk (map genericOpt args)
170 Fold indices together when the types match:
172 genericOpt (StIndex pk (StIndex pk' base off) off')
174 = StIndex pk (genericOpt base)
175 (genericOpt (StPrim IntAddOp [off, off']))
177 genericOpt (StIndex pk base off)
178 = StIndex pk (genericOpt base) (genericOpt off)
181 For PrimOps, we first optimize the children, and then we try our hand
182 at some constant-folding.
185 genericOpt (StPrim op args) = primOpt op (map genericOpt args)
188 Replace register leaves with appropriate StixTrees for the given
192 genericOpt leaf@(StReg (StixMagicId id))
193 = case (stgReg id) of
194 Always tree -> genericOpt tree
197 genericOpt other = other
200 Now, try to constant-fold the PrimOps. The arguments have already
201 been optimized and folded.
205 :: PrimOp -- The operation from an StPrim
206 -> [StixTree] -- The optimized arguments
209 primOpt op arg@[StInt x]
211 IntNegOp -> StInt (-x)
212 IntAbsOp -> StInt (abs x)
215 primOpt op args@[StInt x, StInt y]
217 CharGtOp -> StInt (if x > y then 1 else 0)
218 CharGeOp -> StInt (if x >= y then 1 else 0)
219 CharEqOp -> StInt (if x == y then 1 else 0)
220 CharNeOp -> StInt (if x /= y then 1 else 0)
221 CharLtOp -> StInt (if x < y then 1 else 0)
222 CharLeOp -> StInt (if x <= y then 1 else 0)
223 IntAddOp -> StInt (x + y)
224 IntSubOp -> StInt (x - y)
225 IntMulOp -> StInt (x * y)
226 IntQuotOp -> StInt (x `quot` y)
227 IntRemOp -> StInt (x `rem` y)
228 IntGtOp -> StInt (if x > y then 1 else 0)
229 IntGeOp -> StInt (if x >= y then 1 else 0)
230 IntEqOp -> StInt (if x == y then 1 else 0)
231 IntNeOp -> StInt (if x /= y then 1 else 0)
232 IntLtOp -> StInt (if x < y then 1 else 0)
233 IntLeOp -> StInt (if x <= y then 1 else 0)
234 -- ToDo: WordQuotOp, WordRemOp.
238 When possible, shift the constants to the right-hand side, so that we
239 can match for strength reductions. Note that the code generator will
240 also assume that constants have been shifted to the right when
244 primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x]
247 We can often do something with constants of 0 and 1 ...
250 primOpt op args@[x, y@(StInt 0)]
265 primOpt op args@[x, y@(StInt 1)]
273 Now look for multiplication/division by powers of 2 (integers).
276 primOpt op args@[x, y@(StInt n)]
278 IntMulOp -> case exactLog2 n of
279 Nothing -> StPrim op args
280 Just p -> StPrim ISllOp [x, StInt p]
281 IntQuotOp -> case exactLog2 n of
282 Nothing -> StPrim op args
283 Just p -> StPrim ISrlOp [x, StInt p]
287 Anything else is just too hard.
290 primOpt op args = StPrim op args
293 -----------------------------------------------------------------------------
294 Fix up floating point operations for x86.
296 The problem is that the code generator can't handle the weird register
297 naming scheme for floating point registers on the x86, so we have to
298 deal with memory-resident floating point values wherever possible.
300 We therefore can't stand references to floating-point kinded temporary
301 variables, and try to translate them into memory addresses wherever
305 floatFix :: [StixTree] -> [StixTree]
306 floatFix trees = fltFix emptyUFM trees
308 fltFix :: UniqFM StixTree -- mapping tmp vars to memory locations
313 -- The case we're interested in: loading a temporary from a memory
314 -- address. Eliminate the instruction and replace all future references
315 -- to the temporary with the memory address.
316 fltFix locs ((StAssign rep (StReg (StixTemp uq _)) loc) : trees)
317 | isFloatingRep rep = trace "found one" $ fltFix (addToUFM locs uq loc) trees
319 fltFix locs ((StAssign rep src dst) : trees)
320 = StAssign rep (fltFix1 locs src) (fltFix1 locs dst) : fltFix locs trees
322 fltFix locs (tree : trees)
323 = fltFix1 locs tree : fltFix locs trees
326 fltFix1 :: UniqFM StixTree -> StixTree -> StixTree
327 fltFix1 locs r@(StReg (StixTemp uq rep))
328 | isFloatingRep rep = case lookupUFM locs uq of
329 Nothing -> panic "fltFix1"
330 Just tree -> trace "substed" $ tree
332 fltFix1 locs (StIndex rep l r) =
333 StIndex rep (fltFix1 locs l) (fltFix1 locs r)
335 fltFix1 locs (StInd rep tree) =
336 StInd rep (fltFix1 locs tree)
338 fltFix1 locs (StAssign rep dst src) = panic "fltFix1: StAssign"
340 fltFix1 locs (StJump tree) = StJump (fltFix1 locs tree)
342 fltFix1 locs (StCondJump lbl tree) =
343 StCondJump lbl (fltFix1 locs tree)
345 fltFix1 locs (StPrim op trees) =
346 StPrim op (map (fltFix1 locs) trees)
348 fltFix1 locs (StCall f conv rep trees) =
349 StCall f conv rep (map (fltFix1 locs) trees)
351 fltFix1 locs tree = tree