2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( writeRealAsm, dumpRealAsm ) where
8 #include "HsVersions.h"
17 import AbsCStixGen ( genCodeAbstractC )
18 import AbsCSyn ( AbstractC, MagicId )
19 import AsmRegAlloc ( runRegAllocate )
20 import OrdList ( OrdList )
21 import PrimOp ( commutableOp, PrimOp(..) )
22 import RegAllocInfo ( mkMRegsState, MRegsState )
23 import Stix ( StixTree(..), StixReg(..) )
24 import PrimRep ( isFloatingRep )
25 import UniqSupply ( returnUs, thenUs, mapUs, initUs, UniqSM, UniqSupply )
26 import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM )
29 import GlaExts (trace) --tmp
30 #include "nativeGen/NCG.h"
33 The 96/03 native-code generator has machine-independent and
34 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
36 This module (@AsmCodeGen@) is the top-level machine-independent
37 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
38 (defined in module @Stix@), using support code from @StixInfo@ (info
39 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
40 macros), and @StixInteger@ (GMP arbitrary-precision operations).
42 Before entering machine-dependent land, we do some machine-independent
43 @genericOpt@imisations (defined below) on the @StixTree@s.
45 We convert to the machine-specific @Instr@ datatype with
46 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
47 use a machine-independent register allocator (@runRegAllocate@) to
48 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
49 helper functions (see about @RegAllocInfo@ below).
51 The machine-dependent bits break down as follows:
53 \item[@MachRegs@:] Everything about the target platform's machine
54 registers (and immediate operands, and addresses, which tend to
55 intermingle/interact with registers).
57 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
58 have a module of its own), plus a miscellany of other things
59 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
61 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
64 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
67 \item[@RegAllocInfo@:] In the register allocator, we manipulate
68 @MRegsState@s, which are @BitSet@s, one bit per machine register.
69 When we want to say something about a specific machine register
70 (e.g., ``it gets clobbered by this instruction''), we set/unset
71 its bit. Obviously, we do this @BitSet@ thing for efficiency
74 The @RegAllocInfo@ module collects together the machine-specific
75 info needed to do register allocation.
80 writeRealAsm :: Handle -> AbstractC -> UniqSupply -> IO ()
81 writeRealAsm handle absC us
82 = -- _scc_ "writeRealAsm"
83 printForAsm handle (initUs us (runNCG absC))
85 dumpRealAsm :: AbstractC -> UniqSupply -> SDoc
86 dumpRealAsm absC us = initUs us (runNCG absC)
89 = genCodeAbstractC absC `thenUs` \ treelists ->
91 stix = map (map genericOpt) treelists
95 stix' = map floatFix stix
103 @codeGen@ is the top-level code-generation function:
105 codeGen :: [[StixTree]] -> UniqSM SDoc
108 = mapUs genMachCode trees `thenUs` \ dynamic_codes ->
110 static_instrs = scheduleMachCode dynamic_codes
112 returnUs (vcat (map pprInstr static_instrs))
115 Top level code generator for a chunk of stix code:
117 genMachCode :: [StixTree] -> UniqSM InstrList
120 = mapUs stmt2Instrs stmts `thenUs` \ blocks ->
121 returnUs (foldr (.) id blocks asmVoid)
124 The next bit does the code scheduling. The scheduler must also deal
125 with register allocation of temporaries. Much parallelism can be
126 exposed via the OrdList, but more might occur, so further analysis
130 scheduleMachCode :: [InstrList] -> [Instr]
133 = concat . map (runRegAllocate freeRegsState reservedRegs)
135 freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
138 %************************************************************************
140 \subsection[NCOpt]{The Generic Optimiser}
142 %************************************************************************
144 This is called between translating Abstract C to its Tree and actually
145 using the Native Code Generator to generate the annotations. It's a
146 chance to do some strength reductions.
148 ** Remember these all have to be machine independent ***
150 Note that constant-folding should have already happened, but we might
151 have introduced some new opportunities for constant-folding wrt
152 address manipulations.
155 genericOpt :: StixTree -> StixTree
158 For most nodes, just optimize the children.
161 genericOpt (StInd pk addr) = StInd pk (genericOpt addr)
163 genericOpt (StAssign pk dst src)
164 = StAssign pk (genericOpt dst) (genericOpt src)
166 genericOpt (StJump addr) = StJump (genericOpt addr)
168 genericOpt (StCondJump addr test)
169 = StCondJump addr (genericOpt test)
171 genericOpt (StCall fn cconv pk args)
172 = StCall fn cconv pk (map genericOpt args)
175 Fold indices together when the types match:
177 genericOpt (StIndex pk (StIndex pk' base off) off')
179 = StIndex pk (genericOpt base)
180 (genericOpt (StPrim IntAddOp [off, off']))
182 genericOpt (StIndex pk base off)
183 = StIndex pk (genericOpt base) (genericOpt off)
186 For PrimOps, we first optimize the children, and then we try our hand
187 at some constant-folding.
190 genericOpt (StPrim op args) = primOpt op (map genericOpt args)
193 Replace register leaves with appropriate StixTrees for the given
197 genericOpt leaf@(StReg (StixMagicId id))
198 = case (stgReg id) of
199 Always tree -> genericOpt tree
202 genericOpt other = other
205 Now, try to constant-fold the PrimOps. The arguments have already
206 been optimized and folded.
210 :: PrimOp -- The operation from an StPrim
211 -> [StixTree] -- The optimized arguments
214 primOpt op arg@[StInt x]
216 IntNegOp -> StInt (-x)
217 IntAbsOp -> StInt (abs x)
220 primOpt op args@[StInt x, StInt y]
222 CharGtOp -> StInt (if x > y then 1 else 0)
223 CharGeOp -> StInt (if x >= y then 1 else 0)
224 CharEqOp -> StInt (if x == y then 1 else 0)
225 CharNeOp -> StInt (if x /= y then 1 else 0)
226 CharLtOp -> StInt (if x < y then 1 else 0)
227 CharLeOp -> StInt (if x <= y then 1 else 0)
228 IntAddOp -> StInt (x + y)
229 IntSubOp -> StInt (x - y)
230 IntMulOp -> StInt (x * y)
231 IntQuotOp -> StInt (x `quot` y)
232 IntRemOp -> StInt (x `rem` y)
233 IntGtOp -> StInt (if x > y then 1 else 0)
234 IntGeOp -> StInt (if x >= y then 1 else 0)
235 IntEqOp -> StInt (if x == y then 1 else 0)
236 IntNeOp -> StInt (if x /= y then 1 else 0)
237 IntLtOp -> StInt (if x < y then 1 else 0)
238 IntLeOp -> StInt (if x <= y then 1 else 0)
239 -- ToDo: WordQuotOp, WordRemOp.
243 When possible, shift the constants to the right-hand side, so that we
244 can match for strength reductions. Note that the code generator will
245 also assume that constants have been shifted to the right when
249 primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x]
252 We can often do something with constants of 0 and 1 ...
255 primOpt op args@[x, y@(StInt 0)]
270 primOpt op args@[x, y@(StInt 1)]
278 Now look for multiplication/division by powers of 2 (integers).
281 primOpt op args@[x, y@(StInt n)]
283 IntMulOp -> case exactLog2 n of
284 Nothing -> StPrim op args
285 Just p -> StPrim ISllOp [x, StInt p]
286 IntQuotOp -> case exactLog2 n of
287 Nothing -> StPrim op args
288 Just p -> StPrim ISrlOp [x, StInt p]
292 Anything else is just too hard.
295 primOpt op args = StPrim op args
298 -----------------------------------------------------------------------------
299 Fix up floating point operations for x86.
301 The problem is that the code generator can't handle the weird register
302 naming scheme for floating point registers on the x86, so we have to
303 deal with memory-resident floating point values wherever possible.
305 We therefore can't stand references to floating-point kinded temporary
306 variables, and try to translate them into memory addresses wherever
310 floatFix :: [StixTree] -> [StixTree]
311 floatFix trees = fltFix emptyUFM trees
313 fltFix :: UniqFM StixTree -- mapping tmp vars to memory locations
318 -- The case we're interested in: loading a temporary from a memory
319 -- address. Eliminate the instruction and replace all future references
320 -- to the temporary with the memory address.
321 fltFix locs ((StAssign rep (StReg (StixTemp uq _)) loc) : trees)
322 | isFloatingRep rep = trace "found one" $ fltFix (addToUFM locs uq loc) trees
324 fltFix locs ((StAssign rep src dst) : trees)
325 = StAssign rep (fltFix1 locs src) (fltFix1 locs dst) : fltFix locs trees
327 fltFix locs (tree : trees)
328 = fltFix1 locs tree : fltFix locs trees
331 fltFix1 :: UniqFM StixTree -> StixTree -> StixTree
332 fltFix1 locs r@(StReg (StixTemp uq rep))
333 | isFloatingRep rep = case lookupUFM locs uq of
334 Nothing -> panic "fltFix1"
335 Just tree -> trace "substed" $ tree
337 fltFix1 locs (StIndex rep l r) =
338 StIndex rep (fltFix1 locs l) (fltFix1 locs r)
340 fltFix1 locs (StInd rep tree) =
341 StInd rep (fltFix1 locs tree)
343 fltFix1 locs (StAssign rep dst src) = panic "fltFix1: StAssign"
345 fltFix1 locs (StJump tree) = StJump (fltFix1 locs tree)
347 fltFix1 locs (StCondJump label tree) =
348 StCondJump label (fltFix1 locs tree)
350 fltFix1 locs (StPrim op trees) =
351 StPrim op (map (fltFix1 locs) trees)
353 fltFix1 locs (StCall f conv rep trees) =
354 StCall f conv rep (map (fltFix1 locs) trees)
356 fltFix1 locs tree = tree