2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
17 import AbsCStixGen ( genCodeAbstractC )
18 import AbsCSyn ( AbstractC, MagicId )
19 import AsmRegAlloc ( runRegAllocate )
20 import OrdList ( OrdList )
21 import PrimOp ( commutableOp, PrimOp(..) )
22 import RegAllocInfo ( mkMRegsState, MRegsState )
23 import Stix ( StixTree(..), StixReg(..), pprStixTrees )
24 import PrimRep ( isFloatingRep )
25 import UniqSupply ( returnUs, thenUs, mapUs, initUs,
26 initUs_, UniqSM, UniqSupply )
27 import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM )
30 import GlaExts (trace) --tmp
31 #include "nativeGen/NCG.h"
34 The 96/03 native-code generator has machine-independent and
35 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
37 This module (@AsmCodeGen@) is the top-level machine-independent
38 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
39 (defined in module @Stix@), using support code from @StixInfo@ (info
40 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
41 macros), and @StixInteger@ (GMP arbitrary-precision operations).
43 Before entering machine-dependent land, we do some machine-independent
44 @genericOpt@imisations (defined below) on the @StixTree@s.
46 We convert to the machine-specific @Instr@ datatype with
47 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
48 use a machine-independent register allocator (@runRegAllocate@) to
49 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
50 helper functions (see about @RegAllocInfo@ below).
52 The machine-dependent bits break down as follows:
54 \item[@MachRegs@:] Everything about the target platform's machine
55 registers (and immediate operands, and addresses, which tend to
56 intermingle/interact with registers).
58 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
59 have a module of its own), plus a miscellany of other things
60 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
62 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
65 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
68 \item[@RegAllocInfo@:] In the register allocator, we manipulate
69 @MRegsState@s, which are @BitSet@s, one bit per machine register.
70 When we want to say something about a specific machine register
71 (e.g., ``it gets clobbered by this instruction''), we set/unset
72 its bit. Obviously, we do this @BitSet@ thing for efficiency
75 The @RegAllocInfo@ module collects together the machine-specific
76 info needed to do register allocation.
82 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc)
84 = let (stixRaw, us1) = initUs us (genCodeAbstractC absC)
85 stixOpt = map (map genericOpt) stixRaw
86 stixFinal = map x86floatFix stixOpt
87 insns = initUs_ us1 (codeGen stixFinal)
88 debug_stix = vcat (map pprStixTrees stixFinal)
93 x86floatFix = floatFix
100 @codeGen@ is the top-level code-generation function:
102 codeGen :: [[StixTree]] -> UniqSM SDoc
105 = mapUs genMachCode stixFinal `thenUs` \ dynamic_codes ->
107 static_instrs = scheduleMachCode dynamic_codes
109 returnUs (vcat (map pprInstr static_instrs))
112 Top level code generator for a chunk of stix code:
114 genMachCode :: [StixTree] -> UniqSM InstrList
117 = mapUs stmt2Instrs stmts `thenUs` \ blocks ->
118 returnUs (foldr (.) id blocks asmVoid)
121 The next bit does the code scheduling. The scheduler must also deal
122 with register allocation of temporaries. Much parallelism can be
123 exposed via the OrdList, but more might occur, so further analysis
127 scheduleMachCode :: [InstrList] -> [Instr]
130 = concat . map (runRegAllocate freeRegsState reservedRegs)
132 freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
135 %************************************************************************
137 \subsection[NCOpt]{The Generic Optimiser}
139 %************************************************************************
141 This is called between translating Abstract C to its Tree and actually
142 using the Native Code Generator to generate the annotations. It's a
143 chance to do some strength reductions.
145 ** Remember these all have to be machine independent ***
147 Note that constant-folding should have already happened, but we might
148 have introduced some new opportunities for constant-folding wrt
149 address manipulations.
152 genericOpt :: StixTree -> StixTree
155 For most nodes, just optimize the children.
158 genericOpt (StInd pk addr) = StInd pk (genericOpt addr)
160 genericOpt (StAssign pk dst src)
161 = StAssign pk (genericOpt dst) (genericOpt src)
163 genericOpt (StJump addr) = StJump (genericOpt addr)
165 genericOpt (StCondJump addr test)
166 = StCondJump addr (genericOpt test)
168 genericOpt (StCall fn cconv pk args)
169 = StCall fn cconv pk (map genericOpt args)
172 Fold indices together when the types match:
174 genericOpt (StIndex pk (StIndex pk' base off) off')
176 = StIndex pk (genericOpt base)
177 (genericOpt (StPrim IntAddOp [off, off']))
179 genericOpt (StIndex pk base off)
180 = StIndex pk (genericOpt base) (genericOpt off)
183 For PrimOps, we first optimize the children, and then we try our hand
184 at some constant-folding.
187 genericOpt (StPrim op args) = primOpt op (map genericOpt args)
190 Replace register leaves with appropriate StixTrees for the given
194 genericOpt leaf@(StReg (StixMagicId id))
195 = case (stgReg id) of
196 Always tree -> genericOpt tree
199 genericOpt other = other
202 Now, try to constant-fold the PrimOps. The arguments have already
203 been optimized and folded.
207 :: PrimOp -- The operation from an StPrim
208 -> [StixTree] -- The optimized arguments
211 primOpt op arg@[StInt x]
213 IntNegOp -> StInt (-x)
216 primOpt op args@[StInt x, StInt y]
218 CharGtOp -> StInt (if x > y then 1 else 0)
219 CharGeOp -> StInt (if x >= y then 1 else 0)
220 CharEqOp -> StInt (if x == y then 1 else 0)
221 CharNeOp -> StInt (if x /= y then 1 else 0)
222 CharLtOp -> StInt (if x < y then 1 else 0)
223 CharLeOp -> StInt (if x <= y then 1 else 0)
224 IntAddOp -> StInt (x + y)
225 IntSubOp -> StInt (x - y)
226 IntMulOp -> StInt (x * y)
227 IntQuotOp -> StInt (x `quot` y)
228 IntRemOp -> StInt (x `rem` y)
229 IntGtOp -> StInt (if x > y then 1 else 0)
230 IntGeOp -> StInt (if x >= y then 1 else 0)
231 IntEqOp -> StInt (if x == y then 1 else 0)
232 IntNeOp -> StInt (if x /= y then 1 else 0)
233 IntLtOp -> StInt (if x < y then 1 else 0)
234 IntLeOp -> StInt (if x <= y then 1 else 0)
235 -- ToDo: WordQuotOp, WordRemOp.
239 When possible, shift the constants to the right-hand side, so that we
240 can match for strength reductions. Note that the code generator will
241 also assume that constants have been shifted to the right when
245 primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x]
248 We can often do something with constants of 0 and 1 ...
251 primOpt op args@[x, y@(StInt 0)]
266 primOpt op args@[x, y@(StInt 1)]
274 Now look for multiplication/division by powers of 2 (integers).
277 primOpt op args@[x, y@(StInt n)]
279 IntMulOp -> case exactLog2 n of
280 Nothing -> StPrim op args
281 Just p -> StPrim ISllOp [x, StInt p]
282 IntQuotOp -> case exactLog2 n of
283 Nothing -> StPrim op args
284 Just p -> StPrim ISrlOp [x, StInt p]
288 Anything else is just too hard.
291 primOpt op args = StPrim op args
294 -----------------------------------------------------------------------------
295 Fix up floating point operations for x86.
297 The problem is that the code generator can't handle the weird register
298 naming scheme for floating point registers on the x86, so we have to
299 deal with memory-resident floating point values wherever possible.
301 We therefore can't stand references to floating-point kinded temporary
302 variables, and try to translate them into memory addresses wherever
306 floatFix :: [StixTree] -> [StixTree]
307 floatFix trees = fltFix emptyUFM trees
309 fltFix :: UniqFM StixTree -- mapping tmp vars to memory locations
314 -- The case we're interested in: loading a temporary from a memory
315 -- address. Eliminate the instruction and replace all future references
316 -- to the temporary with the memory address.
317 fltFix locs ((StAssign rep (StReg (StixTemp uq _)) loc) : trees)
318 | isFloatingRep rep = trace "found one" $ fltFix (addToUFM locs uq loc) trees
320 fltFix locs ((StAssign rep src dst) : trees)
321 = StAssign rep (fltFix1 locs src) (fltFix1 locs dst) : fltFix locs trees
323 fltFix locs (tree : trees)
324 = fltFix1 locs tree : fltFix locs trees
327 fltFix1 :: UniqFM StixTree -> StixTree -> StixTree
328 fltFix1 locs r@(StReg (StixTemp uq rep))
329 | isFloatingRep rep = case lookupUFM locs uq of
330 Nothing -> panic "fltFix1"
331 Just tree -> trace "substed" $ tree
333 fltFix1 locs (StIndex rep l r) =
334 StIndex rep (fltFix1 locs l) (fltFix1 locs r)
336 fltFix1 locs (StInd rep tree) =
337 StInd rep (fltFix1 locs tree)
339 fltFix1 locs (StAssign rep dst src) = panic "fltFix1: StAssign"
341 fltFix1 locs (StJump tree) = StJump (fltFix1 locs tree)
343 fltFix1 locs (StCondJump lbl tree) =
344 StCondJump lbl (fltFix1 locs tree)
346 fltFix1 locs (StPrim op trees) =
347 StPrim op (map (fltFix1 locs) trees)
349 fltFix1 locs (StCall f conv rep trees) =
350 StCall f conv rep (map (fltFix1 locs) trees)
352 fltFix1 locs tree = tree