2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
11 import List ( intersperse )
18 import AbsCStixGen ( genCodeAbstractC )
19 import AbsCSyn ( AbstractC, MagicId )
20 import AsmRegAlloc ( runRegAllocate )
21 import OrdList ( OrdList )
22 import PrimOp ( commutableOp, PrimOp(..) )
23 import RegAllocInfo ( mkMRegsState, MRegsState )
24 import Stix ( StixTree(..), StixReg(..), pprStixTrees )
25 import PrimRep ( isFloatingRep )
26 import UniqSupply ( returnUs, thenUs, mapUs, initUs,
27 initUs_, UniqSM, UniqSupply )
28 import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM )
31 import GlaExts (trace) --tmp
32 #include "nativeGen/NCG.h"
35 The 96/03 native-code generator has machine-independent and
36 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
38 This module (@AsmCodeGen@) is the top-level machine-independent
39 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
40 (defined in module @Stix@), using support code from @StixInfo@ (info
41 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
42 macros), and @StixInteger@ (GMP arbitrary-precision operations).
44 Before entering machine-dependent land, we do some machine-independent
45 @genericOpt@imisations (defined below) on the @StixTree@s.
47 We convert to the machine-specific @Instr@ datatype with
48 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
49 use a machine-independent register allocator (@runRegAllocate@) to
50 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
51 helper functions (see about @RegAllocInfo@ below).
53 The machine-dependent bits break down as follows:
55 \item[@MachRegs@:] Everything about the target platform's machine
56 registers (and immediate operands, and addresses, which tend to
57 intermingle/interact with registers).
59 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
60 have a module of its own), plus a miscellany of other things
61 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
63 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
66 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
69 \item[@RegAllocInfo@:] In the register allocator, we manipulate
70 @MRegsState@s, which are @BitSet@s, one bit per machine register.
71 When we want to say something about a specific machine register
72 (e.g., ``it gets clobbered by this instruction''), we set/unset
73 its bit. Obviously, we do this @BitSet@ thing for efficiency
76 The @RegAllocInfo@ module collects together the machine-specific
77 info needed to do register allocation.
83 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc)
85 = let (stixRaw, us1) = initUs us (genCodeAbstractC absC)
86 stixOpt = map (map genericOpt) stixRaw
87 stixFinal = map x86floatFix stixOpt
88 insns = initUs_ us1 (codeGen stixFinal)
89 debug_stix = vcat (map pprStixTrees stixFinal)
94 x86floatFix = floatFix
101 @codeGen@ is the top-level code-generation function:
103 codeGen :: [[StixTree]] -> UniqSM SDoc
106 = mapUs genMachCode stixFinal `thenUs` \ dynamic_codes ->
108 static_instrss = scheduleMachCode dynamic_codes
109 docs = map (vcat . map pprInstr) static_instrss
111 returnUs (vcat (intersperse (char ' ' $$ char ' ') docs))
114 Top level code generator for a chunk of stix code:
116 genMachCode :: [StixTree] -> UniqSM InstrList
119 = mapUs stmt2Instrs stmts `thenUs` \ blocks ->
120 returnUs (foldr (.) id blocks asmVoid)
123 The next bit does the code scheduling. The scheduler must also deal
124 with register allocation of temporaries. Much parallelism can be
125 exposed via the OrdList, but more might occur, so further analysis
129 scheduleMachCode :: [InstrList] -> [[Instr]]
132 = map (runRegAllocate freeRegsState reservedRegs)
134 freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
137 %************************************************************************
139 \subsection[NCOpt]{The Generic Optimiser}
141 %************************************************************************
143 This is called between translating Abstract C to its Tree and actually
144 using the Native Code Generator to generate the annotations. It's a
145 chance to do some strength reductions.
147 ** Remember these all have to be machine independent ***
149 Note that constant-folding should have already happened, but we might
150 have introduced some new opportunities for constant-folding wrt
151 address manipulations.
154 genericOpt :: StixTree -> StixTree
157 For most nodes, just optimize the children.
160 genericOpt (StInd pk addr) = StInd pk (genericOpt addr)
162 genericOpt (StAssign pk dst src)
163 = StAssign pk (genericOpt dst) (genericOpt src)
165 genericOpt (StJump addr) = StJump (genericOpt addr)
167 genericOpt (StCondJump addr test)
168 = StCondJump addr (genericOpt test)
170 genericOpt (StCall fn cconv pk args)
171 = StCall fn cconv pk (map genericOpt args)
174 Fold indices together when the types match:
176 genericOpt (StIndex pk (StIndex pk' base off) off')
178 = StIndex pk (genericOpt base)
179 (genericOpt (StPrim IntAddOp [off, off']))
181 genericOpt (StIndex pk base off)
182 = StIndex pk (genericOpt base) (genericOpt off)
185 For PrimOps, we first optimize the children, and then we try our hand
186 at some constant-folding.
189 genericOpt (StPrim op args) = primOpt op (map genericOpt args)
192 Replace register leaves with appropriate StixTrees for the given
196 genericOpt leaf@(StReg (StixMagicId id))
197 = case (stgReg id) of
198 Always tree -> genericOpt tree
201 genericOpt other = other
204 Now, try to constant-fold the PrimOps. The arguments have already
205 been optimized and folded.
209 :: PrimOp -- The operation from an StPrim
210 -> [StixTree] -- The optimized arguments
213 primOpt op arg@[StInt x]
215 IntNegOp -> StInt (-x)
218 primOpt op args@[StInt x, StInt y]
220 CharGtOp -> StInt (if x > y then 1 else 0)
221 CharGeOp -> StInt (if x >= y then 1 else 0)
222 CharEqOp -> StInt (if x == y then 1 else 0)
223 CharNeOp -> StInt (if x /= y then 1 else 0)
224 CharLtOp -> StInt (if x < y then 1 else 0)
225 CharLeOp -> StInt (if x <= y then 1 else 0)
226 IntAddOp -> StInt (x + y)
227 IntSubOp -> StInt (x - y)
228 IntMulOp -> StInt (x * y)
229 IntQuotOp -> StInt (x `quot` y)
230 IntRemOp -> StInt (x `rem` y)
231 IntGtOp -> StInt (if x > y then 1 else 0)
232 IntGeOp -> StInt (if x >= y then 1 else 0)
233 IntEqOp -> StInt (if x == y then 1 else 0)
234 IntNeOp -> StInt (if x /= y then 1 else 0)
235 IntLtOp -> StInt (if x < y then 1 else 0)
236 IntLeOp -> StInt (if x <= y then 1 else 0)
237 -- ToDo: WordQuotOp, WordRemOp.
241 When possible, shift the constants to the right-hand side, so that we
242 can match for strength reductions. Note that the code generator will
243 also assume that constants have been shifted to the right when
247 primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x]
250 We can often do something with constants of 0 and 1 ...
253 primOpt op args@[x, y@(StInt 0)]
268 primOpt op args@[x, y@(StInt 1)]
276 Now look for multiplication/division by powers of 2 (integers).
279 primOpt op args@[x, y@(StInt n)]
281 IntMulOp -> case exactLog2 n of
282 Nothing -> StPrim op args
283 Just p -> StPrim ISllOp [x, StInt p]
284 IntQuotOp -> case exactLog2 n of
285 Nothing -> StPrim op args
286 Just p -> StPrim ISrlOp [x, StInt p]
290 Anything else is just too hard.
293 primOpt op args = StPrim op args
296 -----------------------------------------------------------------------------
297 Fix up floating point operations for x86.
299 The problem is that the code generator can't handle the weird register
300 naming scheme for floating point registers on the x86, so we have to
301 deal with memory-resident floating point values wherever possible.
303 We therefore can't stand references to floating-point kinded temporary
304 variables, and try to translate them into memory addresses wherever
308 floatFix :: [StixTree] -> [StixTree]
309 floatFix trees = fltFix emptyUFM trees
311 fltFix :: UniqFM StixTree -- mapping tmp vars to memory locations
316 -- The case we're interested in: loading a temporary from a memory
317 -- address. Eliminate the instruction and replace all future references
318 -- to the temporary with the memory address.
319 fltFix locs ((StAssign rep (StReg (StixTemp uq _)) loc) : trees)
320 | isFloatingRep rep = trace "found one" $ fltFix (addToUFM locs uq loc) trees
322 fltFix locs ((StAssign rep src dst) : trees)
323 = StAssign rep (fltFix1 locs src) (fltFix1 locs dst) : fltFix locs trees
325 fltFix locs (tree : trees)
326 = fltFix1 locs tree : fltFix locs trees
329 fltFix1 :: UniqFM StixTree -> StixTree -> StixTree
330 fltFix1 locs r@(StReg (StixTemp uq rep))
331 | isFloatingRep rep = case lookupUFM locs uq of
332 Nothing -> panic "fltFix1"
333 Just tree -> trace "substed" $ tree
335 fltFix1 locs (StIndex rep l r) =
336 StIndex rep (fltFix1 locs l) (fltFix1 locs r)
338 fltFix1 locs (StInd rep tree) =
339 StInd rep (fltFix1 locs tree)
341 fltFix1 locs (StAssign rep dst src) = panic "fltFix1: StAssign"
343 fltFix1 locs (StJump tree) = StJump (fltFix1 locs tree)
345 fltFix1 locs (StCondJump lbl tree) =
346 StCondJump lbl (fltFix1 locs tree)
348 fltFix1 locs (StPrim op trees) =
349 StPrim op (map (fltFix1 locs) trees)
351 fltFix1 locs (StCall f conv rep trees) =
352 StCall f conv rep (map (fltFix1 locs) trees)
354 fltFix1 locs tree = tree