2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
16 import AbsCStixGen ( genCodeAbstractC )
17 import AbsCSyn ( AbstractC, MagicId(..) )
18 import AbsCUtils ( mkAbsCStmtList, magicIdPrimRep )
19 import AsmRegAlloc ( runRegAllocate )
20 import MachOp ( MachOp(..), isCommutableMachOp, isComparisonMachOp )
21 import RegAllocInfo ( findReservedRegs )
22 import Stix ( StixReg(..), StixStmt(..), StixExpr(..), StixVReg(..),
23 pprStixStmts, pprStixStmt,
24 stixStmt_CountTempUses, stixStmt_Subst,
28 uniqOfNatM_State, deltaOfNatM_State,
30 import UniqSupply ( returnUs, thenUs, initUs,
33 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
35 import PprMach ( pprDyldSymbolStub )
36 import List ( group, sort )
39 import qualified Pretty
46 import List ( intersperse )
49 The 96/03 native-code generator has machine-independent and
50 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
52 This module (@AsmCodeGen@) is the top-level machine-independent
53 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
54 (defined in module @Stix@), using support code from @StixPrim@
55 (primitive operations), @StixMacro@ (Abstract C macros), and
56 @StixInteger@ (GMP arbitrary-precision operations).
58 Before entering machine-dependent land, we do some machine-independent
59 @genericOpt@imisations (defined below) on the @StixTree@s.
61 We convert to the machine-specific @Instr@ datatype with
62 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
63 use a machine-independent register allocator (@runRegAllocate@) to
64 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
65 helper functions (see about @RegAllocInfo@ below).
67 The machine-dependent bits break down as follows:
69 \item[@MachRegs@:] Everything about the target platform's machine
70 registers (and immediate operands, and addresses, which tend to
71 intermingle/interact with registers).
73 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
74 have a module of its own), plus a miscellany of other things
75 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
77 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
80 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
83 \item[@RegAllocInfo@:] In the register allocator, we manipulate
84 @MRegsState@s, which are @BitSet@s, one bit per machine register.
85 When we want to say something about a specific machine register
86 (e.g., ``it gets clobbered by this instruction''), we set/unset
87 its bit. Obviously, we do this @BitSet@ thing for efficiency
90 The @RegAllocInfo@ module collects together the machine-specific
91 info needed to do register allocation.
97 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, Pretty.Doc)
99 = let absCstmts = mkAbsCStmtList absC
100 (results, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
101 stix_sdocs = [ stix | (stix, insn, imports) <- results ]
102 insn_sdocs = [ insn | (stix, insn, imports) <- results ]
103 imports = [ imports | (stix, insn, imports) <- results ]
105 insn_sdoc = my_vcat insn_sdocs IF_OS_darwin(Pretty.$$ dyld_stubs,)
106 stix_sdoc = vcat stix_sdocs
109 -- Generate "symbol stubs" for all external symbols that might
110 -- come from a dynamic library.
112 dyld_stubs = Pretty.vcat $ map pprDyldSymbolStub $
113 map head $ group $ sort $ concat imports
117 my_trace m x = trace m x
118 my_vcat sds = Pretty.vcat (
121 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
122 Pretty.$$ Pretty.char ' '
127 my_vcat sds = Pretty.vcat sds
131 my_trace "nativeGen: begin"
132 (stix_sdoc, insn_sdoc)
135 absCtoNat :: AbstractC -> UniqSM (SDoc, Pretty.Doc, [FastString])
137 = _scc_ "genCodeAbstractC" genCodeAbstractC absC `thenUs` \ stixRaw ->
138 _scc_ "genericOpt" genericOpt stixRaw `bind` \ stixOpt ->
139 _scc_ "liftStrings" liftStrings stixOpt `thenUs` \ stixLifted ->
140 _scc_ "genMachCode" genMachCode stixLifted `thenUs` \ (pre_regalloc, imports) ->
141 _scc_ "regAlloc" regAlloc pre_regalloc `bind` \ almost_final ->
142 _scc_ "x86fp_kludge" x86fp_kludge almost_final `bind` \ final_mach_code ->
143 _scc_ "vcat" Pretty.vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
144 _scc_ "pprStixTrees" pprStixStmts stixOpt `bind` \ stix_sdoc ->
145 returnUs ({-\_ -> Pretty.vcat (map pprInstr almost_final),-}
146 stix_sdoc, final_sdoc, imports)
150 x86fp_kludge :: [Instr] -> [Instr]
151 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
153 regAlloc :: InstrBlock -> [Instr]
154 regAlloc = runRegAllocate allocatableRegs findReservedRegs
157 Top level code generator for a chunk of stix code. For this part of
158 the computation, we switch from the UniqSM monad to the NatM monad.
159 The latter carries not only a Unique, but also an Int denoting the
160 current C stack pointer offset in the generated code; this is needed
161 for creating correct spill offsets on architectures which don't offer,
162 or for which it would be prohibitively expensive to employ, a frame
163 pointer register. Viz, x86.
165 The offset is measured in bytes, and indicates the difference between
166 the current (simulated) C stack-ptr and the value it was at the
167 beginning of the block. For stacks which grow down, this value should
168 be either zero or negative.
170 Switching between the two monads whilst carrying along the same Unique
171 supply breaks abstraction. Is that bad?
174 genMachCode :: [StixStmt] -> UniqSM (InstrBlock, [FastString])
176 genMachCode stmts initial_us
177 = let initial_st = mkNatM_State initial_us 0
178 (instr_list, final_st) = initNat initial_st (stmtsToInstrs stmts)
179 final_us = uniqOfNatM_State final_st
180 final_delta = deltaOfNatM_State final_st
181 final_imports = importsOfNatM_State final_st
184 then ((instr_list, final_imports), final_us)
185 else pprPanic "genMachCode: nonzero final delta"
189 %************************************************************************
191 \subsection[NCOpt]{The Generic Optimiser}
193 %************************************************************************
195 This is called between translating Abstract C to its Tree and actually
196 using the Native Code Generator to generate the annotations. It's a
197 chance to do some strength reductions.
199 ** Remember these all have to be machine independent ***
201 Note that constant-folding should have already happened, but we might
202 have introduced some new opportunities for constant-folding wrt
203 address manipulations.
206 genericOpt :: [StixStmt] -> [StixStmt]
207 genericOpt = map stixStmt_ConFold . stixPeep
211 stixPeep :: [StixStmt] -> [StixStmt]
213 -- This transformation assumes that the temp assigned to in t1
214 -- is not assigned to in t2; for otherwise the target of the
215 -- second assignment would be substituted for, giving nonsense
216 -- code. As far as I can see, StixTemps are only ever assigned
217 -- to once. It would be nice to be sure!
219 stixPeep ( t1@(StAssignReg pka (StixTemp (StixVReg u pk)) rhs)
222 | stixStmt_CountTempUses u t2 == 1
223 && sum (map (stixStmt_CountTempUses u) ts) == 0
226 trace ("nativeGen: inlining " ++ showSDoc (pprStixExpr rhs))
228 (stixPeep (stixStmt_Subst u rhs t2 : ts))
230 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
235 For most nodes, just optimize the children.
238 stixExpr_ConFold :: StixExpr -> StixExpr
239 stixStmt_ConFold :: StixStmt -> StixStmt
241 stixStmt_ConFold stmt
243 StAssignReg pk reg@(StixTemp _) src
244 -> StAssignReg pk reg (stixExpr_ConFold src)
245 StAssignReg pk reg@(StixMagicId mid) src
246 -- Replace register leaves with appropriate StixTrees for
247 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
248 -- Assigning to BaseReg is always illegal, so we check for that.
250 BaseReg -> panic "stixStmt_ConFold: assignment to BaseReg";
252 case get_MagicId_reg_or_addr mid of
254 -> StAssignReg pk reg (stixExpr_ConFold src)
256 -> stixStmt_ConFold (StAssignMem pk baseRegAddr src)
258 StAssignMem pk addr src
259 -> StAssignMem pk (stixExpr_ConFold addr) (stixExpr_ConFold src)
261 -> StVoidable (stixExpr_ConFold expr)
263 -> StJump dsts (stixExpr_ConFold addr)
265 -> let test_opt = stixExpr_ConFold test
267 if manifestlyZero test_opt
268 then StComment (mkFastString ("deleted: " ++ showSDoc (pprStixStmt stmt)))
269 else StCondJump addr (stixExpr_ConFold test)
271 -> StData pk (map stixExpr_ConFold datas)
275 manifestlyZero (StInt 0) = True
276 manifestlyZero other = False
278 stixExpr_ConFold expr
281 -> StInd pk (stixExpr_ConFold addr)
282 StCall fn cconv pk args
283 -> StCall fn cconv pk (map stixExpr_ConFold args)
284 StIndex pk (StIndex pk' base off) off'
285 -- Fold indices together when the types match:
287 -> StIndex pk (stixExpr_ConFold base)
288 (stixExpr_ConFold (StMachOp MO_Nat_Add [off, off']))
290 -> StIndex pk (stixExpr_ConFold base) (stixExpr_ConFold off)
293 -- For PrimOps, we first optimize the children, and then we try
294 -- our hand at some constant-folding.
295 -> stixMachOpFold mop (map stixExpr_ConFold args)
296 StReg (StixMagicId mid)
297 -- Replace register leaves with appropriate StixTrees for
298 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
299 -- For the rest, BaseReg is taken to mean the address of the reg table
300 -- in MainCapability, and for all others we generate an indirection to
301 -- its location in the register table.
302 -> case get_MagicId_reg_or_addr mid of
306 BaseReg -> stixExpr_ConFold baseRegAddr
307 other -> stixExpr_ConFold (StInd (magicIdPrimRep mid) baseRegAddr)
312 Now, try to constant-fold the PrimOps. The arguments have already
313 been optimized and folded.
317 :: MachOp -- The operation from an StMachOp
318 -> [StixExpr] -- The optimized arguments
321 stixMachOpFold mop arg@[StInt x]
323 MO_NatS_Neg -> StInt (-x)
324 other -> StMachOp mop arg
326 stixMachOpFold mop args@[StInt x, StInt y]
328 MO_32U_Gt -> StInt (if x > y then 1 else 0)
329 MO_32U_Ge -> StInt (if x >= y then 1 else 0)
330 MO_32U_Eq -> StInt (if x == y then 1 else 0)
331 MO_32U_Ne -> StInt (if x /= y then 1 else 0)
332 MO_32U_Lt -> StInt (if x < y then 1 else 0)
333 MO_32U_Le -> StInt (if x <= y then 1 else 0)
334 MO_Nat_Add -> StInt (x + y)
335 MO_Nat_Sub -> StInt (x - y)
336 MO_NatS_Mul -> StInt (x * y)
337 MO_NatS_Quot | y /= 0 -> StInt (x `quot` y)
338 MO_NatS_Rem | y /= 0 -> StInt (x `rem` y)
339 MO_NatS_Gt -> StInt (if x > y then 1 else 0)
340 MO_NatS_Ge -> StInt (if x >= y then 1 else 0)
341 MO_Nat_Eq -> StInt (if x == y then 1 else 0)
342 MO_Nat_Ne -> StInt (if x /= y then 1 else 0)
343 MO_NatS_Lt -> StInt (if x < y then 1 else 0)
344 MO_NatS_Le -> StInt (if x <= y then 1 else 0)
345 MO_Nat_Shl | y >= 0 && y < 32 -> do_shl x y
346 other -> StMachOp mop args
348 do_shl :: Integer -> Integer -> StixExpr
350 do_shl v n | n > 0 = do_shl (v*2) (n-1)
353 When possible, shift the constants to the right-hand side, so that we
354 can match for strength reductions. Note that the code generator will
355 also assume that constants have been shifted to the right when
359 stixMachOpFold op [x@(StInt _), y] | isCommutableMachOp op
360 = stixMachOpFold op [y, x]
363 We can often do something with constants of 0 and 1 ...
366 stixMachOpFold mop args@[x, y@(StInt 0)]
378 MO_Nat_Ne | x_is_comparison -> x
379 other -> StMachOp mop args
383 StMachOp mopp [_, _] -> isComparisonMachOp mopp
386 stixMachOpFold mop args@[x, y@(StInt 1)]
392 MO_NatS_Rem -> StInt 0
393 MO_NatU_Rem -> StInt 0
394 other -> StMachOp mop args
397 Now look for multiplication/division by powers of 2 (integers).
400 stixMachOpFold mop args@[x, y@(StInt n)]
403 -> case exactLog2 n of
405 Just p -> StMachOp MO_Nat_Shl [x, StInt p]
407 -> case exactLog2 n of
409 Just p -> StMachOp MO_Nat_Shr [x, StInt p]
413 unchanged = StMachOp mop args
416 Anything else is just too hard.
419 stixMachOpFold mop args = StMachOp mop args