2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
16 import AbsCStixGen ( genCodeAbstractC )
17 import AbsCSyn ( AbstractC, MagicId(..) )
18 import AbsCUtils ( mkAbsCStmtList, magicIdPrimRep )
19 import AsmRegAlloc ( runRegAllocate )
20 import MachOp ( MachOp(..), isCommutableMachOp, isComparisonMachOp )
21 import RegAllocInfo ( findReservedRegs )
22 import Stix ( StixReg(..), StixStmt(..), StixExpr(..), StixVReg(..),
23 pprStixStmts, pprStixStmt,
24 stixStmt_CountTempUses, stixStmt_Subst,
28 uniqOfNatM_State, deltaOfNatM_State,
30 import UniqSupply ( returnUs, thenUs, initUs,
33 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
35 import PprMach ( pprDyldSymbolStub )
36 import List ( group, sort )
39 import qualified Pretty
47 import List ( intersperse )
51 The 96/03 native-code generator has machine-independent and
52 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
54 This module (@AsmCodeGen@) is the top-level machine-independent
55 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
56 (defined in module @Stix@), using support code from @StixPrim@
57 (primitive operations), @StixMacro@ (Abstract C macros), and
58 @StixInteger@ (GMP arbitrary-precision operations).
60 Before entering machine-dependent land, we do some machine-independent
61 @genericOpt@imisations (defined below) on the @StixTree@s.
63 We convert to the machine-specific @Instr@ datatype with
64 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
65 use a machine-independent register allocator (@runRegAllocate@) to
66 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
67 helper functions (see about @RegAllocInfo@ below).
69 The machine-dependent bits break down as follows:
71 \item[@MachRegs@:] Everything about the target platform's machine
72 registers (and immediate operands, and addresses, which tend to
73 intermingle/interact with registers).
75 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
76 have a module of its own), plus a miscellany of other things
77 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
79 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
82 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
85 \item[@RegAllocInfo@:] In the register allocator, we manipulate
86 @MRegsState@s, which are @BitSet@s, one bit per machine register.
87 When we want to say something about a specific machine register
88 (e.g., ``it gets clobbered by this instruction''), we set/unset
89 its bit. Obviously, we do this @BitSet@ thing for efficiency
92 The @RegAllocInfo@ module collects together the machine-specific
93 info needed to do register allocation.
99 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, Pretty.Doc)
100 nativeCodeGen absC us
101 = let absCstmts = mkAbsCStmtList absC
102 (results, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
103 stix_sdocs = [ stix | (stix, insn, imports) <- results ]
104 insn_sdocs = [ insn | (stix, insn, imports) <- results ]
105 imports = [ imports | (stix, insn, imports) <- results ]
107 insn_sdoc = my_vcat insn_sdocs IF_OS_darwin(Pretty.$$ dyld_stubs,)
108 stix_sdoc = vcat stix_sdocs
111 -- Generate "symbol stubs" for all external symbols that might
112 -- come from a dynamic library.
114 dyld_stubs = Pretty.vcat $ map pprDyldSymbolStub $
115 map head $ group $ sort $ concat imports
119 my_trace m x = trace m x
120 my_vcat sds = Pretty.vcat (
123 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
124 Pretty.$$ Pretty.char ' '
129 my_vcat sds = Pretty.vcat sds
133 my_trace "nativeGen: begin"
134 (stix_sdoc, insn_sdoc)
137 absCtoNat :: AbstractC -> UniqSM (SDoc, Pretty.Doc, [FastString])
139 = _scc_ "genCodeAbstractC" genCodeAbstractC absC `thenUs` \ stixRaw ->
140 _scc_ "genericOpt" genericOpt stixRaw `bind` \ stixOpt ->
141 _scc_ "liftStrings" liftStrings stixOpt `thenUs` \ stixLifted ->
142 _scc_ "genMachCode" genMachCode stixLifted `thenUs` \ (pre_regalloc, imports) ->
143 _scc_ "regAlloc" regAlloc pre_regalloc `bind` \ almost_final ->
144 _scc_ "x86fp_kludge" x86fp_kludge almost_final `bind` \ final_mach_code ->
145 _scc_ "vcat" Pretty.vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
146 _scc_ "pprStixTrees" pprStixStmts stixOpt `bind` \ stix_sdoc ->
147 returnUs ({-\_ -> Pretty.vcat (map pprInstr almost_final),-}
148 stix_sdoc, final_sdoc, imports)
152 x86fp_kludge :: [Instr] -> [Instr]
153 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
155 regAlloc :: InstrBlock -> [Instr]
156 regAlloc = runRegAllocate allocatableRegs findReservedRegs
159 Top level code generator for a chunk of stix code. For this part of
160 the computation, we switch from the UniqSM monad to the NatM monad.
161 The latter carries not only a Unique, but also an Int denoting the
162 current C stack pointer offset in the generated code; this is needed
163 for creating correct spill offsets on architectures which don't offer,
164 or for which it would be prohibitively expensive to employ, a frame
165 pointer register. Viz, x86.
167 The offset is measured in bytes, and indicates the difference between
168 the current (simulated) C stack-ptr and the value it was at the
169 beginning of the block. For stacks which grow down, this value should
170 be either zero or negative.
172 Switching between the two monads whilst carrying along the same Unique
173 supply breaks abstraction. Is that bad?
176 genMachCode :: [StixStmt] -> UniqSM (InstrBlock, [FastString])
178 genMachCode stmts initial_us
179 = let initial_st = mkNatM_State initial_us 0
180 (instr_list, final_st) = initNat initial_st (stmtsToInstrs stmts)
181 final_us = uniqOfNatM_State final_st
182 final_delta = deltaOfNatM_State final_st
183 final_imports = importsOfNatM_State final_st
186 then ((instr_list, final_imports), final_us)
187 else pprPanic "genMachCode: nonzero final delta"
191 %************************************************************************
193 \subsection[NCOpt]{The Generic Optimiser}
195 %************************************************************************
197 This is called between translating Abstract C to its Tree and actually
198 using the Native Code Generator to generate the annotations. It's a
199 chance to do some strength reductions.
201 ** Remember these all have to be machine independent ***
203 Note that constant-folding should have already happened, but we might
204 have introduced some new opportunities for constant-folding wrt
205 address manipulations.
208 genericOpt :: [StixStmt] -> [StixStmt]
209 genericOpt = map stixStmt_ConFold . stixPeep
213 stixPeep :: [StixStmt] -> [StixStmt]
215 -- This transformation assumes that the temp assigned to in t1
216 -- is not assigned to in t2; for otherwise the target of the
217 -- second assignment would be substituted for, giving nonsense
218 -- code. As far as I can see, StixTemps are only ever assigned
219 -- to once. It would be nice to be sure!
221 stixPeep ( t1@(StAssignReg pka (StixTemp (StixVReg u pk)) rhs)
224 | stixStmt_CountTempUses u t2 == 1
225 && sum (map (stixStmt_CountTempUses u) ts) == 0
228 trace ("nativeGen: inlining " ++ showSDoc (pprStixExpr rhs))
230 (stixPeep (stixStmt_Subst u rhs t2 : ts))
232 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
237 For most nodes, just optimize the children.
240 stixExpr_ConFold :: StixExpr -> StixExpr
241 stixStmt_ConFold :: StixStmt -> StixStmt
243 stixStmt_ConFold stmt
245 StAssignReg pk reg@(StixTemp _) src
246 -> StAssignReg pk reg (stixExpr_ConFold src)
247 StAssignReg pk reg@(StixMagicId mid) src
248 -- Replace register leaves with appropriate StixTrees for
249 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
250 -- Assigning to BaseReg is always illegal, so we check for that.
252 BaseReg -> panic "stixStmt_ConFold: assignment to BaseReg";
254 case get_MagicId_reg_or_addr mid of
256 -> StAssignReg pk reg (stixExpr_ConFold src)
258 -> stixStmt_ConFold (StAssignMem pk baseRegAddr src)
260 StAssignMem pk addr src
261 -> StAssignMem pk (stixExpr_ConFold addr) (stixExpr_ConFold src)
263 -> StVoidable (stixExpr_ConFold expr)
265 -> StJump dsts (stixExpr_ConFold addr)
267 -> let test_opt = stixExpr_ConFold test
269 if manifestlyZero test_opt
270 then StComment (mkFastString ("deleted: " ++ showSDoc (pprStixStmt stmt)))
271 else StCondJump addr (stixExpr_ConFold test)
273 -> StData pk (map stixExpr_ConFold datas)
277 manifestlyZero (StInt 0) = True
278 manifestlyZero other = False
280 stixExpr_ConFold expr
283 -> StInd pk (stixExpr_ConFold addr)
284 StCall fn cconv pk args
285 -> StCall fn cconv pk (map stixExpr_ConFold args)
286 StIndex pk (StIndex pk' base off) off'
287 -- Fold indices together when the types match:
289 -> StIndex pk (stixExpr_ConFold base)
290 (stixExpr_ConFold (StMachOp MO_Nat_Add [off, off']))
292 -> StIndex pk (stixExpr_ConFold base) (stixExpr_ConFold off)
295 -- For PrimOps, we first optimize the children, and then we try
296 -- our hand at some constant-folding.
297 -> stixMachOpFold mop (map stixExpr_ConFold args)
298 StReg (StixMagicId mid)
299 -- Replace register leaves with appropriate StixTrees for
300 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
301 -- For the rest, BaseReg is taken to mean the address of the reg table
302 -- in MainCapability, and for all others we generate an indirection to
303 -- its location in the register table.
304 -> case get_MagicId_reg_or_addr mid of
308 BaseReg -> stixExpr_ConFold baseRegAddr
309 other -> stixExpr_ConFold (StInd (magicIdPrimRep mid) baseRegAddr)
314 Now, try to constant-fold the PrimOps. The arguments have already
315 been optimized and folded.
319 :: MachOp -- The operation from an StMachOp
320 -> [StixExpr] -- The optimized arguments
323 stixMachOpFold mop arg@[StInt x]
325 MO_NatS_Neg -> StInt (-x)
326 other -> StMachOp mop arg
328 stixMachOpFold mop args@[StInt x, StInt y]
330 MO_32U_Gt -> StInt (if x > y then 1 else 0)
331 MO_32U_Ge -> StInt (if x >= y then 1 else 0)
332 MO_32U_Eq -> StInt (if x == y then 1 else 0)
333 MO_32U_Ne -> StInt (if x /= y then 1 else 0)
334 MO_32U_Lt -> StInt (if x < y then 1 else 0)
335 MO_32U_Le -> StInt (if x <= y then 1 else 0)
336 MO_Nat_Add -> StInt (x + y)
337 MO_Nat_Sub -> StInt (x - y)
338 MO_NatS_Mul -> StInt (x * y)
339 MO_NatS_Quot | y /= 0 -> StInt (x `quot` y)
340 MO_NatS_Rem | y /= 0 -> StInt (x `rem` y)
341 MO_NatS_Gt -> StInt (if x > y then 1 else 0)
342 MO_NatS_Ge -> StInt (if x >= y then 1 else 0)
343 MO_Nat_Eq -> StInt (if x == y then 1 else 0)
344 MO_Nat_Ne -> StInt (if x /= y then 1 else 0)
345 MO_NatS_Lt -> StInt (if x < y then 1 else 0)
346 MO_NatS_Le -> StInt (if x <= y then 1 else 0)
347 MO_Nat_Shl | y >= 0 && y < 32 -> do_shl x y
348 other -> StMachOp mop args
350 do_shl :: Integer -> Integer -> StixExpr
352 do_shl v n | n > 0 = do_shl (v*2) (n-1)
355 When possible, shift the constants to the right-hand side, so that we
356 can match for strength reductions. Note that the code generator will
357 also assume that constants have been shifted to the right when
361 stixMachOpFold op [x@(StInt _), y] | isCommutableMachOp op
362 = stixMachOpFold op [y, x]
365 We can often do something with constants of 0 and 1 ...
368 stixMachOpFold mop args@[x, y@(StInt 0)]
380 MO_Nat_Ne | x_is_comparison -> x
381 other -> StMachOp mop args
385 StMachOp mopp [_, _] -> isComparisonMachOp mopp
388 stixMachOpFold mop args@[x, y@(StInt 1)]
394 MO_NatS_Rem -> StInt 0
395 MO_NatU_Rem -> StInt 0
396 other -> StMachOp mop args
399 Now look for multiplication/division by powers of 2 (integers).
402 stixMachOpFold mop args@[x, y@(StInt n)]
405 -> case exactLog2 n of
407 Just p -> StMachOp MO_Nat_Shl [x, StInt p]
409 -> case exactLog2 n of
411 Just p -> StMachOp MO_Nat_Shr [x, StInt p]
415 unchanged = StMachOp mop args
418 Anything else is just too hard.
421 stixMachOpFold mop args = StMachOp mop args