2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
11 import List ( intersperse )
18 import AbsCStixGen ( genCodeAbstractC )
19 import AbsCSyn ( AbstractC, MagicId(..) )
20 import AbsCUtils ( mkAbsCStmtList, magicIdPrimRep )
21 import AsmRegAlloc ( runRegAllocate )
22 import MachOp ( MachOp(..), isCommutableMachOp, isComparisonMachOp )
23 import RegAllocInfo ( findReservedRegs )
24 import Stix ( StixReg(..), StixStmt(..), StixExpr(..), StixVReg(..),
25 pprStixStmts, pprStixStmt,
26 stixStmt_CountTempUses, stixStmt_Subst,
30 uniqOfNatM_State, deltaOfNatM_State )
31 import UniqSupply ( returnUs, thenUs, initUs,
34 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
36 import qualified Pretty
43 The 96/03 native-code generator has machine-independent and
44 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
46 This module (@AsmCodeGen@) is the top-level machine-independent
47 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
48 (defined in module @Stix@), using support code from @StixInfo@ (info
49 tables), @StixPrim@ (primitive operations), @StixMacro@ (Abstract C
50 macros), and @StixInteger@ (GMP arbitrary-precision operations).
52 Before entering machine-dependent land, we do some machine-independent
53 @genericOpt@imisations (defined below) on the @StixTree@s.
55 We convert to the machine-specific @Instr@ datatype with
56 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
57 use a machine-independent register allocator (@runRegAllocate@) to
58 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
59 helper functions (see about @RegAllocInfo@ below).
61 The machine-dependent bits break down as follows:
63 \item[@MachRegs@:] Everything about the target platform's machine
64 registers (and immediate operands, and addresses, which tend to
65 intermingle/interact with registers).
67 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
68 have a module of its own), plus a miscellany of other things
69 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
71 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
74 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
77 \item[@RegAllocInfo@:] In the register allocator, we manipulate
78 @MRegsState@s, which are @BitSet@s, one bit per machine register.
79 When we want to say something about a specific machine register
80 (e.g., ``it gets clobbered by this instruction''), we set/unset
81 its bit. Obviously, we do this @BitSet@ thing for efficiency
84 The @RegAllocInfo@ module collects together the machine-specific
85 info needed to do register allocation.
91 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, Pretty.Doc)
93 = let absCstmts = mkAbsCStmtList absC
94 (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
95 stix_sdocs = map fst sdoc_pairs
96 insn_sdocs = map snd sdoc_pairs
98 insn_sdoc = my_vcat insn_sdocs
99 stix_sdoc = vcat stix_sdocs
102 my_trace m x = trace m x
103 my_vcat sds = Pretty.vcat (
106 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
107 Pretty.$$ Pretty.char ' '
112 my_vcat sds = Pretty.vcat sds
116 my_trace "nativeGen: begin"
117 (stix_sdoc, insn_sdoc)
120 absCtoNat :: AbstractC -> UniqSM (SDoc, Pretty.Doc)
122 = _scc_ "genCodeAbstractC" genCodeAbstractC absC `thenUs` \ stixRaw ->
123 _scc_ "genericOpt" genericOpt stixRaw `bind` \ stixOpt ->
124 _scc_ "liftStrings" liftStrings stixOpt `thenUs` \ stixLifted ->
125 _scc_ "genMachCode" genMachCode stixLifted `thenUs` \ pre_regalloc ->
126 _scc_ "regAlloc" regAlloc pre_regalloc `bind` \ almost_final ->
127 _scc_ "x86fp_kludge" x86fp_kludge almost_final `bind` \ final_mach_code ->
128 _scc_ "vcat" Pretty.vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
129 _scc_ "pprStixTrees" pprStixStmts stixOpt `bind` \ stix_sdoc ->
130 returnUs ({-\_ -> Pretty.vcat (map pprInstr almost_final),-}
131 stix_sdoc, final_sdoc)
135 x86fp_kludge :: [Instr] -> [Instr]
136 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
138 regAlloc :: InstrBlock -> [Instr]
139 regAlloc = runRegAllocate allocatableRegs findReservedRegs
142 Top level code generator for a chunk of stix code. For this part of
143 the computation, we switch from the UniqSM monad to the NatM monad.
144 The latter carries not only a Unique, but also an Int denoting the
145 current C stack pointer offset in the generated code; this is needed
146 for creating correct spill offsets on architectures which don't offer,
147 or for which it would be prohibitively expensive to employ, a frame
148 pointer register. Viz, x86.
150 The offset is measured in bytes, and indicates the difference between
151 the current (simulated) C stack-ptr and the value it was at the
152 beginning of the block. For stacks which grow down, this value should
153 be either zero or negative.
155 Switching between the two monads whilst carrying along the same Unique
156 supply breaks abstraction. Is that bad?
159 genMachCode :: [StixStmt] -> UniqSM InstrBlock
161 genMachCode stmts initial_us
162 = let initial_st = mkNatM_State initial_us 0
163 (instr_list, final_st) = initNat initial_st (stmtsToInstrs stmts)
164 final_us = uniqOfNatM_State final_st
165 final_delta = deltaOfNatM_State final_st
168 then (instr_list, final_us)
169 else pprPanic "genMachCode: nonzero final delta"
173 %************************************************************************
175 \subsection[NCOpt]{The Generic Optimiser}
177 %************************************************************************
179 This is called between translating Abstract C to its Tree and actually
180 using the Native Code Generator to generate the annotations. It's a
181 chance to do some strength reductions.
183 ** Remember these all have to be machine independent ***
185 Note that constant-folding should have already happened, but we might
186 have introduced some new opportunities for constant-folding wrt
187 address manipulations.
190 genericOpt :: [StixStmt] -> [StixStmt]
191 genericOpt = map stixStmt_ConFold . stixPeep
195 stixPeep :: [StixStmt] -> [StixStmt]
197 -- This transformation assumes that the temp assigned to in t1
198 -- is not assigned to in t2; for otherwise the target of the
199 -- second assignment would be substituted for, giving nonsense
200 -- code. As far as I can see, StixTemps are only ever assigned
201 -- to once. It would be nice to be sure!
203 stixPeep ( t1@(StAssignReg pka (StixTemp (StixVReg u pk)) rhs)
206 | stixStmt_CountTempUses u t2 == 1
207 && sum (map (stixStmt_CountTempUses u) ts) == 0
210 trace ("nativeGen: inlining " ++ showSDoc (pprStixExpr rhs))
212 (stixPeep (stixStmt_Subst u rhs t2 : ts))
214 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
219 For most nodes, just optimize the children.
222 stixExpr_ConFold :: StixExpr -> StixExpr
223 stixStmt_ConFold :: StixStmt -> StixStmt
225 stixStmt_ConFold stmt
227 StAssignReg pk reg@(StixTemp _) src
228 -> StAssignReg pk reg (stixExpr_ConFold src)
229 StAssignReg pk reg@(StixMagicId mid) src
230 -- Replace register leaves with appropriate StixTrees for
231 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
232 -- Assigning to BaseReg is always illegal, so we check for that.
234 BaseReg -> panic "stixStmt_ConFold: assignment to BaseReg";
236 case get_MagicId_reg_or_addr mid of
238 -> StAssignReg pk reg (stixExpr_ConFold src)
240 -> stixStmt_ConFold (StAssignMem pk baseRegAddr src)
242 StAssignMem pk addr src
243 -> StAssignMem pk (stixExpr_ConFold addr) (stixExpr_ConFold src)
245 -> StVoidable (stixExpr_ConFold expr)
247 -> StJump dsts (stixExpr_ConFold addr)
249 -> let test_opt = stixExpr_ConFold test
251 if manifestlyZero test_opt
252 then StComment (_PK_ ("deleted: " ++ showSDoc (pprStixStmt stmt)))
253 else StCondJump addr (stixExpr_ConFold test)
255 -> StData pk (map stixExpr_ConFold datas)
259 manifestlyZero (StInt 0) = True
260 manifestlyZero other = False
262 stixExpr_ConFold expr
265 -> StInd pk (stixExpr_ConFold addr)
266 StCall fn cconv pk args
267 -> StCall fn cconv pk (map stixExpr_ConFold args)
268 StIndex pk (StIndex pk' base off) off'
269 -- Fold indices together when the types match:
271 -> StIndex pk (stixExpr_ConFold base)
272 (stixExpr_ConFold (StMachOp MO_Nat_Add [off, off']))
274 -> StIndex pk (stixExpr_ConFold base) (stixExpr_ConFold off)
277 -- For PrimOps, we first optimize the children, and then we try
278 -- our hand at some constant-folding.
279 -> stixMachOpFold mop (map stixExpr_ConFold args)
280 StReg (StixMagicId mid)
281 -- Replace register leaves with appropriate StixTrees for
282 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
283 -- For the rest, BaseReg is taken to mean the address of the reg table
284 -- in MainCapability, and for all others we generate an indirection to
285 -- its location in the register table.
286 -> case get_MagicId_reg_or_addr mid of
290 BaseReg -> stixExpr_ConFold baseRegAddr
291 other -> stixExpr_ConFold (StInd (magicIdPrimRep mid) baseRegAddr)
296 Now, try to constant-fold the PrimOps. The arguments have already
297 been optimized and folded.
301 :: MachOp -- The operation from an StMachOp
302 -> [StixExpr] -- The optimized arguments
305 stixMachOpFold mop arg@[StInt x]
307 MO_NatS_Neg -> StInt (-x)
308 other -> StMachOp mop arg
310 stixMachOpFold mop args@[StInt x, StInt y]
312 MO_32U_Gt -> StInt (if x > y then 1 else 0)
313 MO_32U_Ge -> StInt (if x >= y then 1 else 0)
314 MO_32U_Eq -> StInt (if x == y then 1 else 0)
315 MO_32U_Ne -> StInt (if x /= y then 1 else 0)
316 MO_32U_Lt -> StInt (if x < y then 1 else 0)
317 MO_32U_Le -> StInt (if x <= y then 1 else 0)
318 MO_Nat_Add -> StInt (x + y)
319 MO_Nat_Sub -> StInt (x - y)
320 MO_NatS_Mul -> StInt (x * y)
321 MO_NatS_Quot | y /= 0 -> StInt (x `quot` y)
322 MO_NatS_Rem | y /= 0 -> StInt (x `rem` y)
323 MO_NatS_Gt -> StInt (if x > y then 1 else 0)
324 MO_NatS_Ge -> StInt (if x >= y then 1 else 0)
325 MO_Nat_Eq -> StInt (if x == y then 1 else 0)
326 MO_Nat_Ne -> StInt (if x /= y then 1 else 0)
327 MO_NatS_Lt -> StInt (if x < y then 1 else 0)
328 MO_NatS_Le -> StInt (if x <= y then 1 else 0)
329 MO_Nat_Shl | y >= 0 && y < 32 -> do_shl x y
330 other -> StMachOp mop args
332 do_shl :: Integer -> Integer -> StixExpr
334 do_shl v n | n > 0 = do_shl (v*2) (n-1)
337 When possible, shift the constants to the right-hand side, so that we
338 can match for strength reductions. Note that the code generator will
339 also assume that constants have been shifted to the right when
343 stixMachOpFold op [x@(StInt _), y] | isCommutableMachOp op
344 = stixMachOpFold op [y, x]
347 We can often do something with constants of 0 and 1 ...
350 stixMachOpFold mop args@[x, y@(StInt 0)]
362 MO_Nat_Ne | x_is_comparison -> x
363 other -> StMachOp mop args
367 StMachOp mopp [_, _] -> isComparisonMachOp mopp
370 stixMachOpFold mop args@[x, y@(StInt 1)]
376 MO_NatS_Rem -> StInt 0
377 MO_NatU_Rem -> StInt 0
378 other -> StMachOp mop args
381 Now look for multiplication/division by powers of 2 (integers).
384 stixMachOpFold mop args@[x, y@(StInt n)]
387 -> case exactLog2 n of
389 Just p -> StMachOp MO_Nat_Shl [x, StInt p]
391 -> case exactLog2 n of
393 Just p -> StMachOp MO_Nat_Shr [x, StInt p]
397 unchanged = StMachOp mop args
400 Anything else is just too hard.
403 stixMachOpFold mop args = StMachOp mop args