2 % (c) The AQUA Project, Glasgow University, 1993-1998
6 module AsmCodeGen ( nativeCodeGen ) where
8 #include "HsVersions.h"
16 import AbsCStixGen ( genCodeAbstractC )
17 import AbsCSyn ( AbstractC, MagicId(..) )
18 import AbsCUtils ( mkAbsCStmtList, magicIdPrimRep )
19 import AsmRegAlloc ( runRegAllocate )
20 import MachOp ( MachOp(..), isCommutableMachOp, isComparisonMachOp )
21 import RegAllocInfo ( findReservedRegs )
22 import Stix ( StixReg(..), StixStmt(..), StixExpr(..), StixVReg(..),
23 pprStixStmts, pprStixStmt,
24 stixStmt_CountTempUses, stixStmt_Subst,
28 uniqOfNatM_State, deltaOfNatM_State )
29 import UniqSupply ( returnUs, thenUs, initUs,
32 import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
34 import qualified Pretty
41 import List ( intersperse )
44 The 96/03 native-code generator has machine-independent and
45 machine-dependent modules (those \tr{#include}'ing \tr{NCG.h}).
47 This module (@AsmCodeGen@) is the top-level machine-independent
48 module. It uses @AbsCStixGen.genCodeAbstractC@ to produce @StixTree@s
49 (defined in module @Stix@), using support code from @StixPrim@
50 (primitive operations), @StixMacro@ (Abstract C macros), and
51 @StixInteger@ (GMP arbitrary-precision operations).
53 Before entering machine-dependent land, we do some machine-independent
54 @genericOpt@imisations (defined below) on the @StixTree@s.
56 We convert to the machine-specific @Instr@ datatype with
57 @stmt2Instrs@, assuming an ``infinite'' supply of registers. We then
58 use a machine-independent register allocator (@runRegAllocate@) to
59 rejoin reality. Obviously, @runRegAllocate@ has machine-specific
60 helper functions (see about @RegAllocInfo@ below).
62 The machine-dependent bits break down as follows:
64 \item[@MachRegs@:] Everything about the target platform's machine
65 registers (and immediate operands, and addresses, which tend to
66 intermingle/interact with registers).
68 \item[@MachMisc@:] Includes the @Instr@ datatype (possibly should
69 have a module of its own), plus a miscellany of other things
70 (e.g., @targetDoubleSize@, @smStablePtrTable@, ...)
72 \item[@MachCode@:] @stmt2Instrs@ is where @Stix@ stuff turns into
75 \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
78 \item[@RegAllocInfo@:] In the register allocator, we manipulate
79 @MRegsState@s, which are @BitSet@s, one bit per machine register.
80 When we want to say something about a specific machine register
81 (e.g., ``it gets clobbered by this instruction''), we set/unset
82 its bit. Obviously, we do this @BitSet@ thing for efficiency
85 The @RegAllocInfo@ module collects together the machine-specific
86 info needed to do register allocation.
92 nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, Pretty.Doc)
94 = let absCstmts = mkAbsCStmtList absC
95 (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
96 stix_sdocs = map fst sdoc_pairs
97 insn_sdocs = map snd sdoc_pairs
99 insn_sdoc = my_vcat insn_sdocs
100 stix_sdoc = vcat stix_sdocs
103 my_trace m x = trace m x
104 my_vcat sds = Pretty.vcat (
107 Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
108 Pretty.$$ Pretty.char ' '
113 my_vcat sds = Pretty.vcat sds
117 my_trace "nativeGen: begin"
118 (stix_sdoc, insn_sdoc)
121 absCtoNat :: AbstractC -> UniqSM (SDoc, Pretty.Doc)
123 = _scc_ "genCodeAbstractC" genCodeAbstractC absC `thenUs` \ stixRaw ->
124 _scc_ "genericOpt" genericOpt stixRaw `bind` \ stixOpt ->
125 _scc_ "liftStrings" liftStrings stixOpt `thenUs` \ stixLifted ->
126 _scc_ "genMachCode" genMachCode stixLifted `thenUs` \ pre_regalloc ->
127 _scc_ "regAlloc" regAlloc pre_regalloc `bind` \ almost_final ->
128 _scc_ "x86fp_kludge" x86fp_kludge almost_final `bind` \ final_mach_code ->
129 _scc_ "vcat" Pretty.vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
130 _scc_ "pprStixTrees" pprStixStmts stixOpt `bind` \ stix_sdoc ->
131 returnUs ({-\_ -> Pretty.vcat (map pprInstr almost_final),-}
132 stix_sdoc, final_sdoc)
136 x86fp_kludge :: [Instr] -> [Instr]
137 x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
139 regAlloc :: InstrBlock -> [Instr]
140 regAlloc = runRegAllocate allocatableRegs findReservedRegs
143 Top level code generator for a chunk of stix code. For this part of
144 the computation, we switch from the UniqSM monad to the NatM monad.
145 The latter carries not only a Unique, but also an Int denoting the
146 current C stack pointer offset in the generated code; this is needed
147 for creating correct spill offsets on architectures which don't offer,
148 or for which it would be prohibitively expensive to employ, a frame
149 pointer register. Viz, x86.
151 The offset is measured in bytes, and indicates the difference between
152 the current (simulated) C stack-ptr and the value it was at the
153 beginning of the block. For stacks which grow down, this value should
154 be either zero or negative.
156 Switching between the two monads whilst carrying along the same Unique
157 supply breaks abstraction. Is that bad?
160 genMachCode :: [StixStmt] -> UniqSM InstrBlock
162 genMachCode stmts initial_us
163 = let initial_st = mkNatM_State initial_us 0
164 (instr_list, final_st) = initNat initial_st (stmtsToInstrs stmts)
165 final_us = uniqOfNatM_State final_st
166 final_delta = deltaOfNatM_State final_st
169 then (instr_list, final_us)
170 else pprPanic "genMachCode: nonzero final delta"
174 %************************************************************************
176 \subsection[NCOpt]{The Generic Optimiser}
178 %************************************************************************
180 This is called between translating Abstract C to its Tree and actually
181 using the Native Code Generator to generate the annotations. It's a
182 chance to do some strength reductions.
184 ** Remember these all have to be machine independent ***
186 Note that constant-folding should have already happened, but we might
187 have introduced some new opportunities for constant-folding wrt
188 address manipulations.
191 genericOpt :: [StixStmt] -> [StixStmt]
192 genericOpt = map stixStmt_ConFold . stixPeep
196 stixPeep :: [StixStmt] -> [StixStmt]
198 -- This transformation assumes that the temp assigned to in t1
199 -- is not assigned to in t2; for otherwise the target of the
200 -- second assignment would be substituted for, giving nonsense
201 -- code. As far as I can see, StixTemps are only ever assigned
202 -- to once. It would be nice to be sure!
204 stixPeep ( t1@(StAssignReg pka (StixTemp (StixVReg u pk)) rhs)
207 | stixStmt_CountTempUses u t2 == 1
208 && sum (map (stixStmt_CountTempUses u) ts) == 0
211 trace ("nativeGen: inlining " ++ showSDoc (pprStixExpr rhs))
213 (stixPeep (stixStmt_Subst u rhs t2 : ts))
215 stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
220 For most nodes, just optimize the children.
223 stixExpr_ConFold :: StixExpr -> StixExpr
224 stixStmt_ConFold :: StixStmt -> StixStmt
226 stixStmt_ConFold stmt
228 StAssignReg pk reg@(StixTemp _) src
229 -> StAssignReg pk reg (stixExpr_ConFold src)
230 StAssignReg pk reg@(StixMagicId mid) src
231 -- Replace register leaves with appropriate StixTrees for
232 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
233 -- Assigning to BaseReg is always illegal, so we check for that.
235 BaseReg -> panic "stixStmt_ConFold: assignment to BaseReg";
237 case get_MagicId_reg_or_addr mid of
239 -> StAssignReg pk reg (stixExpr_ConFold src)
241 -> stixStmt_ConFold (StAssignMem pk baseRegAddr src)
243 StAssignMem pk addr src
244 -> StAssignMem pk (stixExpr_ConFold addr) (stixExpr_ConFold src)
246 -> StVoidable (stixExpr_ConFold expr)
248 -> StJump dsts (stixExpr_ConFold addr)
250 -> let test_opt = stixExpr_ConFold test
252 if manifestlyZero test_opt
253 then StComment (mkFastString ("deleted: " ++ showSDoc (pprStixStmt stmt)))
254 else StCondJump addr (stixExpr_ConFold test)
256 -> StData pk (map stixExpr_ConFold datas)
260 manifestlyZero (StInt 0) = True
261 manifestlyZero other = False
263 stixExpr_ConFold expr
266 -> StInd pk (stixExpr_ConFold addr)
267 StCall fn cconv pk args
268 -> StCall fn cconv pk (map stixExpr_ConFold args)
269 StIndex pk (StIndex pk' base off) off'
270 -- Fold indices together when the types match:
272 -> StIndex pk (stixExpr_ConFold base)
273 (stixExpr_ConFold (StMachOp MO_Nat_Add [off, off']))
275 -> StIndex pk (stixExpr_ConFold base) (stixExpr_ConFold off)
278 -- For PrimOps, we first optimize the children, and then we try
279 -- our hand at some constant-folding.
280 -> stixMachOpFold mop (map stixExpr_ConFold args)
281 StReg (StixMagicId mid)
282 -- Replace register leaves with appropriate StixTrees for
283 -- the given target. MagicIds which map to a reg on this arch are left unchanged.
284 -- For the rest, BaseReg is taken to mean the address of the reg table
285 -- in MainCapability, and for all others we generate an indirection to
286 -- its location in the register table.
287 -> case get_MagicId_reg_or_addr mid of
291 BaseReg -> stixExpr_ConFold baseRegAddr
292 other -> stixExpr_ConFold (StInd (magicIdPrimRep mid) baseRegAddr)
297 Now, try to constant-fold the PrimOps. The arguments have already
298 been optimized and folded.
302 :: MachOp -- The operation from an StMachOp
303 -> [StixExpr] -- The optimized arguments
306 stixMachOpFold mop arg@[StInt x]
308 MO_NatS_Neg -> StInt (-x)
309 other -> StMachOp mop arg
311 stixMachOpFold mop args@[StInt x, StInt y]
313 MO_32U_Gt -> StInt (if x > y then 1 else 0)
314 MO_32U_Ge -> StInt (if x >= y then 1 else 0)
315 MO_32U_Eq -> StInt (if x == y then 1 else 0)
316 MO_32U_Ne -> StInt (if x /= y then 1 else 0)
317 MO_32U_Lt -> StInt (if x < y then 1 else 0)
318 MO_32U_Le -> StInt (if x <= y then 1 else 0)
319 MO_Nat_Add -> StInt (x + y)
320 MO_Nat_Sub -> StInt (x - y)
321 MO_NatS_Mul -> StInt (x * y)
322 MO_NatS_Quot | y /= 0 -> StInt (x `quot` y)
323 MO_NatS_Rem | y /= 0 -> StInt (x `rem` y)
324 MO_NatS_Gt -> StInt (if x > y then 1 else 0)
325 MO_NatS_Ge -> StInt (if x >= y then 1 else 0)
326 MO_Nat_Eq -> StInt (if x == y then 1 else 0)
327 MO_Nat_Ne -> StInt (if x /= y then 1 else 0)
328 MO_NatS_Lt -> StInt (if x < y then 1 else 0)
329 MO_NatS_Le -> StInt (if x <= y then 1 else 0)
330 MO_Nat_Shl | y >= 0 && y < 32 -> do_shl x y
331 other -> StMachOp mop args
333 do_shl :: Integer -> Integer -> StixExpr
335 do_shl v n | n > 0 = do_shl (v*2) (n-1)
338 When possible, shift the constants to the right-hand side, so that we
339 can match for strength reductions. Note that the code generator will
340 also assume that constants have been shifted to the right when
344 stixMachOpFold op [x@(StInt _), y] | isCommutableMachOp op
345 = stixMachOpFold op [y, x]
348 We can often do something with constants of 0 and 1 ...
351 stixMachOpFold mop args@[x, y@(StInt 0)]
363 MO_Nat_Ne | x_is_comparison -> x
364 other -> StMachOp mop args
368 StMachOp mopp [_, _] -> isComparisonMachOp mopp
371 stixMachOpFold mop args@[x, y@(StInt 1)]
377 MO_NatS_Rem -> StInt 0
378 MO_NatU_Rem -> StInt 0
379 other -> StMachOp mop args
382 Now look for multiplication/division by powers of 2 (integers).
385 stixMachOpFold mop args@[x, y@(StInt n)]
388 -> case exactLog2 n of
390 Just p -> StMachOp MO_Nat_Shl [x, StInt p]
392 -> case exactLog2 n of
394 Just p -> StMachOp MO_Nat_Shr [x, StInt p]
398 unchanged = StMachOp mop args
401 Anything else is just too hard.
404 stixMachOpFold mop args = StMachOp mop args