%
-% (c) The AQUA Project, Glasgow University, 1993-1996
+% (c) The AQUA Project, Glasgow University, 1993-1998
%
\begin{code}
-#include "HsVersions.h"
+module AsmCodeGen ( nativeCodeGen ) where
-module AsmCodeGen ( writeRealAsm, dumpRealAsm ) where
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
-import Ubiq{-uitous-}
+import List ( intersperse )
import MachMisc
import MachRegs
import AbsCStixGen ( genCodeAbstractC )
import AbsCSyn ( AbstractC, MagicId )
+import AbsCUtils ( mkAbsCStmtList )
import AsmRegAlloc ( runRegAllocate )
-import OrdList ( OrdList )
import PrimOp ( commutableOp, PrimOp(..) )
-import PrimRep ( PrimRep{-instance Eq-} )
-import RegAllocInfo ( mkMRegsState, MRegsState )
-import Stix ( StixTree(..), StixReg(..), CodeSegment )
-import UniqSupply ( returnUs, thenUs, mapUs, UniqSM(..) )
-import Unpretty ( uppAppendFile, uppShow, uppAboves, Unpretty(..) )
+import RegAllocInfo ( findReservedRegs )
+import Stix ( StixTree(..), StixReg(..),
+ pprStixTrees, pprStixTree, CodeSegment(..),
+ stixCountTempUses, stixSubst,
+ NatM, initNat, mapNat,
+ NatM_State, mkNatM_State,
+ uniqOfNatM_State, deltaOfNatM_State )
+import UniqSupply ( returnUs, thenUs, mapUs, initUs,
+ initUs_, UniqSM, UniqSupply,
+ lazyThenUs, lazyMapUs )
+import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
+
+import OrdList ( fromOL, concatOL )
+import Outputable
+
\end{code}
The 96/03 native-code generator has machine-independent and
machine instructions.
\item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really
- an @Unpretty@).
+ an @Doc@).
\item[@RegAllocInfo@:] In the register allocator, we manipulate
@MRegsState@s, which are @BitSet@s, one bit per machine register.
\end{description}
So, here we go:
-\begin{code}
-writeRealAsm :: _FILE -> AbstractC -> UniqSupply -> IO ()
-
-writeRealAsm file absC us
- = uppAppendFile file 80 (runNCG absC us)
-
-dumpRealAsm :: AbstractC -> UniqSupply -> String
-
-dumpRealAsm absC us = uppShow 80 (runNCG absC us)
-
-runNCG absC
- = genCodeAbstractC absC `thenUs` \ treelists ->
- let
- stix = map (map genericOpt) treelists
- in
- codeGen stix
-\end{code}
-@codeGen@ is the top-level code-generation function:
\begin{code}
-codeGen :: [[StixTree]] -> UniqSM Unpretty
-
-codeGen trees
- = mapUs genMachCode trees `thenUs` \ dynamic_codes ->
- let
- static_instrs = scheduleMachCode dynamic_codes
- in
- returnUs (uppAboves (map pprInstr static_instrs))
+nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc)
+nativeCodeGen absC us
+ = let absCstmts = mkAbsCStmtList absC
+ (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
+ stix_sdocs = map fst sdoc_pairs
+ insn_sdocs = map snd sdoc_pairs
+
+ insn_sdoc = my_vcat insn_sdocs
+ stix_sdoc = vcat stix_sdocs
+
+# if 1 /* ifdef NCG_DEBUG */
+ my_trace m x = trace m x
+ my_vcat sds = vcat (intersperse (char ' '
+ $$ ptext SLIT("# ___ncg_debug_marker")
+ $$ char ' ')
+ sds)
+# else
+ my_vcat sds = vcat sds
+ my_trace m x = x
+# endif
+ in
+ my_trace "nativeGen: begin"
+ (stix_sdoc, insn_sdoc)
+
+
+absCtoNat :: AbstractC -> UniqSM (SDoc, SDoc)
+absCtoNat absC
+ = genCodeAbstractC absC `thenUs` \ stixRaw ->
+ genericOpt stixRaw `bind` \ stixOpt ->
+ genMachCode stixOpt `thenUs` \ pre_regalloc ->
+ regAlloc pre_regalloc `bind` \ almost_final ->
+ x86fp_kludge almost_final `bind` \ final_mach_code ->
+ vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
+ pprStixTrees stixOpt `bind` \ stix_sdoc ->
+ returnUs (stix_sdoc, final_sdoc)
+ where
+ bind f x = x f
+
+ x86fp_kludge :: [Instr] -> [Instr]
+ x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
+
+ regAlloc :: InstrBlock -> [Instr]
+ regAlloc = runRegAllocate allocatableRegs findReservedRegs
\end{code}
-Top level code generator for a chunk of stix code:
-\begin{code}
-genMachCode :: [StixTree] -> UniqSM InstrList
+Top level code generator for a chunk of stix code. For this part of
+the computation, we switch from the UniqSM monad to the NatM monad.
+The latter carries not only a Unique, but also an Int denoting the
+current C stack pointer offset in the generated code; this is needed
+for creating correct spill offsets on architectures which don't offer,
+or for which it would be prohibitively expensive to employ, a frame
+pointer register. Viz, x86.
-genMachCode stmts
- = mapUs stmt2Instrs stmts `thenUs` \ blocks ->
- returnUs (foldr (.) id blocks asmVoid)
-\end{code}
+The offset is measured in bytes, and indicates the difference between
+the current (simulated) C stack-ptr and the value it was at the
+beginning of the block. For stacks which grow down, this value should
+be either zero or negative.
-The next bit does the code scheduling. The scheduler must also deal
-with register allocation of temporaries. Much parallelism can be
-exposed via the OrdList, but more might occur, so further analysis
-might be needed.
+Switching between the two monads whilst carrying along the same Unique
+supply breaks abstraction. Is that bad?
\begin{code}
-scheduleMachCode :: [InstrList] -> [Instr]
-
-scheduleMachCode
- = concat . map (runRegAllocate freeRegsState reservedRegs)
- where
- freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
+genMachCode :: [StixTree] -> UniqSM InstrBlock
+
+genMachCode stmts initial_us
+ = let initial_st = mkNatM_State initial_us 0
+ (blocks, final_st) = initNat initial_st
+ (mapNat stmt2Instrs stmts)
+ instr_list = concatOL blocks
+ final_us = uniqOfNatM_State final_st
+ final_delta = deltaOfNatM_State final_st
+ in
+ if final_delta == 0
+ then (instr_list, final_us)
+ else pprPanic "genMachCode: nonzero final delta"
+ (int final_delta)
\end{code}
%************************************************************************
address manipulations.
\begin{code}
-genericOpt :: StixTree -> StixTree
+genericOpt :: [StixTree] -> [StixTree]
+genericOpt = map stixConFold . stixPeep
+
+
+
+stixPeep :: [StixTree] -> [StixTree]
+
+-- This transformation assumes that the temp assigned to in t1
+-- is not assigned to in t2; for otherwise the target of the
+-- second assignment would be substituted for, giving nonsense
+-- code. As far as I can see, StixTemps are only ever assigned
+-- to once. It would be nice to be sure!
+
+stixPeep ( t1@(StAssign pka (StReg (StixTemp u pk)) rhs)
+ : t2
+ : ts )
+ | stixCountTempUses u t2 == 1
+ && sum (map (stixCountTempUses u) ts) == 0
+ =
+# ifdef NCG_DEBUG
+ trace ("nativeGen: inlining " ++ showSDoc (pprStixTree rhs))
+# endif
+ (stixPeep (stixSubst u rhs t2 : ts))
+
+stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
+stixPeep [t1] = [t1]
+stixPeep [] = []
+
+-- disable stix inlining until we figure out how to fix the
+-- latent bugs in the register allocator which are exposed by
+-- the inliner.
+--stixPeep = id
\end{code}
For most nodes, just optimize the children.
\begin{code}
-genericOpt (StInd pk addr) = StInd pk (genericOpt addr)
+stixConFold :: StixTree -> StixTree
+
+stixConFold (StInd pk addr) = StInd pk (stixConFold addr)
-genericOpt (StAssign pk dst src)
- = StAssign pk (genericOpt dst) (genericOpt src)
+stixConFold (StAssign pk dst src)
+ = StAssign pk (stixConFold dst) (stixConFold src)
-genericOpt (StJump addr) = StJump (genericOpt addr)
+stixConFold (StJump addr) = StJump (stixConFold addr)
-genericOpt (StCondJump addr test)
- = StCondJump addr (genericOpt test)
+stixConFold (StCondJump addr test)
+ = StCondJump addr (stixConFold test)
-genericOpt (StCall fn pk args)
- = StCall fn pk (map genericOpt args)
+stixConFold (StCall fn cconv pk args)
+ = StCall fn cconv pk (map stixConFold args)
\end{code}
Fold indices together when the types match:
\begin{code}
-genericOpt (StIndex pk (StIndex pk' base off) off')
+stixConFold (StIndex pk (StIndex pk' base off) off')
| pk == pk'
- = StIndex pk (genericOpt base)
- (genericOpt (StPrim IntAddOp [off, off']))
+ = StIndex pk (stixConFold base)
+ (stixConFold (StPrim IntAddOp [off, off']))
-genericOpt (StIndex pk base off)
- = StIndex pk (genericOpt base) (genericOpt off)
+stixConFold (StIndex pk base off)
+ = StIndex pk (stixConFold base) (stixConFold off)
\end{code}
For PrimOps, we first optimize the children, and then we try our hand
at some constant-folding.
\begin{code}
-genericOpt (StPrim op args) = primOpt op (map genericOpt args)
+stixConFold (StPrim op args) = stixPrimFold op (map stixConFold args)
\end{code}
Replace register leaves with appropriate StixTrees for the given
target.
\begin{code}
-genericOpt leaf@(StReg (StixMagicId id))
+stixConFold leaf@(StReg (StixMagicId id))
= case (stgReg id) of
- Always tree -> genericOpt tree
+ Always tree -> stixConFold tree
Save _ -> leaf
-genericOpt other = other
+stixConFold other = other
\end{code}
Now, try to constant-fold the PrimOps. The arguments have already
been optimized and folded.
\begin{code}
-primOpt
+stixPrimFold
:: PrimOp -- The operation from an StPrim
-> [StixTree] -- The optimized arguments
-> StixTree
-primOpt op arg@[StInt x]
+stixPrimFold op arg@[StInt x]
= case op of
IntNegOp -> StInt (-x)
- IntAbsOp -> StInt (abs x)
_ -> StPrim op arg
-primOpt op args@[StInt x, StInt y]
+stixPrimFold op args@[StInt x, StInt y]
= case op of
CharGtOp -> StInt (if x > y then 1 else 0)
CharGeOp -> StInt (if x >= y then 1 else 0)
IntNeOp -> StInt (if x /= y then 1 else 0)
IntLtOp -> StInt (if x < y then 1 else 0)
IntLeOp -> StInt (if x <= y then 1 else 0)
+ -- ToDo: WordQuotOp, WordRemOp.
_ -> StPrim op args
\end{code}
possible.
\begin{code}
-primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x]
+stixPrimFold op [x@(StInt _), y] | commutableOp op = stixPrimFold op [y, x]
\end{code}
We can often do something with constants of 0 and 1 ...
\begin{code}
-primOpt op args@[x, y@(StInt 0)]
+stixPrimFold op args@[x, y@(StInt 0)]
= case op of
IntAddOp -> x
IntSubOp -> x
IntMulOp -> y
AndOp -> y
OrOp -> x
+ XorOp -> x
SllOp -> x
- SraOp -> x
SrlOp -> x
ISllOp -> x
ISraOp -> x
ISrlOp -> x
+ IntNeOp | is_comparison -> x
_ -> StPrim op args
+ where
+ is_comparison
+ = case x of
+ StPrim opp [_, _] -> opp `elem` comparison_ops
+ _ -> False
-primOpt op args@[x, y@(StInt 1)]
+stixPrimFold op args@[x, y@(StInt 1)]
= case op of
IntMulOp -> x
IntQuotOp -> x
Now look for multiplication/division by powers of 2 (integers).
\begin{code}
-primOpt op args@[x, y@(StInt n)]
+stixPrimFold op args@[x, y@(StInt n)]
= case op of
IntMulOp -> case exactLog2 n of
Nothing -> StPrim op args
- Just p -> StPrim SllOp [x, StInt p]
+ Just p -> StPrim ISllOp [x, StInt p]
IntQuotOp -> case exactLog2 n of
Nothing -> StPrim op args
- Just p -> StPrim SraOp [x, StInt p]
+ Just p -> StPrim ISrlOp [x, StInt p]
_ -> StPrim op args
\end{code}
Anything else is just too hard.
\begin{code}
-primOpt op args = StPrim op args
+stixPrimFold op args = StPrim op args
+\end{code}
+
+\begin{code}
+comparison_ops
+ = [ CharGtOp , CharGeOp , CharEqOp , CharNeOp , CharLtOp , CharLeOp,
+ IntGtOp , IntGeOp , IntEqOp , IntNeOp , IntLtOp , IntLeOp,
+ WordGtOp , WordGeOp , WordEqOp , WordNeOp , WordLtOp , WordLeOp,
+ AddrGtOp , AddrGeOp , AddrEqOp , AddrNeOp , AddrLtOp , AddrLeOp,
+ FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp,
+ DoubleGtOp, DoubleGeOp, DoubleEqOp, DoubleNeOp, DoubleLtOp, DoubleLeOp
+ ]
\end{code}