%
\begin{code}
-module AsmCodeGen ( writeRealAsm, dumpRealAsm ) where
+module AsmCodeGen ( nativeCodeGen ) where
#include "HsVersions.h"
-
-import IO ( Handle )
+#include "NCG.h"
import MachMisc
import MachRegs
import PprMach
import AbsCStixGen ( genCodeAbstractC )
-import AbsCSyn ( AbstractC, MagicId )
+import AbsCSyn ( AbstractC, MagicId(..) )
+import AbsCUtils ( mkAbsCStmtList, magicIdPrimRep )
import AsmRegAlloc ( runRegAllocate )
-import OrdList ( OrdList )
-import PrimOp ( commutableOp, PrimOp(..) )
-import RegAllocInfo ( mkMRegsState, MRegsState )
-import Stix ( StixTree(..), StixReg(..) )
-import PrimRep ( isFloatingRep )
-import UniqSupply ( returnUs, thenUs, mapUs, initUs_, UniqSM, UniqSupply )
-import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM )
-import Outputable
-
-import GlaExts (trace) --tmp
-#include "nativeGen/NCG.h"
+import MachOp ( MachOp(..), isCommutableMachOp, isComparisonMachOp )
+import RegAllocInfo ( findReservedRegs )
+import Stix ( StixReg(..), StixStmt(..), StixExpr(..), StixVReg(..),
+ pprStixStmts, pprStixStmt,
+ stixStmt_CountTempUses, stixStmt_Subst,
+ liftStrings,
+ initNat,
+ mkNatM_State,
+ uniqOfNatM_State, deltaOfNatM_State )
+import UniqSupply ( returnUs, thenUs, initUs,
+ UniqSM, UniqSupply,
+ lazyMapUs )
+import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) )
+
+import qualified Pretty
+import Outputable
+import FastString
+
+-- DEBUGGING ONLY
+--import OrdList
+
+import List ( intersperse )
\end{code}
The 96/03 native-code generator has machine-independent and
\end{description}
So, here we go:
-\begin{code}
-writeRealAsm :: Handle -> AbstractC -> UniqSupply -> IO ()
-writeRealAsm handle absC us
- = -- _scc_ "writeRealAsm"
- printForAsm handle (initUs_ us (runNCG absC))
-
-dumpRealAsm :: AbstractC -> UniqSupply -> SDoc
-dumpRealAsm absC us = initUs_ us (runNCG absC)
-
-runNCG absC
- = genCodeAbstractC absC `thenUs` \ treelists ->
- let
- stix = map (map genericOpt) treelists
- in
-#if i386_TARGET_ARCH
- let
- stix' = map floatFix stix
- in
- codeGen stix'
-#else
- codeGen stix
-#endif
-\end{code}
-@codeGen@ is the top-level code-generation function:
\begin{code}
-codeGen :: [[StixTree]] -> UniqSM SDoc
-
-codeGen trees
- = mapUs genMachCode trees `thenUs` \ dynamic_codes ->
- let
- static_instrs = scheduleMachCode dynamic_codes
- in
- returnUs (vcat (map pprInstr static_instrs))
+nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, Pretty.Doc)
+nativeCodeGen absC us
+ = let absCstmts = mkAbsCStmtList absC
+ (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts)
+ stix_sdocs = map fst sdoc_pairs
+ insn_sdocs = map snd sdoc_pairs
+
+ insn_sdoc = my_vcat insn_sdocs
+ stix_sdoc = vcat stix_sdocs
+
+# ifdef NCG_DEBUG
+ my_trace m x = trace m x
+ my_vcat sds = Pretty.vcat (
+ intersperse (
+ Pretty.char ' '
+ Pretty.$$ Pretty.ptext SLIT("# ___ncg_debug_marker")
+ Pretty.$$ Pretty.char ' '
+ )
+ sds
+ )
+# else
+ my_vcat sds = Pretty.vcat sds
+ my_trace m x = x
+# endif
+ in
+ my_trace "nativeGen: begin"
+ (stix_sdoc, insn_sdoc)
+
+
+absCtoNat :: AbstractC -> UniqSM (SDoc, Pretty.Doc)
+absCtoNat absC
+ = _scc_ "genCodeAbstractC" genCodeAbstractC absC `thenUs` \ stixRaw ->
+ _scc_ "genericOpt" genericOpt stixRaw `bind` \ stixOpt ->
+ _scc_ "liftStrings" liftStrings stixOpt `thenUs` \ stixLifted ->
+ _scc_ "genMachCode" genMachCode stixLifted `thenUs` \ pre_regalloc ->
+ _scc_ "regAlloc" regAlloc pre_regalloc `bind` \ almost_final ->
+ _scc_ "x86fp_kludge" x86fp_kludge almost_final `bind` \ final_mach_code ->
+ _scc_ "vcat" Pretty.vcat (map pprInstr final_mach_code) `bind` \ final_sdoc ->
+ _scc_ "pprStixTrees" pprStixStmts stixOpt `bind` \ stix_sdoc ->
+ returnUs ({-\_ -> Pretty.vcat (map pprInstr almost_final),-}
+ stix_sdoc, final_sdoc)
+ where
+ bind f x = x f
+
+ x86fp_kludge :: [Instr] -> [Instr]
+ x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id)
+
+ regAlloc :: InstrBlock -> [Instr]
+ regAlloc = runRegAllocate allocatableRegs findReservedRegs
\end{code}
-Top level code generator for a chunk of stix code:
-\begin{code}
-genMachCode :: [StixTree] -> UniqSM InstrList
+Top level code generator for a chunk of stix code. For this part of
+the computation, we switch from the UniqSM monad to the NatM monad.
+The latter carries not only a Unique, but also an Int denoting the
+current C stack pointer offset in the generated code; this is needed
+for creating correct spill offsets on architectures which don't offer,
+or for which it would be prohibitively expensive to employ, a frame
+pointer register. Viz, x86.
-genMachCode stmts
- = mapUs stmt2Instrs stmts `thenUs` \ blocks ->
- returnUs (foldr (.) id blocks asmVoid)
-\end{code}
+The offset is measured in bytes, and indicates the difference between
+the current (simulated) C stack-ptr and the value it was at the
+beginning of the block. For stacks which grow down, this value should
+be either zero or negative.
-The next bit does the code scheduling. The scheduler must also deal
-with register allocation of temporaries. Much parallelism can be
-exposed via the OrdList, but more might occur, so further analysis
-might be needed.
+Switching between the two monads whilst carrying along the same Unique
+supply breaks abstraction. Is that bad?
\begin{code}
-scheduleMachCode :: [InstrList] -> [Instr]
+genMachCode :: [StixStmt] -> UniqSM InstrBlock
-scheduleMachCode
- = concat . map (runRegAllocate freeRegsState reservedRegs)
- where
- freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
+genMachCode stmts initial_us
+ = let initial_st = mkNatM_State initial_us 0
+ (instr_list, final_st) = initNat initial_st (stmtsToInstrs stmts)
+ final_us = uniqOfNatM_State final_st
+ final_delta = deltaOfNatM_State final_st
+ in
+ if final_delta == 0
+ then (instr_list, final_us)
+ else pprPanic "genMachCode: nonzero final delta"
+ (int final_delta)
\end{code}
%************************************************************************
address manipulations.
\begin{code}
-genericOpt :: StixTree -> StixTree
-\end{code}
+genericOpt :: [StixStmt] -> [StixStmt]
+genericOpt = map stixStmt_ConFold . stixPeep
-For most nodes, just optimize the children.
-
-\begin{code}
-genericOpt (StInd pk addr) = StInd pk (genericOpt addr)
-
-genericOpt (StAssign pk dst src)
- = StAssign pk (genericOpt dst) (genericOpt src)
-
-genericOpt (StJump addr) = StJump (genericOpt addr)
-genericOpt (StCondJump addr test)
- = StCondJump addr (genericOpt test)
-genericOpt (StCall fn cconv pk args)
- = StCall fn cconv pk (map genericOpt args)
-\end{code}
+stixPeep :: [StixStmt] -> [StixStmt]
-Fold indices together when the types match:
-\begin{code}
-genericOpt (StIndex pk (StIndex pk' base off) off')
- | pk == pk'
- = StIndex pk (genericOpt base)
- (genericOpt (StPrim IntAddOp [off, off']))
+-- This transformation assumes that the temp assigned to in t1
+-- is not assigned to in t2; for otherwise the target of the
+-- second assignment would be substituted for, giving nonsense
+-- code. As far as I can see, StixTemps are only ever assigned
+-- to once. It would be nice to be sure!
-genericOpt (StIndex pk base off)
- = StIndex pk (genericOpt base) (genericOpt off)
-\end{code}
+stixPeep ( t1@(StAssignReg pka (StixTemp (StixVReg u pk)) rhs)
+ : t2
+ : ts )
+ | stixStmt_CountTempUses u t2 == 1
+ && sum (map (stixStmt_CountTempUses u) ts) == 0
+ =
+# ifdef NCG_DEBUG
+ trace ("nativeGen: inlining " ++ showSDoc (pprStixExpr rhs))
+# endif
+ (stixPeep (stixStmt_Subst u rhs t2 : ts))
-For PrimOps, we first optimize the children, and then we try our hand
-at some constant-folding.
-
-\begin{code}
-genericOpt (StPrim op args) = primOpt op (map genericOpt args)
+stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts)
+stixPeep [t1] = [t1]
+stixPeep [] = []
\end{code}
-Replace register leaves with appropriate StixTrees for the given
-target.
+For most nodes, just optimize the children.
\begin{code}
-genericOpt leaf@(StReg (StixMagicId id))
- = case (stgReg id) of
- Always tree -> genericOpt tree
- Save _ -> leaf
-
-genericOpt other = other
+stixExpr_ConFold :: StixExpr -> StixExpr
+stixStmt_ConFold :: StixStmt -> StixStmt
+
+stixStmt_ConFold stmt
+ = case stmt of
+ StAssignReg pk reg@(StixTemp _) src
+ -> StAssignReg pk reg (stixExpr_ConFold src)
+ StAssignReg pk reg@(StixMagicId mid) src
+ -- Replace register leaves with appropriate StixTrees for
+ -- the given target. MagicIds which map to a reg on this arch are left unchanged.
+ -- Assigning to BaseReg is always illegal, so we check for that.
+ -> case mid of {
+ BaseReg -> panic "stixStmt_ConFold: assignment to BaseReg";
+ other ->
+ case get_MagicId_reg_or_addr mid of
+ Left realreg
+ -> StAssignReg pk reg (stixExpr_ConFold src)
+ Right baseRegAddr
+ -> stixStmt_ConFold (StAssignMem pk baseRegAddr src)
+ }
+ StAssignMem pk addr src
+ -> StAssignMem pk (stixExpr_ConFold addr) (stixExpr_ConFold src)
+ StVoidable expr
+ -> StVoidable (stixExpr_ConFold expr)
+ StJump dsts addr
+ -> StJump dsts (stixExpr_ConFold addr)
+ StCondJump addr test
+ -> let test_opt = stixExpr_ConFold test
+ in
+ if manifestlyZero test_opt
+ then StComment (mkFastString ("deleted: " ++ showSDoc (pprStixStmt stmt)))
+ else StCondJump addr (stixExpr_ConFold test)
+ StData pk datas
+ -> StData pk (map stixExpr_ConFold datas)
+ other
+ -> other
+ where
+ manifestlyZero (StInt 0) = True
+ manifestlyZero other = False
+
+stixExpr_ConFold expr
+ = case expr of
+ StInd pk addr
+ -> StInd pk (stixExpr_ConFold addr)
+ StCall fn cconv pk args
+ -> StCall fn cconv pk (map stixExpr_ConFold args)
+ StIndex pk (StIndex pk' base off) off'
+ -- Fold indices together when the types match:
+ | pk == pk'
+ -> StIndex pk (stixExpr_ConFold base)
+ (stixExpr_ConFold (StMachOp MO_Nat_Add [off, off']))
+ StIndex pk base off
+ -> StIndex pk (stixExpr_ConFold base) (stixExpr_ConFold off)
+
+ StMachOp mop args
+ -- For PrimOps, we first optimize the children, and then we try
+ -- our hand at some constant-folding.
+ -> stixMachOpFold mop (map stixExpr_ConFold args)
+ StReg (StixMagicId mid)
+ -- Replace register leaves with appropriate StixTrees for
+ -- the given target. MagicIds which map to a reg on this arch are left unchanged.
+ -- For the rest, BaseReg is taken to mean the address of the reg table
+ -- in MainCapability, and for all others we generate an indirection to
+ -- its location in the register table.
+ -> case get_MagicId_reg_or_addr mid of
+ Left realreg -> expr
+ Right baseRegAddr
+ -> case mid of
+ BaseReg -> stixExpr_ConFold baseRegAddr
+ other -> stixExpr_ConFold (StInd (magicIdPrimRep mid) baseRegAddr)
+ other
+ -> other
\end{code}
Now, try to constant-fold the PrimOps. The arguments have already
been optimized and folded.
\begin{code}
-primOpt
- :: PrimOp -- The operation from an StPrim
- -> [StixTree] -- The optimized arguments
- -> StixTree
-
-primOpt op arg@[StInt x]
- = case op of
- IntNegOp -> StInt (-x)
- IntAbsOp -> StInt (abs x)
- _ -> StPrim op arg
-
-primOpt op args@[StInt x, StInt y]
- = case op of
- CharGtOp -> StInt (if x > y then 1 else 0)
- CharGeOp -> StInt (if x >= y then 1 else 0)
- CharEqOp -> StInt (if x == y then 1 else 0)
- CharNeOp -> StInt (if x /= y then 1 else 0)
- CharLtOp -> StInt (if x < y then 1 else 0)
- CharLeOp -> StInt (if x <= y then 1 else 0)
- IntAddOp -> StInt (x + y)
- IntSubOp -> StInt (x - y)
- IntMulOp -> StInt (x * y)
- IntQuotOp -> StInt (x `quot` y)
- IntRemOp -> StInt (x `rem` y)
- IntGtOp -> StInt (if x > y then 1 else 0)
- IntGeOp -> StInt (if x >= y then 1 else 0)
- IntEqOp -> StInt (if x == y then 1 else 0)
- IntNeOp -> StInt (if x /= y then 1 else 0)
- IntLtOp -> StInt (if x < y then 1 else 0)
- IntLeOp -> StInt (if x <= y then 1 else 0)
- -- ToDo: WordQuotOp, WordRemOp.
- _ -> StPrim op args
+stixMachOpFold
+ :: MachOp -- The operation from an StMachOp
+ -> [StixExpr] -- The optimized arguments
+ -> StixExpr
+
+stixMachOpFold mop arg@[StInt x]
+ = case mop of
+ MO_NatS_Neg -> StInt (-x)
+ other -> StMachOp mop arg
+
+stixMachOpFold mop args@[StInt x, StInt y]
+ = case mop of
+ MO_32U_Gt -> StInt (if x > y then 1 else 0)
+ MO_32U_Ge -> StInt (if x >= y then 1 else 0)
+ MO_32U_Eq -> StInt (if x == y then 1 else 0)
+ MO_32U_Ne -> StInt (if x /= y then 1 else 0)
+ MO_32U_Lt -> StInt (if x < y then 1 else 0)
+ MO_32U_Le -> StInt (if x <= y then 1 else 0)
+ MO_Nat_Add -> StInt (x + y)
+ MO_Nat_Sub -> StInt (x - y)
+ MO_NatS_Mul -> StInt (x * y)
+ MO_NatS_Quot | y /= 0 -> StInt (x `quot` y)
+ MO_NatS_Rem | y /= 0 -> StInt (x `rem` y)
+ MO_NatS_Gt -> StInt (if x > y then 1 else 0)
+ MO_NatS_Ge -> StInt (if x >= y then 1 else 0)
+ MO_Nat_Eq -> StInt (if x == y then 1 else 0)
+ MO_Nat_Ne -> StInt (if x /= y then 1 else 0)
+ MO_NatS_Lt -> StInt (if x < y then 1 else 0)
+ MO_NatS_Le -> StInt (if x <= y then 1 else 0)
+ MO_Nat_Shl | y >= 0 && y < 32 -> do_shl x y
+ other -> StMachOp mop args
+ where
+ do_shl :: Integer -> Integer -> StixExpr
+ do_shl v 0 = StInt v
+ do_shl v n | n > 0 = do_shl (v*2) (n-1)
\end{code}
When possible, shift the constants to the right-hand side, so that we
possible.
\begin{code}
-primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x]
+stixMachOpFold op [x@(StInt _), y] | isCommutableMachOp op
+ = stixMachOpFold op [y, x]
\end{code}
We can often do something with constants of 0 and 1 ...
\begin{code}
-primOpt op args@[x, y@(StInt 0)]
- = case op of
- IntAddOp -> x
- IntSubOp -> x
- IntMulOp -> y
- AndOp -> y
- OrOp -> x
- XorOp -> x
- SllOp -> x
- SrlOp -> x
- ISllOp -> x
- ISraOp -> x
- ISrlOp -> x
- _ -> StPrim op args
-
-primOpt op args@[x, y@(StInt 1)]
- = case op of
- IntMulOp -> x
- IntQuotOp -> x
- IntRemOp -> StInt 0
- _ -> StPrim op args
+stixMachOpFold mop args@[x, y@(StInt 0)]
+ = case mop of
+ MO_Nat_Add -> x
+ MO_Nat_Sub -> x
+ MO_NatS_Mul -> y
+ MO_NatU_Mul -> y
+ MO_Nat_And -> y
+ MO_Nat_Or -> x
+ MO_Nat_Xor -> x
+ MO_Nat_Shl -> x
+ MO_Nat_Shr -> x
+ MO_Nat_Sar -> x
+ MO_Nat_Ne | x_is_comparison -> x
+ other -> StMachOp mop args
+ where
+ x_is_comparison
+ = case x of
+ StMachOp mopp [_, _] -> isComparisonMachOp mopp
+ _ -> False
+
+stixMachOpFold mop args@[x, y@(StInt 1)]
+ = case mop of
+ MO_NatS_Mul -> x
+ MO_NatU_Mul -> x
+ MO_NatS_Quot -> x
+ MO_NatU_Quot -> x
+ MO_NatS_Rem -> StInt 0
+ MO_NatU_Rem -> StInt 0
+ other -> StMachOp mop args
\end{code}
Now look for multiplication/division by powers of 2 (integers).
\begin{code}
-primOpt op args@[x, y@(StInt n)]
- = case op of
- IntMulOp -> case exactLog2 n of
- Nothing -> StPrim op args
- Just p -> StPrim ISllOp [x, StInt p]
- IntQuotOp -> case exactLog2 n of
- Nothing -> StPrim op args
- Just p -> StPrim ISrlOp [x, StInt p]
- _ -> StPrim op args
+stixMachOpFold mop args@[x, y@(StInt n)]
+ = case mop of
+ MO_NatS_Mul
+ -> case exactLog2 n of
+ Nothing -> unchanged
+ Just p -> StMachOp MO_Nat_Shl [x, StInt p]
+ MO_NatS_Quot
+ -> case exactLog2 n of
+ Nothing -> unchanged
+ Just p -> StMachOp MO_Nat_Shr [x, StInt p]
+ other
+ -> unchanged
+ where
+ unchanged = StMachOp mop args
\end{code}
Anything else is just too hard.
\begin{code}
-primOpt op args = StPrim op args
-\end{code}
-
------------------------------------------------------------------------------
-Fix up floating point operations for x86.
-
-The problem is that the code generator can't handle the weird register
-naming scheme for floating point registers on the x86, so we have to
-deal with memory-resident floating point values wherever possible.
-
-We therefore can't stand references to floating-point kinded temporary
-variables, and try to translate them into memory addresses wherever
-possible.
-
-\begin{code}
-floatFix :: [StixTree] -> [StixTree]
-floatFix trees = fltFix emptyUFM trees
-
-fltFix :: UniqFM StixTree -- mapping tmp vars to memory locations
- -> [StixTree]
- -> [StixTree]
-fltFix locs [] = []
-
--- The case we're interested in: loading a temporary from a memory
--- address. Eliminate the instruction and replace all future references
--- to the temporary with the memory address.
-fltFix locs ((StAssign rep (StReg (StixTemp uq _)) loc) : trees)
- | isFloatingRep rep = trace "found one" $ fltFix (addToUFM locs uq loc) trees
-
-fltFix locs ((StAssign rep src dst) : trees)
- = StAssign rep (fltFix1 locs src) (fltFix1 locs dst) : fltFix locs trees
-
-fltFix locs (tree : trees)
- = fltFix1 locs tree : fltFix locs trees
-
-
-fltFix1 :: UniqFM StixTree -> StixTree -> StixTree
-fltFix1 locs r@(StReg (StixTemp uq rep))
- | isFloatingRep rep = case lookupUFM locs uq of
- Nothing -> panic "fltFix1"
- Just tree -> trace "substed" $ tree
-
-fltFix1 locs (StIndex rep l r) =
- StIndex rep (fltFix1 locs l) (fltFix1 locs r)
-
-fltFix1 locs (StInd rep tree) =
- StInd rep (fltFix1 locs tree)
-
-fltFix1 locs (StAssign rep dst src) = panic "fltFix1: StAssign"
-
-fltFix1 locs (StJump tree) = StJump (fltFix1 locs tree)
-
-fltFix1 locs (StCondJump label tree) =
- StCondJump label (fltFix1 locs tree)
-
-fltFix1 locs (StPrim op trees) =
- StPrim op (map (fltFix1 locs) trees)
-
-fltFix1 locs (StCall f conv rep trees) =
- StCall f conv rep (map (fltFix1 locs) trees)
-
-fltFix1 locs tree = tree
+stixMachOpFold mop args = StMachOp mop args
\end{code}