X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FAsmCodeGen.lhs;h=17f184ad6231af668765c403992fe3b078d4f0d0;hb=6254fd4ab7c5798599e58b48896c9e284222f26f;hp=73c2935eee8cb68b1d8dc74a0a60425f88fd8cf2;hpb=b7b61f608b38752327aadfa9af8e9eb053a5942f;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs index 73c2935..17f184a 100644 --- a/ghc/compiler/nativeGen/AsmCodeGen.lhs +++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs @@ -6,8 +6,8 @@ module AsmCodeGen ( nativeCodeGen ) where #include "HsVersions.h" +#include "nativeGen/NCG.h" -import IO ( Handle ) import List ( intersperse ) import MachMisc @@ -17,19 +17,24 @@ import PprMach import AbsCStixGen ( genCodeAbstractC ) import AbsCSyn ( AbstractC, MagicId ) +import AbsCUtils ( mkAbsCStmtList ) import AsmRegAlloc ( runRegAllocate ) -import OrdList ( OrdList ) import PrimOp ( commutableOp, PrimOp(..) ) -import RegAllocInfo ( mkMRegsState, MRegsState ) -import Stix ( StixTree(..), StixReg(..), pprStixTrees ) -import PrimRep ( isFloatingRep ) +import RegAllocInfo ( findReservedRegs ) +import Stix ( StixTree(..), StixReg(..), + pprStixTrees, pprStixTree, CodeSegment(..), + stixCountTempUses, stixSubst, + NatM, initNat, mapNat, + NatM_State, mkNatM_State, + uniqOfNatM_State, deltaOfNatM_State ) import UniqSupply ( returnUs, thenUs, mapUs, initUs, - initUs_, UniqSM, UniqSupply ) -import UniqFM ( UniqFM, emptyUFM, addToUFM, lookupUFM ) -import Outputable + initUs_, UniqSM, UniqSupply, + lazyThenUs, lazyMapUs ) +import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) ) + +import OrdList ( fromOL, concatOL ) +import Outputable -import GlaExts (trace) --tmp -#include "nativeGen/NCG.h" \end{code} The 96/03 native-code generator has machine-independent and @@ -82,56 +87,80 @@ So, here we go: \begin{code} nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc) nativeCodeGen absC us - = let (stixRaw, us1) = initUs us (genCodeAbstractC absC) - stixOpt = map (map genericOpt) stixRaw - stixFinal = map x86floatFix stixOpt - insns = initUs_ us1 (codeGen stixFinal) - debug_stix = vcat (map pprStixTrees stixFinal) - in - (debug_stix, insns) - -#if i386_TARGET_ARCH -x86floatFix = floatFix -#else -x86floatFix = id -#endif - + = let absCstmts = mkAbsCStmtList absC + (sdoc_pairs, us1) = initUs us (lazyMapUs absCtoNat absCstmts) + stix_sdocs = map fst sdoc_pairs + insn_sdocs = map snd sdoc_pairs + + insn_sdoc = my_vcat insn_sdocs + stix_sdoc = vcat stix_sdocs + +# if 1 /* ifdef NCG_DEBUG */ + my_trace m x = trace m x + my_vcat sds = vcat (intersperse (char ' ' + $$ ptext SLIT("# ___ncg_debug_marker") + $$ char ' ') + sds) +# else + my_vcat sds = vcat sds + my_trace m x = x +# endif + in + my_trace "nativeGen: begin" + (stix_sdoc, insn_sdoc) + + +absCtoNat :: AbstractC -> UniqSM (SDoc, SDoc) +absCtoNat absC + = genCodeAbstractC absC `thenUs` \ stixRaw -> + genericOpt stixRaw `bind` \ stixOpt -> + genMachCode stixOpt `thenUs` \ pre_regalloc -> + regAlloc pre_regalloc `bind` \ almost_final -> + x86fp_kludge almost_final `bind` \ final_mach_code -> + vcat (map pprInstr final_mach_code) `bind` \ final_sdoc -> + pprStixTrees stixOpt `bind` \ stix_sdoc -> + returnUs (stix_sdoc, final_sdoc) + where + bind f x = x f + + x86fp_kludge :: [Instr] -> [Instr] + x86fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id) + + regAlloc :: InstrBlock -> [Instr] + regAlloc = runRegAllocate allocatableRegs findReservedRegs \end{code} -@codeGen@ is the top-level code-generation function: -\begin{code} -codeGen :: [[StixTree]] -> UniqSM SDoc +Top level code generator for a chunk of stix code. For this part of +the computation, we switch from the UniqSM monad to the NatM monad. +The latter carries not only a Unique, but also an Int denoting the +current C stack pointer offset in the generated code; this is needed +for creating correct spill offsets on architectures which don't offer, +or for which it would be prohibitively expensive to employ, a frame +pointer register. Viz, x86. -codeGen stixFinal - = mapUs genMachCode stixFinal `thenUs` \ dynamic_codes -> - let - static_instrss = scheduleMachCode dynamic_codes - docs = map (vcat . map pprInstr) static_instrss - in - returnUs (vcat (intersperse (char ' ' $$ char ' ') docs)) -\end{code} +The offset is measured in bytes, and indicates the difference between +the current (simulated) C stack-ptr and the value it was at the +beginning of the block. For stacks which grow down, this value should +be either zero or negative. -Top level code generator for a chunk of stix code: -\begin{code} -genMachCode :: [StixTree] -> UniqSM InstrList - -genMachCode stmts - = mapUs stmt2Instrs stmts `thenUs` \ blocks -> - returnUs (foldr (.) id blocks asmVoid) -\end{code} - -The next bit does the code scheduling. The scheduler must also deal -with register allocation of temporaries. Much parallelism can be -exposed via the OrdList, but more might occur, so further analysis -might be needed. +Switching between the two monads whilst carrying along the same Unique +supply breaks abstraction. Is that bad? \begin{code} -scheduleMachCode :: [InstrList] -> [[Instr]] - -scheduleMachCode - = map (runRegAllocate freeRegsState reservedRegs) - where - freeRegsState = mkMRegsState (extractMappedRegNos freeRegs) +genMachCode :: [StixTree] -> UniqSM InstrBlock + +genMachCode stmts initial_us + = let initial_st = mkNatM_State initial_us 0 + (blocks, final_st) = initNat initial_st + (mapNat stmt2Instrs stmts) + instr_list = concatOL blocks + final_us = uniqOfNatM_State final_st + final_delta = deltaOfNatM_State final_st + in + if final_delta == 0 + then (instr_list, final_us) + else pprPanic "genMachCode: nonzero final delta" + (int final_delta) \end{code} %************************************************************************ @@ -151,71 +180,104 @@ have introduced some new opportunities for constant-folding wrt address manipulations. \begin{code} -genericOpt :: StixTree -> StixTree +genericOpt :: [StixTree] -> [StixTree] +genericOpt = map stixConFold . stixPeep + + + +stixPeep :: [StixTree] -> [StixTree] + +-- This transformation assumes that the temp assigned to in t1 +-- is not assigned to in t2; for otherwise the target of the +-- second assignment would be substituted for, giving nonsense +-- code. As far as I can see, StixTemps are only ever assigned +-- to once. It would be nice to be sure! + +stixPeep ( t1@(StAssign pka (StReg (StixTemp u pk)) rhs) + : t2 + : ts ) + | stixCountTempUses u t2 == 1 + && sum (map (stixCountTempUses u) ts) == 0 + = +# ifdef NCG_DEBUG + trace ("nativeGen: inlining " ++ showSDoc (pprStixTree rhs)) +# endif + (stixPeep (stixSubst u rhs t2 : ts)) + +stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts) +stixPeep [t1] = [t1] +stixPeep [] = [] + +-- disable stix inlining until we figure out how to fix the +-- latent bugs in the register allocator which are exposed by +-- the inliner. +--stixPeep = id \end{code} For most nodes, just optimize the children. \begin{code} -genericOpt (StInd pk addr) = StInd pk (genericOpt addr) +stixConFold :: StixTree -> StixTree + +stixConFold (StInd pk addr) = StInd pk (stixConFold addr) -genericOpt (StAssign pk dst src) - = StAssign pk (genericOpt dst) (genericOpt src) +stixConFold (StAssign pk dst src) + = StAssign pk (stixConFold dst) (stixConFold src) -genericOpt (StJump addr) = StJump (genericOpt addr) +stixConFold (StJump addr) = StJump (stixConFold addr) -genericOpt (StCondJump addr test) - = StCondJump addr (genericOpt test) +stixConFold (StCondJump addr test) + = StCondJump addr (stixConFold test) -genericOpt (StCall fn cconv pk args) - = StCall fn cconv pk (map genericOpt args) +stixConFold (StCall fn cconv pk args) + = StCall fn cconv pk (map stixConFold args) \end{code} Fold indices together when the types match: \begin{code} -genericOpt (StIndex pk (StIndex pk' base off) off') +stixConFold (StIndex pk (StIndex pk' base off) off') | pk == pk' - = StIndex pk (genericOpt base) - (genericOpt (StPrim IntAddOp [off, off'])) + = StIndex pk (stixConFold base) + (stixConFold (StPrim IntAddOp [off, off'])) -genericOpt (StIndex pk base off) - = StIndex pk (genericOpt base) (genericOpt off) +stixConFold (StIndex pk base off) + = StIndex pk (stixConFold base) (stixConFold off) \end{code} For PrimOps, we first optimize the children, and then we try our hand at some constant-folding. \begin{code} -genericOpt (StPrim op args) = primOpt op (map genericOpt args) +stixConFold (StPrim op args) = stixPrimFold op (map stixConFold args) \end{code} Replace register leaves with appropriate StixTrees for the given target. \begin{code} -genericOpt leaf@(StReg (StixMagicId id)) +stixConFold leaf@(StReg (StixMagicId id)) = case (stgReg id) of - Always tree -> genericOpt tree + Always tree -> stixConFold tree Save _ -> leaf -genericOpt other = other +stixConFold other = other \end{code} Now, try to constant-fold the PrimOps. The arguments have already been optimized and folded. \begin{code} -primOpt +stixPrimFold :: PrimOp -- The operation from an StPrim -> [StixTree] -- The optimized arguments -> StixTree -primOpt op arg@[StInt x] +stixPrimFold op arg@[StInt x] = case op of IntNegOp -> StInt (-x) _ -> StPrim op arg -primOpt op args@[StInt x, StInt y] +stixPrimFold op args@[StInt x, StInt y] = case op of CharGtOp -> StInt (if x > y then 1 else 0) CharGeOp -> StInt (if x >= y then 1 else 0) @@ -244,13 +306,13 @@ also assume that constants have been shifted to the right when possible. \begin{code} -primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x] +stixPrimFold op [x@(StInt _), y] | commutableOp op = stixPrimFold op [y, x] \end{code} We can often do something with constants of 0 and 1 ... \begin{code} -primOpt op args@[x, y@(StInt 0)] +stixPrimFold op args@[x, y@(StInt 0)] = case op of IntAddOp -> x IntSubOp -> x @@ -263,9 +325,15 @@ primOpt op args@[x, y@(StInt 0)] ISllOp -> x ISraOp -> x ISrlOp -> x + IntNeOp | is_comparison -> x _ -> StPrim op args + where + is_comparison + = case x of + StPrim opp [_, _] -> opp `elem` comparison_ops + _ -> False -primOpt op args@[x, y@(StInt 1)] +stixPrimFold op args@[x, y@(StInt 1)] = case op of IntMulOp -> x IntQuotOp -> x @@ -276,7 +344,7 @@ primOpt op args@[x, y@(StInt 1)] Now look for multiplication/division by powers of 2 (integers). \begin{code} -primOpt op args@[x, y@(StInt n)] +stixPrimFold op args@[x, y@(StInt n)] = case op of IntMulOp -> case exactLog2 n of Nothing -> StPrim op args @@ -290,66 +358,16 @@ primOpt op args@[x, y@(StInt n)] Anything else is just too hard. \begin{code} -primOpt op args = StPrim op args +stixPrimFold op args = StPrim op args \end{code} ------------------------------------------------------------------------------ -Fix up floating point operations for x86. - -The problem is that the code generator can't handle the weird register -naming scheme for floating point registers on the x86, so we have to -deal with memory-resident floating point values wherever possible. - -We therefore can't stand references to floating-point kinded temporary -variables, and try to translate them into memory addresses wherever -possible. - \begin{code} -floatFix :: [StixTree] -> [StixTree] -floatFix trees = fltFix emptyUFM trees - -fltFix :: UniqFM StixTree -- mapping tmp vars to memory locations - -> [StixTree] - -> [StixTree] -fltFix locs [] = [] - --- The case we're interested in: loading a temporary from a memory --- address. Eliminate the instruction and replace all future references --- to the temporary with the memory address. -fltFix locs ((StAssign rep (StReg (StixTemp uq _)) loc) : trees) - | isFloatingRep rep = trace "found one" $ fltFix (addToUFM locs uq loc) trees - -fltFix locs ((StAssign rep src dst) : trees) - = StAssign rep (fltFix1 locs src) (fltFix1 locs dst) : fltFix locs trees - -fltFix locs (tree : trees) - = fltFix1 locs tree : fltFix locs trees - - -fltFix1 :: UniqFM StixTree -> StixTree -> StixTree -fltFix1 locs r@(StReg (StixTemp uq rep)) - | isFloatingRep rep = case lookupUFM locs uq of - Nothing -> panic "fltFix1" - Just tree -> trace "substed" $ tree - -fltFix1 locs (StIndex rep l r) = - StIndex rep (fltFix1 locs l) (fltFix1 locs r) - -fltFix1 locs (StInd rep tree) = - StInd rep (fltFix1 locs tree) - -fltFix1 locs (StAssign rep dst src) = panic "fltFix1: StAssign" - -fltFix1 locs (StJump tree) = StJump (fltFix1 locs tree) - -fltFix1 locs (StCondJump lbl tree) = - StCondJump lbl (fltFix1 locs tree) - -fltFix1 locs (StPrim op trees) = - StPrim op (map (fltFix1 locs) trees) - -fltFix1 locs (StCall f conv rep trees) = - StCall f conv rep (map (fltFix1 locs) trees) - -fltFix1 locs tree = tree +comparison_ops + = [ CharGtOp , CharGeOp , CharEqOp , CharNeOp , CharLtOp , CharLeOp, + IntGtOp , IntGeOp , IntEqOp , IntNeOp , IntLtOp , IntLeOp, + WordGtOp , WordGeOp , WordEqOp , WordNeOp , WordLtOp , WordLeOp, + AddrGtOp , AddrGeOp , AddrEqOp , AddrNeOp , AddrLtOp , AddrLeOp, + FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp, + DoubleGtOp, DoubleGeOp, DoubleEqOp, DoubleNeOp, DoubleLtOp, DoubleLeOp + ] \end{code}