X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FAsmCodeGen.lhs;h=e82bc8ec3da9077919cd3a292e8b5a66346ee73d;hb=4070b105490709e2fbc40ef926853fc93595b7a6;hp=d88986893c0e2638534456ff34e59380d87b4e3c;hpb=9d4c03805bafb6b1e1d47306b6a6c591c998e517;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs index d889868..e82bc8e 100644 --- a/ghc/compiler/nativeGen/AsmCodeGen.lhs +++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs @@ -1,14 +1,15 @@ % -% (c) The AQUA Project, Glasgow University, 1993-1996 +% (c) The AQUA Project, Glasgow University, 1993-1998 % \begin{code} -#include "HsVersions.h" +module AsmCodeGen ( nativeCodeGen ) where -module AsmCodeGen ( writeRealAsm, dumpRealAsm ) where +#include "HsVersions.h" +#include "nativeGen/NCG.h" -IMP_Ubiq(){-uitous-} -IMPORT_1_3(IO(Handle)) +import IO ( Handle ) +import List ( intersperse ) import MachMisc import MachRegs @@ -18,13 +19,22 @@ import PprMach import AbsCStixGen ( genCodeAbstractC ) import AbsCSyn ( AbstractC, MagicId ) import AsmRegAlloc ( runRegAllocate ) -import OrdList ( OrdList ) import PrimOp ( commutableOp, PrimOp(..) ) -import PrimRep ( PrimRep{-instance Eq-} ) -import RegAllocInfo ( mkMRegsState, MRegsState ) -import Stix ( StixTree(..), StixReg(..), CodeSegment ) -import UniqSupply ( returnUs, thenUs, mapUs, SYN_IE(UniqSM) ) -import Unpretty ( uppPutStr, uppShow, uppAboves, SYN_IE(Unpretty) ) +import RegAllocInfo ( mkMRegsState, MRegsState, findReservedRegs ) +import Stix ( StixTree(..), StixReg(..), + pprStixTrees, ppStixTree, CodeSegment(..), + stixCountTempUses, stixSubst, + NatM, initNat, mapNat, + NatM_State, mkNatM_State, + uniqOfNatM_State, deltaOfNatM_State ) +import PrimRep ( isFloatingRep, PrimRep(..) ) +import UniqSupply ( returnUs, thenUs, mapUs, initUs, + initUs_, UniqSM, UniqSupply ) +import MachMisc ( IF_ARCH_i386(i386_insert_ffrees,) ) + +import OrdList ( fromOL, concatOL ) +import Outputable + \end{code} The 96/03 native-code generator has machine-independent and @@ -59,7 +69,7 @@ The machine-dependent bits break down as follows: machine instructions. \item[@PprMach@:] @pprInstr@ turns an @Instr@ into text (well, really - an @Unpretty@). + an @Doc@). \item[@RegAllocInfo@:] In the register allocator, we manipulate @MRegsState@s, which are @BitSet@s, one bit per machine register. @@ -73,43 +83,77 @@ The machine-dependent bits break down as follows: \end{description} So, here we go: -\begin{code} -writeRealAsm :: Handle -> AbstractC -> UniqSupply -> IO () - -writeRealAsm handle absC us - = uppPutStr handle 80 (runNCG absC us) - -dumpRealAsm :: AbstractC -> UniqSupply -> String - -dumpRealAsm absC us = uppShow 80 (runNCG absC us) -runNCG absC - = genCodeAbstractC absC `thenUs` \ treelists -> - let - stix = map (map genericOpt) treelists - in - codeGen stix +\begin{code} +nativeCodeGen :: AbstractC -> UniqSupply -> (SDoc, SDoc) +nativeCodeGen absC us + = let (stixRaw, us1) = initUs us (genCodeAbstractC absC) + stixOpt = map genericOpt stixRaw + insns = initUs_ us1 (codeGen stixOpt) + debug_stix = vcat (map pprStixTrees stixOpt) + in + trace "nativeGen: begin" + (debug_stix, insns) \end{code} @codeGen@ is the top-level code-generation function: \begin{code} -codeGen :: [[StixTree]] -> UniqSM Unpretty +codeGen :: [[StixTree]] -> UniqSM SDoc -codeGen trees - = mapUs genMachCode trees `thenUs` \ dynamic_codes -> +codeGen stixFinal + = mapUs genMachCode stixFinal `thenUs` \ dynamic_codes -> let - static_instrs = scheduleMachCode dynamic_codes + fp_kludge :: [Instr] -> [Instr] + fp_kludge = IF_ARCH_i386(i386_insert_ffrees,id) + + static_instrss :: [[Instr]] + static_instrss = map fp_kludge (scheduleMachCode dynamic_codes) + docs = map (vcat . map pprInstr) static_instrss + + -- for debugging only + docs_prealloc = map (vcat . map pprInstr . fromOL) + dynamic_codes + text_prealloc = vcat (intersperse (char ' ' $$ char ' ') docs_prealloc) in - returnUs (uppAboves (map pprInstr static_instrs)) + -- trace (showSDoc text_prealloc) ( + returnUs (vcat (intersperse (char ' ' + $$ ptext SLIT("# ___stg_split_marker") + $$ char ' ') + docs)) + -- ) \end{code} -Top level code generator for a chunk of stix code: -\begin{code} -genMachCode :: [StixTree] -> UniqSM InstrList +Top level code generator for a chunk of stix code. For this part of +the computation, we switch from the UniqSM monad to the NatM monad. +The latter carries not only a Unique, but also an Int denoting the +current C stack pointer offset in the generated code; this is needed +for creating correct spill offsets on architectures which don't offer, +or for which it would be prohibitively expensive to employ, a frame +pointer register. Viz, x86. -genMachCode stmts - = mapUs stmt2Instrs stmts `thenUs` \ blocks -> - returnUs (foldr (.) id blocks asmVoid) +The offset is measured in bytes, and indicates the difference between +the current (simulated) C stack-ptr and the value it was at the +beginning of the block. For stacks which grow down, this value should +be either zero or negative. + +Switching between the two monads whilst carrying along the same Unique +supply breaks abstraction. Is that bad? + +\begin{code} +genMachCode :: [StixTree] -> UniqSM InstrBlock + +genMachCode stmts initial_us + = let initial_st = mkNatM_State initial_us 0 + (blocks, final_st) = initNat initial_st + (mapNat stmt2Instrs stmts) + instr_list = concatOL blocks + final_us = uniqOfNatM_State final_st + final_delta = deltaOfNatM_State final_st + in + if final_delta == 0 + then (instr_list, final_us) + else pprPanic "genMachCode: nonzero final delta" + (int final_delta) \end{code} The next bit does the code scheduling. The scheduler must also deal @@ -118,10 +162,10 @@ exposed via the OrdList, but more might occur, so further analysis might be needed. \begin{code} -scheduleMachCode :: [InstrList] -> [Instr] +scheduleMachCode :: [InstrBlock] -> [[Instr]] scheduleMachCode - = concat . map (runRegAllocate freeRegsState reservedRegs) + = map (runRegAllocate freeRegsState findReservedRegs) where freeRegsState = mkMRegsState (extractMappedRegNos freeRegs) \end{code} @@ -143,72 +187,95 @@ have introduced some new opportunities for constant-folding wrt address manipulations. \begin{code} -genericOpt :: StixTree -> StixTree +genericOpt :: [StixTree] -> [StixTree] +genericOpt = map stixConFold . stixPeep + + + +stixPeep :: [StixTree] -> [StixTree] + +-- This transformation assumes that the temp assigned to in t1 +-- is not assigned to in t2; for otherwise the target of the +-- second assignment would be substituted for, giving nonsense +-- code. As far as I can see, StixTemps are only ever assigned +-- to once. It would be nice to be sure! +stixPeep ( t1@(StAssign pka (StReg (StixTemp u pk)) rhs) + : t2 + : ts ) + | stixCountTempUses u t2 == 1 + && sum (map (stixCountTempUses u) ts) == 0 + = trace ("nativeGen: stixInline: " ++ showSDoc (ppStixTree rhs)) + (stixPeep (stixSubst u rhs t2 : ts)) + +stixPeep (t1:t2:ts) = t1 : stixPeep (t2:ts) +stixPeep [t1] = [t1] +stixPeep [] = [] \end{code} For most nodes, just optimize the children. \begin{code} -genericOpt (StInd pk addr) = StInd pk (genericOpt addr) +stixConFold :: StixTree -> StixTree -genericOpt (StAssign pk dst src) - = StAssign pk (genericOpt dst) (genericOpt src) +stixConFold (StInd pk addr) = StInd pk (stixConFold addr) -genericOpt (StJump addr) = StJump (genericOpt addr) +stixConFold (StAssign pk dst src) + = StAssign pk (stixConFold dst) (stixConFold src) -genericOpt (StCondJump addr test) - = StCondJump addr (genericOpt test) +stixConFold (StJump addr) = StJump (stixConFold addr) -genericOpt (StCall fn pk args) - = StCall fn pk (map genericOpt args) +stixConFold (StCondJump addr test) + = StCondJump addr (stixConFold test) + +stixConFold (StCall fn cconv pk args) + = StCall fn cconv pk (map stixConFold args) \end{code} Fold indices together when the types match: \begin{code} -genericOpt (StIndex pk (StIndex pk' base off) off') +stixConFold (StIndex pk (StIndex pk' base off) off') | pk == pk' - = StIndex pk (genericOpt base) - (genericOpt (StPrim IntAddOp [off, off'])) + = StIndex pk (stixConFold base) + (stixConFold (StPrim IntAddOp [off, off'])) -genericOpt (StIndex pk base off) - = StIndex pk (genericOpt base) (genericOpt off) +stixConFold (StIndex pk base off) + = StIndex pk (stixConFold base) (stixConFold off) \end{code} For PrimOps, we first optimize the children, and then we try our hand at some constant-folding. \begin{code} -genericOpt (StPrim op args) = primOpt op (map genericOpt args) +stixConFold (StPrim op args) = stixPrimFold op (map stixConFold args) \end{code} Replace register leaves with appropriate StixTrees for the given target. \begin{code} -genericOpt leaf@(StReg (StixMagicId id)) +stixConFold leaf@(StReg (StixMagicId id)) = case (stgReg id) of - Always tree -> genericOpt tree + Always tree -> stixConFold tree Save _ -> leaf -genericOpt other = other +stixConFold other = other \end{code} Now, try to constant-fold the PrimOps. The arguments have already been optimized and folded. \begin{code} -primOpt +stixPrimFold :: PrimOp -- The operation from an StPrim -> [StixTree] -- The optimized arguments -> StixTree -primOpt op arg@[StInt x] +stixPrimFold op arg@[StInt x] = case op of IntNegOp -> StInt (-x) - IntAbsOp -> StInt (abs x) _ -> StPrim op arg -primOpt op args@[StInt x, StInt y] +stixPrimFold op args@[StInt x, StInt y] = case op of CharGtOp -> StInt (if x > y then 1 else 0) CharGeOp -> StInt (if x >= y then 1 else 0) @@ -227,6 +294,7 @@ primOpt op args@[StInt x, StInt y] IntNeOp -> StInt (if x /= y then 1 else 0) IntLtOp -> StInt (if x < y then 1 else 0) IntLeOp -> StInt (if x <= y then 1 else 0) + -- ToDo: WordQuotOp, WordRemOp. _ -> StPrim op args \end{code} @@ -236,28 +304,34 @@ also assume that constants have been shifted to the right when possible. \begin{code} -primOpt op [x@(StInt _), y] | commutableOp op = primOpt op [y, x] +stixPrimFold op [x@(StInt _), y] | commutableOp op = stixPrimFold op [y, x] \end{code} We can often do something with constants of 0 and 1 ... \begin{code} -primOpt op args@[x, y@(StInt 0)] +stixPrimFold op args@[x, y@(StInt 0)] = case op of IntAddOp -> x IntSubOp -> x IntMulOp -> y AndOp -> y OrOp -> x + XorOp -> x SllOp -> x - SraOp -> x SrlOp -> x ISllOp -> x ISraOp -> x ISrlOp -> x + IntNeOp | is_comparison -> x _ -> StPrim op args + where + is_comparison + = case x of + StPrim opp [_, _] -> opp `elem` comparison_ops + _ -> False -primOpt op args@[x, y@(StInt 1)] +stixPrimFold op args@[x, y@(StInt 1)] = case op of IntMulOp -> x IntQuotOp -> x @@ -268,19 +342,30 @@ primOpt op args@[x, y@(StInt 1)] Now look for multiplication/division by powers of 2 (integers). \begin{code} -primOpt op args@[x, y@(StInt n)] +stixPrimFold op args@[x, y@(StInt n)] = case op of IntMulOp -> case exactLog2 n of Nothing -> StPrim op args - Just p -> StPrim SllOp [x, StInt p] + Just p -> StPrim ISllOp [x, StInt p] IntQuotOp -> case exactLog2 n of Nothing -> StPrim op args - Just p -> StPrim SraOp [x, StInt p] + Just p -> StPrim ISrlOp [x, StInt p] _ -> StPrim op args \end{code} Anything else is just too hard. \begin{code} -primOpt op args = StPrim op args +stixPrimFold op args = StPrim op args \end{code} + +\begin{code} +comparison_ops + = [ CharGtOp , CharGeOp , CharEqOp , CharNeOp , CharLtOp , CharLeOp, + IntGtOp , IntGeOp , IntEqOp , IntNeOp , IntLtOp , IntLeOp, + WordGtOp , WordGeOp , WordEqOp , WordNeOp , WordLtOp , WordLeOp, + AddrGtOp , AddrGeOp , AddrEqOp , AddrNeOp , AddrLtOp , AddrLeOp, + FloatGtOp , FloatGeOp , FloatEqOp , FloatNeOp , FloatLtOp , FloatLeOp, + DoubleGtOp, DoubleGeOp, DoubleEqOp, DoubleNeOp, DoubleLtOp, DoubleLeOp + ] +\end{code} \ No newline at end of file