X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FMachMisc.lhs;h=f7538bb3c30b7e2a6bd2c5c971209a4d4ec0d2b9;hb=9fe0837c3a1c9d986fe8b5b581083829bdcc52f5;hp=b6ba84fa0f5c7bba0765a0a40fd3524b3b71f0fd;hpb=5adf2314bfe7329e57cc956f02d0e566ae9569c9;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs index b6ba84f..f7538bb 100644 --- a/ghc/compiler/nativeGen/MachMisc.lhs +++ b/ghc/compiler/nativeGen/MachMisc.lhs @@ -20,40 +20,42 @@ module MachMisc ( fmtAsmLbl, exactLog2, - stixFor_stdout, stixFor_stderr, stixFor_stdin, - Instr(..), IF_ARCH_i386(Operand(..) COMMA,) Cond(..), - Size(..) - + Size(..), + IF_ARCH_i386(i386_insert_ffrees COMMA,) + #if alpha_TARGET_ARCH , RI(..) #endif #if i386_TARGET_ARCH #endif #if sparc_TARGET_ARCH - , RI(..), riZero + RI(..), riZero, fpRelEA, moveSp, fPair #endif ) where #include "HsVersions.h" --- #include "config.h" +#include "../includes/config.h" import AbsCSyn ( MagicId(..) ) import AbsCUtils ( magicIdPrimRep ) -import CLabel ( CLabel ) -import Const ( mkMachInt, Literal(..) ) +import CLabel ( CLabel, isAsmTemp ) +import Literal ( mkMachInt, Literal(..) ) import MachRegs ( stgReg, callerSaves, RegLoc(..), Imm(..), Reg(..), MachRegsAddr(..) +# if sparc_TARGET_ARCH + ,fp, sp +# endif ) import PrimRep ( PrimRep(..) ) -import SMRep ( SMRep(..) ) -import Stix ( StixTree(..), StixReg(..), CodeSegment ) +import Stix ( StixTree(..), StixReg(..), CodeSegment, DestInfo(..) ) import Panic ( panic ) -import Char ( isDigit ) import GlaExts ( word2Int#, int2Word#, shiftRL#, and#, (/=#) ) -import Outputable ( text ) +import Outputable ( pprPanic, ppr ) +import IOExts ( trace ) +import FastTypes \end{code} \begin{code} @@ -76,55 +78,8 @@ fmtAsmLbl s -} '$' : s ,{-otherwise-} - s + '.':'L':s ) - ---------------------------- -stixFor_stdout, stixFor_stderr, stixFor_stdin :: StixTree -#if i386_TARGET_ARCH --- Linux glibc 2 / libc6 -stixFor_stdout = StInd PtrRep (StLitLbl (text "stdout")) -stixFor_stderr = StInd PtrRep (StLitLbl (text "stderr")) -stixFor_stdin = StInd PtrRep (StLitLbl (text "stdin")) -#endif - -#if alpha_TARGET_ARCH -stixFor_stdout = error "stixFor_stdout: not implemented for Alpha" -stixFor_stderr = error "stixFor_stderr: not implemented for Alpha" -stixFor_stdin = error "stixFor_stdin: not implemented for Alpha" -#endif - -#if sparc_TARGET_ARCH -stixFor_stdout = error "stixFor_stdout: not implemented for Sparc" -stixFor_stderr = error "stixFor_stderr: not implemented for Sparc" -stixFor_stdin = error "stixFor_stdin: not implemented for Sparc" -#endif - -#if 0 -Here's some old stuff from which it shouldn't be too hard to -implement the above for Alpha/Sparc. - -cvtLitLit :: String -> String - --- --- Rather than relying on guessing, use FILE_SIZE to compute the --- _iob offsets. --- -cvtLitLit "stdin" = IF_ARCH_alpha("_iob+0" {-probably OK...-} - ,IF_ARCH_i386("stdin" - ,IF_ARCH_sparc("__iob+0x0"{-probably OK...-} - ,))) - -cvtLitLit "stdout" = IF_ARCH_alpha("_iob+"++show (``FILE_SIZE''::Int) - ,IF_ARCH_i386("stdout" - ,IF_ARCH_sparc("__iob+"++show (``FILE_SIZE''::Int) - ,))) -cvtLitLit "stderr" = IF_ARCH_alpha("_iob+"++show (2*(``FILE_SIZE''::Int)) - ,IF_ARCH_i386("stderr" - ,IF_ARCH_sparc("__iob+"++show (2*(``FILE_SIZE''::Int)) - ,))) -#endif - \end{code} % ---------------------------------------------------------------- @@ -146,9 +101,9 @@ sizeOf :: PrimRep -> Integer{-in bytes-} -- the result is an Integer only because it's more convenient sizeOf pr = case (primRepToSize pr) of - IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},) - IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},) - IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },) + IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2;-} L -> 4; {-SF -> 4;-} _ -> 8},) + IF_ARCH_sparc({B -> 1; BU -> 1; W -> 4; F -> 4; DF -> 8},) + IF_ARCH_i386( {B -> 1; BU -> 1; L -> 4; F -> 4; DF -> 8 },) \end{code} % - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -214,11 +169,11 @@ exactLog2 x = if (x <= 0 || x >= 2147483648) then Nothing else - case (fromInteger x) of { I# x# -> + case iUnbox (fromInteger x) of { x# -> if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then Nothing else - Just (toInteger (I# (pow2 x#))) + Just (toInteger (iBox (pow2 x#))) } where shiftr x y = shiftRL# x y @@ -285,7 +240,7 @@ data Size | BU -- | W -- word (2 bytes): UNUSED -- | WU -- : UNUSED --- | L -- longword (4 bytes): UNUSED + | L -- longword (4 bytes) | Q -- quadword (8 bytes) -- | FF -- VAX F-style floating pt: UNUSED -- | GF -- VAX G-style floating pt: UNUSED @@ -294,20 +249,17 @@ data Size | TF -- IEEE double-precision floating pt #endif #if i386_TARGET_ARCH - = B -- byte (lower) --- | HB -- higher byte **UNUSED** --- | S -- : UNUSED - | L + = B -- byte (signed, JRS:??lower??) + | BU -- byte, unsigned + | L -- word32 | F -- IEEE single-precision floating pt | DF -- IEEE single-precision floating pt + | F80 -- Intel 80-bit internal FP format; only used for spilling #endif #if sparc_TARGET_ARCH = B -- byte (signed) | BU -- byte (unsigned) --- | HW -- halfword, 2 bytes (signed): UNUSED --- | HWU -- halfword, 2 bytes (unsigned): UNUSED | W -- word, 4 bytes --- | D -- doubleword, 8 bytes: UNUSED | F -- IEEE single-precision floating pt | DF -- IEEE single-precision floating pt #endif @@ -319,7 +271,11 @@ primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) -primRepToSize CharRep = IF_ARCH_alpha( BU, IF_ARCH_i386( L, IF_ARCH_sparc( BU,))) +primRepToSize CharRep = IF_ARCH_alpha( L, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) + +primRepToSize Int8Rep = IF_ARCH_alpha( B, IF_ARCH_i386( B, IF_ARCH_sparc( B ,))) +primRepToSize Word8Rep = IF_ARCH_alpha( BU, IF_ARCH_i386( BU, IF_ARCH_sparc( BU,))) + primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) @@ -327,9 +283,15 @@ primRepToSize FloatRep = IF_ARCH_alpha( TF, IF_ARCH_i386( F, IF_ARCH_sparc( primRepToSize DoubleRep = IF_ARCH_alpha( TF, IF_ARCH_i386( DF,IF_ARCH_sparc( DF,))) primRepToSize ArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize ByteArrayRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) +primRepToSize PrimPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize WeakPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) -primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) +primRepToSize ForeignObjRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) +primRepToSize BCORep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize StablePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) +primRepToSize ThreadIdRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) +-- SUP: Wrong!!! Only for testing the rest of the NCG +primRepToSize Word64Rep = trace "primRepToSize: Word64Rep not handled" B +primRepToSize Int64Rep = trace "primRepToSize: Int64Rep not handled" B \end{code} %************************************************************************ @@ -350,6 +312,8 @@ data Instr String -- the literal string | DATA Size [Imm] + | DELTA Int -- specify current stack offset for + -- benefit of subsequent passes \end{code} \begin{code} @@ -435,6 +399,53 @@ data RI #endif {- alpha_TARGET_ARCH -} \end{code} +Intel, in their infinite wisdom, selected a stack model for floating +point registers on x86. That might have made sense back in 1979 -- +nowadays we can see it for the nonsense it really is. A stack model +fits poorly with the existing nativeGen infrastructure, which assumes +flat integer and FP register sets. Prior to this commit, nativeGen +could not generate correct x86 FP code -- to do so would have meant +somehow working the register-stack paradigm into the register +allocator and spiller, which sounds very difficult. + +We have decided to cheat, and go for a simple fix which requires no +infrastructure modifications, at the expense of generating ropey but +correct FP code. All notions of the x86 FP stack and its insns have +been removed. Instead, we pretend (to the instruction selector and +register allocator) that x86 has six floating point registers, %fake0 +.. %fake5, which can be used in the usual flat manner. We further +claim that x86 has floating point instructions very similar to SPARC +and Alpha, that is, a simple 3-operand register-register arrangement. +Code generation and register allocation proceed on this basis. + +When we come to print out the final assembly, our convenient fiction +is converted to dismal reality. Each fake instruction is +independently converted to a series of real x86 instructions. +%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg +arithmetic operations, the two operands are pushed onto the top of the +FP stack, the operation done, and the result copied back into the +relevant register. There are only six %fake registers because 2 are +needed for the translation, and x86 has 8 in total. + +The translation is inefficient but is simple and it works. A cleverer +translation would handle a sequence of insns, simulating the FP stack +contents, would not impose a fixed mapping from %fake to %st regs, and +hopefully could avoid most of the redundant reg-reg moves of the +current translation. + +We might as well make use of whatever unique FP facilities Intel have +chosen to bless us with (let's not be churlish, after all). +Hence GLDZ and GLD1. Bwahahahahahahaha! + +LATER (10 Nov 2000): idiv gives problems with the register spiller, +because the spiller is simpleminded and because idiv has fixed uses of +%eax and %edx. Rather than make the spiller cleverer, we do away with +idiv, and instead have iquot and irem fake (integer) insns, which have +no operand register constraints -- ie, they behave like add, sub, mul. +The printer-outer transforms them to a sequence of real insns which does +the Right Thing (tm). As with the FP stuff, this gives ropey code, +but we don't care, since it doesn't get used much. We hope. + \begin{code} #if i386_TARGET_ARCH @@ -443,8 +454,8 @@ data RI -- Moves. | MOV Size Operand Operand - | MOVZX Size Operand Operand -- size is the size of operand 2 - | MOVSX Size Operand Operand -- size is the size of operand 2 + | MOVZxL Size Operand Operand -- size is the size of operand 1 + | MOVSxL Size Operand Operand -- size is the size of operand 1 -- Load effective address (also a very useful three-operand add instruction :-) @@ -454,12 +465,14 @@ data RI | ADD Size Operand Operand | SUB Size Operand Operand + | IMUL Size Operand Operand --- Multiplication (signed and unsigned), Division (signed and unsigned), --- result in %eax, %edx. +-- Quotient and remainder. SEE comment above -- these are not +-- real x86 insns; instead they are expanded when printed +-- into a sequence of real insns. - | IMUL Size Operand Operand - | IDIV Size Operand + | IQUOT Size Operand Operand + | IREM Size Operand Operand -- Simple bit-twiddling. @@ -468,57 +481,51 @@ data RI | XOR Size Operand Operand | NOT Size Operand | NEGI Size Operand -- NEG instruction (name clash with Cond) - | SHL Size Operand Operand -- 1st operand must be an Imm or CL - | SAR Size Operand Operand -- 1st operand must be an Imm or CL - | SHR Size Operand Operand -- 1st operand must be an Imm or CL + | SHL Size Imm Operand -- Only immediate shifts allowed + | SAR Size Imm Operand -- Only immediate shifts allowed + | SHR Size Imm Operand -- Only immediate shifts allowed + | BT Size Imm Operand | NOP --- Float Arithmetic. -- ToDo for 386 +-- Float Arithmetic. --- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single instructions --- right up until we spit them out. +-- Note that we cheat by treating G{ABS,MOV,NEG} of doubles +-- as single instructions right up until we spit them out. + + -- all the 3-operand fake fp insns are src1 src2 dst + -- and furthermore are constrained to be fp regs only. + -- IMPORTANT: keep is_G_insn up to date with any changes here + | GMOV Reg Reg -- src(fpreg), dst(fpreg) + | GLD Size MachRegsAddr Reg -- src, dst(fpreg) + | GST Size Reg MachRegsAddr -- src(fpreg), dst - | SAHF -- stores ah into flags - | FABS - | FADD Size Operand -- src - | FADDP - | FIADD Size MachRegsAddr -- src - | FCHS - | FCOM Size Operand -- src - | FCOS - | FDIV Size Operand -- src - | FDIVP - | FIDIV Size MachRegsAddr -- src - | FDIVR Size Operand -- src - | FDIVRP - | FIDIVR Size MachRegsAddr -- src - | FICOM Size MachRegsAddr -- src - | FILD Size MachRegsAddr Reg -- src, dst - | FIST Size MachRegsAddr -- dst - | FLD Size Operand -- src - | FLD1 - | FLDZ - | FMUL Size Operand -- src - | FMULP - | FIMUL Size MachRegsAddr -- src - | FRNDINT - | FSIN - | FSQRT - | FST Size Operand -- dst - | FSTP Size Operand -- dst - | FSUB Size Operand -- src - | FSUBP - | FISUB Size MachRegsAddr -- src - | FSUBR Size Operand -- src - | FSUBRP - | FISUBR Size MachRegsAddr -- src - | FTST - | FCOMP Size Operand -- src - | FUCOMPP - | FXCH - | FNSTSW - | FNOP + | GLDZ Reg -- dst(fpreg) + | GLD1 Reg -- dst(fpreg) + | GFTOD Reg Reg -- src(fpreg), dst(fpreg) + | GFTOI Reg Reg -- src(fpreg), dst(intreg) + + | GDTOF Reg Reg -- src(fpreg), dst(fpreg) + | GDTOI Reg Reg -- src(fpreg), dst(intreg) + + | GITOF Reg Reg -- src(intreg), dst(fpreg) + | GITOD Reg Reg -- src(intreg), dst(fpreg) + + | GADD Size Reg Reg Reg -- src1, src2, dst + | GDIV Size Reg Reg Reg -- src1, src2, dst + | GSUB Size Reg Reg Reg -- src1, src2, dst + | GMUL Size Reg Reg Reg -- src1, src2, dst + + | GCMP Size Reg Reg -- src1, src2 + + | GABS Size Reg Reg -- src, dst + | GNEG Size Reg Reg -- src, dst + | GSQRT Size Reg Reg -- src, dst + | GSIN Size Reg Reg -- src, dst + | GCOS Size Reg Reg -- src, dst + | GTAN Size Reg Reg -- src, dst + + | GFREE -- do ffree on all x86 regs; an ugly hack -- Comparison | TEST Size Operand Operand @@ -529,10 +536,12 @@ data RI | PUSH Size Operand | POP Size Operand + | PUSHA + | POPA -- Jumping around. - | JMP Operand -- target + | JMP DestInfo Operand -- possible dests, target | JXX Cond CLabel -- target | CALL Imm @@ -545,6 +554,44 @@ data Operand | OpImm Imm -- immediate value | OpAddr MachRegsAddr -- memory reference + +i386_insert_ffrees :: [Instr] -> [Instr] +i386_insert_ffrees insns + | any is_G_instr insns + = concatMap ffree_before_nonlocal_transfers insns + | otherwise + = insns + +ffree_before_nonlocal_transfers insn + = case insn of + CALL _ -> [GFREE, insn] + -- Jumps to immediate labels are local + JMP _ (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn] + -- If a jump mentions dests, it is a local jump thru + -- a case table. + JMP (DestInfo _) _ -> [insn] + JMP _ _ -> [GFREE, insn] + other -> [insn] + + +-- if you ever add a new FP insn to the fake x86 FP insn set, +-- you must update this too +is_G_instr :: Instr -> Bool +is_G_instr instr + = case instr of + GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True; + GLDZ _ -> True; GLD1 _ -> True; + GFTOD _ _ -> True; GFTOI _ _ -> True; + GDTOF _ _ -> True; GDTOI _ _ -> True; + GITOF _ _ -> True; GITOD _ _ -> True; + GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True + GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True + GCMP _ _ _ -> True; GABS _ _ _ -> True + GNEG _ _ _ -> True; GSQRT _ _ _ -> True + GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True; + GFREE -> panic "is_G_instr: GFREE (!)" + other -> False + #endif {- i386_TARGET_ARCH -} \end{code} @@ -598,7 +645,7 @@ data Operand | BI Cond Bool Imm -- cond, annul?, target | BF Cond Bool Imm -- cond, annul?, target - | JMP MachRegsAddr -- target + | JMP DestInfo MachRegsAddr -- target | CALL Imm Int Bool -- target, args, terminal data RI = RIReg Reg @@ -608,8 +655,24 @@ riZero :: RI -> Bool riZero (RIImm (ImmInt 0)) = True riZero (RIImm (ImmInteger 0)) = True -riZero (RIReg (FixedReg ILIT(0))) = True +riZero (RIReg (RealReg 0)) = True riZero _ = False +-- Calculate the effective address which would be used by the +-- corresponding fpRel sequence. fpRel is in MachRegs.lhs, +-- alas -- can't have fpRelEA here because of module dependencies. +fpRelEA :: Int -> Reg -> Instr +fpRelEA n dst + = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst + +-- Code to shift the stack pointer by n words. +moveSp :: Int -> Instr +moveSp n + = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp + +-- Produce the second-half-of-a-double register given the first half. +fPair :: Reg -> Reg +fPair (RealReg n) | n >= 32 && n `mod` 2 == 0 = RealReg (n+1) +fPair other = pprPanic "fPair(sparc NCG)" (ppr other) #endif {- sparc_TARGET_ARCH -} \end{code}