From 92ee78e03c3670f56ebbbbfb0f67a00f9ea1305f Mon Sep 17 00:00:00 2001 From: "Ben.Lippmeier@anu.edu.au" Date: Tue, 3 Feb 2009 08:13:27 +0000 Subject: [PATCH] NCG: Split MachInstrs into arch specific modules --- compiler/ghc.cabal.in | 4 + compiler/nativeGen/Alpha/Instr.hs | 140 ++++++++ compiler/nativeGen/MachInstrs.hs | 706 ++----------------------------------- compiler/nativeGen/PPC/Instr.hs | 166 +++++++++ compiler/nativeGen/SPARC/Instr.hs | 188 ++++++++++ compiler/nativeGen/X86/Instr.hs | 335 ++++++++++++++++++ 6 files changed, 867 insertions(+), 672 deletions(-) create mode 100644 compiler/nativeGen/Alpha/Instr.hs create mode 100644 compiler/nativeGen/PPC/Instr.hs create mode 100644 compiler/nativeGen/SPARC/Instr.hs create mode 100644 compiler/nativeGen/X86/Instr.hs diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 47b321a..8fd470c 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -456,6 +456,10 @@ Library AsmCodeGen MachCodeGen MachInstrs + Alpha.Instr + X86.Instr + PPC.Instr + SPARC.Instr MachRegs NCGMonad PositionIndependentCode diff --git a/compiler/nativeGen/Alpha/Instr.hs b/compiler/nativeGen/Alpha/Instr.hs new file mode 100644 index 0000000..16ec53f --- /dev/null +++ b/compiler/nativeGen/Alpha/Instr.hs @@ -0,0 +1,140 @@ +----------------------------------------------------------------------------- +-- +-- Machine-dependent assembly language +-- +-- (c) The University of Glasgow 1993-2004 +-- +----------------------------------------------------------------------------- + +#include "HsVersions.h" +#include "nativeGen/NCG.h" + +module Alpha.Instr ( + Cond(..), + Instr(..), + RI(..) +) + +where + +import BlockId +import MachRegs +import Cmm +import FastString +import CLabel + +data Cond + = ALWAYS -- For BI (same as BR) + | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name) + | GE -- For BI only + | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name) + | LE -- For CMP and BI + | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name) + | NE -- For BI only + | NEVER -- For BI (null instruction) + | ULE -- For CMP only + | ULT -- For CMP only + deriving Eq + + +-- ----------------------------------------------------------------------------- +-- Machine's assembly language + +-- We have a few common "instructions" (nearly all the pseudo-ops) but +-- mostly all of 'Instr' is machine-specific. + +-- Register or immediate +data RI + = RIReg Reg + | RIImm Imm + +data Instr + -- comment pseudo-op + = COMMENT FastString + + -- some static data spat out during code + -- generation. Will be extracted before + -- pretty-printing. + | LDATA Section [CmmStatic] + + -- start a new basic block. Useful during + -- codegen, removed later. Preceding + -- instruction should be a jump, as per the + -- invariants for a BasicBlock (see Cmm). + | NEWBLOCK BlockId + + -- specify current stack offset for + -- benefit of subsequent passes + | DELTA Int + + -- | spill this reg to a stack slot + | SPILL Reg Int + + -- | reload this reg from a stack slot + | RELOAD Int Reg + + -- Loads and stores. + | LD Size Reg AddrMode -- size, dst, src + | LDA Reg AddrMode -- dst, src + | LDAH Reg AddrMode -- dst, src + | LDGP Reg AddrMode -- dst, src + | LDI Size Reg Imm -- size, dst, src + | ST Size Reg AddrMode -- size, src, dst + + -- Int Arithmetic. + | CLR Reg -- dst + | ABS Size RI Reg -- size, src, dst + | NEG Size Bool RI Reg -- size, overflow, src, dst + | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst + | SADD Size Size Reg RI Reg -- size, scale, src, src, dst + | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst + | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst + | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst + | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst + | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst + + -- Simple bit-twiddling. + | NOT RI Reg + | AND Reg RI Reg + | ANDNOT Reg RI Reg + | OR Reg RI Reg + | ORNOT Reg RI Reg + | XOR Reg RI Reg + | XORNOT Reg RI Reg + | SLL Reg RI Reg + | SRL Reg RI Reg + | SRA Reg RI Reg + + | ZAP Reg RI Reg + | ZAPNOT Reg RI Reg + + | NOP + + -- Comparison + | CMP Cond Reg RI Reg + + -- Float Arithmetic. + | FCLR Reg + | FABS Reg Reg + | FNEG Size Reg Reg + | FADD Size Reg Reg Reg + | FDIV Size Reg Reg Reg + | FMUL Size Reg Reg Reg + | FSUB Size Reg Reg Reg + | CVTxy Size Size Reg Reg + | FCMP Size Cond Reg Reg Reg + | FMOV Reg Reg + + -- Jumping around. + | BI Cond Reg Imm + | BF Cond Reg Imm + | BR Imm + | JMP Reg AddrMode Int + | BSR Imm Int + | JSR Reg AddrMode Int + + -- Alpha-specific pseudo-ops. + | FUNBEGIN CLabel + | FUNEND CLabel + + diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs index 529da0d..a3e2d2b 100644 --- a/compiler/nativeGen/MachInstrs.hs +++ b/compiler/nativeGen/MachInstrs.hs @@ -15,28 +15,29 @@ #include "nativeGen/NCG.h" -module MachInstrs ( - -- * Cmm instantiations - NatCmm, NatCmmTop, NatBasicBlock, - -- * Machine instructions - Instr(..), - Cond(..), condUnsigned, condToSigned, condToUnsigned, -#if powerpc_TARGET_ARCH - condNegate, +module MachInstrs ( + NatCmm, + NatCmmTop, + NatBasicBlock, + condUnsigned, + condToSigned, + condToUnsigned, + +#if alpha_TARGET_ARCH + module Alpha.Instr +#elif powerpc_TARGET_ARCH + module PPC.Instr +#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH + module X86.Instr +#elif sparc_TARGET_ARCH + module SPARC.Instr +#else +#error "MachInstrs: not defined for this architecture" #endif - RI(..), +) -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - Operand(..), -#endif -#if i386_TARGET_ARCH - i386_insert_ffrees, -#endif -#if sparc_TARGET_ARCH - riZero, fpRelEA, moveSp, fPair, -#endif - ) where +where #include "HsVersions.h" @@ -51,83 +52,28 @@ import Constants ( wORD_SIZE ) import GHC.Exts +#if alpha_TARGET_ARCH +import Alpha.Instr +#elif powerpc_TARGET_ARCH +import PPC.Instr +#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH +import X86.Instr +#elif sparc_TARGET_ARCH +import SPARC.Instr +#else +#error "MachInstrs: not defined for this architecture" +#endif --- ----------------------------------------------------------------------------- --- Our flavours of the Cmm types +-- Our flavours of the Cmm types -- Type synonyms for Cmm populated with native code + type NatCmm = GenCmm CmmStatic [CmmStatic] (ListGraph Instr) type NatCmmTop = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr) type NatBasicBlock = GenBasicBlock Instr --- ----------------------------------------------------------------------------- --- Conditions on this architecture - -data Cond -#if alpha_TARGET_ARCH - = ALWAYS -- For BI (same as BR) - | EQQ -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name) - | GE -- For BI only - | GTT -- For BI only (NB: "GT" is a 1.3 Prelude name) - | LE -- For CMP and BI - | LTT -- For CMP and BI (NB: "LT" is a 1.3 Prelude name) - | NE -- For BI only - | NEVER -- For BI (null instruction) - | ULE -- For CMP only - | ULT -- For CMP only -#endif -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - = ALWAYS -- What's really used? ToDo - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | POS - | CARRY - | OFLO - | PARITY - | NOTPARITY -#endif -#if sparc_TARGET_ARCH - = ALWAYS -- What's really used? ToDo - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | NEVER - | POS - | VC - | VS -#endif -#if powerpc_TARGET_ARCH - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE -#endif - deriving Eq -- to make an assertion work +-- Condition utils condUnsigned GU = True condUnsigned LU = True condUnsigned GEU = True @@ -146,590 +92,6 @@ condToUnsigned GE = GEU condToUnsigned LE = LEU condToUnsigned x = x -#if powerpc_TARGET_ARCH -condNegate ALWAYS = panic "condNegate: ALWAYS" -condNegate EQQ = NE -condNegate GE = LTT -condNegate GEU = LU -condNegate GTT = LE -condNegate GU = LEU -condNegate LE = GTT -condNegate LEU = GU -condNegate LTT = GE -condNegate LU = GEU -condNegate NE = EQQ -#endif - --- ----------------------------------------------------------------------------- --- Register or immediate (a handy type on some platforms) - -data RI = RIReg Reg - | RIImm Imm - - --- ----------------------------------------------------------------------------- --- Machine's assembly language - --- We have a few common "instructions" (nearly all the pseudo-ops) but --- mostly all of 'Instr' is machine-specific. - -data Instr - = COMMENT FastString -- comment pseudo-op - - | LDATA Section [CmmStatic] -- some static data spat out during code - -- generation. Will be extracted before - -- pretty-printing. - - | NEWBLOCK BlockId -- start a new basic block. Useful during - -- codegen, removed later. Preceding - -- instruction should be a jump, as per the - -- invariants for a BasicBlock (see Cmm). - - | DELTA Int -- specify current stack offset for - -- benefit of subsequent passes - - | SPILL Reg Int -- ^ spill this reg to a stack slot - | RELOAD Int Reg -- ^ reload this reg from a stack slot - --- ----------------------------------------------------------------------------- --- Alpha instructions - -#if alpha_TARGET_ARCH - --- data Instr continues... - --- Loads and stores. - | LD Size Reg AddrMode -- size, dst, src - | LDA Reg AddrMode -- dst, src - | LDAH Reg AddrMode -- dst, src - | LDGP Reg AddrMode -- dst, src - | LDI Size Reg Imm -- size, dst, src - | ST Size Reg AddrMode -- size, src, dst - --- Int Arithmetic. - | CLR Reg -- dst - | ABS Size RI Reg -- size, src, dst - | NEG Size Bool RI Reg -- size, overflow, src, dst - | ADD Size Bool Reg RI Reg -- size, overflow, src, src, dst - | SADD Size Size Reg RI Reg -- size, scale, src, src, dst - | SUB Size Bool Reg RI Reg -- size, overflow, src, src, dst - | SSUB Size Size Reg RI Reg -- size, scale, src, src, dst - | MUL Size Bool Reg RI Reg -- size, overflow, src, src, dst - | DIV Size Bool Reg RI Reg -- size, unsigned, src, src, dst - | REM Size Bool Reg RI Reg -- size, unsigned, src, src, dst - --- Simple bit-twiddling. - | NOT RI Reg - | AND Reg RI Reg - | ANDNOT Reg RI Reg - | OR Reg RI Reg - | ORNOT Reg RI Reg - | XOR Reg RI Reg - | XORNOT Reg RI Reg - | SLL Reg RI Reg - | SRL Reg RI Reg - | SRA Reg RI Reg - - | ZAP Reg RI Reg - | ZAPNOT Reg RI Reg - - | NOP - --- Comparison - | CMP Cond Reg RI Reg - --- Float Arithmetic. - | FCLR Reg - | FABS Reg Reg - | FNEG Size Reg Reg - | FADD Size Reg Reg Reg - | FDIV Size Reg Reg Reg - | FMUL Size Reg Reg Reg - | FSUB Size Reg Reg Reg - | CVTxy Size Size Reg Reg - | FCMP Size Cond Reg Reg Reg - | FMOV Reg Reg - --- Jumping around. - | BI Cond Reg Imm - | BF Cond Reg Imm - | BR Imm - | JMP Reg AddrMode Int - | BSR Imm Int - | JSR Reg AddrMode Int - --- Alpha-specific pseudo-ops. - | FUNBEGIN CLabel - | FUNEND CLabel - -data RI - = RIReg Reg - | RIImm Imm - -#endif /* alpha_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Intel x86 instructions - -{- -Intel, in their infinite wisdom, selected a stack model for floating -point registers on x86. That might have made sense back in 1979 -- -nowadays we can see it for the nonsense it really is. A stack model -fits poorly with the existing nativeGen infrastructure, which assumes -flat integer and FP register sets. Prior to this commit, nativeGen -could not generate correct x86 FP code -- to do so would have meant -somehow working the register-stack paradigm into the register -allocator and spiller, which sounds very difficult. - -We have decided to cheat, and go for a simple fix which requires no -infrastructure modifications, at the expense of generating ropey but -correct FP code. All notions of the x86 FP stack and its insns have -been removed. Instead, we pretend (to the instruction selector and -register allocator) that x86 has six floating point registers, %fake0 -.. %fake5, which can be used in the usual flat manner. We further -claim that x86 has floating point instructions very similar to SPARC -and Alpha, that is, a simple 3-operand register-register arrangement. -Code generation and register allocation proceed on this basis. - -When we come to print out the final assembly, our convenient fiction -is converted to dismal reality. Each fake instruction is -independently converted to a series of real x86 instructions. -%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg -arithmetic operations, the two operands are pushed onto the top of the -FP stack, the operation done, and the result copied back into the -relevant register. There are only six %fake registers because 2 are -needed for the translation, and x86 has 8 in total. - -The translation is inefficient but is simple and it works. A cleverer -translation would handle a sequence of insns, simulating the FP stack -contents, would not impose a fixed mapping from %fake to %st regs, and -hopefully could avoid most of the redundant reg-reg moves of the -current translation. - -We might as well make use of whatever unique FP facilities Intel have -chosen to bless us with (let's not be churlish, after all). -Hence GLDZ and GLD1. Bwahahahahahahaha! --} - -{- -MORE FLOATING POINT MUSINGS... - -Intel's internal floating point registers are by default 80 bit -extended precision. This means that all operations done on values in -registers are done at 80 bits, and unless the intermediate values are -truncated to the appropriate size (32 or 64 bits) by storing in -memory, calculations in registers will give different results from -calculations which pass intermediate values in memory (eg. via -function calls). - -One solution is to set the FPU into 64 bit precision mode. Some OSs -do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is -that this will only affect 64-bit precision arithmetic; 32-bit -calculations will still be done at 64-bit precision in registers. So -it doesn't solve the whole problem. - -There's also the issue of what the C library is expecting in terms of -precision. It seems to be the case that glibc on Linux expects the -FPU to be set to 80 bit precision, so setting it to 64 bit could have -unexpected effects. Changing the default could have undesirable -effects on other 3rd-party library code too, so the right thing would -be to save/restore the FPU control word across Haskell code if we were -to do this. - -gcc's -ffloat-store gives consistent results by always storing the -results of floating-point calculations in memory, which works for both -32 and 64-bit precision. However, it only affects the values of -user-declared floating point variables in C, not intermediate results. -GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision -flag). - -Another problem is how to spill floating point registers in the -register allocator. Should we spill the whole 80 bits, or just 64? -On an OS which is set to 64 bit precision, spilling 64 is fine. On -Linux, spilling 64 bits will round the results of some operations. -This is what gcc does. Spilling at 80 bits requires taking up a full -128 bit slot (so we get alignment). We spill at 80-bits and ignore -the alignment problems. - -In the future, we'll use the SSE registers for floating point. This -requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit -precision float ops), which means P4 or Xeon and above. Using SSE -will solve all these problems, because the SSE registers use fixed 32 -bit or 64 bit precision. - ---SDM 1/2003 --} - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- data Instr continues... - --- Moves. - | MOV Size Operand Operand - | MOVZxL Size Operand Operand -- size is the size of operand 1 - | MOVSxL Size Operand Operand -- size is the size of operand 1 - -- x86_64 note: plain mov into a 32-bit register always zero-extends - -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which - -- don't affect the high bits of the register. - --- Load effective address (also a very useful three-operand add instruction :-) - | LEA Size Operand Operand - --- Int Arithmetic. - | ADD Size Operand Operand - | ADC Size Operand Operand - | SUB Size Operand Operand - - | MUL Size Operand Operand - | IMUL Size Operand Operand -- signed int mul - | IMUL2 Size Operand -- %edx:%eax = operand * %eax - - | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op - | IDIV Size Operand -- ditto, but signed - --- Simple bit-twiddling. - | AND Size Operand Operand - | OR Size Operand Operand - | XOR Size Operand Operand - | NOT Size Operand - | NEGI Size Operand -- NEG instruction (name clash with Cond) - --- Shifts (amount may be immediate or %cl only) - | SHL Size Operand{-amount-} Operand - | SAR Size Operand{-amount-} Operand - | SHR Size Operand{-amount-} Operand - - | BT Size Imm Operand - | NOP - -#if i386_TARGET_ARCH --- Float Arithmetic. - --- Note that we cheat by treating G{ABS,MOV,NEG} of doubles --- as single instructions right up until we spit them out. - -- all the 3-operand fake fp insns are src1 src2 dst - -- and furthermore are constrained to be fp regs only. - -- IMPORTANT: keep is_G_insn up to date with any changes here - | GMOV Reg Reg -- src(fpreg), dst(fpreg) - | GLD Size AddrMode Reg -- src, dst(fpreg) - | GST Size Reg AddrMode -- src(fpreg), dst - - | GLDZ Reg -- dst(fpreg) - | GLD1 Reg -- dst(fpreg) - - | GFTOI Reg Reg -- src(fpreg), dst(intreg) - | GDTOI Reg Reg -- src(fpreg), dst(intreg) - - | GITOF Reg Reg -- src(intreg), dst(fpreg) - | GITOD Reg Reg -- src(intreg), dst(fpreg) - - | GADD Size Reg Reg Reg -- src1, src2, dst - | GDIV Size Reg Reg Reg -- src1, src2, dst - | GSUB Size Reg Reg Reg -- src1, src2, dst - | GMUL Size Reg Reg Reg -- src1, src2, dst - - -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT] - -- Compare src1 with src2; set the Zero flag iff the numbers are - -- comparable and the comparison is True. Subsequent code must - -- test the %eflags zero flag regardless of the supplied Cond. - | GCMP Cond Reg Reg -- src1, src2 - - | GABS Size Reg Reg -- src, dst - | GNEG Size Reg Reg -- src, dst - | GSQRT Size Reg Reg -- src, dst - | GSIN Size CLabel CLabel Reg Reg -- src, dst - | GCOS Size CLabel CLabel Reg Reg -- src, dst - | GTAN Size CLabel CLabel Reg Reg -- src, dst - - | GFREE -- do ffree on all x86 regs; an ugly hack -#endif - -#if x86_64_TARGET_ARCH --- SSE2 floating point: we use a restricted set of the available SSE2 --- instructions for floating-point. - - -- use MOV for moving (either movss or movsd (movlpd better?)) - - | CVTSS2SD Reg Reg -- F32 to F64 - | CVTSD2SS Reg Reg -- F64 to F32 - | CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation) - | CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation) - | CVTSI2SS Operand Reg -- I32/I64 to F32 - | CVTSI2SD Operand Reg -- I32/I64 to F64 - - -- use ADD & SUB for arithmetic. In both cases, operands - -- are Operand Reg. - - -- SSE2 floating-point division: - | FDIV Size Operand Operand -- divisor, dividend(dst) - - -- use CMP for comparisons. ucomiss and ucomisd instructions - -- compare single/double prec floating point respectively. - - | SQRT Size Operand Reg -- src, dst -#endif - --- Comparison - | TEST Size Operand Operand - | CMP Size Operand Operand - | SETCC Cond Operand - --- Stack Operations. - | PUSH Size Operand - | POP Size Operand - -- both unused (SDM): - -- | PUSHA - -- | POPA - --- Jumping around. - | JMP Operand - | JXX Cond BlockId -- includes unconditional branches - | JXX_GBL Cond Imm -- non-local version of JXX - | JMP_TBL Operand [BlockId] -- table jump - | CALL (Either Imm Reg) [Reg] - --- Other things. - | CLTD Size -- sign extend %eax into %edx:%eax - - | FETCHGOT Reg -- pseudo-insn for ELF position-independent code - -- pretty-prints as - -- call 1f - -- 1: popl %reg - -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg - | FETCHPC Reg -- pseudo-insn for Darwin position-independent code - -- pretty-prints as - -- call 1f - -- 1: popl %reg - - -data Operand - = OpReg Reg -- register - | OpImm Imm -- immediate value - | OpAddr AddrMode -- memory reference - -#endif /* i386 or x86_64 */ - -#if i386_TARGET_ARCH -i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr] -i386_insert_ffrees blocks - | or (map (any is_G_instr) [ instrs | BasicBlock id instrs <- blocks ]) - = map ffree_before_nonlocal_transfers blocks - | otherwise - = blocks - where - ffree_before_nonlocal_transfers (BasicBlock id insns) - = BasicBlock id (foldr p [] insns) - where p insn r = case insn of - CALL _ _ -> GFREE : insn : r - JMP _ -> GFREE : insn : r - other -> insn : r - --- if you ever add a new FP insn to the fake x86 FP insn set, --- you must update this too -is_G_instr :: Instr -> Bool -is_G_instr instr - = case instr of - GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True - GLDZ _ -> True; GLD1 _ -> True - GFTOI _ _ -> True; GDTOI _ _ -> True - GITOF _ _ -> True; GITOD _ _ -> True - GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True - GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True - GCMP _ _ _ -> True; GABS _ _ _ -> True - GNEG _ _ _ -> True; GSQRT _ _ _ -> True - GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True - GFREE -> panic "is_G_instr: GFREE (!)" - other -> False -#endif /* i386_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Sparc instructions - -#if sparc_TARGET_ARCH - --- data Instr continues... - --- Loads and stores. - | LD Size AddrMode Reg -- size, src, dst - | ST Size Reg AddrMode -- size, src, dst - --- Int Arithmetic. - | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst - | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst - - | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst - | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst - - - -- The SPARC divide instructions perform 64bit by 32bit division - -- The Y register is xored into the first operand. - - -- On _some implementations_ the Y register is overwritten by - -- the remainder, so we have to make sure it is 0 each time. - - -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2 - | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst - | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst - - | RDY Reg -- move contents of Y register to reg - | WRY Reg Reg -- Y <- src1 `xor` src2 - --- Simple bit-twiddling. - | AND Bool Reg RI Reg -- cc?, src1, src2, dst - | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst - | OR Bool Reg RI Reg -- cc?, src1, src2, dst - | ORN Bool Reg RI Reg -- cc?, src1, src2, dst - | XOR Bool Reg RI Reg -- cc?, src1, src2, dst - | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst - | SLL Reg RI Reg -- src1, src2, dst - | SRL Reg RI Reg -- src1, src2, dst - | SRA Reg RI Reg -- src1, src2, dst - | SETHI Imm Reg -- src, dst - | NOP -- Really SETHI 0, %g0, but worth an alias - --- Float Arithmetic. - --- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single --- instructions right up until we spit them out. - | FABS Size Reg Reg -- src dst - | FADD Size Reg Reg Reg -- src1, src2, dst - | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst - | FDIV Size Reg Reg Reg -- src1, src2, dst - | FMOV Size Reg Reg -- src, dst - | FMUL Size Reg Reg Reg -- src1, src2, dst - | FNEG Size Reg Reg -- src, dst - | FSQRT Size Reg Reg -- src, dst - | FSUB Size Reg Reg Reg -- src1, src2, dst - | FxTOy Size Size Reg Reg -- src, dst - --- Jumping around. - | BI Cond Bool BlockId -- cond, annul?, target - | BF Cond Bool BlockId -- cond, annul?, target - - | JMP AddrMode -- target - - -- With a tabled jump we know all the possible destinations. Tabled - -- jump includes its list of destinations so we can work out what regs - -- are live across the jump. - -- - | JMP_TBL AddrMode [BlockId] - - | CALL (Either Imm Reg) Int Bool -- target, args, terminal - -riZero :: RI -> Bool - -riZero (RIImm (ImmInt 0)) = True -riZero (RIImm (ImmInteger 0)) = True -riZero (RIReg (RealReg 0)) = True -riZero _ = False - --- Calculate the effective address which would be used by the --- corresponding fpRel sequence. fpRel is in MachRegs.lhs, --- alas -- can't have fpRelEA here because of module dependencies. -fpRelEA :: Int -> Reg -> Instr -fpRelEA n dst - = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst - --- Code to shift the stack pointer by n words. -moveSp :: Int -> Instr -moveSp n - = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp - --- Produce the second-half-of-a-double register given the first half. -fPair :: Reg -> Maybe Reg -fPair (RealReg n) - | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) - -fPair (VirtualRegD u) - = Just (VirtualRegHi u) - -fPair other - = trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ show other) - Nothing - -#endif /* sparc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- PowerPC instructions -#ifdef powerpc_TARGET_ARCH --- data Instr continues... --- Loads and stores. - | LD Size Reg AddrMode -- Load size, dst, src - | LA Size Reg AddrMode -- Load arithmetic size, dst, src - | ST Size Reg AddrMode -- Store size, src, dst - | STU Size Reg AddrMode -- Store with Update size, src, dst - | LIS Reg Imm -- Load Immediate Shifted dst, src - | LI Reg Imm -- Load Immediate dst, src - | MR Reg Reg -- Move Register dst, src -- also for fmr - - | CMP Size Reg RI --- size, src1, src2 - | CMPL Size Reg RI --- size, src1, src2 - - | BCC Cond BlockId - | BCCFAR Cond BlockId - | JMP CLabel -- same as branch, - -- but with CLabel instead of block ID - | MTCTR Reg - | BCTR [BlockId] -- with list of local destinations - | BL CLabel [Reg] -- with list of argument regs - | BCTRL [Reg] - - | ADD Reg Reg RI -- dst, src1, src2 - | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 - | ADDE Reg Reg Reg -- (extend) dst, src1, src2 - | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 - | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 - | MULLW Reg Reg RI - | DIVW Reg Reg Reg - | DIVWU Reg Reg Reg - | MULLW_MayOflo Reg Reg Reg - -- dst = 1 if src1 * src2 overflows - -- pseudo-instruction; pretty-printed as: - -- mullwo. dst, src1, src2 - -- mfxer dst - -- rlwinm dst, dst, 2, 31,31 - - | AND Reg Reg RI -- dst, src1, src2 - | OR Reg Reg RI -- dst, src1, src2 - | XOR Reg Reg RI -- dst, src1, src2 - | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2 - - | EXTS Size Reg Reg - - | NEG Reg Reg - | NOT Reg Reg - - | SLW Reg Reg RI -- shift left word - | SRW Reg Reg RI -- shift right word - | SRAW Reg Reg RI -- shift right arithmetic word - - -- Rotate Left Word Immediate then AND with Mask - | RLWINM Reg Reg Int Int Int - - | FADD Size Reg Reg Reg - | FSUB Size Reg Reg Reg - | FMUL Size Reg Reg Reg - | FDIV Size Reg Reg Reg - | FNEG Reg Reg -- negate is the same for single and double prec. - - | FCMP Reg Reg - - | FCTIWZ Reg Reg -- convert to integer word - | FRSP Reg Reg -- reduce to single precision - -- (but destination is a FP register) - - | CRNOR Int Int Int -- condition register nor - | MFCR Reg -- move from condition register - - | MFLR Reg -- move from link register - | FETCHPC Reg -- pseudo-instruction: - -- bcl to next insn, mflr reg - - | LWSYNC -- memory barrier -#endif /* powerpc_TARGET_ARCH */ diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs new file mode 100644 index 0000000..beb9e15 --- /dev/null +++ b/compiler/nativeGen/PPC/Instr.hs @@ -0,0 +1,166 @@ +----------------------------------------------------------------------------- +-- +-- Machine-dependent assembly language +-- +-- (c) The University of Glasgow 1993-2004 +-- +----------------------------------------------------------------------------- + +#include "HsVersions.h" +#include "nativeGen/NCG.h" + +module PPC.Instr ( + Cond(..), + condNegate, + RI(..), + Instr(..) +) + +where + +import BlockId +import MachRegs +import Cmm +import Outputable +import FastString +import CLabel + +data Cond + = ALWAYS + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + deriving Eq + + +condNegate :: Cond -> Cond +condNegate ALWAYS = panic "condNegate: ALWAYS" +condNegate EQQ = NE +condNegate GE = LTT +condNegate GEU = LU +condNegate GTT = LE +condNegate GU = LEU +condNegate LE = GTT +condNegate LEU = GU +condNegate LTT = GE +condNegate LU = GEU +condNegate NE = EQQ + + +-- ----------------------------------------------------------------------------- +-- Machine's assembly language + +-- We have a few common "instructions" (nearly all the pseudo-ops) but +-- mostly all of 'Instr' is machine-specific. + +-- Register or immediate +data RI + = RIReg Reg + | RIImm Imm + +data Instr + -- comment pseudo-op + = COMMENT FastString + + -- some static data spat out during code + -- generation. Will be extracted before + -- pretty-printing. + | LDATA Section [CmmStatic] + + -- start a new basic block. Useful during + -- codegen, removed later. Preceding + -- instruction should be a jump, as per the + -- invariants for a BasicBlock (see Cmm). + | NEWBLOCK BlockId + + -- specify current stack offset for + -- benefit of subsequent passes + | DELTA Int + + -- | spill this reg to a stack slot + | SPILL Reg Int + + -- | reload this reg from a stack slot + | RELOAD Int Reg + + -- Loads and stores. + | LD Size Reg AddrMode -- Load size, dst, src + | LA Size Reg AddrMode -- Load arithmetic size, dst, src + | ST Size Reg AddrMode -- Store size, src, dst + | STU Size Reg AddrMode -- Store with Update size, src, dst + | LIS Reg Imm -- Load Immediate Shifted dst, src + | LI Reg Imm -- Load Immediate dst, src + | MR Reg Reg -- Move Register dst, src -- also for fmr + + | CMP Size Reg RI --- size, src1, src2 + | CMPL Size Reg RI --- size, src1, src2 + + | BCC Cond BlockId + | BCCFAR Cond BlockId + | JMP CLabel -- same as branch, + -- but with CLabel instead of block ID + | MTCTR Reg + | BCTR [BlockId] -- with list of local destinations + | BL CLabel [Reg] -- with list of argument regs + | BCTRL [Reg] + + | ADD Reg Reg RI -- dst, src1, src2 + | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 + | ADDE Reg Reg Reg -- (extend) dst, src1, src2 + | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 + | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 + | MULLW Reg Reg RI + | DIVW Reg Reg Reg + | DIVWU Reg Reg Reg + + | MULLW_MayOflo Reg Reg Reg + -- dst = 1 if src1 * src2 overflows + -- pseudo-instruction; pretty-printed as: + -- mullwo. dst, src1, src2 + -- mfxer dst + -- rlwinm dst, dst, 2, 31,31 + + | AND Reg Reg RI -- dst, src1, src2 + | OR Reg Reg RI -- dst, src1, src2 + | XOR Reg Reg RI -- dst, src1, src2 + | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2 + + | EXTS Size Reg Reg + + | NEG Reg Reg + | NOT Reg Reg + + | SLW Reg Reg RI -- shift left word + | SRW Reg Reg RI -- shift right word + | SRAW Reg Reg RI -- shift right arithmetic word + + -- Rotate Left Word Immediate then AND with Mask + | RLWINM Reg Reg Int Int Int + + | FADD Size Reg Reg Reg + | FSUB Size Reg Reg Reg + | FMUL Size Reg Reg Reg + | FDIV Size Reg Reg Reg + | FNEG Reg Reg -- negate is the same for single and double prec. + + | FCMP Reg Reg + + | FCTIWZ Reg Reg -- convert to integer word + | FRSP Reg Reg -- reduce to single precision + -- (but destination is a FP register) + + | CRNOR Int Int Int -- condition register nor + | MFCR Reg -- move from condition register + + | MFLR Reg -- move from link register + | FETCHPC Reg -- pseudo-instruction: + -- bcl to next insn, mflr reg + + | LWSYNC -- memory barrier diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs new file mode 100644 index 0000000..7f782c9 --- /dev/null +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -0,0 +1,188 @@ +----------------------------------------------------------------------------- +-- +-- Machine-dependent assembly language +-- +-- (c) The University of Glasgow 1993-2004 +-- +----------------------------------------------------------------------------- + +#include "HsVersions.h" +#include "nativeGen/NCG.h" + +module SPARC.Instr ( + Cond(..), + RI(..), + Instr(..), + riZero, + fpRelEA, + moveSp, + fPair, +) + +where + +import BlockId +import MachRegs +import Cmm +import Outputable +import Constants ( wORD_SIZE ) +import FastString + +import GHC.Exts + + +data Cond + = ALWAYS + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + | NEG + | NEVER + | POS + | VC + | VS + deriving Eq + +-- ----------------------------------------------------------------------------- +-- Machine's assembly language + +-- We have a few common "instructions" (nearly all the pseudo-ops) but +-- mostly all of 'Instr' is machine-specific. + +-- Register or immediate +data RI + = RIReg Reg + | RIImm Imm + +data Instr + -- comment pseudo-op + = COMMENT FastString + + -- some static data spat out during code + -- generation. Will be extracted before + -- pretty-printing. + | LDATA Section [CmmStatic] + + -- start a new basic block. Useful during + -- codegen, removed later. Preceding + -- instruction should be a jump, as per the + -- invariants for a BasicBlock (see Cmm). + | NEWBLOCK BlockId + + -- specify current stack offset for + -- benefit of subsequent passes + | DELTA Int + + -- | spill this reg to a stack slot + | SPILL Reg Int + + -- | reload this reg from a stack slot + | RELOAD Int Reg + + + -- Loads and stores. + | LD Size AddrMode Reg -- size, src, dst + | ST Size Reg AddrMode -- size, src, dst + + -- Int Arithmetic. + | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst + | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst + + | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst + | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst + + + -- The SPARC divide instructions perform 64bit by 32bit division + -- The Y register is xored into the first operand. + + -- On _some implementations_ the Y register is overwritten by + -- the remainder, so we have to make sure it is 0 each time. + + -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2 + | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst + | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst + + | RDY Reg -- move contents of Y register to reg + | WRY Reg Reg -- Y <- src1 `xor` src2 + + -- Simple bit-twiddling. + | AND Bool Reg RI Reg -- cc?, src1, src2, dst + | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst + | OR Bool Reg RI Reg -- cc?, src1, src2, dst + | ORN Bool Reg RI Reg -- cc?, src1, src2, dst + | XOR Bool Reg RI Reg -- cc?, src1, src2, dst + | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst + | SLL Reg RI Reg -- src1, src2, dst + | SRL Reg RI Reg -- src1, src2, dst + | SRA Reg RI Reg -- src1, src2, dst + | SETHI Imm Reg -- src, dst + | NOP -- Really SETHI 0, %g0, but worth an alias + + -- Float Arithmetic. + -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single + -- instructions right up until we spit them out. + | FABS Size Reg Reg -- src dst + | FADD Size Reg Reg Reg -- src1, src2, dst + | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst + | FDIV Size Reg Reg Reg -- src1, src2, dst + | FMOV Size Reg Reg -- src, dst + | FMUL Size Reg Reg Reg -- src1, src2, dst + | FNEG Size Reg Reg -- src, dst + | FSQRT Size Reg Reg -- src, dst + | FSUB Size Reg Reg Reg -- src1, src2, dst + | FxTOy Size Size Reg Reg -- src, dst + + -- Jumping around. + | BI Cond Bool BlockId -- cond, annul?, target + | BF Cond Bool BlockId -- cond, annul?, target + + | JMP AddrMode -- target + + -- With a tabled jump we know all the possible destinations. Tabled + -- jump includes its list of destinations so we can work out what regs + -- are live across the jump. + -- + | JMP_TBL AddrMode [BlockId] + + | CALL (Either Imm Reg) Int Bool -- target, args, terminal + + +riZero :: RI -> Bool +riZero (RIImm (ImmInt 0)) = True +riZero (RIImm (ImmInteger 0)) = True +riZero (RIReg (RealReg 0)) = True +riZero _ = False + + +-- | Calculate the effective address which would be used by the +-- corresponding fpRel sequence. fpRel is in MachRegs.lhs, +-- alas -- can't have fpRelEA here because of module dependencies. +fpRelEA :: Int -> Reg -> Instr +fpRelEA n dst + = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst + + +-- | Code to shift the stack pointer by n words. +moveSp :: Int -> Instr +moveSp n + = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp + + +-- | Produce the second-half-of-a-double register given the first half. +fPair :: Reg -> Maybe Reg +fPair (RealReg n) + | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) + +fPair (VirtualRegD u) + = Just (VirtualRegHi u) + +fPair other + = trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ show other) + Nothing diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs new file mode 100644 index 0000000..0944e92 --- /dev/null +++ b/compiler/nativeGen/X86/Instr.hs @@ -0,0 +1,335 @@ +----------------------------------------------------------------------------- +-- +-- Machine-dependent assembly language +-- +-- (c) The University of Glasgow 1993-2004 +-- +----------------------------------------------------------------------------- + +#include "HsVersions.h" +#include "nativeGen/NCG.h" + +module X86.Instr +where + +import BlockId +import MachRegs +import Cmm +import FastString + +data Cond + = ALWAYS -- What's really used? ToDo + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + | NEG + | POS + | CARRY + | OFLO + | PARITY + | NOTPARITY + + + +-- ----------------------------------------------------------------------------- +-- Intel x86 instructions + +{- +Intel, in their infinite wisdom, selected a stack model for floating +point registers on x86. That might have made sense back in 1979 -- +nowadays we can see it for the nonsense it really is. A stack model +fits poorly with the existing nativeGen infrastructure, which assumes +flat integer and FP register sets. Prior to this commit, nativeGen +could not generate correct x86 FP code -- to do so would have meant +somehow working the register-stack paradigm into the register +allocator and spiller, which sounds very difficult. + +We have decided to cheat, and go for a simple fix which requires no +infrastructure modifications, at the expense of generating ropey but +correct FP code. All notions of the x86 FP stack and its insns have +been removed. Instead, we pretend (to the instruction selector and +register allocator) that x86 has six floating point registers, %fake0 +.. %fake5, which can be used in the usual flat manner. We further +claim that x86 has floating point instructions very similar to SPARC +and Alpha, that is, a simple 3-operand register-register arrangement. +Code generation and register allocation proceed on this basis. + +When we come to print out the final assembly, our convenient fiction +is converted to dismal reality. Each fake instruction is +independently converted to a series of real x86 instructions. +%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg +arithmetic operations, the two operands are pushed onto the top of the +FP stack, the operation done, and the result copied back into the +relevant register. There are only six %fake registers because 2 are +needed for the translation, and x86 has 8 in total. + +The translation is inefficient but is simple and it works. A cleverer +translation would handle a sequence of insns, simulating the FP stack +contents, would not impose a fixed mapping from %fake to %st regs, and +hopefully could avoid most of the redundant reg-reg moves of the +current translation. + +We might as well make use of whatever unique FP facilities Intel have +chosen to bless us with (let's not be churlish, after all). +Hence GLDZ and GLD1. Bwahahahahahahaha! +-} + +{- +MORE FLOATING POINT MUSINGS... + +Intel's internal floating point registers are by default 80 bit +extended precision. This means that all operations done on values in +registers are done at 80 bits, and unless the intermediate values are +truncated to the appropriate size (32 or 64 bits) by storing in +memory, calculations in registers will give different results from +calculations which pass intermediate values in memory (eg. via +function calls). + +One solution is to set the FPU into 64 bit precision mode. Some OSs +do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is +that this will only affect 64-bit precision arithmetic; 32-bit +calculations will still be done at 64-bit precision in registers. So +it doesn't solve the whole problem. + +There's also the issue of what the C library is expecting in terms of +precision. It seems to be the case that glibc on Linux expects the +FPU to be set to 80 bit precision, so setting it to 64 bit could have +unexpected effects. Changing the default could have undesirable +effects on other 3rd-party library code too, so the right thing would +be to save/restore the FPU control word across Haskell code if we were +to do this. + +gcc's -ffloat-store gives consistent results by always storing the +results of floating-point calculations in memory, which works for both +32 and 64-bit precision. However, it only affects the values of +user-declared floating point variables in C, not intermediate results. +GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision +flag). + +Another problem is how to spill floating point registers in the +register allocator. Should we spill the whole 80 bits, or just 64? +On an OS which is set to 64 bit precision, spilling 64 is fine. On +Linux, spilling 64 bits will round the results of some operations. +This is what gcc does. Spilling at 80 bits requires taking up a full +128 bit slot (so we get alignment). We spill at 80-bits and ignore +the alignment problems. + +In the future, we'll use the SSE registers for floating point. This +requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit +precision float ops), which means P4 or Xeon and above. Using SSE +will solve all these problems, because the SSE registers use fixed 32 +bit or 64 bit precision. + +--SDM 1/2003 +-} + + +data Instr + -- comment pseudo-op + = COMMENT FastString + + -- some static data spat out during code + -- generation. Will be extracted before + -- pretty-printing. + | LDATA Section [CmmStatic] + + -- start a new basic block. Useful during + -- codegen, removed later. Preceding + -- instruction should be a jump, as per the + -- invariants for a BasicBlock (see Cmm). + | NEWBLOCK BlockId + + -- specify current stack offset for + -- benefit of subsequent passes + | DELTA Int + + -- | spill this reg to a stack slot + | SPILL Reg Int + + -- | reload this reg from a stack slot + | RELOAD Int Reg + + + -- Moves. + | MOV Size Operand Operand + | MOVZxL Size Operand Operand -- size is the size of operand 1 + | MOVSxL Size Operand Operand -- size is the size of operand 1 + -- x86_64 note: plain mov into a 32-bit register always zero-extends + -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which + -- don't affect the high bits of the register. + + -- Load effective address (also a very useful three-operand add instruction :-) + | LEA Size Operand Operand + + -- Int Arithmetic. + | ADD Size Operand Operand + | ADC Size Operand Operand + | SUB Size Operand Operand + + | MUL Size Operand Operand + | IMUL Size Operand Operand -- signed int mul + | IMUL2 Size Operand -- %edx:%eax = operand * %eax + + | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op + | IDIV Size Operand -- ditto, but signed + + -- Simple bit-twiddling. + | AND Size Operand Operand + | OR Size Operand Operand + | XOR Size Operand Operand + | NOT Size Operand + | NEGI Size Operand -- NEG instruction (name clash with Cond) + + -- Shifts (amount may be immediate or %cl only) + | SHL Size Operand{-amount-} Operand + | SAR Size Operand{-amount-} Operand + | SHR Size Operand{-amount-} Operand + + | BT Size Imm Operand + | NOP + +#if i386_TARGET_ARCH + -- Float Arithmetic. + + -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles + -- as single instructions right up until we spit them out. + -- all the 3-operand fake fp insns are src1 src2 dst + -- and furthermore are constrained to be fp regs only. + -- IMPORTANT: keep is_G_insn up to date with any changes here + | GMOV Reg Reg -- src(fpreg), dst(fpreg) + | GLD Size AddrMode Reg -- src, dst(fpreg) + | GST Size Reg AddrMode -- src(fpreg), dst + + | GLDZ Reg -- dst(fpreg) + | GLD1 Reg -- dst(fpreg) + + | GFTOI Reg Reg -- src(fpreg), dst(intreg) + | GDTOI Reg Reg -- src(fpreg), dst(intreg) + + | GITOF Reg Reg -- src(intreg), dst(fpreg) + | GITOD Reg Reg -- src(intreg), dst(fpreg) + + | GADD Size Reg Reg Reg -- src1, src2, dst + | GDIV Size Reg Reg Reg -- src1, src2, dst + | GSUB Size Reg Reg Reg -- src1, src2, dst + | GMUL Size Reg Reg Reg -- src1, src2, dst + + -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT] + -- Compare src1 with src2; set the Zero flag iff the numbers are + -- comparable and the comparison is True. Subsequent code must + -- test the %eflags zero flag regardless of the supplied Cond. + | GCMP Cond Reg Reg -- src1, src2 + + | GABS Size Reg Reg -- src, dst + | GNEG Size Reg Reg -- src, dst + | GSQRT Size Reg Reg -- src, dst + | GSIN Size CLabel CLabel Reg Reg -- src, dst + | GCOS Size CLabel CLabel Reg Reg -- src, dst + | GTAN Size CLabel CLabel Reg Reg -- src, dst + + | GFREE -- do ffree on all x86 regs; an ugly hack +#endif + +#if x86_64_TARGET_ARCH +-- SSE2 floating point: we use a restricted set of the available SSE2 +-- instructions for floating-point. + + -- use MOV for moving (either movss or movsd (movlpd better?)) + + | CVTSS2SD Reg Reg -- F32 to F64 + | CVTSD2SS Reg Reg -- F64 to F32 + | CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation) + | CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation) + | CVTSI2SS Operand Reg -- I32/I64 to F32 + | CVTSI2SD Operand Reg -- I32/I64 to F64 + + -- use ADD & SUB for arithmetic. In both cases, operands + -- are Operand Reg. + + -- SSE2 floating-point division: + | FDIV Size Operand Operand -- divisor, dividend(dst) + + -- use CMP for comparisons. ucomiss and ucomisd instructions + -- compare single/double prec floating point respectively. + + | SQRT Size Operand Reg -- src, dst +#endif + + -- Comparison + | TEST Size Operand Operand + | CMP Size Operand Operand + | SETCC Cond Operand + + -- Stack Operations. + | PUSH Size Operand + | POP Size Operand + -- both unused (SDM): + -- | PUSHA + -- | POPA + + -- Jumping around. + | JMP Operand + | JXX Cond BlockId -- includes unconditional branches + | JXX_GBL Cond Imm -- non-local version of JXX + | JMP_TBL Operand [BlockId] -- table jump + | CALL (Either Imm Reg) [Reg] + + -- Other things. + | CLTD Size -- sign extend %eax into %edx:%eax + + | FETCHGOT Reg -- pseudo-insn for ELF position-independent code + -- pretty-prints as + -- call 1f + -- 1: popl %reg + -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg + | FETCHPC Reg -- pseudo-insn for Darwin position-independent code + -- pretty-prints as + -- call 1f + -- 1: popl %reg + + +data Operand + = OpReg Reg -- register + | OpImm Imm -- immediate value + | OpAddr AddrMode -- memory reference + + +#if i386_TARGET_ARCH +i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr] +i386_insert_ffrees blocks + | or (map (any is_G_instr) [ instrs | BasicBlock id instrs <- blocks ]) + = map ffree_before_nonlocal_transfers blocks + | otherwise + = blocks + where + ffree_before_nonlocal_transfers (BasicBlock id insns) + = BasicBlock id (foldr p [] insns) + where p insn r = case insn of + CALL _ _ -> GFREE : insn : r + JMP _ -> GFREE : insn : r + other -> insn : r + +-- if you ever add a new FP insn to the fake x86 FP insn set, +-- you must update this too +is_G_instr :: Instr -> Bool +is_G_instr instr + = case instr of + GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True + GLDZ _ -> True; GLD1 _ -> True + GFTOI _ _ -> True; GDTOI _ _ -> True + GITOF _ _ -> True; GITOD _ _ -> True + GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True + GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True + GCMP _ _ _ -> True; GABS _ _ _ -> True + GNEG _ _ _ -> True; GSQRT _ _ _ -> True + GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True + GFREE -> panic "is_G_instr: GFREE (!)" + other -> False +#endif /* i386_TARGET_ARCH */ -- 1.7.10.4