From aff2e3f0c86af7f6cfbda20b672e9172c439ad55 Mon Sep 17 00:00:00 2001 From: simonmar Date: Fri, 8 Apr 2005 09:54:54 +0000 Subject: [PATCH] [project @ 2005-04-08 09:54:54 by simonmar] x86_64 hacking: - use %rip-relative addressing in a couple of places - floating-point comparisons handle NaN properly I believe the x86_64 NCG is now ready for prime time. It is successfully bootstrapping the compiler, and I think this fixes the last of the test failures. --- ghc/compiler/nativeGen/MachCodeGen.hs | 136 ++++++++++++++++++++++---------- ghc/compiler/nativeGen/MachInstrs.hs | 2 + ghc/compiler/nativeGen/MachRegs.lhs | 19 +++-- ghc/compiler/nativeGen/PprMach.hs | 26 +++--- ghc/compiler/nativeGen/RegAllocInfo.hs | 27 ++++--- 5 files changed, 140 insertions(+), 70 deletions(-) diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs index f782577..3ab97cb 100644 --- a/ghc/compiler/nativeGen/MachCodeGen.hs +++ b/ghc/compiler/nativeGen/MachCodeGen.hs @@ -809,8 +809,7 @@ getRegister (CmmLit (CmmFloat f rep)) = do LDATA ReadOnlyData [CmmDataLabel lbl, CmmStaticLit (CmmFloat f rep)], - MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst) - -- ToDo: should use %rip-relative + MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) ] -- in return (Any rep code) @@ -882,7 +881,7 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do CmmStaticLit (CmmInt 0 I32), CmmStaticLit (CmmInt 0 I32) ], - XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst) + XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) -- xorps, so we need the 128-bit constant -- ToDo: rip-relative ] @@ -902,9 +901,8 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do CmmStaticLit (CmmInt 0 I64) ], -- gcc puts an unpck here. Wonder if we need it. - XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst) + XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) -- xorpd, so we need the 128-bit constant - -- ToDo: rip-relative ] -- return (Any F64 code) @@ -1155,7 +1153,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps code dst = x_code `snocOL` LEA rep - (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm)) + (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) (OpReg dst) -- return (Any rep code) @@ -1841,14 +1839,14 @@ getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) -- ASSERT(rep == I32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (-(fromInteger i)) - return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) | not (is64BitLit lit) -- ASSERT(rep == I32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (fromInteger i) - return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be -- recognised by the next rule. @@ -1866,7 +1864,7 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) let code = x_code `appOL` y_code base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 - return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0)) + return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0)) code) getAmode (CmmLit lit) | not (is64BitLit lit) @@ -1874,7 +1872,7 @@ getAmode (CmmLit lit) | not (is64BitLit lit) getAmode expr = do (reg,code) <- getSomeReg expr - return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code) + return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ @@ -2008,7 +2006,7 @@ getNonClobberedOperand (CmmLoad mem pk) if (amodeCouldBeClobbered src) then do tmp <- getNewRegNat wordRep - return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0), + return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), unitOL (LEA I32 (OpAddr src) (OpReg tmp))) else return (src, nilOL) @@ -2238,10 +2236,10 @@ condFltCode cond x y = do code = x_code `appOL` y_code `snocOL` CMP (cmmExprRep x) y_op (OpReg x_reg) - -- in - return (CondCode False (condToUnsigned cond) code) - -- we need to use the unsigned comparison operators on the + -- NB(1): we need to use the unsigned comparison operators on the -- result of this comparison. + -- in + return (CondCode True (condToUnsigned cond) code) #endif -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2888,14 +2886,51 @@ genCondJump lbl (StPrim op [x, y]) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +#if i386_TARGET_ARCH genCondJump id bool = do CondCode _ cond code <- getCondCode bool return (code `snocOL` JXX cond id) -#endif /* i386_TARGET_ARCH */ +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if x86_64_TARGET_ARCH + +genCondJump id bool = do + CondCode is_float cond cond_code <- getCondCode bool + if not is_float + then + return (cond_code `snocOL` JXX cond id) + else do + lbl <- getBlockIdNat + + -- see comment with condFltReg + let code = case cond of + NE -> or_unordered + GU -> plain_test + GEU -> plain_test + _ -> and_ordered + + plain_test = unitOL ( + JXX cond id + ) + or_unordered = toOL [ + JXX cond id, + JXX PARITY id + ] + and_ordered = toOL [ + JXX PARITY lbl, + JXX cond id, + JXX ALWAYS lbl, + NEWBLOCK lbl + ] + return (cond_code `appOL` code) +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if sparc_TARGET_ARCH @@ -3762,7 +3797,7 @@ genSwitch expr ids = do lbl <- getNewLabelNat let jumpTable = map jumpTableEntry ids - op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl)) + op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) code = e_code `appOL` toOL [ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), JMP_TBL op [ id | Just id <- ids ] @@ -3854,38 +3889,57 @@ condIntReg cond x y = do let code dst = cond_code `appOL` toOL [ SETCC cond (OpReg tmp), - MOV I32 (OpReg tmp) (OpReg dst), - AND I32 (OpImm (ImmInt 1)) (OpReg dst) + MOVZxL I8 (OpReg tmp) (OpReg dst) ] - -- NB. (1) Tha AND is needed here because the x86 only - -- sets the low byte in the SETCC instruction. - -- NB. (2) The extra temporary register is a hack to - -- work around the fact that the setcc instructions only - -- accept byte registers. dst might not be a byte-able reg, - -- but currently all free registers are byte-able, so we're - -- guaranteed that a new temporary is byte-able. -- in return (Any I32 code) - condFltReg cond x y = do - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat CondCode _ cond cond_code <- condFltCode cond x y - let - code dst = cond_code `appOL` toOL [ - JXX cond lbl1, - MOV I32 (OpImm (ImmInt 0)) (OpReg dst), - JXX ALWAYS lbl2, - NEWBLOCK lbl1, - MOV I32 (OpImm (ImmInt 1)) (OpReg dst), - JXX ALWAYS lbl2, - NEWBLOCK lbl2] - -- SIGH, have to split up this block somehow... + tmp1 <- getNewRegNat wordRep + tmp2 <- getNewRegNat wordRep + let + -- We have to worry about unordered operands (eg. comparisons + -- against NaN). If the operands are unordered, the comparison + -- sets the parity flag, carry flag and zero flag. + -- All comparisons are supposed to return false for unordered + -- operands except for !=, which returns true. + -- + -- Optimisation: we don't have to test the parity flag if we + -- know the test has already excluded the unordered case: eg > + -- and >= test for a zero carry flag, which can only occur for + -- ordered operands. + -- + -- ToDo: by reversing comparisons we could avoid testing the + -- parity flag in more cases. + + code dst = + cond_code `appOL` + (case cond of + NE -> or_unordered dst + GU -> plain_test dst + GEU -> plain_test dst + _ -> and_ordered dst) + + plain_test dst = toOL [ + SETCC cond (OpReg tmp1), + MOVZxL I8 (OpReg tmp1) (OpReg dst) + ] + or_unordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC PARITY (OpReg tmp2), + OR I8 (OpReg tmp1) (OpReg tmp2), + MOVZxL I8 (OpReg tmp2) (OpReg dst) + ] + and_ordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC NOTPARITY (OpReg tmp2), + AND I8 (OpReg tmp1) (OpReg tmp2), + MOVZxL I8 (OpReg tmp2) (OpReg dst) + ] -- in return (Any I32 code) - -#endif /* i386_TARGET_ARCH */ +#endif -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ghc/compiler/nativeGen/MachInstrs.hs b/ghc/compiler/nativeGen/MachInstrs.hs index 40d1766..c86f3d1 100644 --- a/ghc/compiler/nativeGen/MachInstrs.hs +++ b/ghc/compiler/nativeGen/MachInstrs.hs @@ -85,6 +85,8 @@ data Cond | POS | CARRY | OFLO + | PARITY + | NOTPARITY #endif #if sparc_TARGET_ARCH = ALWAYS -- What's really used? ToDo diff --git a/ghc/compiler/nativeGen/MachRegs.lhs b/ghc/compiler/nativeGen/MachRegs.lhs index d82922c..0d048e3 100644 --- a/ghc/compiler/nativeGen/MachRegs.lhs +++ b/ghc/compiler/nativeGen/MachRegs.lhs @@ -46,11 +46,13 @@ module MachRegs ( gp, pv, ra, sp, t9, t10, t11, t12, v0, f0, zeroh, #endif #if i386_TARGET_ARCH + EABase(..), EAIndex(..), eax, ebx, ecx, edx, esi, edi, ebp, esp, fake0, fake1, fake2, fake3, fake4, fake5, addrModeRegs, #endif #if x86_64_TARGET_ARCH + EABase(..), EAIndex(..), ripRel, rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, eax, ebx, ecx, edx, esi, edi, ebp, esp, r8, r9, r10, r11, r12, r13, r14, r15, @@ -150,11 +152,11 @@ data AddrMode #endif #if i386_TARGET_ARCH || x86_64_TARGET_ARCH - = AddrBaseIndex Base Index Displacement + = AddrBaseIndex EABase EAIndex Displacement | ImmAddr Imm Int -type Base = Maybe Reg -type Index = Maybe (Reg, Int) -- Int is 2, 4 or 8 +data EABase = EABaseNone | EABaseReg Reg | EABaseRip +data EAIndex = EAIndexNone | EAIndex Reg Int type Displacement = Imm #endif @@ -172,8 +174,8 @@ type Displacement = Imm addrModeRegs :: AddrMode -> [Reg] addrModeRegs (AddrBaseIndex b i _) = b_regs ++ i_regs where - b_regs = case b of { Just r -> [r]; _ -> [] } - i_regs = case i of { Just (r,_) -> [r]; _ -> [] } + b_regs = case b of { EABaseReg r -> [r]; _ -> [] } + i_regs = case i of { EAIndex r _ -> [r]; _ -> [] } addrModeRegs _ = [] #endif @@ -289,9 +291,9 @@ spRel :: Int -- desired stack offset in words, positive or negative spRel n #if defined(i386_TARGET_ARCH) - = AddrBaseIndex (Just esp) Nothing (ImmInt (n * wORD_SIZE)) + = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE)) #elif defined(x86_64_TARGET_ARCH) - = AddrBaseIndex (Just rsp) Nothing (ImmInt (n * wORD_SIZE)) + = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE)) #else = AddrRegImm sp (ImmInt (n * wORD_SIZE)) #endif @@ -304,6 +306,9 @@ fpRel n = AddrRegImm fp (ImmInt (n * wORD_SIZE)) #endif +#if x86_64_TARGET_ARCH +ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm +#endif -- ----------------------------------------------------------------------------- -- Global registers diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs index c6501a0..51f5f47 100644 --- a/ghc/compiler/nativeGen/PprMach.hs +++ b/ghc/compiler/nativeGen/PprMach.hs @@ -446,6 +446,7 @@ pprCond c = ptext (case c of { LEU -> SLIT("be"); NE -> SLIT("ne"); NEG -> SLIT("s"); POS -> SLIT("ns"); CARRY -> SLIT("c"); OFLO -> SLIT("o"); + PARITY -> SLIT("p"); NOTPARITY -> SLIT("np"); ALWAYS -> SLIT("mp") -- hack #endif #if sparc_TARGET_ARCH @@ -480,8 +481,8 @@ pprImm (ImmCLbl l) = pprCLabel_asm l pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i pprImm (ImmLit s) = s -pprImm (ImmFloat _) = panic "pprImm:ImmFloat" -pprImm (ImmDouble _) = panic "pprImm:ImmDouble" +pprImm (ImmFloat _) = ptext SLIT("naughty float immediate") +pprImm (ImmDouble _) = ptext SLIT("naughty double immediate") pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b pprImm (ImmConstantDiff a b) = pprImm a <> char '-' @@ -560,10 +561,11 @@ pprAddr (AddrBaseIndex base index displacement) pp_reg r = pprReg wordRep r in case (base,index) of - (Nothing, Nothing) -> pp_disp - (Just b, Nothing) -> pp_off (pp_reg b) - (Nothing, Just (r,i)) -> pp_off (comma <> pp_reg r <> comma <> int i) - (Just b, Just (r,i)) -> pp_off (pp_reg b <> comma <> pp_reg r + (EABaseNone, EAIndexNone) -> pp_disp + (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b) + (EABaseRip, EAIndexNone) -> pp_off (ptext SLIT("%rip")) + (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i) + (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r <> comma <> int i) where ppr_disp (ImmInt 0) = empty @@ -1202,18 +1204,22 @@ pprInstr (MOVZxL I32 src dst) = pprSizeOpOp SLIT("mov") I32 src dst -- the reg alloc would tend to throw away a plain reg-to-reg -- move, and we still want it to do that. -pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes wordRep src dst +pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes I32 src dst + -- zero-extension only needs to extend to 32 bits: on x86_64, + -- the remaining zero-extension to 64 bits is automatic, and the 32-bit + -- instruction is shorter. + pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes wordRep src dst -- here we do some patching, since the physical registers are only set late -- in the code generation. -pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) (Just (reg2,1)) (ImmInt 0))) dst@(OpReg reg3)) +pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3)) | reg1 == reg3 = pprSizeOpOp SLIT("add") size (OpReg reg2) dst -pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) (Just (reg2,1)) (ImmInt 0))) dst@(OpReg reg3)) +pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3)) | reg2 == reg3 = pprSizeOpOp SLIT("add") size (OpReg reg1) dst -pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) Nothing displ)) dst@(OpReg reg3)) +pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3)) | reg1 == reg3 = pprInstr (ADD size (OpImm displ) dst) pprInstr (LEA size src dst) = pprSizeOpOp SLIT("lea") size src dst diff --git a/ghc/compiler/nativeGen/RegAllocInfo.hs b/ghc/compiler/nativeGen/RegAllocInfo.hs index f6b3131..86630cf 100644 --- a/ghc/compiler/nativeGen/RegAllocInfo.hs +++ b/ghc/compiler/nativeGen/RegAllocInfo.hs @@ -258,11 +258,13 @@ regUsage instr = case instr of use_R (OpAddr ea) = use_EA ea -- Registers used to compute an effective address. - use_EA (ImmAddr _ _) = [] - use_EA (AddrBaseIndex Nothing Nothing _) = [] - use_EA (AddrBaseIndex (Just b) Nothing _) = [b] - use_EA (AddrBaseIndex Nothing (Just (i,_)) _) = [i] - use_EA (AddrBaseIndex (Just b) (Just (i,_)) _) = [b,i] + use_EA (ImmAddr _ _) = [] + use_EA (AddrBaseIndex base index _) = + use_base base $! use_index index + where use_base (EABaseReg r) x = r : x + use_base _ x = x + use_index EAIndexNone = [] + use_index (EAIndex i _) = [i] mkRU src dst = RU (filter interesting src) (filter interesting dst) @@ -555,19 +557,20 @@ patchRegs instr env = case instr of patch1 insn op = insn $! patchOp op patch2 insn src dst = (insn $! patchOp src) $! patchOp dst - patchOp (OpReg reg) = OpReg (env reg) + patchOp (OpReg reg) = OpReg $! env reg patchOp (OpImm imm) = OpImm imm - patchOp (OpAddr ea) = OpAddr (lookupAddr ea) + patchOp (OpAddr ea) = OpAddr $! lookupAddr ea lookupAddr (ImmAddr imm off) = ImmAddr imm off lookupAddr (AddrBaseIndex base index disp) - = AddrBaseIndex (lookupBase base) (lookupIndex index) disp + = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp where - lookupBase Nothing = Nothing - lookupBase (Just r) = Just (env r) + lookupBase EABaseNone = EABaseNone + lookupBase EABaseRip = EABaseRip + lookupBase (EABaseReg r) = EABaseReg (env r) - lookupIndex Nothing = Nothing - lookupIndex (Just (r,i)) = Just (env r, i) + lookupIndex EAIndexNone = EAIndexNone + lookupIndex (EAIndex r i) = EAIndex (env r) i #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/ -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- 1.7.10.4