From aff2e3f0c86af7f6cfbda20b672e9172c439ad55 Mon Sep 17 00:00:00 2001
From: simonmar <unknown>
Date: Fri, 8 Apr 2005 09:54:54 +0000
Subject: [PATCH] [project @ 2005-04-08 09:54:54 by simonmar] x86_64 hacking:

  - use %rip-relative addressing in a couple of places
  - floating-point comparisons handle NaN properly

I believe the x86_64 NCG is now ready for prime time.  It is
successfully bootstrapping the compiler, and I think this fixes the
last of the test failures.
---
 ghc/compiler/nativeGen/MachCodeGen.hs  |  136 ++++++++++++++++++++++----------
 ghc/compiler/nativeGen/MachInstrs.hs   |    2 +
 ghc/compiler/nativeGen/MachRegs.lhs    |   19 +++--
 ghc/compiler/nativeGen/PprMach.hs      |   26 +++---
 ghc/compiler/nativeGen/RegAllocInfo.hs |   27 ++++---
 5 files changed, 140 insertions(+), 70 deletions(-)
diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs
index f782577..3ab97cb 100644
--- a/ghc/compiler/nativeGen/MachCodeGen.hs
+++ b/ghc/compiler/nativeGen/MachCodeGen.hs
@@ -809,8 +809,7 @@ getRegister (CmmLit (CmmFloat f rep)) = do
 	    LDATA ReadOnlyData
 			[CmmDataLabel lbl,
 			 CmmStaticLit (CmmFloat f rep)],
-	    MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
-	-- ToDo: should use %rip-relative
+	    MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 	    ]
     -- in
     return (Any rep code)
@@ -882,7 +881,7 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 		CmmStaticLit (CmmInt 0 I32),
 		CmmStaticLit (CmmInt 0 I32)
 	],
-	XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+	XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 		-- xorps, so we need the 128-bit constant
 		-- ToDo: rip-relative
 	]
@@ -902,9 +901,8 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 		CmmStaticLit (CmmInt 0 I64)
 	],
 		-- gcc puts an unpck here.  Wonder if we need it.
-	XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+	XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 		-- xorpd, so we need the 128-bit constant
-		-- ToDo: rip-relative
 	]
   --
   return (Any F64 code)
@@ -1155,7 +1153,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 	    code dst
                = x_code `snocOL`
 		 LEA rep
-			(OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
+			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
                         (OpReg dst)
 	-- 
 	return (Any rep code)
@@ -1841,14 +1839,14 @@ getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
   -- ASSERT(rep == I32)???
   = do (x_reg, x_code) <- getSomeReg x
        let off = ImmInt (-(fromInteger i))
-       return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
   
 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
   | not (is64BitLit lit)
   -- ASSERT(rep == I32)???
   = do (x_reg, x_code) <- getSomeReg x
        let off = ImmInt (fromInteger i)
-       return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
 
 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
 -- recognised by the next rule.
@@ -1866,7 +1864,7 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
        let
     	   code = x_code `appOL` y_code
            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
                code)
 
 getAmode (CmmLit lit) | not (is64BitLit lit)
@@ -1874,7 +1872,7 @@ getAmode (CmmLit lit) | not (is64BitLit lit)
 
 getAmode expr = do
   (reg,code) <- getSomeReg expr
-  return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
 
 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
 
@@ -2008,7 +2006,7 @@ getNonClobberedOperand (CmmLoad mem pk)
 	if (amodeCouldBeClobbered src) 
 		then do
 		   tmp <- getNewRegNat wordRep
-		   return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
+		   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
 			   unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
 		else
 		   return (src, nilOL)
@@ -2238,10 +2236,10 @@ condFltCode cond x y = do
 	code = x_code `appOL`
 	       y_code `snocOL`
     	    	  CMP (cmmExprRep x) y_op (OpReg x_reg)
-  -- in
-  return (CondCode False (condToUnsigned cond) code)
-	-- we need to use the unsigned comparison operators on the
+	-- NB(1): we need to use the unsigned comparison operators on the
 	-- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
 #endif
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -2888,14 +2886,51 @@ genCondJump lbl (StPrim op [x, y])
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+#if i386_TARGET_ARCH
 
 genCondJump id bool = do
   CondCode _ cond code <- getCondCode bool
   return (code `snocOL` JXX cond id)
 
-#endif /* i386_TARGET_ARCH */
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+	return (cond_code `snocOL` JXX cond id)
+    else do
+	lbl <- getBlockIdNat
+
+	-- see comment with condFltReg
+	let code = case cond of
+	  		NE  -> or_unordered
+			GU  -> plain_test
+			GEU -> plain_test
+			_   -> and_ordered
+
+	    plain_test = unitOL (
+		  JXX cond id
+		)
+	    or_unordered = toOL [
+		  JXX cond id,
+		  JXX PARITY id
+		]
+	    and_ordered = toOL [
+		  JXX PARITY lbl,
+		  JXX cond id,
+		  JXX ALWAYS lbl,
+		  NEWBLOCK lbl
+		]
+	return (cond_code `appOL` code)
 
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 #if sparc_TARGET_ARCH
 
@@ -3762,7 +3797,7 @@ genSwitch expr ids = do
   lbl <- getNewLabelNat
   let
 	jumpTable = map jumpTableEntry ids
-	op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
+	op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
 	code = e_code `appOL` toOL [
 		LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
 	        JMP_TBL op [ id | Just id <- ids ]
@@ -3854,38 +3889,57 @@ condIntReg cond x y = do
   let 
 	code dst = cond_code `appOL` toOL [
 		    SETCC cond (OpReg tmp),
-		    MOV I32 (OpReg tmp) (OpReg dst),
-		    AND I32 (OpImm (ImmInt 1)) (OpReg dst)
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
 		  ]
-		   -- NB. (1) Tha AND is needed here because the x86 only
-		   -- sets the low byte in the SETCC instruction.
-		   -- NB. (2) The extra temporary register is a hack to
-		   -- work around the fact that the setcc instructions only
-		   -- accept byte registers.  dst might not be a byte-able reg,
-		   -- but currently all free registers are byte-able, so we're
-		   -- guaranteed that a new temporary is byte-able.
   -- in
   return (Any I32 code)
 
-
 condFltReg cond x y = do
-  lbl1 <- getBlockIdNat
-  lbl2 <- getBlockIdNat
   CondCode _ cond cond_code <- condFltCode cond x y
-  let
-    	code dst = cond_code `appOL` toOL [
-	    JXX cond lbl1,
-	    MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
-	    JXX ALWAYS lbl2,
-	    NEWBLOCK lbl1,
-	    MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
-	    JXX ALWAYS lbl2,
-	    NEWBLOCK lbl2]
-		-- SIGH, have to split up this block somehow...
+  tmp1 <- getNewRegNat wordRep
+  tmp2 <- getNewRegNat wordRep
+  let 
+	-- We have to worry about unordered operands (eg. comparisons
+	-- against NaN).  If the operands are unordered, the comparison
+	-- sets the parity flag, carry flag and zero flag.
+	-- All comparisons are supposed to return false for unordered
+	-- operands except for !=, which returns true.
+	--
+	-- Optimisation: we don't have to test the parity flag if we
+	-- know the test has already excluded the unordered case: eg >
+	-- and >= test for a zero carry flag, which can only occur for
+	-- ordered operands.
+	--
+	-- ToDo: by reversing comparisons we could avoid testing the
+	-- parity flag in more cases.
+
+	code dst = 
+	   cond_code `appOL` 
+	     (case cond of
+	  	NE  -> or_unordered dst
+		GU  -> plain_test   dst
+		GEU -> plain_test   dst
+		_   -> and_ordered  dst)
+
+	plain_test dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    MOVZxL I8 (OpReg tmp1) (OpReg dst)
+		 ]
+	or_unordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC PARITY (OpReg tmp2),
+		    OR I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+	and_ordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC NOTPARITY (OpReg tmp2),
+		    AND I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
   -- in
   return (Any I32 code)
-
-#endif /* i386_TARGET_ARCH */
+#endif
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
diff --git a/ghc/compiler/nativeGen/MachInstrs.hs b/ghc/compiler/nativeGen/MachInstrs.hs
index 40d1766..c86f3d1 100644
--- a/ghc/compiler/nativeGen/MachInstrs.hs
+++ b/ghc/compiler/nativeGen/MachInstrs.hs
@@ -85,6 +85,8 @@ data Cond
   | POS
   | CARRY
   | OFLO
+  | PARITY
+  | NOTPARITY
 #endif
 #if sparc_TARGET_ARCH
   = ALWAYS	-- What's really used? ToDo
diff --git a/ghc/compiler/nativeGen/MachRegs.lhs b/ghc/compiler/nativeGen/MachRegs.lhs
index d82922c..0d048e3 100644
--- a/ghc/compiler/nativeGen/MachRegs.lhs
+++ b/ghc/compiler/nativeGen/MachRegs.lhs
@@ -46,11 +46,13 @@ module MachRegs (
 	gp, pv, ra, sp, t9, t10, t11, t12, v0, f0, zeroh,
 #endif
 #if i386_TARGET_ARCH
+	EABase(..), EAIndex(..),
 	eax, ebx, ecx, edx, esi, edi, ebp, esp,
 	fake0, fake1, fake2, fake3, fake4, fake5,
 	addrModeRegs,
 #endif
 #if x86_64_TARGET_ARCH
+	EABase(..), EAIndex(..), ripRel,
 	rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
 	eax, ebx, ecx, edx, esi, edi, ebp, esp,
 	r8, r9, r10, r11, r12, r13, r14, r15,
@@ -150,11 +152,11 @@ data AddrMode
 #endif
 
 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-  = AddrBaseIndex	Base Index Displacement
+  = AddrBaseIndex	EABase EAIndex Displacement
   | ImmAddr		Imm Int
 
-type Base         = Maybe Reg
-type Index        = Maybe (Reg, Int)	-- Int is 2, 4 or 8
+data EABase       = EABaseNone  | EABaseReg Reg | EABaseRip
+data EAIndex      = EAIndexNone | EAIndex Reg Int
 type Displacement = Imm
 #endif
 
@@ -172,8 +174,8 @@ type Displacement = Imm
 addrModeRegs :: AddrMode -> [Reg]
 addrModeRegs (AddrBaseIndex b i _) =  b_regs ++ i_regs
   where
-   b_regs = case b of { Just r -> [r]; _ -> [] }
-   i_regs = case i of { Just (r,_) -> [r]; _ -> [] }
+   b_regs = case b of { EABaseReg r -> [r]; _ -> [] }
+   i_regs = case i of { EAIndex r _ -> [r]; _ -> [] }
 addrModeRegs _ = []
 #endif
 
@@ -289,9 +291,9 @@ spRel :: Int	-- desired stack offset in words, positive or negative
 
 spRel n
 #if defined(i386_TARGET_ARCH)
-  = AddrBaseIndex (Just esp) Nothing (ImmInt (n * wORD_SIZE))
+  = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE))
 #elif defined(x86_64_TARGET_ARCH)
-  = AddrBaseIndex (Just rsp) Nothing (ImmInt (n * wORD_SIZE))
+  = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE))
 #else
   = AddrRegImm sp (ImmInt (n * wORD_SIZE))
 #endif
@@ -304,6 +306,9 @@ fpRel n
   = AddrRegImm fp (ImmInt (n * wORD_SIZE))
 #endif
 
+#if x86_64_TARGET_ARCH
+ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm
+#endif
 
 -- -----------------------------------------------------------------------------
 -- Global registers
diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs
index c6501a0..51f5f47 100644
--- a/ghc/compiler/nativeGen/PprMach.hs
+++ b/ghc/compiler/nativeGen/PprMach.hs
@@ -446,6 +446,7 @@ pprCond c = ptext (case c of {
 	LEU	-> SLIT("be");	NE    -> SLIT("ne");
 	NEG	-> SLIT("s");	POS   -> SLIT("ns");
         CARRY   -> SLIT("c");   OFLO  -> SLIT("o");
+	PARITY  -> SLIT("p");   NOTPARITY -> SLIT("np");
 	ALWAYS	-> SLIT("mp")	-- hack
 #endif
 #if sparc_TARGET_ARCH
@@ -480,8 +481,8 @@ pprImm (ImmCLbl l)    = pprCLabel_asm l
 pprImm (ImmIndex l i) = pprCLabel_asm l <> char '+' <> int i
 pprImm (ImmLit s)     = s
 
-pprImm (ImmFloat _) = panic "pprImm:ImmFloat"
-pprImm (ImmDouble _) = panic "pprImm:ImmDouble"
+pprImm (ImmFloat _) = ptext SLIT("naughty float immediate")
+pprImm (ImmDouble _) = ptext SLIT("naughty double immediate")
 
 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
@@ -560,10 +561,11 @@ pprAddr (AddrBaseIndex base index displacement)
 	pp_reg r = pprReg wordRep r
     in
     case (base,index) of
-      (Nothing, Nothing)    -> pp_disp
-      (Just b,  Nothing)    -> pp_off (pp_reg b)
-      (Nothing, Just (r,i)) -> pp_off (comma <> pp_reg r <> comma <> int i)
-      (Just b,  Just (r,i)) -> pp_off (pp_reg b <> comma <> pp_reg r 
+      (EABaseNone,  EAIndexNone) -> pp_disp
+      (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
+      (EABaseRip,   EAIndexNone) -> pp_off (ptext SLIT("%rip"))
+      (EABaseNone,  EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
+      (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r 
                                        <> comma <> int i)
   where
     ppr_disp (ImmInt 0) = empty
@@ -1202,18 +1204,22 @@ pprInstr (MOVZxL I32 src dst) = pprSizeOpOp SLIT("mov") I32 src dst
 	-- the reg alloc would tend to throw away a plain reg-to-reg
 	-- move, and we still want it to do that.
 
-pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes wordRep src dst
+pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes I32 src dst
+	-- zero-extension only needs to extend to 32 bits: on x86_64, 
+	-- the remaining zero-extension to 64 bits is automatic, and the 32-bit
+	-- instruction is shorter.
+
 pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes wordRep src dst
 
 -- here we do some patching, since the physical registers are only set late
 -- in the code generation.
-pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) (Just (reg2,1)) (ImmInt 0))) dst@(OpReg reg3))
+pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
   | reg1 == reg3
   = pprSizeOpOp SLIT("add") size (OpReg reg2) dst
-pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) (Just (reg2,1)) (ImmInt 0))) dst@(OpReg reg3))
+pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
   | reg2 == reg3
   = pprSizeOpOp SLIT("add") size (OpReg reg1) dst
-pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(Just reg1) Nothing displ)) dst@(OpReg reg3))
+pprInstr (LEA size (OpAddr (AddrBaseIndex src1@(EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
   | reg1 == reg3
   = pprInstr (ADD size (OpImm displ) dst)
 pprInstr (LEA size src dst) = pprSizeOpOp SLIT("lea") size src dst
diff --git a/ghc/compiler/nativeGen/RegAllocInfo.hs b/ghc/compiler/nativeGen/RegAllocInfo.hs
index f6b3131..86630cf 100644
--- a/ghc/compiler/nativeGen/RegAllocInfo.hs
+++ b/ghc/compiler/nativeGen/RegAllocInfo.hs
@@ -258,11 +258,13 @@ regUsage instr = case instr of
     use_R (OpAddr ea)  = use_EA ea
 
     -- Registers used to compute an effective address.
-    use_EA (ImmAddr _ _)                           = []
-    use_EA (AddrBaseIndex Nothing  Nothing      _) = []
-    use_EA (AddrBaseIndex (Just b) Nothing      _) = [b]
-    use_EA (AddrBaseIndex Nothing  (Just (i,_)) _) = [i]
-    use_EA (AddrBaseIndex (Just b) (Just (i,_)) _) = [b,i]
+    use_EA (ImmAddr _ _) = []
+    use_EA (AddrBaseIndex base index _) = 
+	use_base base $! use_index index
+	where use_base (EABaseReg r) x = r : x
+	      use_base _ x             = x
+	      use_index EAIndexNone   = []
+	      use_index (EAIndex i _) = [i]
 
     mkRU src dst = RU (filter interesting src)
   	    	      (filter interesting dst)
@@ -555,19 +557,20 @@ patchRegs instr env = case instr of
     patch1 insn op      = insn $! patchOp op
     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 
-    patchOp (OpReg  reg) = OpReg (env reg)
+    patchOp (OpReg  reg) = OpReg $! env reg
     patchOp (OpImm  imm) = OpImm imm
-    patchOp (OpAddr ea)  = OpAddr (lookupAddr ea)
+    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 
     lookupAddr (ImmAddr imm off) = ImmAddr imm off
     lookupAddr (AddrBaseIndex base index disp)
-      = AddrBaseIndex (lookupBase base) (lookupIndex index) disp
+      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
       where
-	lookupBase Nothing       = Nothing
-	lookupBase (Just r)      = Just (env r)
+	lookupBase EABaseNone       = EABaseNone
+	lookupBase EABaseRip        = EABaseRip
+	lookupBase (EABaseReg r)    = EABaseReg (env r)
 				 
-	lookupIndex Nothing      = Nothing
-	lookupIndex (Just (r,i)) = Just (env r, i)
+	lookupIndex EAIndexNone     = EAIndexNone
+	lookupIndex (EAIndex r i)   = EAIndex (env r) i
 
 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-- 
1.7.10.4