From 0df5099f0c2088e2ccbb5b8974a7eae4d77eaa1c Mon Sep 17 00:00:00 2001
From: "Ben.Lippmeier@anu.edu.au" <unknown>
Date: Tue, 20 Jan 2009 05:21:13 +0000
Subject: [PATCH] SPARC NCG: Add support for hardware divide

---
 compiler/nativeGen/MachCodeGen.hs  |  123 +++++++++++++++++++++++++++++++++---
 compiler/nativeGen/MachInstrs.hs   |   16 ++++-
 compiler/nativeGen/PprMach.hs      |   16 ++++-
 compiler/nativeGen/RegAllocInfo.hs |    6 ++
 4 files changed, 148 insertions(+), 13 deletions(-)
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index 0c9aec6..93f31fb 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -1519,14 +1519,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
       MO_Sub W32 -> trivialCode W32 (SUB False False) x y
 
       MO_S_MulMayOflo rep -> imulMayOflo rep x y
-{-
-      -- ToDo: teach about V8+ SPARC div instructions
-      MO_S_Quot W32 -> idiv FSLIT(".div")   x y
-      MO_S_Rem W32  -> idiv FSLIT(".rem")   x y
-      MO_U_Quot W32 -> idiv FSLIT(".udiv")  x y
-      MO_U_Rem W32  -> idiv FSLIT(".urem")  x y
--}
 
+      MO_S_Quot W32 	-> idiv True  False x y
+      MO_U_Quot W32 	-> idiv False False x y
+       
+      MO_S_Rem  W32	-> irem True  x y
+      MO_U_Rem	W32	-> irem False x y
+       
       MO_F_Eq w -> condFltReg EQQ x y
       MO_F_Ne w -> condFltReg NE x y
 
@@ -1559,9 +1558,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
 -}
       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
   where
-    --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+	
+    -- For unsigned division with a 32 bit numerator, 
+    --		we can just clear the Y register.
+    idiv False cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ WRY  g0 g0
+			, UDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+    	
+
+    -- For _signed_ division with a 32 bit numerator,
+    --		we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	tmp			<- getNewRegNat II32
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ SRA  a_reg (RIImm (ImmInt 16)) tmp		-- sign extend
+			, SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+			, WRY  tmp g0				
+			, SDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --	 NOTE:	The SPARC v8 architecture manual says that integer division
+    --		instructions _may_ generate a remainder, depending on the implementation.
+    --		If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --		The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --		It's probably best not to worry about it, and just generate our own
+    --		remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --		Division is between a 64 bit numerator and a 32 bit denominator, 
+    --		so we still have to clear the Y register.
+    irem False x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+
+	tmp_reg		<- getNewRegNat II32
+
+	let code dst
+		= 	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ WRY	g0 g0
+			, UDIV  False         a_reg (RIReg b_reg) tmp_reg
+			, UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+			, SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+    	return	(Any II32 code)
+
+    
+    -- For signed operands:
+    --		Make sure to sign extend into the Y register, or the remainder
+    --		will have the wrong sign when the numerator is negative.
+    --
+    --	TODO:	When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --		not the full 32. Not sure why this is, something to do with overflow?
+    --		If anyone cares enough about the speed of signed remainder they
+    --		can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+	
+	tmp1_reg	<- getNewRegNat II32
+	tmp2_reg	<- getNewRegNat II32
+		
+	let code dst
+		=	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ SRA	a_reg      (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, SRA	tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, WRY	tmp1_reg g0
+
+			, SDIV  False          a_reg (RIReg b_reg)    tmp2_reg	
+			, SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+			, SUB   False False    a_reg (RIReg tmp2_reg) dst]
+			
+	return (Any II32 code)
+   
 
-    --------------------
     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
     imulMayOflo rep a b = do
          (a_reg, a_code) <- getSomeReg a
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs
index 7b319af..e16dbf3 100644
--- a/compiler/nativeGen/MachInstrs.hs
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -558,9 +558,23 @@ is_G_instr instr
 -- Int Arithmetic.
 	      | ADD	      Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 	      | SUB	      Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
+
 	      | UMUL	           Bool Reg RI Reg --     cc?, src1, src2, dst
 	      | SMUL	           Bool Reg RI Reg --     cc?, src1, src2, dst
-              | RDY           Reg	-- move contents of Y register to reg
+
+
+              -- The SPARC divide instructions perform 64bit by 32bit division
+	      --   The Y register is xored into the first operand.
+
+	      --   On _some implementations_ the Y register is overwritten by
+              --   the remainder, so we have to make sure it is 0 each time.
+
+              --   dst <- ((Y `shiftL` 32) `or` src1) `div` src2
+              | UDIV               Bool Reg RI Reg --     cc?, src1, src2, dst
+	      | SDIV               Bool Reg RI Reg --     cc?, src1, src2, dst
+
+              | RDY           Reg                  -- move contents of Y register to reg
+              | WRY           Reg  Reg             -- Y <- src1 `xor` src2
 
 -- Simple bit-twiddling.
 	      | AND	      Bool Reg RI Reg -- cc?, src1, src2, dst
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
index eb880fc..199fd36 100644
--- a/compiler/nativeGen/PprMach.hs
+++ b/compiler/nativeGen/PprMach.hs
@@ -2002,9 +2002,19 @@ pprInstr (SLL reg1 ri reg2) = pprRegRIReg (sLit "sll") False reg1 ri reg2
 pprInstr (SRL reg1 ri reg2) = pprRegRIReg (sLit "srl") False reg1 ri reg2
 pprInstr (SRA reg1 ri reg2) = pprRegRIReg (sLit "sra") False reg1 ri reg2
 
-pprInstr (RDY rd) = ptext (sLit "\trd\t%y,") <> pprReg rd
-pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul")  b reg1 ri reg2
-pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul")  b reg1 ri reg2
+pprInstr (RDY rd) 		= ptext (sLit "\trd\t%y,") <> pprReg rd
+pprInstr (WRY reg1 reg2) 	
+	= ptext (sLit "\twr\t") 
+		<> pprReg reg1 
+		<> char ','
+		<> pprReg reg2
+		<> char ','
+		<> ptext (sLit "%y") 
+
+pprInstr (SMUL b reg1 ri reg2)	= pprRegRIReg (sLit "smul")  b reg1 ri reg2
+pprInstr (UMUL b reg1 ri reg2)	= pprRegRIReg (sLit "umul")  b reg1 ri reg2
+pprInstr (SDIV b reg1 ri reg2)	= pprRegRIReg (sLit "sdiv")  b reg1 ri reg2
+pprInstr (UDIV b reg1 ri reg2)	= pprRegRIReg (sLit "udiv")  b reg1 ri reg2
 
 pprInstr (SETHI imm reg)
   = hcat [
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
index 1b8bdb6..deb5f34 100644
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ b/compiler/nativeGen/RegAllocInfo.hs
@@ -295,7 +295,10 @@ regUsage instr = case instr of
     SUB   x cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
     UMUL    cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
     SMUL    cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
+    UDIV    cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
+    SDIV    cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
     RDY   rd            -> usage ([], [rd])
+    WRY   r1 r2         -> usage ([r1, r2], [])
     AND   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
     ANDN  b r1 ar r2 	-> usage (r1 : regRI ar, [r2])
     OR    b r1 ar r2   	-> usage (r1 : regRI ar, [r2])
@@ -669,7 +672,10 @@ patchRegs instr env = case instr of
     SUB   x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
     UMUL    cc r1 ar r2	-> UMUL cc (env r1) (fixRI ar) (env r2)
     SMUL    cc r1 ar r2	-> SMUL cc (env r1) (fixRI ar) (env r2)
+    UDIV    cc r1 ar r2	-> UDIV cc (env r1) (fixRI ar) (env r2)
+    SDIV    cc r1 ar r2	-> SDIV cc (env r1) (fixRI ar) (env r2)
     RDY   rd            -> RDY (env rd)
+    WRY   r1 r2		-> WRY (env r1) (env r2)
     AND   b r1 ar r2    -> AND b (env r1) (fixRI ar) (env r2)
     ANDN  b r1 ar r2    -> ANDN b (env r1) (fixRI ar) (env r2)
     OR    b r1 ar r2    -> OR b (env r1) (fixRI ar) (env r2)
-- 
1.7.10.4