From 0df5099f0c2088e2ccbb5b8974a7eae4d77eaa1c Mon Sep 17 00:00:00 2001 From: "Ben.Lippmeier@anu.edu.au" Date: Tue, 20 Jan 2009 05:21:13 +0000 Subject: [PATCH] SPARC NCG: Add support for hardware divide --- compiler/nativeGen/MachCodeGen.hs | 123 +++++++++++++++++++++++++++++++++--- compiler/nativeGen/MachInstrs.hs | 16 ++++- compiler/nativeGen/PprMach.hs | 16 ++++- compiler/nativeGen/RegAllocInfo.hs | 6 ++ 4 files changed, 148 insertions(+), 13 deletions(-) diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 0c9aec6..93f31fb 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -1519,14 +1519,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_Sub W32 -> trivialCode W32 (SUB False False) x y MO_S_MulMayOflo rep -> imulMayOflo rep x y -{- - -- ToDo: teach about V8+ SPARC div instructions - MO_S_Quot W32 -> idiv FSLIT(".div") x y - MO_S_Rem W32 -> idiv FSLIT(".rem") x y - MO_U_Quot W32 -> idiv FSLIT(".udiv") x y - MO_U_Rem W32 -> idiv FSLIT(".urem") x y --} + MO_S_Quot W32 -> idiv True False x y + MO_U_Quot W32 -> idiv False False x y + + MO_S_Rem W32 -> irem True x y + MO_U_Rem W32 -> irem False x y + MO_F_Eq w -> condFltReg EQQ x y MO_F_Ne w -> condFltReg NE x y @@ -1559,9 +1558,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps -} other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) where - --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) + -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) + + + -- | Generate an integer division instruction. + idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned division with a 32 bit numerator, + -- we can just clear the Y register. + idiv False cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- For _signed_ division with a 32 bit numerator, + -- we have to sign extend the numerator into the Y register. + idiv True cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend + , SRA tmp (RIImm (ImmInt 16)) tmp + + , WRY tmp g0 + , SDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- | Do an integer remainder. + -- + -- NOTE: The SPARC v8 architecture manual says that integer division + -- instructions _may_ generate a remainder, depending on the implementation. + -- If so it is _recommended_ that the remainder is placed in the Y register. + -- + -- The UltraSparc 2007 manual says Y is _undefined_ after division. + -- + -- The SPARC T2 doesn't store the remainder, not sure about the others. + -- It's probably best not to worry about it, and just generate our own + -- remainders. + -- + irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned operands: + -- Division is between a 64 bit numerator and a 32 bit denominator, + -- so we still have to clear the Y register. + irem False x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV False a_reg (RIReg b_reg) tmp_reg + , UMUL False tmp_reg (RIReg b_reg) tmp_reg + , SUB False False a_reg (RIReg tmp_reg) dst] + + return (Any II32 code) + + + -- For signed operands: + -- Make sure to sign extend into the Y register, or the remainder + -- will have the wrong sign when the numerator is negative. + -- + -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, + -- not the full 32. Not sure why this is, something to do with overflow? + -- If anyone cares enough about the speed of signed remainder they + -- can work it out themselves (then tell me). -- BL 2009/01/20 + + irem True x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp1_reg <- getNewRegNat II32 + tmp2_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , WRY tmp1_reg g0 + + , SDIV False a_reg (RIReg b_reg) tmp2_reg + , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg + , SUB False False a_reg (RIReg tmp2_reg) dst] + + return (Any II32 code) + - -------------------- imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do (a_reg, a_code) <- getSomeReg a diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs index 7b319af..e16dbf3 100644 --- a/compiler/nativeGen/MachInstrs.hs +++ b/compiler/nativeGen/MachInstrs.hs @@ -558,9 +558,23 @@ is_G_instr instr -- Int Arithmetic. | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst + | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst - | RDY Reg -- move contents of Y register to reg + + + -- The SPARC divide instructions perform 64bit by 32bit division + -- The Y register is xored into the first operand. + + -- On _some implementations_ the Y register is overwritten by + -- the remainder, so we have to make sure it is 0 each time. + + -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2 + | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst + | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst + + | RDY Reg -- move contents of Y register to reg + | WRY Reg Reg -- Y <- src1 `xor` src2 -- Simple bit-twiddling. | AND Bool Reg RI Reg -- cc?, src1, src2, dst diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs index eb880fc..199fd36 100644 --- a/compiler/nativeGen/PprMach.hs +++ b/compiler/nativeGen/PprMach.hs @@ -2002,9 +2002,19 @@ pprInstr (SLL reg1 ri reg2) = pprRegRIReg (sLit "sll") False reg1 ri reg2 pprInstr (SRL reg1 ri reg2) = pprRegRIReg (sLit "srl") False reg1 ri reg2 pprInstr (SRA reg1 ri reg2) = pprRegRIReg (sLit "sra") False reg1 ri reg2 -pprInstr (RDY rd) = ptext (sLit "\trd\t%y,") <> pprReg rd -pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul") b reg1 ri reg2 -pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul") b reg1 ri reg2 +pprInstr (RDY rd) = ptext (sLit "\trd\t%y,") <> pprReg rd +pprInstr (WRY reg1 reg2) + = ptext (sLit "\twr\t") + <> pprReg reg1 + <> char ',' + <> pprReg reg2 + <> char ',' + <> ptext (sLit "%y") + +pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul") b reg1 ri reg2 +pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul") b reg1 ri reg2 +pprInstr (SDIV b reg1 ri reg2) = pprRegRIReg (sLit "sdiv") b reg1 ri reg2 +pprInstr (UDIV b reg1 ri reg2) = pprRegRIReg (sLit "udiv") b reg1 ri reg2 pprInstr (SETHI imm reg) = hcat [ diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs index 1b8bdb6..deb5f34 100644 --- a/compiler/nativeGen/RegAllocInfo.hs +++ b/compiler/nativeGen/RegAllocInfo.hs @@ -295,7 +295,10 @@ regUsage instr = case instr of SUB x cc r1 ar r2 -> usage (r1 : regRI ar, [r2]) UMUL cc r1 ar r2 -> usage (r1 : regRI ar, [r2]) SMUL cc r1 ar r2 -> usage (r1 : regRI ar, [r2]) + UDIV cc r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SDIV cc r1 ar r2 -> usage (r1 : regRI ar, [r2]) RDY rd -> usage ([], [rd]) + WRY r1 r2 -> usage ([r1, r2], []) AND b r1 ar r2 -> usage (r1 : regRI ar, [r2]) ANDN b r1 ar r2 -> usage (r1 : regRI ar, [r2]) OR b r1 ar r2 -> usage (r1 : regRI ar, [r2]) @@ -669,7 +672,10 @@ patchRegs instr env = case instr of SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2) UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2) SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2) + UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2) + SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2) RDY rd -> RDY (env rd) + WRY r1 r2 -> WRY (env r1) (env r2) AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2) ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2) OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2) -- 1.7.10.4