SPARC NCG: Fix generation of 64 bit ops on 32 bit sparc

[ghc-hetmet.git] / compiler / nativeGen / MachCodeGen.hs
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

index 6d16da8..e90b40c 100644 (file)
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -323,21 +323,53 @@ assignReg_I64Code lvalue valueTree
     = panic "assignReg_I64Code(sparc): invalid lvalue"
  
  
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
---   = panic "iselExpr64(???)"
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do  Amode amode addr_code   <- getAmode addrTree
+       let result
+
+               | AddrRegReg r1 r2      <- amode
+               = do    rlo     <- getNewRegNat II32
+                       tmp     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ ADD False False r1 (RIReg r2) tmp
+                                        , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+                                        , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+                               rlo
+
+               | AddrRegImm r1 (ImmInt i) <- amode
+               = do    rlo     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+                       
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+                                        , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+                               rlo
+               
+       result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
+       let r1_hi       = getHiVRegFromLo r1_lo
+       
+       r_dst_lo        <- getNewRegNat II32
+       let r_dst_hi    =  getHiVRegFromLo r_dst_lo 
+       
+       return  $ ChildCode64
+                       ( toOL
+                       [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+                       , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
+                       r_dst_lo
  
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
-     rlo <- getNewRegNat II32
-     let rhi = getHiVRegFromLo rlo
-         mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
-         mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
-     return (
-            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
-                         rlo
-          )
  
  iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
       r_dst_lo <-  getNewRegNat II32
@@ -1451,23 +1483,46 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_UU_Conv W32 to -> conversionNop (intSize to) x
        MO_SS_Conv W32 to -> conversionNop (intSize to) x
  
-      -- widenings
-      MO_UU_Conv W8 W32  -> integerExtend False W8 W32  x
-      MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
-      MO_UU_Conv W8 W16  -> integerExtend False W8 W16  x
-      MO_SS_Conv W16 W32 -> integerExtend True  W16 W32 x
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
+
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
  
-      other_op -> panic "Unknown unary mach op"
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
      where
-        -- XXX SLL/SRL?
-        integerExtend signed from to expr = do
-           (reg, e_code) <- getSomeReg expr
-          let
-              code dst =
-                  e_code `snocOL` 
-                  ((if signed then SRA else SRL)
-                         reg (RIImm (ImmInt 0)) dst)
-          return (Any (intSize to) code)
+
+       -- | sign extend and widen
+       integerExtend 
+               :: Width                -- ^ width of source expression
+               -> Width                -- ^ width of result
+               -> CmmExpr              -- ^ source expression
+               -> NatM Register        
+
+       integerExtend from to expr
+        = do   -- load the expr into some register
+               (reg, e_code)   <- getSomeReg expr
+               tmp             <- getNewRegNat II32
+               let bitCount
+                       = case (from, to) of
+                               (W8,  W32)      -> 24
+                               (W16, W32)      -> 16
+                               (W8,  W16)      -> 24
+               let code dst
+                       = e_code        
+
+                       -- local shift word left to load the sign bit
+                       `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+                       
+                       -- arithmetic shift right to sign extend
+                       `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+                       
+               return (Any (intSize to) code)
+                               
+
          conversionNop new_rep expr
              = do e_code <- getRegister expr
                   return (swizzleRegisterRep e_code new_rep)
@@ -1496,14 +1551,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
        MO_Sub W32 -> trivialCode W32 (SUB False False) x y
  
        MO_S_MulMayOflo rep -> imulMayOflo rep x y
-{-
-      -- ToDo: teach about V8+ SPARC div instructions
-      MO_S_Quot W32 -> idiv FSLIT(".div")   x y
-      MO_S_Rem W32  -> idiv FSLIT(".rem")   x y
-      MO_U_Quot W32 -> idiv FSLIT(".udiv")  x y
-      MO_U_Rem W32  -> idiv FSLIT(".urem")  x y
--}
  
+      MO_S_Quot W32    -> idiv True  False x y
+      MO_U_Quot W32    -> idiv False False x y
+       
+      MO_S_Rem  W32    -> irem True  x y
+      MO_U_Rem W32     -> irem False x y
+       
        MO_F_Eq w -> condFltReg EQQ x y
        MO_F_Ne w -> condFltReg NE x y
  
@@ -1536,9 +1590,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
  -}
        other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
    where
-    --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+       
+    -- For unsigned division with a 32 bit numerator, 
+    --         we can just clear the Y register.
+    idiv False cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ WRY  g0 g0
+                       , UDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+       
+
+    -- For _signed_ division with a 32 bit numerator,
+    --         we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       tmp                     <- getNewRegNat II32
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
+                       , SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+                       , WRY  tmp g0                           
+                       , SDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --  NOTE:  The SPARC v8 architecture manual says that integer division
+    --         instructions _may_ generate a remainder, depending on the implementation.
+    --         If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --         The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --         It's probably best not to worry about it, and just generate our own
+    --         remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --         Division is between a 64 bit numerator and a 32 bit denominator, 
+    --         so we still have to clear the Y register.
+    irem False x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+
+       tmp_reg         <- getNewRegNat II32
+
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ WRY   g0 g0
+                       , UDIV  False         a_reg (RIReg b_reg) tmp_reg
+                       , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+                       , SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+       return  (Any II32 code)
+
+    
+    -- For signed operands:
+    --         Make sure to sign extend into the Y register, or the remainder
+    --         will have the wrong sign when the numerator is negative.
+    --
+    -- TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --         not the full 32. Not sure why this is, something to do with overflow?
+    --         If anyone cares enough about the speed of signed remainder they
+    --         can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+       
+       tmp1_reg        <- getNewRegNat II32
+       tmp2_reg        <- getNewRegNat II32
+               
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , WRY   tmp1_reg g0
+
+                       , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg  
+                       , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+                       , SUB   False False    a_reg (RIReg tmp2_reg) dst]
+                       
+       return (Any II32 code)
+   
  
-    --------------------
      imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
      imulMayOflo rep a b = do
           (a_reg, a_code) <- getSomeReg a
@@ -1930,15 +2090,16 @@ getAmode (CmmMachOp (MO_Add rep) [x, y])
         code = codeX `appOL` codeY
      return (Amode (AddrRegReg regX regY) code)
  
--- XXX Is this same as "leaf" in Stix?
  getAmode (CmmLit lit)
    = do
-      tmp <- getNewRegNat II32
-      let
-       code = unitOL (SETHI (HI imm__2) tmp)
-      return (Amode (AddrRegImm tmp (LO imm__2)) code)
-      where
-         imm__2 = litToImm lit
+       let imm__2      = litToImm lit
+       tmp1    <- getNewRegNat II32
+       tmp2    <- getNewRegNat II32
+
+       let code = toOL [ SETHI (HI imm__2) tmp1
+                       , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+               
+       return (Amode (AddrRegReg tmp2 g0) code)
  
  getAmode other
    = do