SPARC NCG: Reenable out of line 32 bit float ops

[ghc-hetmet.git] / compiler / nativeGen / MachCodeGen.hs
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

index 90285bf..d9b2028 100644 (file)
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -31,6 +31,7 @@ import NCGMonad
  import PositionIndependentCode
  import RegAllocInfo    ( mkBranchInstr, mkRegRegMoveInstr )
  import MachRegs
  import PositionIndependentCode
  import RegAllocInfo    ( mkBranchInstr, mkRegRegMoveInstr )
  import MachRegs
+import PprMach
  
  -- Our intermediate code:
  import BlockId
  
  -- Our intermediate code:
  import BlockId
@@ -58,6 +59,7 @@ import Data.Bits
  import Data.Word
  import Data.Int
  
  import Data.Word
  import Data.Int
  
+
  -- -----------------------------------------------------------------------------
  -- Top-level of the instruction selector
  
  -- -----------------------------------------------------------------------------
  -- Top-level of the instruction selector
  
@@ -321,21 +323,53 @@ assignReg_I64Code lvalue valueTree
     = panic "assignReg_I64Code(sparc): invalid lvalue"
  
  
     = panic "assignReg_I64Code(sparc): invalid lvalue"
  
  
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
---   = panic "iselExpr64(???)"
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do  Amode amode addr_code   <- getAmode addrTree
+       let result
+
+               | AddrRegReg r1 r2      <- amode
+               = do    rlo     <- getNewRegNat II32
+                       tmp     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ ADD False False r1 (RIReg r2) tmp
+                                        , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+                                        , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+                               rlo
+
+               | AddrRegImm r1 (ImmInt i) <- amode
+               = do    rlo     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+                       
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+                                        , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+                               rlo
+               
+       result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
+       let r1_hi       = getHiVRegFromLo r1_lo
+       
+       r_dst_lo        <- getNewRegNat II32
+       let r_dst_hi    =  getHiVRegFromLo r_dst_lo 
+       
+       return  $ ChildCode64
+                       ( toOL
+                       [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+                       , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
+                       r_dst_lo
  
  
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
-     rlo <- getNewRegNat II32
-     let rhi = getHiVRegFromLo rlo
-         mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
-         mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
-     return (
-            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
-                         rlo
-          )
  
  iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
       r_dst_lo <-  getNewRegNat II32
  
  iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
       r_dst_lo <-  getNewRegNat II32
@@ -1449,23 +1483,46 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_UU_Conv W32 to -> conversionNop (intSize to) x
        MO_SS_Conv W32 to -> conversionNop (intSize to) x
  
        MO_UU_Conv W32 to -> conversionNop (intSize to) x
        MO_SS_Conv W32 to -> conversionNop (intSize to) x
  
-      -- widenings
-      MO_UU_Conv W8 W32  -> integerExtend False W8 W32  x
-      MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
-      MO_UU_Conv W8 W16  -> integerExtend False W8 W16  x
-      MO_SS_Conv W16 W32 -> integerExtend True  W16 W32 x
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
+
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
  
  
-      other_op -> panic "Unknown unary mach op"
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
      where
      where
-        -- XXX SLL/SRL?
-        integerExtend signed from to expr = do
-           (reg, e_code) <- getSomeReg expr
-          let
-              code dst =
-                  e_code `snocOL` 
-                  ((if signed then SRA else SRL)
-                         reg (RIImm (ImmInt 0)) dst)
-          return (Any (intSize to) code)
+
+       -- | sign extend and widen
+       integerExtend 
+               :: Width                -- ^ width of source expression
+               -> Width                -- ^ width of result
+               -> CmmExpr              -- ^ source expression
+               -> NatM Register        
+
+       integerExtend from to expr
+        = do   -- load the expr into some register
+               (reg, e_code)   <- getSomeReg expr
+               tmp             <- getNewRegNat II32
+               let bitCount
+                       = case (from, to) of
+                               (W8,  W32)      -> 24
+                               (W16, W32)      -> 16
+                               (W8,  W16)      -> 24
+               let code dst
+                       = e_code        
+
+                       -- local shift word left to load the sign bit
+                       `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+                       
+                       -- arithmetic shift right to sign extend
+                       `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+                       
+               return (Any (intSize to) code)
+                               
+
          conversionNop new_rep expr
              = do e_code <- getRegister expr
                   return (swizzleRegisterRep e_code new_rep)
          conversionNop new_rep expr
              = do e_code <- getRegister expr
                   return (swizzleRegisterRep e_code new_rep)
@@ -1494,14 +1551,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
        MO_Sub W32 -> trivialCode W32 (SUB False False) x y
  
        MO_S_MulMayOflo rep -> imulMayOflo rep x y
        MO_Sub W32 -> trivialCode W32 (SUB False False) x y
  
        MO_S_MulMayOflo rep -> imulMayOflo rep x y
-{-
-      -- ToDo: teach about V8+ SPARC div instructions
-      MO_S_Quot W32 -> idiv FSLIT(".div")   x y
-      MO_S_Rem W32  -> idiv FSLIT(".rem")   x y
-      MO_U_Quot W32 -> idiv FSLIT(".udiv")  x y
-      MO_U_Rem W32  -> idiv FSLIT(".urem")  x y
--}
  
  
+      MO_S_Quot W32    -> idiv True  False x y
+      MO_U_Quot W32    -> idiv False False x y
+       
+      MO_S_Rem  W32    -> irem True  x y
+      MO_U_Rem W32     -> irem False x y
+       
        MO_F_Eq w -> condFltReg EQQ x y
        MO_F_Ne w -> condFltReg NE x y
  
        MO_F_Eq w -> condFltReg EQQ x y
        MO_F_Ne w -> condFltReg NE x y
  
@@ -1534,9 +1590,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
  -}
        other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
    where
  -}
        other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
    where
-    --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+       
+    -- For unsigned division with a 32 bit numerator, 
+    --         we can just clear the Y register.
+    idiv False cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ WRY  g0 g0
+                       , UDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+       
+
+    -- For _signed_ division with a 32 bit numerator,
+    --         we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       tmp                     <- getNewRegNat II32
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
+                       , SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+                       , WRY  tmp g0                           
+                       , SDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --  NOTE:  The SPARC v8 architecture manual says that integer division
+    --         instructions _may_ generate a remainder, depending on the implementation.
+    --         If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --         The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --         It's probably best not to worry about it, and just generate our own
+    --         remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --         Division is between a 64 bit numerator and a 32 bit denominator, 
+    --         so we still have to clear the Y register.
+    irem False x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+
+       tmp_reg         <- getNewRegNat II32
+
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ WRY   g0 g0
+                       , UDIV  False         a_reg (RIReg b_reg) tmp_reg
+                       , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+                       , SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+       return  (Any II32 code)
+
+    
+    -- For signed operands:
+    --         Make sure to sign extend into the Y register, or the remainder
+    --         will have the wrong sign when the numerator is negative.
+    --
+    -- TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --         not the full 32. Not sure why this is, something to do with overflow?
+    --         If anyone cares enough about the speed of signed remainder they
+    --         can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+       
+       tmp1_reg        <- getNewRegNat II32
+       tmp2_reg        <- getNewRegNat II32
+               
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , WRY   tmp1_reg g0
+
+                       , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg  
+                       , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+                       , SUB   False False    a_reg (RIReg tmp2_reg) dst]
+                       
+       return (Any II32 code)
+   
  
  
-    --------------------
      imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
      imulMayOflo rep a b = do
           (a_reg, a_code) <- getSomeReg a
      imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
      imulMayOflo rep a b = do
           (a_reg, a_code) <- getSomeReg a
@@ -1928,15 +2090,16 @@ getAmode (CmmMachOp (MO_Add rep) [x, y])
         code = codeX `appOL` codeY
      return (Amode (AddrRegReg regX regY) code)
  
         code = codeX `appOL` codeY
      return (Amode (AddrRegReg regX regY) code)
  
--- XXX Is this same as "leaf" in Stix?
  getAmode (CmmLit lit)
    = do
  getAmode (CmmLit lit)
    = do
-      tmp <- getNewRegNat II32
-      let
-       code = unitOL (SETHI (HI imm__2) tmp)
-      return (Amode (AddrRegImm tmp (LO imm__2)) code)
-      where
-         imm__2 = litToImm lit
+       let imm__2      = litToImm lit
+       tmp1    <- getNewRegNat II32
+       tmp2    <- getNewRegNat II32
+
+       let code = toOL [ SETHI (HI imm__2) tmp1
+                       , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+               
+       return (Amode (AddrRegReg tmp2 g0) code)
  
  getAmode other
    = do
  
  getAmode other
    = do
@@ -2592,15 +2755,13 @@ assignMem_FltCode pk addr src = do
      return code__2
  
  -- Floating point assignment to a register/temporary
      return code__2
  
  -- Floating point assignment to a register/temporary
--- ToDo: Verify correctness
-assignReg_FltCode pk reg src = do
-    r <- getRegister src
-    v1 <- getNewRegNat pk
-    return $ case r of
-        Any _ code         -> code dst
-       Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
-    where
-      dst = getRegisterReg reg
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+    srcRegister <- getRegister srcCmmExpr
+    let dstReg = getRegisterReg dstCmmReg
+
+    return $ case srcRegister of
+        Any _ code                 -> code dstReg
+       Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
  
  #endif /* sparc_TARGET_ARCH */
  
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -3445,185 +3606,247 @@ genCCall target dest_regs args = do
     in preparation for the outer call.  Upshot: we need to calculate the
     args into temporary regs, and move those to arg regs or onto the
     stack only immediately prior to the call proper.  Sigh.
     in preparation for the outer call.  Upshot: we need to calculate the
     args into temporary regs, and move those to arg regs or onto the
     stack only immediately prior to the call proper.  Sigh.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
  -}
  
  -}
  
-genCCall target dest_regs argsAndHints = do
-    let
-        args = map hintlessCmm argsAndHints
-    argcode_and_vregs <- mapM arg_to_int_vregs args
-    let 
-        (argcodes, vregss) = unzip argcode_and_vregs
-        n_argRegs          = length allArgRegs
-        n_argRegs_used     = min (length vregs) n_argRegs
-        vregs              = concat vregss
-    -- deal with static vs dynamic call targets
-    callinsns <- (case target of
-        CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
-               return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-        CmmCallee expr conv -> do
-                (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
-                return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-       CmmPrim mop -> do
-                 (res, reduce) <- outOfLineFloatOp mop
-                 lblOrMopExpr <- case res of
-                      Left lbl -> do
-                           return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-                      Right mopExpr -> do
-                           (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
-                           return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-                 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
-
-      )
-    let
-        argcode = concatOL argcodes
-        (move_sp_down, move_sp_up)
-           = let diff = length vregs - n_argRegs
-                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
-             in  if   nn <= 0
-                 then (nilOL, nilOL)
-                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+genCCall target dest_regs argsAndHints 
+ = do          
+       -- strip hints from the arg regs
+       let args :: [CmmExpr]
+           args  = map hintlessCmm argsAndHints
  
  
-        transfer_code
-           = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
  
  
-       -- assign the results, if necessary
-       assign_code []  = nilOL
-       
-       assign_code [CmmHinted dest _hint]      
-        = let  rep     = localRegType dest
-               width   = typeWidth rep
-               r_dest  = getRegisterReg (CmmLocal dest)
-
-               result
-                       | isFloatType rep 
-                       , W32   <- width
-                       = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
-                       
-                       | isFloatType rep
-                       , W64   <- width
-                       = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+       -- work out the arguments, and assign them to integer regs
+       argcode_and_vregs       <- mapM arg_to_int_vregs args
+       let (argcodes, vregss)  = unzip argcode_and_vregs
+       let vregs               = concat vregss
+
+       let n_argRegs           = length allArgRegs
+       let n_argRegs_used      = min (length vregs) n_argRegs
+
+
+       -- deal with static vs dynamic call targets
+       callinsns <- case target of
+               CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
+                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+               CmmCallee expr conv 
+                -> do  (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+               CmmPrim mop 
+                -> do  res     <- outOfLineFloatOp mop
+                       lblOrMopExpr <- case res of
+                               Left lbl -> do
+                                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+                               Right mopExpr -> do
+                                       (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+                                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+                       return lblOrMopExpr
+
+       let argcode = concatOL argcodes
+
+       let (move_sp_down, move_sp_up)
+                  = let diff = length vregs - n_argRegs
+                        nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+                    in  if   nn <= 0
+                        then (nilOL, nilOL)
+                        else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+        let transfer_code
+               = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+                               
+       return 
+        $      argcode                 `appOL`
+               move_sp_down            `appOL`
+               transfer_code           `appOL`
+               callinsns               `appOL`
+               unitOL NOP              `appOL`
+               move_sp_up              `appOL`
+               assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+--     or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+       -- If the expr produces a 64 bit int, then we can just use iselExpr64
+       | isWord64 (cmmExprType arg)
+       = do    (ChildCode64 code r_lo) <- iselExpr64 arg
+               let r_hi                = getHiVRegFromLo r_lo
+               return (code, [r_hi, r_lo])
+
+       | otherwise
+       = do    (src, code)     <- getSomeReg arg
+               tmp             <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+               let pk          = cmmExprType arg
+
+               case cmmTypeSize pk of
+
+                -- Load a 64 bit float return value into two integer regs.
+                FF64 -> do
+                       v1 <- getNewRegNat II32
+                       v2 <- getNewRegNat II32
+
+                       let Just f0_high = fPair f0
                         
                         
-                       | not $ isFloatType rep
-                       , W32   <- width
-                       = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+                       let code2 = 
+                               code                            `snocOL`
+                               FMOV FF64 src f0                `snocOL`
+                               ST   FF32  f0 (spRel 16)        `snocOL`
+                               LD   II32  (spRel 16) v1        `snocOL`
+                               ST   FF32  f0_high (spRel 16)   `snocOL`
+                               LD   II32  (spRel 16) v2
+
+                       return  (code2, [v1,v2])
+
+                -- Load a 32 bit float return value into an integer reg
+                FF32 -> do
+                       v1 <- getNewRegNat II32
                         
                         
-          in   result
+                       let code2 =
+                               code                            `snocOL`
+                               ST   FF32  src (spRel 16)       `snocOL`
+                               LD   II32  (spRel 16) v1
                                 
                                 
-    return (argcode       `appOL`
-            move_sp_down  `appOL`
-            transfer_code `appOL`
-            callinsns     `appOL`
-            unitOL NOP    `appOL`
-            move_sp_up   `appOL`
-           assign_code dest_regs)
-  where
-     -- move args from the integer vregs into which they have been 
-     -- marshalled, into %o0 .. %o5, and the rest onto the stack.
-     move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
-     move_final [] _ offset          -- all args done
-        = []
-
-     move_final (v:vs) [] offset     -- out of aregs; move to stack
-        = ST II32 v (spRel offset)
-          : move_final vs [] (offset+1)
-
-     move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
-        = OR False g0 (RIReg v) a
-          : move_final vs az offset
-
-     -- generate code to calculate an argument, and move it into one
-     -- or two integer vregs.
-     arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-     arg_to_int_vregs arg
-        | isWord64 (cmmExprType arg)
-        = do
-         (ChildCode64 code r_lo) <- iselExpr64 arg
-          let 
-              r_hi = getHiVRegFromLo r_lo
-          return (code, [r_hi, r_lo])
-        | otherwise
-        = do
-         (src, code) <- getSomeReg arg
-          tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
-          let
-              pk       = cmmExprType arg
-              Just f0_high = fPair f0
-          case cmmTypeSize pk of
-             FF64 -> do
-                      v1 <- getNewRegNat II32
-                      v2 <- getNewRegNat II32
-                      return (
-                        code                          `snocOL`
-                        FMOV FF64 src f0                `snocOL`
-                        ST   FF32  f0 (spRel 16)         `snocOL`
-                        LD   II32  (spRel 16) v1         `snocOL`
-                        ST   FF32  f0_high (spRel 16) `snocOL`
-                        LD   II32  (spRel 16) v2
-                        ,
-                        [v1,v2]
-                       )
-             FF32 -> do
-                      v1 <- getNewRegNat II32
-                      return (
-                        code                    `snocOL`
-                        ST   FF32  src (spRel 16)  `snocOL`
-                        LD   II32  (spRel 16) v1
-                        ,
-                        [v1]
-                       )
-             other -> do
-                        v1 <- getNewRegNat II32
-                        return (
-                          code `snocOL` OR False g0 (RIReg src) v1
-                          , 
-                          [v1]
-                         )
-outOfLineFloatOp mop =
-    do
-      dflags <- getDynFlagsNat
-      mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
-                 mkForeignLabel functionName Nothing True
-      let mopLabelOrExpr = case mopExpr of
-                       CmmLit (CmmLabel lbl) -> Left lbl
-                        _ -> Right mopExpr
-      return (mopLabelOrExpr, reduce)
-            where
-                (reduce, functionName) = case mop of
-                 MO_F32_Exp    -> (True,  fsLit "exp")
-                 MO_F32_Log    -> (True,  fsLit "log")
-                 MO_F32_Sqrt   -> (True,  fsLit "sqrt")
+                       return (code2, [v1])
+
+                -- Move an integer return value into its destination reg.
+                other -> do
+                       v1 <- getNewRegNat II32
+                       
+                       let code2 = 
+                               code                            `snocOL`
+                               OR False g0 (RIReg src) v1
+                       
+                       return (code2, [v1])
  
  
-                 MO_F32_Sin    -> (True,  fsLit "sin")
-                 MO_F32_Cos    -> (True,  fsLit "cos")
-                 MO_F32_Tan    -> (True,  fsLit "tan")
  
  
-                 MO_F32_Asin   -> (True,  fsLit "asin")
-                 MO_F32_Acos   -> (True,  fsLit "acos")
-                 MO_F32_Atan   -> (True,  fsLit "atan")
+-- | Move args from the integer vregs into which they have been 
+--     marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset          
+       = []
  
  
-                 MO_F32_Sinh   -> (True,  fsLit "sinh")
-                 MO_F32_Cosh   -> (True,  fsLit "cosh")
-                 MO_F32_Tanh   -> (True,  fsLit "tanh")
+-- out of aregs; move to stack
+move_final (v:vs) [] offset     
+       = ST II32 v (spRel offset)
+       : move_final vs [] (offset+1)
  
  
-                 MO_F64_Exp    -> (False, fsLit "exp")
-                 MO_F64_Log    -> (False, fsLit "log")
-                 MO_F64_Sqrt   -> (False, fsLit "sqrt")
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset 
+       = OR False g0 (RIReg v) a
+       : move_final vs az offset
  
  
-                 MO_F64_Sin    -> (False, fsLit "sin")
-                 MO_F64_Cos    -> (False, fsLit "cos")
-                 MO_F64_Tan    -> (False, fsLit "tan")
  
  
-                 MO_F64_Asin   -> (False, fsLit "asin")
-                 MO_F64_Acos   -> (False, fsLit "acos")
-                 MO_F64_Atan   -> (False, fsLit "atan")
+-- | Assign results returned from the call into their 
+--     desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code [] = nilOL
+
+assign_code [CmmHinted dest _hint]     
+ = let rep     = localRegType dest
+       width   = typeWidth rep
+       r_dest  = getRegisterReg (CmmLocal dest)
+
+       result
+               | isFloatType rep 
+               , W32   <- width
+               = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+               | isFloatType rep
+               , W64   <- width
+               = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+               | not $ isFloatType rep
+               , W32   <- width
+               = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+               | not $ isFloatType rep
+               , W64           <- width
+               , r_dest_hi     <- getHiVRegFromLo r_dest
+               = toOL  [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+                       , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+   in  result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp 
+       :: CallishMachOp 
+       -> NatM (Either CLabel CmmExpr)
+
+outOfLineFloatOp mop 
+ = do  let functionName
+               = outOfLineFloatOp_table mop
+       
+       dflags  <- getDynFlagsNat
+       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
+               $  mkForeignLabel functionName Nothing True
  
  
-                 MO_F64_Sinh   -> (False, fsLit "sinh")
-                 MO_F64_Cosh   -> (False, fsLit "cosh")
-                 MO_F64_Tanh   -> (False, fsLit "tanh")
+       let mopLabelOrExpr 
+               = case mopExpr of
+                       CmmLit (CmmLabel lbl)   -> Left lbl
+                        _                      -> Right mopExpr
+
+       return mopLabelOrExpr
+
+
+-- | Decide what C function to use to implement a CallishMachOp
+--
+outOfLineFloatOp_table 
+       :: CallishMachOp
+       -> FastString
+       
+outOfLineFloatOp_table mop
+ = case mop of
+       MO_F32_Exp    -> fsLit "expf"
+       MO_F32_Log    -> fsLit "logf"
+       MO_F32_Sqrt   -> fsLit "sqrtf"
+
+       MO_F32_Sin    -> fsLit "sinf"
+       MO_F32_Cos    -> fsLit "cosf"
+       MO_F32_Tan    -> fsLit "tanf"
+
+       MO_F32_Asin   -> fsLit "asinf"
+       MO_F32_Acos   -> fsLit "acosf"
+       MO_F32_Atan   -> fsLit "atanf"
+
+       MO_F32_Sinh   -> fsLit "sinhf"
+       MO_F32_Cosh   -> fsLit "coshf"
+       MO_F32_Tanh   -> fsLit "tanhf"
+
+       MO_F64_Exp    -> fsLit "exp"
+       MO_F64_Log    -> fsLit "log"
+       MO_F64_Sqrt   -> fsLit "sqrt"
+
+       MO_F64_Sin    -> fsLit "sin"
+       MO_F64_Cos    -> fsLit "cos"
+       MO_F64_Tan    -> fsLit "tan"
+
+       MO_F64_Asin   -> fsLit "asin"
+       MO_F64_Acos   -> fsLit "acos"
+       MO_F64_Atan   -> fsLit "atan"
+
+       MO_F64_Sinh   -> fsLit "sinh"
+       MO_F64_Cosh   -> fsLit "cosh"
+       MO_F64_Tanh   -> fsLit "tanh"
+
+       other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+                       (pprCallishMachOp mop)
  
  
-                  other -> pprPanic "outOfLineFloatOp(sparc) "
-                                (pprCallishMachOp mop)
  
  #endif /* sparc_TARGET_ARCH */
  
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -4012,15 +4235,43 @@ genSwitch expr ids
          return code
  #elif sparc_TARGET_ARCH
  genSwitch expr ids
          return code
  #elif sparc_TARGET_ARCH
  genSwitch expr ids
-  | opt_PIC
-  = error "MachCodeGen: sparc genSwitch PIC not finished\n"
+       | opt_PIC
+       = error "MachCodeGen: sparc genSwitch PIC not finished\n"
    
    
-  | otherwise
-  = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
+       | otherwise
+       = do    (e_reg, e_code) <- getSomeReg expr
+
+               base_reg        <- getNewRegNat II32
+               offset_reg      <- getNewRegNat II32
+               dst             <- getNewRegNat II32
+
+               label           <- getNewLabelNat
+               let jumpTable   = map jumpTableEntry ids
+
+               return $ e_code `appOL`
+                toOL   
+                       -- the jump table
+                       [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+                       -- load base of jump table
+                       , SETHI (HI (ImmCLbl label)) base_reg
+                       , OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+                       
+                       -- the addrs in the table are 32 bits wide..
+                       , SLL   e_reg (RIImm $ ImmInt 2) offset_reg
+
+                       -- load and jump to the destination
+                       , LD    II32 (AddrRegReg base_reg offset_reg) dst
+                       , JMP   (AddrRegImm dst (ImmInt 0)) 
+                       , NOP ]
+
  #else
  #error "ToDo: genSwitch"
  #endif
  
  #else
  #error "ToDo: genSwitch"
  #endif
  
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
  jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
  jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
      where blockLabel = mkAsmTempLabel id
  jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
  jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
      where blockLabel = mkAsmTempLabel id
@@ -4793,21 +5044,33 @@ coerceInt2FP width1 width2 x = do
             FxTOy (intSize width1) (floatSize width2) dst dst]
      return (Any (floatSize $ width2) code__2)
  
             FxTOy (intSize width1) (floatSize width2) dst dst]
      return (Any (floatSize $ width2) code__2)
  
-------------
-coerceFP2Int width1 width2 x = do
-    let pk     = intSize width1
-        fprep  = floatSize width2
  
  
-    (src, code) <- getSomeReg x
-    reg <- getNewRegNat fprep
-    tmp <- getNewRegNat pk
-    let
-       code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
-           code `appOL` toOL [
-           FxTOy fprep pk src tmp,
-           ST pk tmp (spRel (-2)),
-           LD pk (spRel (-2)) dst]
-    return (Any pk code__2)
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--        FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x 
+ = do  let fsize1      = floatSize width1
+           fsize2      = floatSize width2
+       
+            isize2     = intSize   width2
+
+       (fsrc, code)    <- getSomeReg x
+       fdst            <- getNewRegNat fsize2
+    
+       let code2 dst   
+               =       code
+               `appOL` toOL
+                       -- convert float to int format, leaving it in a float reg.
+                       [ FxTOy fsize1 isize2 fsrc fdst
+
+                       -- store the int into mem, then load it back to move
+                       --      it into an actual int reg.
+                       , ST    fsize2 fdst (spRel (-2))
+                       , LD    isize2 (spRel (-2)) dst]
+
+       return (Any isize2 code2)
  
  ------------
  coerceDbl2Flt x = do
  
  ------------
  coerceDbl2Flt x = do