SPARC NCG: Clean up formatting and add comments in genCCall

[ghc-hetmet.git] / compiler / nativeGen / MachCodeGen.hs
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

index af8408a..c340b9d 100644 (file)
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -29,7 +29,9 @@ import MachInstrs
  import MachRegs
  import NCGMonad
  import PositionIndependentCode
-import RegAllocInfo ( mkBranchInstr )
+import RegAllocInfo    ( mkBranchInstr, mkRegRegMoveInstr )
+import MachRegs
+import PprMach
  
  -- Our intermediate code:
  import BlockId
@@ -57,6 +59,7 @@ import Data.Bits
  import Data.Word
  import Data.Int
  
+
  -- -----------------------------------------------------------------------------
  -- Top-level of the instruction selector
  
@@ -320,21 +323,53 @@ assignReg_I64Code lvalue valueTree
     = panic "assignReg_I64Code(sparc): invalid lvalue"
  
  
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
---   = panic "iselExpr64(???)"
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do  Amode amode addr_code   <- getAmode addrTree
+       let result
+
+               | AddrRegReg r1 r2      <- amode
+               = do    rlo     <- getNewRegNat II32
+                       tmp     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ ADD False False r1 (RIReg r2) tmp
+                                        , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+                                        , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+                               rlo
+
+               | AddrRegImm r1 (ImmInt i) <- amode
+               = do    rlo     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+                       
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+                                        , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+                               rlo
+               
+       result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
+       let r1_hi       = getHiVRegFromLo r1_lo
+       
+       r_dst_lo        <- getNewRegNat II32
+       let r_dst_hi    =  getHiVRegFromLo r_dst_lo 
+       
+       return  $ ChildCode64
+                       ( toOL
+                       [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+                       , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
+                       r_dst_lo
  
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
-     rlo <- getNewRegNat II32
-     let rhi = getHiVRegFromLo rlo
-         mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
-         mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
-     return (
-            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
-                         rlo
-          )
  
  iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
       r_dst_lo <-  getNewRegNat II32
@@ -1393,24 +1428,38 @@ reg2reg size src dst
  
  #if sparc_TARGET_ARCH
  
+-- getRegister :: CmmExpr -> NatM Register
+
+-- Load a literal float into a float register.
+--     The actual literal is stored in a new data area, and we load it 
+--     at runtime.
  getRegister (CmmLit (CmmFloat f W32)) = do
+
+    -- a label for the new data area
      lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+
      let code dst = toOL [
+            -- the data area         
             LDATA ReadOnlyData
                         [CmmDataLabel lbl,
                          CmmStaticLit (CmmFloat f W32)],
-           SETHI (HI (ImmCLbl lbl)) dst,
-           LD FF32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+
+            -- load the literal
+           SETHI (HI (ImmCLbl lbl)) tmp,
+           LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+
      return (Any FF32 code)
  
  getRegister (CmmLit (CmmFloat d W64)) = do
      lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
      let code dst = toOL [
             LDATA ReadOnlyData
                         [CmmDataLabel lbl,
                          CmmStaticLit (CmmFloat d W64)],
-           SETHI (HI (ImmCLbl lbl)) dst,
-           LD FF64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+           SETHI (HI (ImmCLbl lbl)) tmp,
+           LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
      return (Any FF64 code)
  
  getRegister (CmmMachOp mop [x]) -- unary MachOps
@@ -1434,23 +1483,46 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_UU_Conv W32 to -> conversionNop (intSize to) x
        MO_SS_Conv W32 to -> conversionNop (intSize to) x
  
-      -- widenings
-      MO_UU_Conv W8 W32  -> integerExtend False W8 W32  x
-      MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
-      MO_UU_Conv W8 W16  -> integerExtend False W8 W16  x
-      MO_SS_Conv W16 W32 -> integerExtend True  W16 W32 x
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
  
-      other_op -> panic "Unknown unary mach op"
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
+
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
      where
-        -- XXX SLL/SRL?
-        integerExtend signed from to expr = do
-           (reg, e_code) <- getSomeReg expr
-          let
-              code dst =
-                  e_code `snocOL` 
-                  ((if signed then SRA else SRL)
-                         reg (RIImm (ImmInt 0)) dst)
-          return (Any (intSize to) code)
+
+       -- | sign extend and widen
+       integerExtend 
+               :: Width                -- ^ width of source expression
+               -> Width                -- ^ width of result
+               -> CmmExpr              -- ^ source expression
+               -> NatM Register        
+
+       integerExtend from to expr
+        = do   -- load the expr into some register
+               (reg, e_code)   <- getSomeReg expr
+               tmp             <- getNewRegNat II32
+               let bitCount
+                       = case (from, to) of
+                               (W8,  W32)      -> 24
+                               (W16, W32)      -> 16
+                               (W8,  W16)      -> 24
+               let code dst
+                       = e_code        
+
+                       -- local shift word left to load the sign bit
+                       `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+                       
+                       -- arithmetic shift right to sign extend
+                       `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+                       
+               return (Any (intSize to) code)
+                               
+
          conversionNop new_rep expr
              = do e_code <- getRegister expr
                   return (swizzleRegisterRep e_code new_rep)
@@ -1479,14 +1551,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
        MO_Sub W32 -> trivialCode W32 (SUB False False) x y
  
        MO_S_MulMayOflo rep -> imulMayOflo rep x y
-{-
-      -- ToDo: teach about V8+ SPARC div instructions
-      MO_S_Quot W32 -> idiv FSLIT(".div")   x y
-      MO_S_Rem W32  -> idiv FSLIT(".rem")   x y
-      MO_U_Quot W32 -> idiv FSLIT(".udiv")  x y
-      MO_U_Rem W32  -> idiv FSLIT(".urem")  x y
--}
  
+      MO_S_Quot W32    -> idiv True  False x y
+      MO_U_Quot W32    -> idiv False False x y
+       
+      MO_S_Rem  W32    -> irem True  x y
+      MO_U_Rem W32     -> irem False x y
+       
        MO_F_Eq w -> condFltReg EQQ x y
        MO_F_Ne w -> condFltReg NE x y
  
@@ -1519,9 +1590,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
  -}
        other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
    where
-    --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+       
+    -- For unsigned division with a 32 bit numerator, 
+    --         we can just clear the Y register.
+    idiv False cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ WRY  g0 g0
+                       , UDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+       
+
+    -- For _signed_ division with a 32 bit numerator,
+    --         we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       tmp                     <- getNewRegNat II32
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
+                       , SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+                       , WRY  tmp g0                           
+                       , SDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --  NOTE:  The SPARC v8 architecture manual says that integer division
+    --         instructions _may_ generate a remainder, depending on the implementation.
+    --         If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --         The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --         It's probably best not to worry about it, and just generate our own
+    --         remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --         Division is between a 64 bit numerator and a 32 bit denominator, 
+    --         so we still have to clear the Y register.
+    irem False x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+
+       tmp_reg         <- getNewRegNat II32
+
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ WRY   g0 g0
+                       , UDIV  False         a_reg (RIReg b_reg) tmp_reg
+                       , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+                       , SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+       return  (Any II32 code)
+
+    
+    -- For signed operands:
+    --         Make sure to sign extend into the Y register, or the remainder
+    --         will have the wrong sign when the numerator is negative.
+    --
+    -- TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --         not the full 32. Not sure why this is, something to do with overflow?
+    --         If anyone cares enough about the speed of signed remainder they
+    --         can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+       
+       tmp1_reg        <- getNewRegNat II32
+       tmp2_reg        <- getNewRegNat II32
+               
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , WRY   tmp1_reg g0
+
+                       , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg  
+                       , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+                       , SUB   False False    a_reg (RIReg tmp2_reg) dst]
+                       
+       return (Any II32 code)
+   
  
-    --------------------
      imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
      imulMayOflo rep a b = do
           (a_reg, a_code) <- getSomeReg a
@@ -1913,15 +2090,16 @@ getAmode (CmmMachOp (MO_Add rep) [x, y])
         code = codeX `appOL` codeY
      return (Amode (AddrRegReg regX regY) code)
  
--- XXX Is this same as "leaf" in Stix?
  getAmode (CmmLit lit)
    = do
-      tmp <- getNewRegNat II32
-      let
-       code = unitOL (SETHI (HI imm__2) tmp)
-      return (Amode (AddrRegImm tmp (LO imm__2)) code)
-      where
-         imm__2 = litToImm lit
+       let imm__2      = litToImm lit
+       tmp1    <- getNewRegNat II32
+       tmp2    <- getNewRegNat II32
+
+       let code = toOL [ SETHI (HI imm__2) tmp1
+                       , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+               
+       return (Amode (AddrRegReg tmp2 g0) code)
  
  getAmode other
    = do
@@ -2475,7 +2653,7 @@ assignReg_IntCode pk reg src = do
      r <- getRegister src
      return $ case r of
         Any _ code         -> code dst
-       Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
+       Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
      where
        dst = getRegisterReg reg
  
@@ -2577,15 +2755,13 @@ assignMem_FltCode pk addr src = do
      return code__2
  
  -- Floating point assignment to a register/temporary
--- ToDo: Verify correctness
-assignReg_FltCode pk reg src = do
-    r <- getRegister src
-    v1 <- getNewRegNat pk
-    return $ case r of
-        Any _ code         -> code dst
-       Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
-    where
-      dst = getRegisterReg reg
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+    srcRegister <- getRegister srcCmmExpr
+    let dstReg = getRegisterReg dstCmmReg
+
+    return $ case srcRegister of
+        Any _ code                 -> code dstReg
+       Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -2907,14 +3083,14 @@ genCondJump id bool = do
  
  #if sparc_TARGET_ARCH
  
-genCondJump (BlockId id) bool = do
+genCondJump bid bool = do
    CondCode is_float cond code <- getCondCode bool
    return (
         code `appOL` 
         toOL (
           if   is_float
-         then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
-         else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+         then [NOP, BF cond False bid, NOP]
+         else [BI cond False bid, NOP]
         )
      )
  
@@ -3430,158 +3606,247 @@ genCCall target dest_regs args = do
     in preparation for the outer call.  Upshot: we need to calculate the
     args into temporary regs, and move those to arg regs or onto the
     stack only immediately prior to the call proper.  Sigh.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
  -}
  
-genCCall target dest_regs argsAndHints = do
-    let
-        args = map hintlessCmm argsAndHints
-    argcode_and_vregs <- mapM arg_to_int_vregs args
-    let 
-        (argcodes, vregss) = unzip argcode_and_vregs
-        n_argRegs          = length allArgRegs
-        n_argRegs_used     = min (length vregs) n_argRegs
-        vregs              = concat vregss
-    -- deal with static vs dynamic call targets
-    callinsns <- (case target of
-        CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
-               return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-        CmmCallee expr conv -> do
-                (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
-                return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-       CmmPrim mop -> do
-                 (res, reduce) <- outOfLineFloatOp mop
-                 lblOrMopExpr <- case res of
-                      Left lbl -> do
-                           return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-                      Right mopExpr -> do
-                           (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
-                           return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-                 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
-
-      )
-    let
-        argcode = concatOL argcodes
-        (move_sp_down, move_sp_up)
-           = let diff = length vregs - n_argRegs
-                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
-             in  if   nn <= 0
-                 then (nilOL, nilOL)
-                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
-        transfer_code
-           = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
-    return (argcode       `appOL`
-            move_sp_down  `appOL`
-            transfer_code `appOL`
-            callinsns     `appOL`
-            unitOL NOP    `appOL`
-            move_sp_up)
-  where
-     -- move args from the integer vregs into which they have been 
-     -- marshalled, into %o0 .. %o5, and the rest onto the stack.
-     move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
-     move_final [] _ offset          -- all args done
-        = []
-
-     move_final (v:vs) [] offset     -- out of aregs; move to stack
-        = ST II32 v (spRel offset)
-          : move_final vs [] (offset+1)
-
-     move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
-        = OR False g0 (RIReg v) a
-          : move_final vs az offset
-
-     -- generate code to calculate an argument, and move it into one
-     -- or two integer vregs.
-     arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-     arg_to_int_vregs arg
-        | isWord64 (cmmExprType arg)
-        = do
-         (ChildCode64 code r_lo) <- iselExpr64 arg
-          let 
-              r_hi = getHiVRegFromLo r_lo
-          return (code, [r_hi, r_lo])
-        | otherwise
-        = do
-         (src, code) <- getSomeReg arg
-          tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
-          let
-              pk   = cmmExprType arg
-          case cmmTypeSize pk of
-             FF64 -> do
-                      v1 <- getNewRegNat II32
-                      v2 <- getNewRegNat II32
-                      return (
-                        code                          `snocOL`
-                        FMOV FF64 src f0                `snocOL`
-                        ST   FF32  f0 (spRel 16)         `snocOL`
-                        LD   II32  (spRel 16) v1         `snocOL`
-                        ST   FF32  (fPair f0) (spRel 16) `snocOL`
-                        LD   II32  (spRel 16) v2
-                        ,
-                        [v1,v2]
-                       )
-             FF32 -> do
-                      v1 <- getNewRegNat II32
-                      return (
-                        code                    `snocOL`
-                        ST   FF32  src (spRel 16)  `snocOL`
-                        LD   II32  (spRel 16) v1
-                        ,
-                        [v1]
-                       )
-             other -> do
-                        v1 <- getNewRegNat II32
-                        return (
-                          code `snocOL` OR False g0 (RIReg src) v1
-                          , 
-                          [v1]
-                         )
-outOfLineFloatOp mop =
-    do
-      dflags <- getDynFlagsNat
-      mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
-                 mkForeignLabel functionName Nothing True
-      let mopLabelOrExpr = case mopExpr of
-                       CmmLit (CmmLabel lbl) -> Left lbl
-                        _ -> Right mopExpr
-      return (mopLabelOrExpr, reduce)
-            where
-                (reduce, functionName) = case mop of
-                 MO_F32_Exp    -> (True,  fsLit "exp")
-                 MO_F32_Log    -> (True,  fsLit "log")
-                 MO_F32_Sqrt   -> (True,  fsLit "sqrt")
+genCCall target dest_regs argsAndHints 
+ = do          
+       -- strip hints from the arg regs
+       let args :: [CmmExpr]
+           args  = map hintlessCmm argsAndHints
+
+
+       -- work out the arguments, and assign them to integer regs
+       argcode_and_vregs       <- mapM arg_to_int_vregs args
+       let (argcodes, vregss)  = unzip argcode_and_vregs
+       let vregs               = concat vregss
+
+       let n_argRegs           = length allArgRegs
+       let n_argRegs_used      = min (length vregs) n_argRegs
+
+
+       -- deal with static vs dynamic call targets
+       callinsns <- case target of
+               CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
+                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+               CmmCallee expr conv 
+                -> do  (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+               CmmPrim mop 
+                -> do  (res, reduce) <- outOfLineFloatOp mop
+                       lblOrMopExpr <- case res of
+                               Left lbl -> do
+                                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+                               Right mopExpr -> do
+                                       (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+                                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+                       if reduce 
+                               then panic ("genCCall(sparc): can not reduce mach op " ++ show mop)
+                               else return lblOrMopExpr
+
+       let argcode = concatOL argcodes
+
+       let (move_sp_down, move_sp_up)
+                  = let diff = length vregs - n_argRegs
+                        nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+                    in  if   nn <= 0
+                        then (nilOL, nilOL)
+                        else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+        let transfer_code
+               = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+                               
+       return 
+        $      argcode                 `appOL`
+               move_sp_down            `appOL`
+               transfer_code           `appOL`
+               callinsns               `appOL`
+               unitOL NOP              `appOL`
+               move_sp_up              `appOL`
+               assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+--     or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+       -- If the expr produces a 64 bit int, then we can just use iselExpr64
+       | isWord64 (cmmExprType arg)
+       = do    (ChildCode64 code r_lo) <- iselExpr64 arg
+               let r_hi                = getHiVRegFromLo r_lo
+               return (code, [r_hi, r_lo])
+
+       | otherwise
+       = do    (src, code)     <- getSomeReg arg
+               tmp             <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+               let pk          = cmmExprType arg
+
+               case cmmTypeSize pk of
+
+                -- Load a 64 bit float return value into two integer regs.
+                FF64 -> do
+                       v1 <- getNewRegNat II32
+                       v2 <- getNewRegNat II32
+
+                       let Just f0_high = fPair f0
+                       
+                       let code2 = 
+                               code                            `snocOL`
+                               FMOV FF64 src f0                `snocOL`
+                               ST   FF32  f0 (spRel 16)        `snocOL`
+                               LD   II32  (spRel 16) v1        `snocOL`
+                               ST   FF32  f0_high (spRel 16)   `snocOL`
+                               LD   II32  (spRel 16) v2
+
+                       return  (code2, [v1,v2])
+
+                -- Load a 32 bit float return value into an integer reg
+                FF32 -> do
+                       v1 <- getNewRegNat II32
+                       
+                       let code2 =
+                               code                            `snocOL`
+                               ST   FF32  src (spRel 16)       `snocOL`
+                               LD   II32  (spRel 16) v1
+                               
+                       return (code2, [v1])
+
+                -- Move an integer return value into its destination reg.
+                other -> do
+                       v1 <- getNewRegNat II32
+                       
+                       let code2 = 
+                               code                            `snocOL`
+                               OR False g0 (RIReg src) v1
+                       
+                       return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been 
+--     marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset          
+       = []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset     
+       = ST II32 v (spRel offset)
+       : move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset 
+       = OR False g0 (RIReg v) a
+       : move_final vs az offset
+
+
+-- | Assign results returned from the call into their 
+--     desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code [] = nilOL
+
+assign_code [CmmHinted dest _hint]     
+ = let rep     = localRegType dest
+       width   = typeWidth rep
+       r_dest  = getRegisterReg (CmmLocal dest)
+
+       result
+               | isFloatType rep 
+               , W32   <- width
+               = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+               | isFloatType rep
+               , W64   <- width
+               = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+               | not $ isFloatType rep
+               , W32   <- width
+               = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+               | not $ isFloatType rep
+               , W64           <- width
+               , r_dest_hi     <- getHiVRegFromLo r_dest
+               = toOL  [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+                       , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+   in  result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp 
+       :: CallishMachOp 
+       -> NatM ( Either CLabel CmmExpr
+               , Bool)
+
+outOfLineFloatOp mop 
+ = do  let (reduce, functionName)
+               = outOfLineFloatOp_table mop
+       
+       dflags  <- getDynFlagsNat
+       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
+               $  mkForeignLabel functionName Nothing True
+
+       let mopLabelOrExpr 
+               = case mopExpr of
+                       CmmLit (CmmLabel lbl)   -> Left lbl
+                        _                      -> Right mopExpr
+
+       return (mopLabelOrExpr, reduce)
+
+
+outOfLineFloatOp_table 
+       :: CallishMachOp
+       -> (Bool, FastString)
+       
+outOfLineFloatOp_table mop
+ = case mop of
+       MO_F32_Exp    -> (True,  fsLit "exp")
+       MO_F32_Log    -> (True,  fsLit "log")
+       MO_F32_Sqrt   -> (True,  fsLit "sqrt")
+
+       MO_F32_Sin    -> (True,  fsLit "sin")
+       MO_F32_Cos    -> (True,  fsLit "cos")
+       MO_F32_Tan    -> (True,  fsLit "tan")
  
-                 MO_F32_Sin    -> (True,  fsLit "sin")
-                 MO_F32_Cos    -> (True,  fsLit "cos")
-                 MO_F32_Tan    -> (True,  fsLit "tan")
+       MO_F32_Asin   -> (True,  fsLit "asin")
+       MO_F32_Acos   -> (True,  fsLit "acos")
+       MO_F32_Atan   -> (True,  fsLit "atan")
  
-                 MO_F32_Asin   -> (True,  fsLit "asin")
-                 MO_F32_Acos   -> (True,  fsLit "acos")
-                 MO_F32_Atan   -> (True,  fsLit "atan")
+       MO_F32_Sinh   -> (True,  fsLit "sinh")
+       MO_F32_Cosh   -> (True,  fsLit "cosh")
+       MO_F32_Tanh   -> (True,  fsLit "tanh")
  
-                 MO_F32_Sinh   -> (True,  fsLit "sinh")
-                 MO_F32_Cosh   -> (True,  fsLit "cosh")
-                 MO_F32_Tanh   -> (True,  fsLit "tanh")
+       MO_F64_Exp    -> (False, fsLit "exp")
+       MO_F64_Log    -> (False, fsLit "log")
+       MO_F64_Sqrt   -> (False, fsLit "sqrt")
  
-                 MO_F64_Exp    -> (False, fsLit "exp")
-                 MO_F64_Log    -> (False, fsLit "log")
-                 MO_F64_Sqrt   -> (False, fsLit "sqrt")
+       MO_F64_Sin    -> (False, fsLit "sin")
+       MO_F64_Cos    -> (False, fsLit "cos")
+       MO_F64_Tan    -> (False, fsLit "tan")
  
-                 MO_F64_Sin    -> (False, fsLit "sin")
-                 MO_F64_Cos    -> (False, fsLit "cos")
-                 MO_F64_Tan    -> (False, fsLit "tan")
+       MO_F64_Asin   -> (False, fsLit "asin")
+       MO_F64_Acos   -> (False, fsLit "acos")
+       MO_F64_Atan   -> (False, fsLit "atan")
  
-                 MO_F64_Asin   -> (False, fsLit "asin")
-                 MO_F64_Acos   -> (False, fsLit "acos")
-                 MO_F64_Atan   -> (False, fsLit "atan")
+       MO_F64_Sinh   -> (False, fsLit "sinh")
+       MO_F64_Cosh   -> (False, fsLit "cosh")
+       MO_F64_Tanh   -> (False, fsLit "tanh")
  
-                 MO_F64_Sinh   -> (False, fsLit "sinh")
-                 MO_F64_Cosh   -> (False, fsLit "cosh")
-                 MO_F64_Tanh   -> (False, fsLit "tanh")
+       other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+                       (pprCallishMachOp mop)
  
-                  other -> pprPanic "outOfLineFloatOp(sparc) "
-                                (pprCallishMachOp mop)
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -3970,15 +4235,43 @@ genSwitch expr ids
          return code
  #elif sparc_TARGET_ARCH
  genSwitch expr ids
-  | opt_PIC
-  = error "MachCodeGen: sparc genSwitch PIC not finished\n"
+       | opt_PIC
+       = error "MachCodeGen: sparc genSwitch PIC not finished\n"
    
-  | otherwise
-  = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
+       | otherwise
+       = do    (e_reg, e_code) <- getSomeReg expr
+
+               base_reg        <- getNewRegNat II32
+               offset_reg      <- getNewRegNat II32
+               dst             <- getNewRegNat II32
+
+               label           <- getNewLabelNat
+               let jumpTable   = map jumpTableEntry ids
+
+               return $ e_code `appOL`
+                toOL   
+                       -- the jump table
+                       [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+                       -- load base of jump table
+                       , SETHI (HI (ImmCLbl label)) base_reg
+                       , OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+                       
+                       -- the addrs in the table are 32 bits wide..
+                       , SLL   e_reg (RIImm $ ImmInt 2) offset_reg
+
+                       -- load and jump to the destination
+                       , LD    II32 (AddrRegReg base_reg offset_reg) dst
+                       , JMP   (AddrRegImm dst (ImmInt 0)) 
+                       , NOP ]
+
  #else
  #error "ToDo: genSwitch"
  #endif
  
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
  jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
  jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
      where blockLabel = mkAsmTempLabel id
@@ -4135,32 +4428,32 @@ condIntReg NE x y = do
      return (Any II32 code__2)
  
  condIntReg cond x y = do
-    BlockId lbl1 <- getBlockIdNat
-    BlockId lbl2 <- getBlockIdNat
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
      CondCode _ cond cond_code <- condIntCode cond x y
      let
         code__2 dst = cond_code `appOL` toOL [
-           BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+           BI cond False bid1, NOP,
             OR False g0 (RIImm (ImmInt 0)) dst,
-           BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
-           NEWBLOCK (BlockId lbl1),
+           BI ALWAYS False bid2, NOP,
+           NEWBLOCK bid1,
             OR False g0 (RIImm (ImmInt 1)) dst,
-           NEWBLOCK (BlockId lbl2)]
+           NEWBLOCK bid2]
      return (Any II32 code__2)
  
  condFltReg cond x y = do
-    BlockId lbl1 <- getBlockIdNat
-    BlockId lbl2 <- getBlockIdNat
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
      CondCode _ cond cond_code <- condFltCode cond x y
      let
         code__2 dst = cond_code `appOL` toOL [ 
             NOP,
-           BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+           BF cond False bid1, NOP,
             OR False g0 (RIImm (ImmInt 0)) dst,
-           BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
-           NEWBLOCK (BlockId lbl1),
+           BI ALWAYS False bid2, NOP,
+           NEWBLOCK bid1,
             OR False g0 (RIImm (ImmInt 1)) dst,
-           NEWBLOCK (BlockId lbl2)]
+           NEWBLOCK bid2]
      return (Any II32 code__2)
  
  #endif /* sparc_TARGET_ARCH */
@@ -4748,24 +5041,36 @@ coerceInt2FP width1 width2 x = do
         code__2 dst = code `appOL` toOL [
             ST (intSize width1) src (spRel (-2)),
             LD (intSize width1) (spRel (-2)) dst,
-           FxTOy (intSize width1) (floatSize width1) dst dst]
+           FxTOy (intSize width1) (floatSize width2) dst dst]
      return (Any (floatSize $ width2) code__2)
  
-------------
-coerceFP2Int width1 width2 x = do
-    let pk     = intSize width1
-        fprep  = floatSize width2
  
-    (src, code) <- getSomeReg x
-    reg <- getNewRegNat fprep
-    tmp <- getNewRegNat pk
-    let
-       code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
-           code `appOL` toOL [
-           FxTOy fprep pk src tmp,
-           ST pk tmp (spRel (-2)),
-           LD pk (spRel (-2)) dst]
-    return (Any pk code__2)
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--        FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x 
+ = do  let fsize1      = floatSize width1
+           fsize2      = floatSize width2
+       
+            isize2     = intSize   width2
+
+       (fsrc, code)    <- getSomeReg x
+       fdst            <- getNewRegNat fsize2
+    
+       let code2 dst   
+               =       code
+               `appOL` toOL
+                       -- convert float to int format, leaving it in a float reg.
+                       [ FxTOy fsize1 isize2 fsrc fdst
+
+                       -- store the int into mem, then load it back to move
+                       --      it into an actual int reg.
+                       , ST    fsize2 fdst (spRel (-2))
+                       , LD    isize2 (spRel (-2)) dst]
+
+       return (Any isize2 code2)
  
  ------------
  coerceDbl2Flt x = do