X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FMachCodeGen.hs;h=c340b9d8d0fa254fdced5cda116996c0d26e2ec7;hb=2922c9ae951271a60db6fd6b2488f9d8111e442e;hp=af8408af362f5fc24232a53586a68a8a9b653bd8;hpb=598d761c769316dc4550028285f6508538b8a99c;p=ghc-hetmet.git

diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index af8408a..c340b9d 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -29,7 +29,9 @@ import MachInstrs
 import MachRegs
 import NCGMonad
 import PositionIndependentCode
-import RegAllocInfo ( mkBranchInstr )
+import RegAllocInfo 	( mkBranchInstr, mkRegRegMoveInstr )
+import MachRegs
+import PprMach
 
 -- Our intermediate code:
 import BlockId
@@ -57,6 +59,7 @@ import Data.Bits
 import Data.Word
 import Data.Int
 
+
 -- -----------------------------------------------------------------------------
 -- Top-level of the instruction selector
 
@@ -320,21 +323,53 @@ assignReg_I64Code lvalue valueTree
    = panic "assignReg_I64Code(sparc): invalid lvalue"
 
 
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
---   = panic "iselExpr64(???)"
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do	Amode amode addr_code	<- getAmode addrTree
+ 	let result
+
+		| AddrRegReg r1 r2	<- amode
+		= do	rlo 	<- getNewRegNat II32
+			tmp	<- getNewRegNat II32
+			let rhi = getHiVRegFromLo rlo
+
+			return	$ ChildCode64 
+				(	 addr_code 
+				`appOL`	 toOL
+					 [ ADD False False r1 (RIReg r2) tmp
+					 , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+					 , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+				rlo
+
+		| AddrRegImm r1 (ImmInt i) <- amode
+		= do	rlo	<- getNewRegNat II32
+			let rhi = getHiVRegFromLo rlo
+			
+			return	$ ChildCode64 
+				(	 addr_code 
+				`appOL`	 toOL
+					 [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+					 , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+				rlo
+		
+	result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do	ChildCode64 code1 r1_lo <- iselExpr64 e1
+ 	let r1_hi	= getHiVRegFromLo r1_lo
+	
+	r_dst_lo	<- getNewRegNat II32
+	let r_dst_hi	=  getHiVRegFromLo r_dst_lo 
+	
+	return	$ ChildCode64
+			( toOL
+			[ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+			, ADD True  False r1_hi (RIReg g0)	   r_dst_hi ])
+			r_dst_lo
 
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
-     rlo <- getNewRegNat II32
-     let rhi = getHiVRegFromLo rlo
-         mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi
-         mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo
-     return (
-            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
-                         rlo
-          )
 
 iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
      r_dst_lo <-  getNewRegNat II32
@@ -1393,24 +1428,38 @@ reg2reg size src dst
 
 #if sparc_TARGET_ARCH
 
+-- getRegister :: CmmExpr -> NatM Register
+
+-- Load a literal float into a float register.
+--	The actual literal is stored in a new data area, and we load it 
+--	at runtime.
 getRegister (CmmLit (CmmFloat f W32)) = do
+
+    -- a label for the new data area
     lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+
     let code dst = toOL [
+            -- the data area         
 	    LDATA ReadOnlyData
 	                [CmmDataLabel lbl,
 			 CmmStaticLit (CmmFloat f W32)],
-	    SETHI (HI (ImmCLbl lbl)) dst,
-	    LD FF32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+
+            -- load the literal
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+
     return (Any FF32 code)
 
 getRegister (CmmLit (CmmFloat d W64)) = do
     lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
     let code dst = toOL [
 	    LDATA ReadOnlyData
 	                [CmmDataLabel lbl,
 			 CmmStaticLit (CmmFloat d W64)],
-	    SETHI (HI (ImmCLbl lbl)) dst,
-	    LD FF64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
     return (Any FF64 code)
 
 getRegister (CmmMachOp mop [x]) -- unary MachOps
@@ -1434,23 +1483,46 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
       MO_UU_Conv W32 to -> conversionNop (intSize to) x
       MO_SS_Conv W32 to -> conversionNop (intSize to) x
 
-      -- widenings
-      MO_UU_Conv W8 W32  -> integerExtend False W8 W32  x
-      MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x
-      MO_UU_Conv W8 W16  -> integerExtend False W8 W16  x
-      MO_SS_Conv W16 W32 -> integerExtend True  W16 W32 x
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
 
-      other_op -> panic "Unknown unary mach op"
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
+
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
     where
-        -- XXX SLL/SRL?
-        integerExtend signed from to expr = do
-           (reg, e_code) <- getSomeReg expr
-	   let
-	       code dst =
-		   e_code `snocOL` 
-		   ((if signed then SRA else SRL)
-		          reg (RIImm (ImmInt 0)) dst)
-	   return (Any (intSize to) code)
+
+	-- | sign extend and widen
+	integerExtend 
+		:: Width 		-- ^ width of source expression
+		-> Width 		-- ^ width of result
+		-> CmmExpr 		-- ^ source expression
+		-> NatM Register	
+
+	integerExtend from to expr
+	 = do	-- load the expr into some register
+	 	(reg, e_code) 	<- getSomeReg expr
+		tmp		<- getNewRegNat II32
+		let bitCount
+			= case (from, to) of
+				(W8,  W32)	-> 24
+				(W16, W32)	-> 16
+				(W8,  W16)	-> 24
+	 	let code dst
+			= e_code 	
+
+			-- local shift word left to load the sign bit
+			`snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+			
+			-- arithmetic shift right to sign extend
+			`snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+			
+		return (Any (intSize to) code)
+				
+
         conversionNop new_rep expr
             = do e_code <- getRegister expr
                  return (swizzleRegisterRep e_code new_rep)
@@ -1479,14 +1551,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
       MO_Sub W32 -> trivialCode W32 (SUB False False) x y
 
       MO_S_MulMayOflo rep -> imulMayOflo rep x y
-{-
-      -- ToDo: teach about V8+ SPARC div instructions
-      MO_S_Quot W32 -> idiv FSLIT(".div")   x y
-      MO_S_Rem W32  -> idiv FSLIT(".rem")   x y
-      MO_U_Quot W32 -> idiv FSLIT(".udiv")  x y
-      MO_U_Rem W32  -> idiv FSLIT(".urem")  x y
--}
 
+      MO_S_Quot W32 	-> idiv True  False x y
+      MO_U_Quot W32 	-> idiv False False x y
+       
+      MO_S_Rem  W32	-> irem True  x y
+      MO_U_Rem	W32	-> irem False x y
+       
       MO_F_Eq w -> condFltReg EQQ x y
       MO_F_Ne w -> condFltReg NE x y
 
@@ -1519,9 +1590,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
 -}
       other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
   where
-    --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+	
+    -- For unsigned division with a 32 bit numerator, 
+    --		we can just clear the Y register.
+    idiv False cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ WRY  g0 g0
+			, UDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+    	
+
+    -- For _signed_ division with a 32 bit numerator,
+    --		we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	tmp			<- getNewRegNat II32
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ SRA  a_reg (RIImm (ImmInt 16)) tmp		-- sign extend
+			, SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+			, WRY  tmp g0				
+			, SDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --	 NOTE:	The SPARC v8 architecture manual says that integer division
+    --		instructions _may_ generate a remainder, depending on the implementation.
+    --		If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --		The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --		It's probably best not to worry about it, and just generate our own
+    --		remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --		Division is between a 64 bit numerator and a 32 bit denominator, 
+    --		so we still have to clear the Y register.
+    irem False x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+
+	tmp_reg		<- getNewRegNat II32
+
+	let code dst
+		= 	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ WRY	g0 g0
+			, UDIV  False         a_reg (RIReg b_reg) tmp_reg
+			, UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+			, SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+    	return	(Any II32 code)
+
+    
+    -- For signed operands:
+    --		Make sure to sign extend into the Y register, or the remainder
+    --		will have the wrong sign when the numerator is negative.
+    --
+    --	TODO:	When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --		not the full 32. Not sure why this is, something to do with overflow?
+    --		If anyone cares enough about the speed of signed remainder they
+    --		can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+	
+	tmp1_reg	<- getNewRegNat II32
+	tmp2_reg	<- getNewRegNat II32
+		
+	let code dst
+		=	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ SRA	a_reg      (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, SRA	tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, WRY	tmp1_reg g0
+
+			, SDIV  False          a_reg (RIReg b_reg)    tmp2_reg	
+			, SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+			, SUB   False False    a_reg (RIReg tmp2_reg) dst]
+			
+	return (Any II32 code)
+   
 
-    --------------------
     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
     imulMayOflo rep a b = do
          (a_reg, a_code) <- getSomeReg a
@@ -1913,15 +2090,16 @@ getAmode (CmmMachOp (MO_Add rep) [x, y])
     	code = codeX `appOL` codeY
     return (Amode (AddrRegReg regX regY) code)
 
--- XXX Is this same as "leaf" in Stix?
 getAmode (CmmLit lit)
   = do
-      tmp <- getNewRegNat II32
-      let
-    	code = unitOL (SETHI (HI imm__2) tmp)
-      return (Amode (AddrRegImm tmp (LO imm__2)) code)
-      where
-         imm__2 = litToImm lit
+	let imm__2	= litToImm lit
+	tmp1 	<- getNewRegNat II32
+	tmp2	<- getNewRegNat II32
+
+	let code = toOL	[ SETHI (HI imm__2) tmp1
+			, OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+		
+	return (Amode (AddrRegReg tmp2 g0) code)
 
 getAmode other
   = do
@@ -2475,7 +2653,7 @@ assignReg_IntCode pk reg src = do
     r <- getRegister src
     return $ case r of
 	Any _ code         -> code dst
-	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
+	Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
     where
       dst = getRegisterReg reg
 
@@ -2577,15 +2755,13 @@ assignMem_FltCode pk addr src = do
     return code__2
 
 -- Floating point assignment to a register/temporary
--- ToDo: Verify correctness
-assignReg_FltCode pk reg src = do
-    r <- getRegister src
-    v1 <- getNewRegNat pk
-    return $ case r of
-        Any _ code         -> code dst
-	Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
-    where
-      dst = getRegisterReg reg
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+    srcRegister <- getRegister srcCmmExpr
+    let dstReg	= getRegisterReg dstCmmReg
+
+    return $ case srcRegister of
+        Any _ code         	    -> code dstReg
+	Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
 
 #endif /* sparc_TARGET_ARCH */
 
@@ -2907,14 +3083,14 @@ genCondJump id bool = do
 
 #if sparc_TARGET_ARCH
 
-genCondJump (BlockId id) bool = do
+genCondJump bid bool = do
   CondCode is_float cond code <- getCondCode bool
   return (
        code `appOL` 
        toOL (
          if   is_float
-         then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
-         else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+         then [NOP, BF cond False bid, NOP]
+         else [BI cond False bid, NOP]
        )
     )
 
@@ -3430,158 +3606,247 @@ genCCall target dest_regs args = do
    in preparation for the outer call.  Upshot: we need to calculate the
    args into temporary regs, and move those to arg regs or onto the
    stack only immediately prior to the call proper.  Sigh.
+
+genCCall
+    :: CmmCallTarget		-- function to call
+    -> HintedCmmFormals		-- where to put the result
+    -> HintedCmmActuals		-- arguments (of mixed type)
+    -> NatM InstrBlock
+
 -}
 
-genCCall target dest_regs argsAndHints = do
-    let
-        args = map hintlessCmm argsAndHints
-    argcode_and_vregs <- mapM arg_to_int_vregs args
-    let 
-        (argcodes, vregss) = unzip argcode_and_vregs
-        n_argRegs          = length allArgRegs
-        n_argRegs_used     = min (length vregs) n_argRegs
-        vregs              = concat vregss
-    -- deal with static vs dynamic call targets
-    callinsns <- (case target of
-        CmmCallee (CmmLit (CmmLabel lbl)) conv -> do
-		return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-        CmmCallee expr conv -> do
-                (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
-                return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-	CmmPrim mop -> do
-		  (res, reduce) <- outOfLineFloatOp mop
-		  lblOrMopExpr <- case res of
-		       Left lbl -> do
-		            return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-		       Right mopExpr -> do
-			    (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
-			    return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-		  if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
-
-      )
-    let
-        argcode = concatOL argcodes
-        (move_sp_down, move_sp_up)
-           = let diff = length vregs - n_argRegs
-                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
-             in  if   nn <= 0
-                 then (nilOL, nilOL)
-                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
-        transfer_code
-           = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
-    return (argcode       `appOL`
-            move_sp_down  `appOL`
-            transfer_code `appOL`
-            callinsns     `appOL`
-            unitOL NOP    `appOL`
-            move_sp_up)
-  where
-     -- move args from the integer vregs into which they have been 
-     -- marshalled, into %o0 .. %o5, and the rest onto the stack.
-     move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
-     move_final [] _ offset          -- all args done
-        = []
-
-     move_final (v:vs) [] offset     -- out of aregs; move to stack
-        = ST II32 v (spRel offset)
-          : move_final vs [] (offset+1)
-
-     move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
-        = OR False g0 (RIReg v) a
-          : move_final vs az offset
-
-     -- generate code to calculate an argument, and move it into one
-     -- or two integer vregs.
-     arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-     arg_to_int_vregs arg
-        | isWord64 (cmmExprType arg)
-        = do
-	  (ChildCode64 code r_lo) <- iselExpr64 arg
-          let 
-              r_hi = getHiVRegFromLo r_lo
-          return (code, [r_hi, r_lo])
-        | otherwise
-        = do
-	  (src, code) <- getSomeReg arg
-          tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
-          let
-              pk   = cmmExprType arg
-          case cmmTypeSize pk of
-             FF64 -> do
-                      v1 <- getNewRegNat II32
-                      v2 <- getNewRegNat II32
-                      return (
-                        code                          `snocOL`
-                        FMOV FF64 src f0                `snocOL`
-                        ST   FF32  f0 (spRel 16)         `snocOL`
-                        LD   II32  (spRel 16) v1         `snocOL`
-                        ST   FF32  (fPair f0) (spRel 16) `snocOL`
-                        LD   II32  (spRel 16) v2
-                        ,
-                        [v1,v2]
-                       )
-             FF32 -> do
-                      v1 <- getNewRegNat II32
-                      return (
-                        code                    `snocOL`
-                        ST   FF32  src (spRel 16)  `snocOL`
-                        LD   II32  (spRel 16) v1
-                        ,
-                        [v1]
-                       )
-             other -> do
-                        v1 <- getNewRegNat II32
-                        return (
-                          code `snocOL` OR False g0 (RIReg src) v1
-                          , 
-                          [v1]
-                         )
-outOfLineFloatOp mop =
-    do
-      dflags <- getDynFlagsNat
-      mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
-		  mkForeignLabel functionName Nothing True
-      let mopLabelOrExpr = case mopExpr of
-			CmmLit (CmmLabel lbl) -> Left lbl
-                        _ -> Right mopExpr
-      return (mopLabelOrExpr, reduce)
-            where
-                (reduce, functionName) = case mop of
-	          MO_F32_Exp    -> (True,  fsLit "exp")
-	          MO_F32_Log    -> (True,  fsLit "log")
-	          MO_F32_Sqrt   -> (True,  fsLit "sqrt")
+genCCall target dest_regs argsAndHints 
+ = do	 	
+	-- strip hints from the arg regs
+	let args :: [CmmExpr]
+	    args  = map hintlessCmm argsAndHints
+
+
+	-- work out the arguments, and assign them to integer regs
+	argcode_and_vregs	<- mapM arg_to_int_vregs args
+	let (argcodes, vregss)	= unzip argcode_and_vregs
+	let vregs		= concat vregss
+
+	let n_argRegs		= length allArgRegs
+	let n_argRegs_used 	= min (length vregs) n_argRegs
+
+
+	-- deal with static vs dynamic call targets
+	callinsns <- case target of
+		CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
+			return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+		CmmCallee expr conv 
+		 -> do	(dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+			return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+		CmmPrim mop 
+		 -> do	(res, reduce) <- outOfLineFloatOp mop
+			lblOrMopExpr <- case res of
+				Left lbl -> do
+					return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+		       		Right mopExpr -> do
+					(dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+					return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+			if reduce 
+			  	then panic ("genCCall(sparc): can not reduce mach op " ++ show mop)
+				else return lblOrMopExpr
+
+	let argcode = concatOL argcodes
+
+	let (move_sp_down, move_sp_up)
+	           = let diff = length vregs - n_argRegs
+	                 nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+	             in  if   nn <= 0
+	                 then (nilOL, nilOL)
+	                 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+        let transfer_code
+           	= toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+				
+	return 
+	 $ 	argcode			`appOL`
+		move_sp_down		`appOL`
+		transfer_code		`appOL`
+		callinsns		`appOL`
+		unitOL NOP		`appOL`
+		move_sp_up		`appOL`
+		assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+-- 	or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+	-- If the expr produces a 64 bit int, then we can just use iselExpr64
+	| isWord64 (cmmExprType arg)
+	= do	(ChildCode64 code r_lo) <- iselExpr64 arg
+	  	let r_hi 		= getHiVRegFromLo r_lo
+		return (code, [r_hi, r_lo])
+
+	| otherwise
+	= do	(src, code) 	<- getSomeReg arg
+		tmp 		<- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+		let pk   	= cmmExprType arg
+
+		case cmmTypeSize pk of
+
+		 -- Load a 64 bit float return value into two integer regs.
+		 FF64 -> do
+			v1 <- getNewRegNat II32
+			v2 <- getNewRegNat II32
+
+			let Just f0_high = fPair f0
+			
+			let code2 = 
+				code				`snocOL`
+		                FMOV FF64 src f0		`snocOL`
+		                ST   FF32  f0 (spRel 16)	`snocOL`
+		                LD   II32  (spRel 16) v1	`snocOL`
+		                ST   FF32  f0_high (spRel 16)	`snocOL`
+		                LD   II32  (spRel 16) v2
+
+			return	(code2, [v1,v2])
+
+		 -- Load a 32 bit float return value into an integer reg
+		 FF32 -> do
+			v1 <- getNewRegNat II32
+			
+			let code2 =
+				code                    	`snocOL`
+				ST   FF32  src (spRel 16)  	`snocOL`
+				LD   II32  (spRel 16) v1
+				
+			return (code2, [v1])
+
+		 -- Move an integer return value into its destination reg.
+		 other -> do
+			v1 <- getNewRegNat II32
+	                
+			let code2 = 
+				code				`snocOL`
+				OR False g0 (RIReg src) v1
+			
+			return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been 
+-- 	marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset          
+	= []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset     
+	= ST II32 v (spRel offset)
+	: move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset 
+	= OR False g0 (RIReg v) a
+	: move_final vs az offset
+
+
+-- | Assign results returned from the call into their 
+--	desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code []	= nilOL
+
+assign_code [CmmHinted dest _hint]	
+ = let	rep	= localRegType dest
+	width	= typeWidth rep
+	r_dest 	= getRegisterReg (CmmLocal dest)
+
+	result
+		| isFloatType rep 
+		, W32	<- width
+		= unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+		| isFloatType rep
+		, W64	<- width
+		= unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+		| not $ isFloatType rep
+		, W32	<- width
+		= unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+		| not $ isFloatType rep
+		, W64		<- width
+		, r_dest_hi	<- getHiVRegFromLo r_dest
+		= toOL 	[ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+			, mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+   in	result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp 
+	:: CallishMachOp 
+	-> NatM ( Either CLabel CmmExpr
+		, Bool)
+
+outOfLineFloatOp mop 
+ = do	let (reduce, functionName)
+ 		= outOfLineFloatOp_table mop
+	
+ 	dflags	<- getDynFlagsNat
+	mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
+		$  mkForeignLabel functionName Nothing True
+
+	let mopLabelOrExpr 
+		= case mopExpr of
+			CmmLit (CmmLabel lbl) 	-> Left lbl
+                        _ 			-> Right mopExpr
+
+	return (mopLabelOrExpr, reduce)
+
+
+outOfLineFloatOp_table 
+	:: CallishMachOp
+	-> (Bool, FastString)
+	
+outOfLineFloatOp_table mop
+ = case mop of
+	MO_F32_Exp    -> (True,  fsLit "exp")
+	MO_F32_Log    -> (True,  fsLit "log")
+	MO_F32_Sqrt   -> (True,  fsLit "sqrt")
+
+	MO_F32_Sin    -> (True,  fsLit "sin")
+	MO_F32_Cos    -> (True,  fsLit "cos")
+	MO_F32_Tan    -> (True,  fsLit "tan")
 
-	          MO_F32_Sin    -> (True,  fsLit "sin")
-	          MO_F32_Cos    -> (True,  fsLit "cos")
-	          MO_F32_Tan    -> (True,  fsLit "tan")
+	MO_F32_Asin   -> (True,  fsLit "asin")
+	MO_F32_Acos   -> (True,  fsLit "acos")
+	MO_F32_Atan   -> (True,  fsLit "atan")
 
-	          MO_F32_Asin   -> (True,  fsLit "asin")
-	          MO_F32_Acos   -> (True,  fsLit "acos")
-	          MO_F32_Atan   -> (True,  fsLit "atan")
+	MO_F32_Sinh   -> (True,  fsLit "sinh")
+	MO_F32_Cosh   -> (True,  fsLit "cosh")
+	MO_F32_Tanh   -> (True,  fsLit "tanh")
 
-	          MO_F32_Sinh   -> (True,  fsLit "sinh")
-	          MO_F32_Cosh   -> (True,  fsLit "cosh")
-	          MO_F32_Tanh   -> (True,  fsLit "tanh")
+	MO_F64_Exp    -> (False, fsLit "exp")
+	MO_F64_Log    -> (False, fsLit "log")
+	MO_F64_Sqrt   -> (False, fsLit "sqrt")
 
-	          MO_F64_Exp    -> (False, fsLit "exp")
-	          MO_F64_Log    -> (False, fsLit "log")
-	          MO_F64_Sqrt   -> (False, fsLit "sqrt")
+	MO_F64_Sin    -> (False, fsLit "sin")
+	MO_F64_Cos    -> (False, fsLit "cos")
+	MO_F64_Tan    -> (False, fsLit "tan")
 
-	          MO_F64_Sin    -> (False, fsLit "sin")
-	          MO_F64_Cos    -> (False, fsLit "cos")
-	          MO_F64_Tan    -> (False, fsLit "tan")
+	MO_F64_Asin   -> (False, fsLit "asin")
+	MO_F64_Acos   -> (False, fsLit "acos")
+	MO_F64_Atan   -> (False, fsLit "atan")
 
-	          MO_F64_Asin   -> (False, fsLit "asin")
-	          MO_F64_Acos   -> (False, fsLit "acos")
-	          MO_F64_Atan   -> (False, fsLit "atan")
+	MO_F64_Sinh   -> (False, fsLit "sinh")
+	MO_F64_Cosh   -> (False, fsLit "cosh")
+	MO_F64_Tanh   -> (False, fsLit "tanh")
 
-	          MO_F64_Sinh   -> (False, fsLit "sinh")
-	          MO_F64_Cosh   -> (False, fsLit "cosh")
-	          MO_F64_Tanh   -> (False, fsLit "tanh")
+	other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+              		(pprCallishMachOp mop)
 
-                  other -> pprPanic "outOfLineFloatOp(sparc) "
-                                (pprCallishMachOp mop)
 
 #endif /* sparc_TARGET_ARCH */
 
@@ -3970,15 +4235,43 @@ genSwitch expr ids
         return code
 #elif sparc_TARGET_ARCH
 genSwitch expr ids
-  | opt_PIC
-  = error "MachCodeGen: sparc genSwitch PIC not finished\n"
+	| opt_PIC
+	= error "MachCodeGen: sparc genSwitch PIC not finished\n"
   
-  | otherwise
-  = error "MachCodeGen: sparc genSwitch non-PIC not finished\n"
+	| otherwise
+	= do	(e_reg, e_code) <- getSomeReg expr
+
+		base_reg	<- getNewRegNat II32
+		offset_reg	<- getNewRegNat II32
+		dst		<- getNewRegNat II32
+
+		label 		<- getNewLabelNat
+		let jumpTable	= map jumpTableEntry ids
+
+		return $ e_code `appOL`
+		 toOL	
+		 	-- the jump table
+			[ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+			-- load base of jump table
+			, SETHI (HI (ImmCLbl label)) base_reg
+			, OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+			
+			-- the addrs in the table are 32 bits wide..
+			, SLL   e_reg (RIImm $ ImmInt 2) offset_reg
+
+			-- load and jump to the destination
+			, LD 	II32 (AddrRegReg base_reg offset_reg) dst
+			, JMP   (AddrRegImm dst (ImmInt 0)) 
+			, NOP ]
+
 #else
 #error "ToDo: genSwitch"
 #endif
 
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
     where blockLabel = mkAsmTempLabel id
@@ -4135,32 +4428,32 @@ condIntReg NE x y = do
     return (Any II32 code__2)
 
 condIntReg cond x y = do
-    BlockId lbl1 <- getBlockIdNat
-    BlockId lbl2 <- getBlockIdNat
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
     CondCode _ cond cond_code <- condIntCode cond x y
     let
 	code__2 dst = cond_code `appOL` toOL [
-	    BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    BI cond False bid1, NOP,
 	    OR False g0 (RIImm (ImmInt 0)) dst,
-	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
-	    NEWBLOCK (BlockId lbl1),
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
 	    OR False g0 (RIImm (ImmInt 1)) dst,
-	    NEWBLOCK (BlockId lbl2)]
+	    NEWBLOCK bid2]
     return (Any II32 code__2)
 
 condFltReg cond x y = do
-    BlockId lbl1 <- getBlockIdNat
-    BlockId lbl2 <- getBlockIdNat
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
     CondCode _ cond cond_code <- condFltCode cond x y
     let
     	code__2 dst = cond_code `appOL` toOL [ 
     	    NOP,
-	    BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    BF cond False bid1, NOP,
 	    OR False g0 (RIImm (ImmInt 0)) dst,
-	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
-	    NEWBLOCK (BlockId lbl1),
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
 	    OR False g0 (RIImm (ImmInt 1)) dst,
-	    NEWBLOCK (BlockId lbl2)]
+	    NEWBLOCK bid2]
     return (Any II32 code__2)
 
 #endif /* sparc_TARGET_ARCH */
@@ -4748,24 +5041,36 @@ coerceInt2FP width1 width2 x = do
     	code__2 dst = code `appOL` toOL [
     	    ST (intSize width1) src (spRel (-2)),
     	    LD (intSize width1) (spRel (-2)) dst,
-    	    FxTOy (intSize width1) (floatSize width1) dst dst]
+    	    FxTOy (intSize width1) (floatSize width2) dst dst]
     return (Any (floatSize $ width2) code__2)
 
-------------
-coerceFP2Int width1 width2 x = do
-    let pk	= intSize width1
-        fprep	= floatSize width2
 
-    (src, code) <- getSomeReg x
-    reg <- getNewRegNat fprep
-    tmp <- getNewRegNat pk
-    let
-    	code__2 dst = ASSERT(fprep == FF64 || fprep == FF32)
-	    code `appOL` toOL [
-    	    FxTOy fprep pk src tmp,
-    	    ST pk tmp (spRel (-2)),
-    	    LD pk (spRel (-2)) dst]
-    return (Any pk code__2)
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--	   FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x 
+ = do	let fsize1	= floatSize width1
+	    fsize2	= floatSize width2
+	
+            isize2	= intSize   width2
+
+	(fsrc, code)	<- getSomeReg x
+	fdst		<- getNewRegNat fsize2
+    
+	let code2 dst	
+		= 	code
+		`appOL` toOL
+			-- convert float to int format, leaving it in a float reg.
+			[ FxTOy fsize1 isize2 fsrc fdst
+
+			-- store the int into mem, then load it back to move
+			--	it into an actual int reg.
+			, ST    fsize2 fdst (spRel (-2))
+			, LD	isize2 (spRel (-2)) dst]
+
+	return (Any isize2 code2)
 
 ------------
 coerceDbl2Flt x = do