X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FMachCodeGen.hs;h=f700fbcb7184ab7c2e5098b95cbd479bf6c0cc13;hb=baf9d8508f3fd2d042dcbe19b68f4b49c12f4769;hp=3a0dfad9e7dfa0832f24cf3d6ebea22739c03e49;hpb=14a5aadb84c34dbe2bee129ed80fdfa1fb12e3e0;p=ghc-hetmet.git

diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs
index 3a0dfad..f700fbc 100644
--- a/ghc/compiler/nativeGen/MachCodeGen.hs
+++ b/ghc/compiler/nativeGen/MachCodeGen.hs
@@ -513,17 +513,17 @@ getRegisterReg (CmmGlobal mid)
 
 getRegister :: CmmExpr -> NatM Register
 
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat wordRep
+      return (Fixed wordRep reg nilOL)
+
 getRegister (CmmReg reg) 
   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
 
 getRegister tree@(CmmRegOff _ _) 
   = getRegister (mangleIndexTree tree)
 
-getRegister CmmPicBaseReg
-  = do
-      reg <- getPicBaseNat wordRep
-      return (Fixed wordRep reg nilOL)
-
 -- end of machine-"independent" bit; here we go on the rest...
 
 #if alpha_TARGET_ARCH
@@ -809,8 +809,7 @@ getRegister (CmmLit (CmmFloat f rep)) = do
 	    LDATA ReadOnlyData
 			[CmmDataLabel lbl,
 			 CmmStaticLit (CmmFloat f rep)],
-	    MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
-	-- ToDo: should use %rip-relative
+	    MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 	    ]
     -- in
     return (Any rep code)
@@ -869,9 +868,10 @@ getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 
 #if x86_64_TARGET_ARCH
 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+  x_code <- getAnyReg x
   lbl <- getNewLabelNat
   let
-    code dst = toOL [
+    code dst = x_code dst `appOL` toOL [
 	-- This is how gcc does it, so it can't be that bad:
 	LDATA ReadOnlyData16 [
 		CmmAlign 16,
@@ -881,7 +881,7 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
 		CmmStaticLit (CmmInt 0 I32),
 		CmmStaticLit (CmmInt 0 I32)
 	],
-	XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+	XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 		-- xorps, so we need the 128-bit constant
 		-- ToDo: rip-relative
 	]
@@ -889,10 +889,11 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
   return (Any F32 code)
 
 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+  x_code <- getAnyReg x
   lbl <- getNewLabelNat
   let
 	-- This is how gcc does it, so it can't be that bad:
-    code dst = toOL [
+    code dst = x_code dst `appOL` toOL [
 	LDATA ReadOnlyData16 [
 		CmmAlign 16,
 		CmmDataLabel lbl,
@@ -900,9 +901,8 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
 		CmmStaticLit (CmmInt 0 I64)
 	],
 		-- gcc puts an unpck here.  Wonder if we need it.
-	XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+	XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
 		-- xorpd, so we need the 128-bit constant
-		-- ToDo: rip-relative
 	]
   --
   return (Any F64 code)
@@ -1153,7 +1153,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 	    code dst
                = x_code `snocOL`
 		 LEA rep
-			(OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
+			(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
                         (OpReg dst)
 	-- 
 	return (Any rep code)
@@ -1839,14 +1839,14 @@ getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
   -- ASSERT(rep == I32)???
   = do (x_reg, x_code) <- getSomeReg x
        let off = ImmInt (-(fromInteger i))
-       return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
   
 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
   | not (is64BitLit lit)
   -- ASSERT(rep == I32)???
   = do (x_reg, x_code) <- getSomeReg x
        let off = ImmInt (fromInteger i)
-       return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
 
 -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
 -- recognised by the next rule.
@@ -1864,7 +1864,7 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
        let
     	   code = x_code `appOL` y_code
            base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
                code)
 
 getAmode (CmmLit lit) | not (is64BitLit lit)
@@ -1872,7 +1872,7 @@ getAmode (CmmLit lit) | not (is64BitLit lit)
 
 getAmode expr = do
   (reg,code) <- getSomeReg expr
-  return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
 
 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
 
@@ -1996,6 +1996,14 @@ getAmode other
 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 
 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
 getNonClobberedOperand (CmmLit lit)
   | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
     return (OpImm (litToImm lit), nilOL)
@@ -2006,7 +2014,7 @@ getNonClobberedOperand (CmmLoad mem pk)
 	if (amodeCouldBeClobbered src) 
 		then do
 		   tmp <- getNewRegNat wordRep
-		   return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
+		   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
 			   unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
 		else
 		   return (src, nilOL)
@@ -2024,23 +2032,38 @@ regClobbered _ = False
 -- getOperand: the operand is not required to remain valid across the
 -- computation of an arbitrary expression.
 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
 getOperand (CmmLit lit)
-  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+					   CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
     return (OpImm (litToImm lit), nilOL)
 getOperand (CmmLoad mem pk)
   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
     Amode src mem_code <- getAmode mem
     return (OpAddr src, mem_code)
 getOperand e = do
-    (reg, code) <- getNonClobberedReg e
+    (reg, code) <- getSomeReg e
     return (OpReg reg, code)
 
 isOperand :: CmmExpr -> Bool
 isOperand (CmmLoad _ _) = True
-isOperand (CmmLit lit)  = not (is64BitLit lit) && 
-			  not (isFloatingRep (cmmLitRep lit))
+isOperand (CmmLit lit)  = not (is64BitLit lit)
+			  || isSuitableFloatingPointLit lit
 isOperand _             = False
 
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory.  However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
 getRegOrMem (CmmLoad mem pk)
   | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
@@ -2236,10 +2259,10 @@ condFltCode cond x y = do
 	code = x_code `appOL`
 	       y_code `snocOL`
     	    	  CMP (cmmExprRep x) y_op (OpReg x_reg)
-  -- in
-  return (CondCode False (condToUnsigned cond) code)
-	-- we need to use the unsigned comparison operators on the
+	-- NB(1): we need to use the unsigned comparison operators on the
 	-- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
 #endif
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -2886,14 +2909,51 @@ genCondJump lbl (StPrim op [x, y])
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+#if i386_TARGET_ARCH
 
 genCondJump id bool = do
   CondCode _ cond code <- getCondCode bool
   return (code `snocOL` JXX cond id)
 
-#endif /* i386_TARGET_ARCH */
+#endif
 
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+	return (cond_code `snocOL` JXX cond id)
+    else do
+	lbl <- getBlockIdNat
+
+	-- see comment with condFltReg
+	let code = case cond of
+	  		NE  -> or_unordered
+			GU  -> plain_test
+			GEU -> plain_test
+			_   -> and_ordered
+
+	    plain_test = unitOL (
+		  JXX cond id
+		)
+	    or_unordered = toOL [
+		  JXX cond id,
+		  JXX PARITY id
+		]
+	    and_ordered = toOL [
+		  JXX PARITY lbl,
+		  JXX cond id,
+		  JXX ALWAYS lbl,
+		  NEWBLOCK lbl
+		]
+	return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 #if sparc_TARGET_ARCH
 
@@ -3048,12 +3108,12 @@ genCCall target dest_regs args vols = do
 	-- CmmPrim -> ...
         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
            -> -- ToDo: stdcall arg sizes
-	      return (unitOL (CALL (Left fn_imm)), conv)
+	      return (unitOL (CALL (Left fn_imm) []), conv)
 	   where fn_imm = ImmCLbl lbl
         CmmForeignCall expr conv
            -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
                  ASSERT(dyn_rep == I32)
-                  return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+                  return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
 
     let	push_code = concatOL push_codes
 	call = callinsns `appOL`
@@ -3120,8 +3180,8 @@ genCCall target dest_regs args vols = do
                         code `appOL`
                         toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
                               DELTA (delta-size),
-                              GST sz reg (AddrBaseIndex (Just esp) 
-                                                        Nothing 
+                              GST sz reg (AddrBaseIndex (EABaseReg esp) 
+                                                        EAIndexNone
                                                         (ImmInt 0))]
                        )
            else return (size,
@@ -3138,6 +3198,9 @@ genCCall target dest_regs args vols = do
         (reg,code) <- getSomeReg op
 	return (code, reg, cmmExprRep op)
 
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 
 outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
   -> Maybe [GlobalReg] -> NatM InstrBlock
@@ -3161,6 +3224,10 @@ outOfLineFloatOp mop res args vols
 	lbl = CmmLabel (mkForeignLabel fn Nothing False)
 
 	fn = case mop of
+	      MO_F32_Sqrt  -> FSLIT("sqrt")
+	      MO_F32_Sin   -> FSLIT("sin")
+	      MO_F32_Cos   -> FSLIT("cos")
+	      MO_F32_Tan   -> FSLIT("tan")
 	      MO_F32_Exp   -> FSLIT("exp")
 	      MO_F32_Log   -> FSLIT("log")
 
@@ -3173,6 +3240,10 @@ outOfLineFloatOp mop res args vols
 	      MO_F32_Tanh  -> FSLIT("tanh")
 	      MO_F32_Pwr   -> FSLIT("pow")
 
+	      MO_F64_Sqrt  -> FSLIT("sqrt")
+	      MO_F64_Sin   -> FSLIT("sin")
+	      MO_F64_Cos   -> FSLIT("cos")
+	      MO_F64_Tan   -> FSLIT("tan")
 	      MO_F64_Exp   -> FSLIT("exp")
 	      MO_F64_Log   -> FSLIT("log")
 
@@ -3187,22 +3258,29 @@ outOfLineFloatOp mop res args vols
 
               other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
 
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 #if x86_64_TARGET_ARCH
 
 genCCall (CmmPrim op) [(r,_)] args vols = 
-  panic "genCCall(CmmPrim)(x86_64)"
+  outOfLineFloatOp op r args vols
 
 genCCall target dest_regs args vols = do
 
 	-- load up the register arguments
-    (stack_args, sse_regs, load_args_code)
-	 <- load_args args allArgRegs allFPArgRegs 0 nilOL
+    (stack_args, aregs, fregs, load_args_code)
+	 <- load_args args allArgRegs allFPArgRegs nilOL
 
     let
+	fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
+	int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+	arg_regs = int_regs_used ++ fp_regs_used
+		-- for annotating the call instruction with
+
+	sse_regs = length fp_regs_used
+
 	tot_arg_size = arg_size * length stack_args
 
 	-- On entry to the called function, %rsp should be aligned
@@ -3234,11 +3312,11 @@ genCCall target dest_regs args vols = do
 	-- CmmPrim -> ...
         CmmForeignCall (CmmLit (CmmLabel lbl)) conv
            -> -- ToDo: stdcall arg sizes
-	      return (unitOL (CALL (Left fn_imm)), conv)
+	      return (unitOL (CALL (Left fn_imm) arg_regs), conv)
 	   where fn_imm = ImmCLbl lbl
         CmmForeignCall expr conv
            -> do (dyn_r, dyn_c) <- getSomeReg expr
-		 return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+		 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
 
     let
 	-- The x86_64 ABI requires us to set %al to the number of SSE
@@ -3289,31 +3367,31 @@ genCCall target dest_regs args vols = do
     load_args :: [(CmmExpr,MachHint)]
 	      -> [Reg] 			-- int regs avail for args
 	      -> [Reg] 			-- FP regs avail for args
-	      -> Int -> InstrBlock
-	      -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
-    load_args args [] [] sse_regs code = return (args, sse_regs, code)
+	      -> InstrBlock
+	      -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+    load_args args [] [] code     =  return (args, [], [], code)
 	-- no more regs to use
-    load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
+    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
 	-- no more args to push
-    load_args ((arg,hint) : rest) aregs fregs sse_regs code
+    load_args ((arg,hint) : rest) aregs fregs code
 	| isFloatingRep arg_rep = 
 	case fregs of
 	  [] -> push_this_arg
 	  (r:rs) -> do
 	     arg_code <- getAnyReg arg
-	     load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
+	     load_args rest aregs rs (code `appOL` arg_code r)
 	| otherwise =
 	case aregs of
 	  [] -> push_this_arg
 	  (r:rs) -> do
 	     arg_code <- getAnyReg arg
-	     load_args rest rs fregs sse_regs (code `appOL` arg_code r)
+	     load_args rest rs fregs (code `appOL` arg_code r)
 	where
 	  arg_rep = cmmExprRep arg
 
 	  push_this_arg = do
-	    (args',sse',code') <- load_args rest aregs fregs sse_regs code
-	    return ((arg,hint):args', sse', code')
+	    (args',ars,frs,code') <- load_args rest aregs fregs code
+	    return ((arg,hint):args', ars, frs, code')
 
     push_args [] code = return code
     push_args ((arg,hint):rest) code
@@ -3749,7 +3827,7 @@ genSwitch expr ids = do
   lbl <- getNewLabelNat
   let
 	jumpTable = map jumpTableEntry ids
-	op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
+	op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
 	code = e_code `appOL` toOL [
 		LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
 	        JMP_TBL op [ id | Just id <- ids ]
@@ -3841,38 +3919,77 @@ condIntReg cond x y = do
   let 
 	code dst = cond_code `appOL` toOL [
 		    SETCC cond (OpReg tmp),
-		    MOV I32 (OpReg tmp) (OpReg dst),
-		    AND I32 (OpImm (ImmInt 1)) (OpReg dst)
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
 		  ]
-		   -- NB. (1) Tha AND is needed here because the x86 only
-		   -- sets the low byte in the SETCC instruction.
-		   -- NB. (2) The extra temporary register is a hack to
-		   -- work around the fact that the setcc instructions only
-		   -- accept byte registers.  dst might not be a byte-able reg,
-		   -- but currently all free registers are byte-able, so we're
-		   -- guaranteed that a new temporary is byte-able.
   -- in
   return (Any I32 code)
 
+#endif
+
+#if i386_TARGET_ARCH
 
 condFltReg cond x y = do
-  lbl1 <- getBlockIdNat
-  lbl2 <- getBlockIdNat
   CondCode _ cond cond_code <- condFltCode cond x y
-  let
-    	code dst = cond_code `appOL` toOL [
-	    JXX cond lbl1,
-	    MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
-	    JXX ALWAYS lbl2,
-	    NEWBLOCK lbl1,
-	    MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
-	    JXX ALWAYS lbl2,
-	    NEWBLOCK lbl2]
-		-- SIGH, have to split up this block somehow...
+  tmp <- getNewRegNat I8
+  let 
+	code dst = cond_code `appOL` toOL [
+		    SETCC cond (OpReg tmp),
+		    MOVZxL I8 (OpReg tmp) (OpReg dst)
+		  ]
   -- in
   return (Any I32 code)
 
-#endif /* i386_TARGET_ARCH */
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp1 <- getNewRegNat wordRep
+  tmp2 <- getNewRegNat wordRep
+  let 
+	-- We have to worry about unordered operands (eg. comparisons
+	-- against NaN).  If the operands are unordered, the comparison
+	-- sets the parity flag, carry flag and zero flag.
+	-- All comparisons are supposed to return false for unordered
+	-- operands except for !=, which returns true.
+	--
+	-- Optimisation: we don't have to test the parity flag if we
+	-- know the test has already excluded the unordered case: eg >
+	-- and >= test for a zero carry flag, which can only occur for
+	-- ordered operands.
+	--
+	-- ToDo: by reversing comparisons we could avoid testing the
+	-- parity flag in more cases.
+
+	code dst = 
+	   cond_code `appOL` 
+	     (case cond of
+	  	NE  -> or_unordered dst
+		GU  -> plain_test   dst
+		GEU -> plain_test   dst
+		_   -> and_ordered  dst)
+
+	plain_test dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    MOVZxL I8 (OpReg tmp1) (OpReg dst)
+		 ]
+	or_unordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC PARITY (OpReg tmp2),
+		    OR I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+	and_ordered dst = toOL [
+		    SETCC cond (OpReg tmp1),
+		    SETCC NOTPARITY (OpReg tmp2),
+		    AND I8 (OpReg tmp1) (OpReg tmp2),
+		    MOVZxL I8 (OpReg tmp2) (OpReg dst)
+		  ]
+  -- in
+  return (Any I32 code)
+
+#endif
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
@@ -4210,7 +4327,10 @@ trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
   -- in
   return (Any rep code)
 
-trivialCode rep instr maybe_revinstr a b = do
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
   (b_op, b_code) <- getNonClobberedOperand b
   a_code <- getAnyReg a
   tmp <- getNewRegNat rep
@@ -4222,7 +4342,7 @@ trivialCode rep instr maybe_revinstr a b = do
      -- as the destination reg.  In this case, we have to save b in a
      -- new temporary across the computation of a.
      code dst
-	| dst `clashesWith` b_op =
+	| dst `regClashesWithOp` b_op =
 		b_code `appOL`
 		unitOL (MOV rep b_op (OpReg tmp)) `appOL`
 		a_code dst `snocOL`
@@ -4233,10 +4353,10 @@ trivialCode rep instr maybe_revinstr a b = do
 		instr b_op (OpReg dst)
   -- in
   return (Any rep code)
- where
-  reg `clashesWith` OpReg reg2   = reg == reg2
-  reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
-  reg `clashesWith` _            = False
+
+reg `regClashesWithOp` OpReg reg2   = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _            = False
 
 -----------
 
@@ -4268,19 +4388,7 @@ trivialFCode pk instr x y = do
 
 #if x86_64_TARGET_ARCH
 
--- We use the 2-operand SSE2 floating pt instructions.  ToDo: improve on
--- this by using some of the special cases in trivialCode above.
-trivialFCode pk instr x y = do
-  (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
-  x_code <- getAnyReg x
-  let
-     code dst =
-	y_code `appOL`
-	x_code dst `snocOL`
-	instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
-		 (IF_ARCH_x86_64(OpReg,) dst)
-  -- in
-  return (Any pk code)
+trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
 
 #endif
 
@@ -4547,7 +4655,7 @@ coerceFP2Int from to x = do
 coerceFP2Int from to x = do
   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
   let
-        opc  = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
         code dst = x_code `snocOL` opc x_op dst
   -- in
   return (Any to code) -- works even if the destination rep is <I32