X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FMachCodeGen.hs;h=b29b59e8bbc69fe167fbf1f81e9a324f32ce5f05;hb=6e1e37433632160a73d30ad785476a1ef0ba14a8;hp=90ce6b5bf8cca25cfb2bbe5ca2ebaa16d7ecbb96;hpb=0065d5ab628975892cea1ec7303f968c3338cbe1;p=ghc-hetmet.git

diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index 90ce6b5..b29b59e 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -21,7 +21,7 @@ module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
 import MachInstrs
 import MachRegs
 import NCGMonad
-import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
+import PositionIndependentCode
 import RegAllocInfo ( mkBranchInstr )
 
 -- Our intermediate code:
@@ -29,6 +29,7 @@ import PprCmm		( pprExpr )
 import Cmm
 import MachOp
 import CLabel
+import ClosureInfo	( C_SRT(..) )
 
 -- The rest:
 import StaticFlags	( opt_PIC )
@@ -41,14 +42,14 @@ import FastTypes	( isFastTrue )
 import Constants	( wORD_SIZE )
 
 #ifdef DEBUG
-import Outputable	( assertPanic )
-import TRACE		( trace )
+import Debug.Trace	( trace )
 #endif
 
 import Control.Monad	( mapAndUnzipM )
-import Maybe		( fromJust )
-import DATA_BITS
-import DATA_WORD
+import Data.Maybe	( fromJust )
+import Data.Bits
+import Data.Word
+import Data.Int
 
 -- -----------------------------------------------------------------------------
 -- Top-level of the instruction selector
@@ -60,7 +61,7 @@ import DATA_WORD
 
 type InstrBlock = OrdList Instr
 
-cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
+cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
 cmmTopCodeGen (CmmProc info lab params blocks) = do
   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
   picBaseMb <- getPicBaseMaybeNat
@@ -118,8 +119,8 @@ stmtToInstrs stmt = case stmt of
       | otherwise	 -> assignMem_IntCode kind addr src
 	where kind = cmmExprRep src
 
-    CmmCall target result_regs args vols
-       -> genCCall target result_regs args vols
+    CmmCall target result_regs args _
+       -> genCCall target result_regs args
 
     CmmBranch id	  -> genBranch id
     CmmCondBranch arg id  -> genCondJump id arg
@@ -187,7 +188,7 @@ assignMem_I64Code addrTree valueTree = do
   return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 
 
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
    let 
          r_dst_lo = mkVReg u_dst I32
@@ -229,7 +230,7 @@ iselExpr64 (CmmLoad addrTree I64) = do
                         rlo
      )
 
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
    = return (ChildCode64 nilOL (mkVReg vu I32))
          
 -- we handle addition, but rather badly
@@ -264,6 +265,17 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
    -- in
    return (ChildCode64 code rlo)
 
+iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
+     fn <- getAnyReg expr
+     r_dst_lo <-  getNewRegNat I32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         code = fn r_dst_lo
+     return (
+             ChildCode64 (code `snocOL` 
+                          MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
+                          r_dst_lo
+            )
+
 iselExpr64 expr
    = pprPanic "iselExpr64(i386)" (ppr expr)
 
@@ -357,7 +369,7 @@ assignMem_I64Code addrTree valueTree = do
 	-- in
 	return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
 
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
    ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
    let 
          r_dst_lo = mkVReg u_dst I32
@@ -387,7 +399,7 @@ iselExpr64 (CmmLoad addrTree I64) = do
     return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
                          rlo
 
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64)))
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
    = return (ChildCode64 nilOL (mkVReg vu I32))
 
 iselExpr64 (CmmLit (CmmInt i _)) = do
@@ -464,7 +476,7 @@ getSomeReg expr = do
 
 getRegisterReg :: CmmReg -> Reg
 
-getRegisterReg (CmmLocal (LocalReg u pk))
+getRegisterReg (CmmLocal (LocalReg u pk _))
   = mkVReg u pk
 
 getRegisterReg (CmmGlobal mid)
@@ -486,10 +498,14 @@ getRegisterReg (CmmGlobal mid)
 
 getRegister :: CmmExpr -> NatM Register
 
+#if !x86_64_TARGET_ARCH
+    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+    -- register, it can only be used for rip-relative addressing.
 getRegister (CmmReg (CmmGlobal PicBaseReg))
   = do
       reg <- getPicBaseNat wordRep
       return (Fixed wordRep reg nilOL)
+#endif
 
 getRegister (CmmReg reg) 
   = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
@@ -497,6 +513,31 @@ getRegister (CmmReg reg)
 getRegister tree@(CmmRegOff _ _) 
   = getRegister (mangleIndexTree tree)
 
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_U_Conv I64 I32)
+             [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_S_Conv I64 I32)
+             [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 rlo code
+
+getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 rlo code       
+
+#endif
+
 -- end of machine-"independent" bit; here we go on the rest...
 
 #if alpha_TARGET_ARCH
@@ -736,7 +777,7 @@ getRegister leaf
 
 getRegister (CmmLit (CmmFloat f F32)) = do
     lbl <- getNewLabelNat
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
     Amode addr addr_code <- getAmode dynRef
     let code dst =
 	    LDATA ReadOnlyData
@@ -759,7 +800,7 @@ getRegister (CmmLit (CmmFloat d F64))
 
   | otherwise = do
     lbl <- getNewLabelNat
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
     Amode addr addr_code <- getAmode dynRef
     let code dst =
 	    LDATA ReadOnlyData
@@ -844,6 +885,13 @@ getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
 #endif
 
 #if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Any I64 (\dst -> unitOL $
+        LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
+#endif
+
+#if x86_64_TARGET_ARCH
 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
   x_code <- getAnyReg x
   lbl <- getNewLabelNat
@@ -898,21 +946,19 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
       MO_Not rep   -> trivialUCode rep (NOT  rep) x
 
       -- Nop conversions
-      -- TODO: these are only nops if the arg is not a fixed register that
-      -- can't be byte-addressed.
-      MO_U_Conv I32 I8  -> conversionNop I32 x
-      MO_S_Conv I32 I8  -> conversionNop I32 x
-      MO_U_Conv I16 I8  -> conversionNop I16 x
-      MO_S_Conv I16 I8  -> conversionNop I16 x
-      MO_U_Conv I32 I16 -> conversionNop I32 x
-      MO_S_Conv I32 I16 -> conversionNop I32 x
+      MO_U_Conv I32 I8  -> toI8Reg  I32 x
+      MO_S_Conv I32 I8  -> toI8Reg  I32 x
+      MO_U_Conv I16 I8  -> toI8Reg  I16 x
+      MO_S_Conv I16 I8  -> toI8Reg  I16 x
+      MO_U_Conv I32 I16 -> toI16Reg I32 x
+      MO_S_Conv I32 I16 -> toI16Reg I32 x
 #if x86_64_TARGET_ARCH
       MO_U_Conv I64 I32 -> conversionNop I64 x
       MO_S_Conv I64 I32 -> conversionNop I64 x
-      MO_U_Conv I64 I16 -> conversionNop I64 x
-      MO_S_Conv I64 I16 -> conversionNop I64 x
-      MO_U_Conv I64 I8  -> conversionNop I64 x
-      MO_S_Conv I64 I8  -> conversionNop I64 x
+      MO_U_Conv I64 I16 -> toI16Reg I64 x
+      MO_S_Conv I64 I16 -> toI16Reg I64 x
+      MO_U_Conv I64 I8  -> toI8Reg  I64 x
+      MO_S_Conv I64 I8  -> toI8Reg  I64 x
 #endif
 
       MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
@@ -964,6 +1010,18 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
 		  instr from (OpReg reg) (OpReg dst)
 	    return (Any to code)
 
+	toI8Reg new_rep expr
+            = do codefn <- getAnyReg expr
+		 return (Any new_rep codefn)
+		-- HACK: use getAnyReg to get a byte-addressable register.
+		-- If the source was a Fixed register, this will add the
+		-- mov instruction to put it into the desired destination.
+		-- We're assuming that the destination won't be a fixed
+		-- non-byte-addressable register; it won't be, because all
+		-- fixed registers are word-sized.
+
+	toI16Reg = toI8Reg -- for now
+
         conversionNop new_rep expr
             = do e_code <- getRegister expr
                  return (swizzleRegisterRep e_code new_rep)
@@ -1101,16 +1159,30 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
 	  -- in
 	  return (Any rep code)
         
-    {- Case2: shift length is complex (non-immediate) -}
+    {- Case2: shift length is complex (non-immediate)
+      * y must go in %ecx.
+      * we cannot do y first *and* put its result in %ecx, because
+        %ecx might be clobbered by x.
+      * if we do y second, then x cannot be 
+        in a clobbered reg.  Also, we cannot clobber x's reg
+        with the instruction itself.
+      * so we can either:
+        - do y first, put its result in a fresh tmp, then copy it to %ecx later
+        - do y second and put its result into %ecx.  x gets placed in a fresh
+          tmp.  This is likely to be better, becuase the reg alloc can
+          eliminate this reg->reg move here (it won't eliminate the other one,
+          because the move is into the fixed %ecx).
+    -}
     shift_code rep instr x y{-amount-} = do
-        (x_reg, x_code) <- getNonClobberedReg x
+        x_code <- getAnyReg x
+	tmp <- getNewRegNat rep
         y_code <- getAnyReg y
 	let 
-	   code = x_code `appOL`
+	   code = x_code tmp `appOL`
 		  y_code ecx `snocOL`
-		  instr (OpReg ecx) (OpReg x_reg)
+		  instr (OpReg ecx) (OpReg tmp)
         -- in
-        return (Fixed rep x_reg code)
+        return (Fixed rep tmp code)
 
     --------------------
     add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
@@ -1648,7 +1720,7 @@ getRegister (CmmLit (CmmInt i rep))
 
 getRegister (CmmLit (CmmFloat f frep)) = do
     lbl <- getNewLabelNat
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
     Amode addr addr_code <- getAmode dynRef
     let code dst = 
 	    LDATA ReadOnlyData  [CmmDataLabel lbl,
@@ -1747,6 +1819,14 @@ getAmode other
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
+#if x86_64_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Amode (ripRel (litToImm displacement)) nilOL
+
+#endif
+
 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 
 -- This is all just ridiculous, since it carefully undoes 
@@ -1774,15 +1854,18 @@ getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
 					[y, CmmLit (CmmInt shift _)]])
   | shift == 0 || shift == 1 || shift == 2 || shift == 3
-  = do (x_reg, x_code) <- getNonClobberedReg x
-	-- x must be in a temp, because it has to stay live over y_code
-	-- we could compre x_reg and y_reg and do something better here...
-       (y_reg, y_code) <- getSomeReg y
-       let
-    	   code = x_code `appOL` y_code
-           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
-               code)
+  = x86_complex_amode x y shift 0
+
+getAmode (CmmMachOp (MO_Add rep) 
+                [x, CmmMachOp (MO_Add _)
+                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
+                         CmmLit (CmmInt offset _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  && not (is64BitInteger offset)
+  = x86_complex_amode x y shift offset
+
+getAmode (CmmMachOp (MO_Add rep) [x,y])
+  = x86_complex_amode x y 0 0
 
 getAmode (CmmLit lit) | not (is64BitLit lit)
   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
@@ -1791,6 +1874,19 @@ getAmode expr = do
   (reg,code) <- getSomeReg expr
   return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
 
+
+x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
+x86_complex_amode base index shift offset
+  = do (x_reg, x_code) <- getNonClobberedReg base
+	-- x must be in a temp, because it has to stay live over y_code
+	-- we could compre x_reg and y_reg and do something better here...
+       (y_reg, y_code) <- getSomeReg index
+       let
+    	   code = x_code `appOL` y_code
+           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
+               code)
+
 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -1983,7 +2079,12 @@ is64BitLit x = False
 #endif
 
 is64BitInteger :: Integer -> Bool
-is64BitInteger i = i > 0x7fffffff || i < -0x80000000
+is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
+  where i64 = fromIntegral i :: Int64
+  -- a CmmInt is intended to be truncated to the appropriate 
+  -- number of bits, so here we truncate it to Int64.  This is
+  -- important because e.g. -1 as a CmmInt might be either
+  -- -1 or 18446744073709551615.
 
 -- -----------------------------------------------------------------------------
 --  The 'CondCode' type:  Condition codes passed up the tree.
@@ -2006,7 +2107,7 @@ getCondCode = panic "MachCode.getCondCode: not on Alphas"
 -- yes, they really do seem to want exactly the same!
 
 getCondCode (CmmMachOp mop [x, y])
-  = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
+  = 
     case mop of
       MO_Eq F32 -> condFltCode EQQ x y
       MO_Ne F32 -> condFltCode NE  x y
@@ -2037,7 +2138,7 @@ getCondCode (CmmMachOp mop [x, y])
       MO_U_Lt rep -> condIntCode LU   x y
       MO_U_Le rep -> condIntCode LEU  x y
 
-      other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
 
 getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
 
@@ -2305,6 +2406,25 @@ assignIntCode pk dst src
 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 
 -- integer assignment to memory
+
+-- specific case of adding/subtracting an integer to a particular address.
+-- ToDo: catch other cases where we can use an operation directly on a memory 
+-- address.
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+                                                 CmmLit (CmmInt i _)])
+   | addr == addr2, pk /= I64 || not (is64BitInteger i),
+     Just instr <- check op
+   = do Amode amode code_addr <- getAmode addr
+        let code = code_addr `snocOL`
+                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
+        return code
+   where
+        check (MO_Add _) = Just ADD
+        check (MO_Sub _) = Just SUB
+        check _ = Nothing
+        -- ToDo: more?
+
+-- general case
 assignMem_IntCode pk addr src = do
     Amode addr code_addr <- getAmode addr
     (code_src, op_src)   <- get_op_RI src
@@ -2818,9 +2938,8 @@ genCondJump id bool = do
 
 genCCall
     :: CmmCallTarget		-- function to call
-    -> [(CmmReg,MachHint)]	-- where to put the result
-    -> [(CmmExpr,MachHint)]	-- arguments (of mixed type)
-    -> Maybe [GlobalReg]	-- volatile regs to save
+    -> CmmHintFormals		-- where to put the result
+    -> CmmActuals		-- arguments (of mixed type)
     -> NatM InstrBlock
 
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -2899,8 +3018,12 @@ genCCall fn cconv result_regs args
 
 #if i386_TARGET_ARCH
 
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+	-- write barrier compiles to no code on x86/x86-64; 
+	-- we keep it this long in order to prevent earlier optimisations.
+
 -- we only cope with a single result for foreign calls
-genCCall (CmmPrim op) [(r,_)] args vols = do
+genCCall (CmmPrim op) [(r,_)] args = do
   case op of
 	MO_F32_Sqrt -> actuallyInlineFloatOp F32  (GSQRT F32) args
 	MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
@@ -2914,14 +3037,14 @@ genCCall (CmmPrim op) [(r,_)] args vols = do
 	MO_F32_Tan  -> actuallyInlineFloatOp F32  (GTAN F32) args
 	MO_F64_Tan  -> actuallyInlineFloatOp F64 (GTAN F64) args
 	
-	other_op    -> outOfLineFloatOp op r args vols
+	other_op    -> outOfLineFloatOp op r args
  where
   actuallyInlineFloatOp rep instr [(x,_)]
 	= do res <- trivialUFCode rep instr x
 	     any <- anyReg res
- 	     return (any (getRegisterReg r))
+ 	     return (any (getRegisterReg (CmmLocal r)))
 
-genCCall target dest_regs args vols = do
+genCCall target dest_regs args = do
     let
         sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
 #if !darwin_TARGET_OS        
@@ -2984,8 +3107,8 @@ genCCall target dest_regs args vols = do
 		rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
 	  where 
 		r_dest_hi = getHiVRegFromLo r_dest
-		rep = cmmRegRep dest
-		r_dest = getRegisterReg dest
+		rep = localRegRep dest
+		r_dest = getRegisterReg (CmmLocal dest)
 	assign_code many = panic "genCCall.assign_code many"
 
     return (push_code `appOL` 
@@ -3049,26 +3172,26 @@ genCCall target dest_regs args vols = do
 
 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 
-outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
-  -> Maybe [GlobalReg] -> NatM InstrBlock
-outOfLineFloatOp mop res args vols
+outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
+  -> NatM InstrBlock
+outOfLineFloatOp mop res args
   = do
-      targetExpr <- cmmMakeDynamicReference addImportNat True lbl
+      targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
       let target = CmmForeignCall targetExpr CCallConv
         
-      if cmmRegRep res == F64
+      if localRegRep res == F64
         then
-          stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)  
+          stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe)
         else do
           uq <- getUniqueNat
           let 
-            tmp = CmmLocal (LocalReg uq F64)
+            tmp = LocalReg uq F64 KindNonPtr
           -- in
-          code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols)
-          code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
+          code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe)
+          code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
           return (code1 `appOL` code2)
   where
-	lbl = mkForeignLabel fn Nothing True
+	lbl = mkForeignLabel fn Nothing False
 
 	fn = case mop of
 	      MO_F32_Sqrt  -> FSLIT("sqrtf")
@@ -3109,10 +3232,14 @@ outOfLineFloatOp mop res args vols
 
 #if x86_64_TARGET_ARCH
 
-genCCall (CmmPrim op) [(r,_)] args vols = 
-  outOfLineFloatOp op r args vols
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+	-- write barrier compiles to no code on x86/x86-64; 
+	-- we keep it this long in order to prevent earlier optimisations.
+
+genCCall (CmmPrim op) [(r,_)] args = 
+  outOfLineFloatOp op r args
 
-genCCall target dest_regs args vols = do
+genCCall target dest_regs args = do
 
 	-- load up the register arguments
     (stack_args, aregs, fregs, load_args_code)
@@ -3121,7 +3248,7 @@ genCCall target dest_regs args vols = do
     let
 	fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
 	int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
-	arg_regs = int_regs_used ++ fp_regs_used
+	arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
 		-- for annotating the call instruction with
 
 	sse_regs = length fp_regs_used
@@ -3195,8 +3322,8 @@ genCCall target dest_regs args vols = do
 		F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
 		rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
 	  where 
-		rep = cmmRegRep dest
-		r_dest = getRegisterReg dest
+		rep = localRegRep dest
+		r_dest = getRegisterReg (CmmLocal dest)
 	assign_code many = panic "genCCall.assign_code many"
 
     return (load_args_code 	`appOL` 
@@ -3244,10 +3371,10 @@ genCCall target dest_regs args vols = do
 	 (arg_reg, arg_code) <- getSomeReg arg
          delta <- getDeltaNat
          setDeltaNat (delta-arg_size)
-	 let code' = code `appOL` toOL [
-			MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
+	 let code' = code `appOL` arg_code `appOL` toOL [
 			SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
-	 		DELTA (delta-arg_size)]
+	 		DELTA (delta-arg_size),
+			MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
 	 push_args rest code'
 
        | otherwise = do
@@ -3298,7 +3425,7 @@ genCCall target dest_regs args vols = do
    stack only immediately prior to the call proper.  Sigh.
 -}
 
-genCCall target dest_regs argsAndHints vols = do
+genCCall target dest_regs argsAndHints = do
     let
         args = map fst argsAndHints
     argcode_and_vregs <- mapM arg_to_int_vregs args
@@ -3405,7 +3532,7 @@ genCCall target dest_regs argsAndHints vols = do
                          )
 outOfLineFloatOp mop =
     do
-      mopExpr <- cmmMakeDynamicReference addImportNat True $
+      mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
 		  mkForeignLabel functionName Nothing True
       let mopLabelOrExpr = case mopExpr of
 			CmmLit (CmmLabel lbl) -> Left lbl
@@ -3490,7 +3617,11 @@ outOfLineFloatOp mop =
     frame just before ccalling.
 -}
 
-genCCall target dest_regs argsAndHints vols
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ 
+ = return $ unitOL LWSYNC
+
+genCCall target dest_regs argsAndHints
   = ASSERT (not $ any (`elem` [I8,I16]) argReps)
         -- we rely on argument promotion in the codeGen
     do
@@ -3651,12 +3782,12 @@ genCCall target dest_regs argsAndHints vols
                     | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
                                           MR r_dest r4]
                     | otherwise -> unitOL (MR r_dest r3)
-                    where rep = cmmRegRep dest
-                          r_dest = getRegisterReg dest
+                    where rep = cmmRegRep (CmmLocal dest)
+                          r_dest = getRegisterReg (CmmLocal dest)
                           
         outOfLineFloatOp mop =
             do
-                mopExpr <- cmmMakeDynamicReference addImportNat True $
+                mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
                               mkForeignLabel functionName Nothing True
                 let mopLabelOrExpr = case mopExpr of
                         CmmLit (CmmLabel lbl) -> Left lbl
@@ -3716,7 +3847,7 @@ genSwitch expr ids
   = do
         (reg,e_code) <- getSomeReg expr
         lbl <- getNewLabelNat
-        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
         (tableReg,t_code) <- getSomeReg $ dynRef
         let
             jumpTable = map jumpTableEntryRel ids
@@ -3730,11 +3861,25 @@ genSwitch expr ids
             op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
                                        (EAIndex reg wORD_SIZE) (ImmInt 0))
 
+#if x86_64_TARGET_ARCH && darwin_TARGET_OS
+    -- on Mac OS X/x86_64, put the jump table in the text section
+    -- to work around a limitation of the linker.
+    -- ld64 is unable to handle the relocations for
+    --     .quad L1 - L0
+    -- if L0 is not preceded by a non-anonymous label in its section.
+    
+            code = e_code `appOL` t_code `appOL` toOL [
+                            ADD wordRep op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
+                            LDATA Text (CmmDataLabel lbl : jumpTable)
+                    ]
+#else
             code = e_code `appOL` t_code `appOL` toOL [
                             LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
                             ADD wordRep op (OpReg tableReg),
                             JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
                     ]
+#endif
         return code
   | otherwise
   = do
@@ -3756,7 +3901,7 @@ genSwitch expr ids
         (reg,e_code) <- getSomeReg expr
         tmp <- getNewRegNat I32
         lbl <- getNewLabelNat
-        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
         (tableReg,t_code) <- getSomeReg $ dynRef
         let
             jumpTable = map jumpTableEntryRel ids
@@ -4442,6 +4587,8 @@ remainderCode rep div x y = do
 -- -----------------------------------------------------------------------------
 --  Coercing to/from integer/floating-point...
 
+-- When going to integer, we truncate (round towards 0).
+
 -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
 -- conversions.  We have to store temporaries in memory to move
 -- between the integer and the floating point register sets.
@@ -4527,7 +4674,7 @@ coerceFP2Int from to x = do
 coerceFP2Int from to x = do
   (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
   let
-        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
         code dst = x_code `snocOL` opc x_op dst
   -- in
   return (Any to code) -- works even if the destination rep is <I32
@@ -4595,7 +4742,7 @@ coerceInt2FP fromRep toRep x = do
     lbl <- getNewLabelNat
     itmp <- getNewRegNat I32
     ftmp <- getNewRegNat F64
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
     Amode addr addr_code <- getAmode dynRef
     let
     	code' dst = code `appOL` maybe_exts `appOL` toOL [