floating-point fix for x86_64

[ghc-hetmet.git] / ghc / compiler / nativeGen / MachCodeGen.hs
diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs

index 9285518..32dad13 100644 (file)
--- a/ghc/compiler/nativeGen/MachCodeGen.hs
+++ b/ghc/compiler/nativeGen/MachCodeGen.hs
@@ -15,11 +15,13 @@ module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
+#include "MachDeps.h"
  
  -- NCG stuff:
  import MachInstrs
  import MachRegs
  import NCGMonad
+import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
  
  -- Our intermediate code:
  import PprCmm          ( pprExpr )
@@ -28,14 +30,14 @@ import MachOp
  import CLabel
  
  -- The rest:
-import CmdLineOpts     ( opt_Static )
+import StaticFlags     ( opt_PIC )
  import ForeignCall     ( CCallConv(..) )
  import OrdList
  import Pretty
  import Outputable
-import qualified Outputable
  import FastString
  import FastTypes       ( isFastTrue )
+import Constants       ( wORD_SIZE )
  
  #ifdef DEBUG
  import Outputable      ( assertPanic )
@@ -60,7 +62,13 @@ type InstrBlock = OrdList Instr
  cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop]
  cmmTopCodeGen (CmmProc info lab params blocks) = do
    (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
-  return (CmmProc info lab params (concat nat_blocks) : concat statics)
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (concat nat_blocks)
+      tops = proc : concat statics
+  case picBaseMb of
+      Just picBase -> initializePicBase picBase tops
+      Nothing -> return tops
+  
  cmmTopCodeGen (CmmData sec dat) = do
    return [CmmData sec dat]  -- no translation, we just use CmmStatic
  
@@ -95,15 +103,17 @@ stmtToInstrs stmt = case stmt of
  
      CmmAssign reg src
        | isFloatingRep kind -> assignReg_FltCode kind reg src
-      | wordRep == I32 && kind == I64
-                          -> assignReg_I64Code      reg src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64       -> assignReg_I64Code      reg src
+#endif
        | otherwise         -> assignReg_IntCode kind reg src
         where kind = cmmRegRep reg
  
      CmmStore addr src
        | isFloatingRep kind -> assignMem_FltCode kind addr src
-      | wordRep == I32 && kind == I64
-                        -> assignMem_I64Code      addr src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64     -> assignMem_I64Code      addr src
+#endif
        | otherwise       -> assignMem_IntCode kind addr src
         where kind = cmmExprRep src
  
@@ -150,9 +160,14 @@ data ChildCode64   -- a.k.a "Register64"
                         -- selection game are therefore that the returned
                         -- Reg may be modified
  
+#if WORD_SIZE_IN_BITS==32
  assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
  assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
  iselExpr64        :: CmmExpr -> NatM ChildCode64
+#endif
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
@@ -257,88 +272,61 @@ iselExpr64 expr
  
  #if sparc_TARGET_ARCH
  
-assignMem_I64Code addrTree valueTree
-   = iselExpr64 valueTree              `thenNat` \ (ChildCode64 vcode vrlo) ->
-     getRegister addrTree              `thenNat` \ register_addr ->
-     getNewRegNat IntRep               `thenNat` \ t_addr ->
-     let rlo = VirtualRegI vrlo
+assignMem_I64Code addrTree valueTree = do
+     Amode addr addr_code <- getAmode addrTree
+     ChildCode64 vcode rlo <- iselExpr64 valueTree  
+     (src, code) <- getSomeReg addrTree
+     let 
           rhi = getHiVRegFromLo rlo
-         code_addr = registerCode register_addr t_addr
-         reg_addr  = registerName register_addr t_addr
           -- Big-endian store
-         mov_hi = ST W rhi (AddrRegImm reg_addr (ImmInt 0))
-         mov_lo = ST W rlo (AddrRegImm reg_addr (ImmInt 4))
-     in
-         return (vcode `appOL` code_addr `snocOL` mov_hi `snocOL` mov_lo)
+         mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
+         mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
+     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
  
-
-assignReg_I64Code (StixTemp (StixVReg u_dst pk)) valueTree
-   = iselExpr64 valueTree              `thenNat` \ (ChildCode64 vcode vr_src_lo) ->
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
       let 
-         r_dst_lo = mkVReg u_dst IntRep
-         r_src_lo = VirtualRegI vr_src_lo
+         r_dst_lo = mkVReg u_dst pk
           r_dst_hi = getHiVRegFromLo r_dst_lo
           r_src_hi = getHiVRegFromLo r_src_lo
           mov_lo = mkMOV r_src_lo r_dst_lo
           mov_hi = mkMOV r_src_hi r_dst_hi
           mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-     in
-         return (
-            vcode `snocOL` mov_hi `snocOL` mov_lo
-         )
+     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
  assignReg_I64Code lvalue valueTree
-   = pprPanic "assignReg_I64Code(sparc): invalid lvalue"
-              (pprStixReg lvalue)
+   = panic "assignReg_I64Code(sparc): invalid lvalue"
  
  
  -- Don't delete this -- it's very handy for debugging.
  --iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
+--   | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
  --   = panic "iselExpr64(???)"
  
-iselExpr64 (CmmLoad I64 addrTree)
-   = getRegister addrTree              `thenNat` \ register_addr ->
-     getNewRegNat IntRep               `thenNat` \ t_addr ->
-     getNewRegNat IntRep               `thenNat` \ rlo ->
+iselExpr64 (CmmLoad addrTree I64) = do
+     Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
+     rlo <- getNewRegNat I32
       let rhi = getHiVRegFromLo rlo
-         code_addr = registerCode register_addr t_addr
-         reg_addr  = registerName register_addr t_addr
-         mov_hi = LD W (AddrRegImm reg_addr (ImmInt 0)) rhi
-         mov_lo = LD W (AddrRegImm reg_addr (ImmInt 4)) rlo
-     in
-         return (
-            ChildCode64 (code_addr `snocOL` mov_hi `snocOL` mov_lo) 
-                        (getVRegUnique rlo)
-         )
+         mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
+         mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
+     return (
+            ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) 
+                         rlo
+          )
  
-iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64)))
-   = getNewRegNat IntRep               `thenNat` \ r_dst_lo ->
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
+     r_dst_lo <-  getNewRegNat I32
       let r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_lo = mkVReg vu IntRep
+         r_src_lo = mkVReg uq I32
           r_src_hi = getHiVRegFromLo r_src_lo
           mov_lo = mkMOV r_src_lo r_dst_lo
           mov_hi = mkMOV r_src_hi r_dst_hi
           mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-     in
-         return (
-            ChildCode64 (toOL [mov_hi, mov_lo]) (getVRegUnique r_dst_lo)
+     return (
+            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
           )
  
-iselExpr64 (StCall fn cconv I64 args)
-  = genCCall fn cconv kind args                        `thenNat` \ call ->
-    getNewRegNat IntRep                                `thenNat` \ r_dst_lo ->
-    let r_dst_hi = getHiVRegFromLo r_dst_lo
-        mov_lo = mkMOV o0 r_dst_lo
-        mov_hi = mkMOV o1 r_dst_hi
-        mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-    in
-    return (
-       ChildCode64 (call `snocOL` mov_hi `snocOL` mov_lo) 
-                   (getVRegUnique r_dst_lo)
-    )
-
  iselExpr64 expr
-   = pprPanic "iselExpr64(sparc)" (pprCmmExpr expr)
+   = pprPanic "iselExpr64(sparc)" (ppr expr)
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -456,6 +444,21 @@ swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
  
  
  -- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed _ reg code -> 
+       return (reg, code)
+
+-- -----------------------------------------------------------------------------
  -- Grab the Reg for a CmmReg
  
  getRegisterReg :: CmmReg -> Reg
@@ -482,6 +485,11 @@ getRegisterReg (CmmGlobal mid)
  
  getRegister :: CmmExpr -> NatM Register
  
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat wordRep
+      return (Fixed wordRep reg nilOL)
+
  getRegister (CmmReg reg) 
    = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
  
@@ -727,12 +735,14 @@ getRegister leaf
  
  getRegister (CmmLit (CmmFloat f F32)) = do
      lbl <- getNewLabelNat
-    let code dst = toOL [
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
             LDATA ReadOnlyData
                         [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat f F32)],
-           GLD F32 (ImmAddr (ImmCLbl lbl) 0) dst
-           ]
+                        CmmStaticLit (CmmFloat f F32)]
+           `consOL` (addr_code `snocOL`
+           GLD F32 addr dst)
      -- in
      return (Any F32 code)
  
@@ -748,15 +758,41 @@ getRegister (CmmLit (CmmFloat d F64))
  
    | otherwise = do
      lbl <- getNewLabelNat
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat d F64)]
+           `consOL` (addr_code `snocOL`
+           GLD F64 addr dst)
+    -- in
+    return (Any F64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+   let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
+       -- I don't know why there are xorpd, xorps, and pxor instructions.
+       -- They all appear to do the same thing --SDM
+   return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+    lbl <- getNewLabelNat
      let code dst = toOL [
             LDATA ReadOnlyData
                         [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat d F64)],
-           GLD F64 (ImmAddr (ImmCLbl lbl) 0) dst
+                        CmmStaticLit (CmmFloat f rep)],
+           MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
             ]
      -- in
-    return (Any F64 code)
+    return (Any rep code)
  
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- catch simple cases of zero- or sign-extended load
  getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
@@ -775,11 +811,87 @@ getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
    code <- intLoadCode (MOVSxL I16) addr
    return (Any I32 code)
  
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I32) addr
+  return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+    code dst = x_code dst `appOL` toOL [
+       -- This is how gcc does it, so it can't be that bad:
+       LDATA ReadOnlyData16 [
+               CmmAlign 16,
+               CmmDataLabel lbl,
+               CmmStaticLit (CmmInt 0x80000000 I32),
+               CmmStaticLit (CmmInt 0 I32),
+               CmmStaticLit (CmmInt 0 I32),
+               CmmStaticLit (CmmInt 0 I32)
+       ],
+       XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+               -- xorps, so we need the 128-bit constant
+               -- ToDo: rip-relative
+       ]
+  --
+  return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+       -- This is how gcc does it, so it can't be that bad:
+    code dst = x_code dst `appOL` toOL [
+       LDATA ReadOnlyData16 [
+               CmmAlign 16,
+               CmmDataLabel lbl,
+               CmmStaticLit (CmmInt 0x8000000000000000 I64),
+               CmmStaticLit (CmmInt 0 I64)
+       ],
+               -- gcc puts an unpck here.  Wonder if we need it.
+       XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+               -- xorpd, so we need the 128-bit constant
+       ]
+  --
+  return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  getRegister (CmmMachOp mop [x]) -- unary MachOps
    = case mop of
+#if i386_TARGET_ARCH
        MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
        MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
  
        MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
        MO_Not rep   -> trivialUCode rep (NOT  rep) x
@@ -793,6 +905,15 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_S_Conv I16 I8  -> conversionNop I16 x
        MO_U_Conv I32 I16 -> conversionNop I32 x
        MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I64 I32 -> conversionNop I64 x
+      MO_S_Conv I64 I32 -> conversionNop I64 x
+      MO_U_Conv I64 I16 -> conversionNop I64 x
+      MO_S_Conv I64 I16 -> conversionNop I64 x
+      MO_U_Conv I64 I8  -> conversionNop I64 x
+      MO_S_Conv I64 I8  -> conversionNop I64 x
+#endif
+
        MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
        MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
  
@@ -805,12 +926,32 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
        MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
  
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
+      MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+      MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+      MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
+      MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+      MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+       -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+       -- However, we don't want the register allocator to throw it
+       -- away as an unnecessary reg-to-reg move, so we keep it in
+       -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
        MO_S_Conv F32 F64 -> conversionNop F64 x
        MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+      MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+      MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
        MO_S_Conv from to
         | isFloatingRep from -> coerceFP2Int from to x
         | isFloatingRep to   -> coerceInt2FP from to x
  
+      other -> pprPanic "getRegister" (pprMachOp mop)
     where
         -- signed or unsigned extension.
         integerExtend from to instr expr = do
@@ -857,14 +998,27 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
        MO_U_Lt rep -> condIntReg LU  x y
        MO_U_Le rep -> condIntReg LEU x y
  
-      MO_Add F32 -> trivialFCode F32  GADD x y
-      MO_Sub F32 -> trivialFCode F32  GSUB x y
+#if i386_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 GADD x y
+      MO_Sub F32 -> trivialFCode F32 GSUB x y
  
        MO_Add F64 -> trivialFCode F64 GADD x y
        MO_Sub F64 -> trivialFCode F64 GSUB x y
  
-      MO_S_Quot F32 -> trivialFCode  F32  GDIV x y
+      MO_S_Quot F32 -> trivialFCode F32 GDIV x y
        MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 ADD x y
+      MO_Sub F32 -> trivialFCode F32 SUB x y
+
+      MO_Add F64 -> trivialFCode F64 ADD x y
+      MO_Sub F64 -> trivialFCode F64 SUB x y
+
+      MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+      MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
  
        MO_Add rep -> add_code rep x y
        MO_Sub rep -> sub_code rep x y
@@ -874,8 +1028,16 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
        MO_U_Quot rep -> div_code rep False True  x y
        MO_U_Rem  rep -> div_code rep False False x y
  
+#if i386_TARGET_ARCH
        MO_Mul   F32 -> trivialFCode F32 GMUL x y
        MO_Mul   F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Mul   F32 -> trivialFCode F32 MUL x y
+      MO_Mul   F64 -> trivialFCode F64 MUL x y
+#endif
+
        MO_Mul   rep -> let op = IMUL rep in 
                       trivialCode rep op (Just op) x y
  
@@ -900,24 +1062,26 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
    where
      --------------------
      imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo I32 a b = do
-         res_lo <- getNewRegNat I32
-         res_hi <- getNewRegNat I32
+    imulMayOflo rep a b = do
           (a_reg, a_code) <- getNonClobberedReg a
-         (b_reg, b_code) <- getSomeReg   b
+         b_code <- getAnyReg b
           let 
-             code dst = a_code `appOL` b_code `appOL`
+            shift_amt  = case rep of
+                          I32 -> 31
+                          I64 -> 63
+                          _ -> panic "shift_amt"
+
+             code = a_code `appOL` b_code eax `appOL`
                          toOL [
-                           MOV I32 (OpReg a_reg) (OpReg res_hi),
-                           MOV I32 (OpReg b_reg) (OpReg res_lo),
-                           IMUL64 res_hi res_lo,               -- result in res_hi:res_lo
-                           SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
-                           SUB I32 (OpReg res_hi) (OpReg res_lo),      -- compare against upper
-                           MOV I32 (OpReg res_lo) (OpReg dst)
-                           -- dst==0 if high part == sign extended low part
+                          IMUL2 rep (OpReg a_reg),   -- result in %edx:%eax
+                           SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
+                               -- sign extend lower part
+                           SUB rep (OpReg edx) (OpReg eax)
+                               -- compare against upper
+                           -- eax==0 if high part == sign extended low part
                          ]
           -- in
-        return (Any I32 code)
+        return (Fixed rep eax code)
  
      --------------------
      shift_code :: MachRep
@@ -949,12 +1113,14 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
  
      --------------------
      add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
-    add_code rep x (CmmLit (CmmInt y _)) = add_int rep x y
+    add_code rep x (CmmLit (CmmInt y _))
+       | not (is64BitInteger y) = add_int rep x y
      add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
  
      --------------------
      sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
-    sub_code rep x (CmmLit (CmmInt y _)) = add_int rep x (-y)
+    sub_code rep x (CmmLit (CmmInt y _))
+       | not (is64BitInteger (-y)) = add_int rep x (-y)
      sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
  
      -- our three-operand add instruction:
@@ -965,17 +1131,17 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
             code dst
                 = x_code `snocOL`
                  LEA rep
-                       (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm))
+                       (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
                          (OpReg dst)
         -- 
         return (Any rep code)
  
      ----------------------
      div_code rep signed quotient x y = do
-          (y_op, y_code) <- getOperand y -- cannot be clobbered
+          (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
            x_code <- getAnyReg x
            let
-            widen | signed    = CLTD
+            widen | signed    = CLTD rep
                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
  
              instr | signed    = IDIV
@@ -992,17 +1158,18 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
             return (Fixed rep result code)
  
  
-
  getRegister (CmmLoad mem pk)
    | isFloatingRep pk
    = do
      Amode src mem_code <- getAmode mem
      let
         code dst = mem_code `snocOL` 
-                  GLD pk src dst
+                  IF_ARCH_i386(GLD pk src dst,
+                               MOV pk (OpAddr src) (OpReg dst))
      --
      return (Any pk code)
  
+#if i386_TARGET_ARCH
  getRegister (CmmLoad mem pk)
    | pk /= I64
    = do 
@@ -1017,14 +1184,47 @@ getRegister (CmmLoad mem pk)
         -- we can't guarantee access to an 8-bit variant of every register
         -- (esi and edi don't have 8-bit variants), so to make things
         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+  = do 
+    code <- intLoadCode (MOV pk) mem
+    return (Any pk code)
+#endif
  
  getRegister (CmmLit (CmmInt 0 rep))
    = let
+       -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+       adj_rep = case rep of I64 -> I32; _ -> rep
+       rep1 = IF_ARCH_i386( rep, adj_rep ) 
         code dst 
-           = unitOL (XOR rep (OpReg dst) (OpReg dst))
+           = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
      in
         return (Any rep code)
  
+#if x86_64_TARGET_ARCH
+  -- optimisation for loading small literals on x86_64: take advantage
+  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+  -- instruction forms are shorter.
+getRegister (CmmLit lit) 
+  | I64 <- cmmLitRep lit, not (isBigLit lit)
+  = let 
+       imm = litToImm lit
+       code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
+    in
+       return (Any I64 code)
+  where
+   isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
+   isBigLit _ = False
+       -- note1: not the same as is64BitLit, because that checks for
+       -- signed literals that fit in 32 bits, but we want unsigned
+       -- literals here.
+       -- note2: all labels are small, because we're assuming the
+       -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
  getRegister (CmmLit lit)
    = let 
         rep = cmmLitRep lit
@@ -1033,7 +1233,7 @@ getRegister (CmmLit lit)
      in
         return (Any rep code)
  
-getRegister other = panic "getRegister(x86)"
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
  
  
  intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
@@ -1053,22 +1253,13 @@ anyReg :: Register -> NatM (Reg -> InstrBlock)
  anyReg (Any _ code)          = return code
  anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
  
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed _ reg code -> 
-       return (reg, code)
-
  -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
  -- Fixed registers might not be byte-addressable, so we make sure we've
  -- got a temporary, inserting an extra reg copy if necessary.
  getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
  getByteReg expr = do
    r <- getRegister expr
    case r of
@@ -1082,6 +1273,7 @@ getByteReg expr = do
             return (tmp, code `snocOL` reg2reg rep reg tmp)
         -- ToDo: could optimise slightly by checking for byte-addressable
         -- real registers, but that will happen very rarely if at all.
+#endif
  
  -- Another variant: this time we want the result in a register that cannot
  -- be modified by code to evaluate an arbitrary expression.
@@ -1102,262 +1294,202 @@ getNonClobberedReg expr = do
  
  reg2reg :: MachRep -> Reg -> Reg -> Instr
  reg2reg rep src dst 
+#if i386_TARGET_ARCH
    | isFloatingRep rep = GMOV src dst
+#endif
    | otherwise        = MOV rep (OpReg src) (OpReg dst)
  
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
  #if sparc_TARGET_ARCH
  
-getRegister (StFloat d)
-  = getBlockIdNat                  `thenNat` \ lbl ->
-    getNewRegNat PtrRep            `thenNat` \ tmp ->
+getRegister (CmmLit (CmmFloat f F32)) = do
+    lbl <- getNewLabelNat
      let code dst = toOL [
-           SEGMENT DataSegment,
-           NEWBLOCK lbl,
-           DATA F [ImmFloat d],
-           SEGMENT TextSegment,
-           SETHI (HI (ImmCLbl lbl)) tmp,
-           LD F (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
-    in
-       return (Any F32 code)
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat f F32)],
+           SETHI (HI (ImmCLbl lbl)) dst,
+           LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+    return (Any F32 code)
  
-getRegister (StDouble d)
-  = getBlockIdNat                  `thenNat` \ lbl ->
-    getNewRegNat PtrRep            `thenNat` \ tmp ->
+getRegister (CmmLit (CmmFloat d F64)) = do
+    lbl <- getNewLabelNat
      let code dst = toOL [
-           SEGMENT DataSegment,
-           NEWBLOCK lbl,
-           DATA DF [ImmDouble d],
-           SEGMENT TextSegment,
-           SETHI (HI (ImmCLbl lbl)) tmp,
-           LD DF (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
-    in
-       return (Any F64 code)
-
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat d F64)],
+           SETHI (HI (ImmCLbl lbl)) dst,
+           LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] 
+    return (Any F64 code)
  
-getRegister (CmmMachOp mop [x]) -- unary PrimOps
+getRegister (CmmMachOp mop [x]) -- unary MachOps
    = case mop of
-      MO_NatS_Neg      -> trivialUCode (SUB False False g0) x
-      MO_Nat_Not       -> trivialUCode (XNOR False g0) x
-      MO_32U_to_8U     -> trivialCode (AND False) x (StInt 255)
+      MO_S_Neg F32     -> trivialUFCode F32 (FNEG F32) x
+      MO_S_Neg F64     -> trivialUFCode F64 (FNEG F64) x
  
-      MO_F32_Neg       -> trivialUFCode F32 (FNEG F) x
-      MO_F64_Neg       -> trivialUFCode F64 (FNEG DF) x
+      MO_S_Neg rep     -> trivialUCode rep (SUB False False g0) x
+      MO_Not rep       -> trivialUCode rep (XNOR False g0) x
  
-      MO_F64_to_Flt    -> coerceDbl2Flt x
-      MO_F32_to_Dbl    -> coerceFlt2Dbl x
+      MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
  
-      MO_F32_to_NatS   -> coerceFP2Int F32 x
-      MO_NatS_to_Flt   -> coerceInt2FP F32 x
-      MO_F64_to_NatS   -> coerceFP2Int F64 x
-      MO_NatS_to_Dbl   -> coerceInt2FP F64 x
+      MO_U_Conv F64 F32-> coerceDbl2Flt x
+      MO_U_Conv F32 F64-> coerceFlt2Dbl x
  
-      -- Conversions which are a nop on sparc
-      MO_32U_to_NatS   -> conversionNop IntRep   x
-      MO_32S_to_NatS  -> conversionNop IntRep   x
-      MO_NatS_to_32U   -> conversionNop WordRep  x
-      MO_32U_to_NatU   -> conversionNop WordRep  x
-
-      MO_NatU_to_NatS -> conversionNop IntRep    x
-      MO_NatS_to_NatU -> conversionNop WordRep   x
-      MO_NatP_to_NatU -> conversionNop WordRep   x
-      MO_NatU_to_NatP -> conversionNop PtrRep    x
-      MO_NatS_to_NatP -> conversionNop PtrRep    x
-      MO_NatP_to_NatS -> conversionNop IntRep    x
-
-      -- sign-extending widenings
-      MO_8U_to_32U    -> integerExtend False 24 x
-      MO_8U_to_NatU   -> integerExtend False 24 x
-      MO_8S_to_NatS   -> integerExtend True  24 x
-      MO_16U_to_NatU  -> integerExtend False 16 x
-      MO_16S_to_NatS  -> integerExtend True  16 x
-
-      other_op ->
-        let fixed_x = if   is_float_op  -- promote to double
-                      then CmmMachOp MO_F32_to_Dbl [x]
-                      else x
-       in
-       getRegister (StCall (Left fn) CCallConv F64 [fixed_x])
-    where
-        integerExtend signed nBits x
-           = getRegister (
-                CmmMachOp (if signed then MO_Nat_Sar else MO_Nat_Shr) 
-                         [CmmMachOp MO_Nat_Shl [x, StInt nBits], StInt nBits]
-             )
-        conversionNop new_rep expr
-            = getRegister expr         `thenNat` \ e_code ->
-              return (swizzleRegisterRep e_code new_rep)
+      MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
+      MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
+      MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
+      MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
  
-       (is_float_op, fn)
-         = case mop of
-             MO_F32_Exp    -> (True,  FSLIT("exp"))
-             MO_F32_Log    -> (True,  FSLIT("log"))
-             MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
+      -- Conversions which are a nop on sparc
+      MO_U_Conv from to
+       | from == to   -> conversionNop to   x
+      MO_U_Conv I32 to -> conversionNop to   x
+      MO_S_Conv I32 to -> conversionNop to   x
  
-             MO_F32_Sin    -> (True,  FSLIT("sin"))
-             MO_F32_Cos    -> (True,  FSLIT("cos"))
-             MO_F32_Tan    -> (True,  FSLIT("tan"))
+      -- widenings
+      MO_U_Conv I8 I32  -> integerExtend False I8 I32  x
+      MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
+      MO_U_Conv I8 I16  -> integerExtend False I8 I16  x
+      MO_S_Conv I16 I32 -> integerExtend True I16 I32  x
  
-             MO_F32_Asin   -> (True,  FSLIT("asin"))
-             MO_F32_Acos   -> (True,  FSLIT("acos"))
-             MO_F32_Atan   -> (True,  FSLIT("atan"))
+      other_op -> panic "Unknown unary mach op"
+    where
+        -- XXX SLL/SRL?
+        integerExtend signed from to expr = do
+           (reg, e_code) <- getSomeReg expr
+          let
+              code dst =
+                  e_code `snocOL` 
+                  ((if signed then SRA else SRL)
+                         reg (RIImm (ImmInt 0)) dst)
+          return (Any to code)
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
  
-             MO_F32_Sinh   -> (True,  FSLIT("sinh"))
-             MO_F32_Cosh   -> (True,  FSLIT("cosh"))
-             MO_F32_Tanh   -> (True,  FSLIT("tanh"))
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq F32 -> condFltReg EQQ x y
+      MO_Ne F32 -> condFltReg NE x y
  
-             MO_F64_Exp    -> (False, FSLIT("exp"))
-             MO_F64_Log    -> (False, FSLIT("log"))
-             MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
+      MO_S_Gt F32 -> condFltReg GTT x y
+      MO_S_Ge F32 -> condFltReg GE x y 
+      MO_S_Lt F32 -> condFltReg LTT x y
+      MO_S_Le F32 -> condFltReg LE x y
  
-             MO_F64_Sin    -> (False, FSLIT("sin"))
-             MO_F64_Cos    -> (False, FSLIT("cos"))
-             MO_F64_Tan    -> (False, FSLIT("tan"))
+      MO_Eq F64 -> condFltReg EQQ x y
+      MO_Ne F64 -> condFltReg NE x y
  
-             MO_F64_Asin   -> (False, FSLIT("asin"))
-             MO_F64_Acos   -> (False, FSLIT("acos"))
-             MO_F64_Atan   -> (False, FSLIT("atan"))
+      MO_S_Gt F64 -> condFltReg GTT x y
+      MO_S_Ge F64 -> condFltReg GE x y
+      MO_S_Lt F64 -> condFltReg LTT x y
+      MO_S_Le F64 -> condFltReg LE x y
  
-             MO_F64_Sinh   -> (False, FSLIT("sinh"))
-             MO_F64_Cosh   -> (False, FSLIT("cosh"))
-             MO_F64_Tanh   -> (False, FSLIT("tanh"))
+      MO_Eq rep -> condIntReg EQQ x y
+      MO_Ne rep -> condIntReg NE x y
  
-              other -> pprPanic "getRegister(sparc) - binary CmmMachOp (2)" 
-                                (pprMachOp mop)
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+             
+      MO_U_Gt I32  -> condIntReg GTT x y
+      MO_U_Ge I32  -> condIntReg GE x y
+      MO_U_Lt I32  -> condIntReg LTT x y
+      MO_U_Le I32  -> condIntReg LE x y
  
+      MO_U_Gt I16 -> condIntReg GU  x y
+      MO_U_Ge I16 -> condIntReg GEU x y
+      MO_U_Lt I16 -> condIntReg LU  x y
+      MO_U_Le I16 -> condIntReg LEU x y
  
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
-  = case mop of
-      MO_32U_Gt  -> condIntReg GTT x y
-      MO_32U_Ge  -> condIntReg GE x y
-      MO_32U_Eq  -> condIntReg EQQ x y
-      MO_32U_Ne  -> condIntReg NE x y
-      MO_32U_Lt  -> condIntReg LTT x y
-      MO_32U_Le  -> condIntReg LE x y
-
-      MO_Nat_Eq   -> condIntReg EQQ x y
-      MO_Nat_Ne   -> condIntReg NE x y
-
-      MO_NatS_Gt  -> condIntReg GTT x y
-      MO_NatS_Ge  -> condIntReg GE x y
-      MO_NatS_Lt  -> condIntReg LTT x y
-      MO_NatS_Le  -> condIntReg LE x y
-
-      MO_NatU_Gt  -> condIntReg GU  x y
-      MO_NatU_Ge  -> condIntReg GEU x y
-      MO_NatU_Lt  -> condIntReg LU  x y
-      MO_NatU_Le  -> condIntReg LEU x y
-
-      MO_F32_Gt -> condFltReg GTT x y
-      MO_F32_Ge -> condFltReg GE x y
-      MO_F32_Eq -> condFltReg EQQ x y
-      MO_F32_Ne -> condFltReg NE x y
-      MO_F32_Lt -> condFltReg LTT x y
-      MO_F32_Le -> condFltReg LE x y
-
-      MO_F64_Gt -> condFltReg GTT x y
-      MO_F64_Ge -> condFltReg GE x y
-      MO_F64_Eq -> condFltReg EQQ x y
-      MO_F64_Ne -> condFltReg NE x y
-      MO_F64_Lt -> condFltReg LTT x y
-      MO_F64_Le -> condFltReg LE x y
-
-      MO_Nat_Add -> trivialCode (ADD False False) x y
-      MO_Nat_Sub -> trivialCode (SUB False False) x y
-
-      MO_NatS_Mul  -> trivialCode (SMUL False) x y
-      MO_NatU_Mul  -> trivialCode (UMUL False) x y
-      MO_NatS_MulMayOflo -> imulMayOflo x y
+      MO_Add I32 -> trivialCode I32 (ADD False False) x y
+      MO_Sub I32 -> trivialCode I32 (SUB False False) x y
  
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+{-
        -- ToDo: teach about V8+ SPARC div instructions
-      MO_NatS_Quot -> idiv FSLIT(".div")  x y
-      MO_NatS_Rem  -> idiv FSLIT(".rem")  x y
-      MO_NatU_Quot -> idiv FSLIT(".udiv")  x y
-      MO_NatU_Rem  -> idiv FSLIT(".urem")  x y
+      MO_S_Quot I32 -> idiv FSLIT(".div")  x y
+      MO_S_Rem I32  -> idiv FSLIT(".rem")  x y
+      MO_U_Quot I32 -> idiv FSLIT(".udiv")  x y
+      MO_U_Rem I32  -> idiv FSLIT(".urem")  x y
+-}
+      MO_Add F32  -> trivialFCode F32 FADD  x y
+      MO_Sub F32   -> trivialFCode F32  FSUB x y
+      MO_Mul F32   -> trivialFCode F32  FMUL  x y
+      MO_S_Quot F32   -> trivialFCode F32  FDIV x y
  
-      MO_F32_Add   -> trivialFCode F32  FADD x y
-      MO_F32_Sub   -> trivialFCode F32  FSUB x y
-      MO_F32_Mul   -> trivialFCode F32  FMUL x y
-      MO_F32_Div   -> trivialFCode F32  FDIV x y
+      MO_Add F64   -> trivialFCode F64 FADD  x y
+      MO_Sub F64   -> trivialFCode F64  FSUB x y
+      MO_Mul F64   -> trivialFCode F64  FMUL x y
+      MO_S_Quot F64   -> trivialFCode F64  FDIV x y
  
-      MO_F64_Add   -> trivialFCode F64 FADD x y
-      MO_F64_Sub   -> trivialFCode F64 FSUB x y
-      MO_F64_Mul   -> trivialFCode F64 FMUL x y
-      MO_F64_Div   -> trivialFCode F64 FDIV x y
+      MO_And rep   -> trivialCode rep (AND False) x y
+      MO_Or rep    -> trivialCode rep (OR  False) x y
+      MO_Xor rep   -> trivialCode rep (XOR False) x y
  
-      MO_Nat_And   -> trivialCode (AND False) x y
-      MO_Nat_Or    -> trivialCode (OR  False) x y
-      MO_Nat_Xor   -> trivialCode (XOR False) x y
+      MO_Mul rep -> trivialCode rep (SMUL False) x y
  
-      MO_Nat_Shl   -> trivialCode SLL x y
-      MO_Nat_Shr   -> trivialCode SRL x y
-      MO_Nat_Sar   -> trivialCode SRA x y
+      MO_Shl rep   -> trivialCode rep SLL  x y
+      MO_U_Shr rep   -> trivialCode rep SRL x y
+      MO_S_Shr rep   -> trivialCode rep SRA x y
  
+{-
        MO_F32_Pwr  -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 
                                           [promote x, promote y])
                        where promote x = CmmMachOp MO_F32_to_Dbl [x]
        MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64 
                                          [x, y])
-
+-}
        other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
    where
-    idiv fn x y = getRegister (StCall (Left fn) CCallConv IntRep [x, y])
+    --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
  
      --------------------
-    imulMayOflo :: CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo a1 a2
-       = getNewRegNat IntRep           `thenNat` \ t1 ->
-         getNewRegNat IntRep           `thenNat` \ t2 ->
-         getNewRegNat IntRep           `thenNat` \ res_lo ->
-         getNewRegNat IntRep           `thenNat` \ res_hi ->
-         getRegister a1                        `thenNat` \ reg1 ->
-         getRegister a2                `thenNat` \ reg2 ->
-         let code1 = registerCode reg1 t1
-             code2 = registerCode reg2 t2
-             src1  = registerName reg1 t1
-             src2  = registerName reg2 t2
-             code dst = code1 `appOL` code2 `appOL`
-                        toOL [
-                           SMUL False src1 (RIReg src2) res_lo,
+    imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getSomeReg a
+        (b_reg, b_code) <- getSomeReg b
+        res_lo <- getNewRegNat I32
+        res_hi <- getNewRegNat I32
+        let
+           shift_amt  = case rep of
+                         I32 -> 31
+                         I64 -> 63
+                         _ -> panic "shift_amt"
+           code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
                             RDY res_hi,
-                           SRA res_lo (RIImm (ImmInt 31)) res_lo,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
                             SUB False False res_lo (RIReg res_hi) dst
                          ]
-         in
-            return (Any IntRep code)
+         return (Any I32 code)
  
-getRegister (CmmLoad pk mem) = do
+getRegister (CmmLoad mem pk) = do
      Amode src code <- getAmode mem
      let
-       size = primRepToSize pk
-       code__2 dst = code `snocOL` LD size src dst
-    --
+       code__2 dst = code `snocOL` LD pk src dst
      return (Any pk code__2)
  
-getRegister (StInt i)
+getRegister (CmmLit (CmmInt i _))
    | fits13Bits i
    = let
         src = ImmInt (fromInteger i)
         code dst = unitOL (OR False g0 (RIImm src) dst)
      in
-       return (Any IntRep code)
+       return (Any I32 code)
  
-getRegister leaf
-  | isJust imm
-  = let
+getRegister (CmmLit lit)
+  = let rep = cmmLitRep lit
+       imm = litToImm lit
         code dst = toOL [
-           SETHI (HI imm__2) dst,
-           OR False dst (RIImm (LO imm__2)) dst]
-    in
-       return (Any PtrRep code)
-  | otherwise
-  = ncgPrimopMoan "getRegister(sparc)" (pprCmmExpr leaf)
-  where
-    imm = maybeImm leaf
-    imm__2 = case imm of Just x -> x
+           SETHI (HI imm) dst,
+           OR False dst (RIImm (LO imm)) dst]
+    in return (Any I32 code)
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -1461,6 +1593,23 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
        MO_Mul F64   -> trivialCodeNoImm F64 (FMUL F64) x y
        MO_S_Quot F64   -> trivialCodeNoImm F64 (FDIV F64) x y
  
+         -- optimize addition with 32-bit immediate
+         -- (needed for PIC)
+      MO_Add I32 ->
+        case y of
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
+            -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
+          CmmLit lit
+            -> do
+                (src, srcCode) <- getSomeReg x
+                let imm = litToImm lit
+                    code dst = srcCode `appOL` toOL [
+                                    ADDIS dst src (HA imm),
+                                    ADD dst dst (RIImm (LO imm))
+                                ]
+                return (Any I32 code)
+          _ -> trivialCode I32 True ADD x y
+
        MO_Add rep -> trivialCode rep True ADD x y
        MO_Sub rep ->
          case y of    -- subfi ('substract from' with immediate) doesn't exist
@@ -1496,53 +1645,25 @@ getRegister (CmmLit (CmmInt i rep))
      in
         return (Any rep code)
  
-getRegister (CmmLit (CmmFloat f F32)) = do
-    lbl <- getNewLabelNat
-    tmp <- getNewRegNat I32
-    let code dst = toOL [
-           LDATA ReadOnlyData  [CmmDataLabel lbl,
-                                CmmStaticLit (CmmFloat f F32)],
-           LIS tmp (HA (ImmCLbl lbl)),
-           LD F32 dst (AddrRegImm tmp (LO (ImmCLbl lbl)))
-           ]
-    -- in
-    return (Any F32 code)
-
-getRegister (CmmLit (CmmFloat d F64)) = do
+getRegister (CmmLit (CmmFloat f frep)) = do
      lbl <- getNewLabelNat
-    tmp <- getNewRegNat I32
-    let code dst = toOL [
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst = 
             LDATA ReadOnlyData  [CmmDataLabel lbl,
-                                CmmStaticLit (CmmFloat d F64)],
-           LIS tmp (HA (ImmCLbl lbl)),
-           LD F64 dst (AddrRegImm tmp (LO (ImmCLbl lbl)))
-           ]
-    -- in
-    return (Any F32 code)
-
-#if darwin_TARGET_OS
-getRegister (CmmLit (CmmLabel lbl))
-    | labelCouldBeDynamic lbl
-    = do
-        addImportNat False lbl
-       let imm = ImmDyldNonLazyPtr lbl
-           code dst = toOL [
-                    LIS dst (HA imm),
-                    LD  I32 dst (AddrRegImm dst (LO imm))
-                ]
-        return (Any I32 code)
-#endif
+                                CmmStaticLit (CmmFloat f frep)]
+            `consOL` (addr_code `snocOL` LD frep dst addr)
+    return (Any frep code)
  
  getRegister (CmmLit lit)
-  = let 
-       rep = cmmLitRep lit
-       imm = litToImm lit
-       code dst = toOL [
-                LIS dst (HI imm),
-                OR dst dst (RIImm (LO imm))
-            ]
-    in
-       return (Any rep code)
+  = let rep = cmmLitRep lit
+        imm = litToImm lit
+        code dst = toOL [
+              LIS dst (HI imm),
+              OR dst dst (RIImm (LO imm))
+          ]
+    in return (Any rep code)
+
  getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
      
      -- extend?Rep: wrap integer expression of type rep
@@ -1552,19 +1673,6 @@ extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
  extendUExpr I32 x = x
  extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
  
--- ###FIXME: exact code duplication from x86 case
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed _ reg code -> 
-       return (reg, code)
-
  #endif /* powerpc_TARGET_ARCH */
  
  
@@ -1638,21 +1746,23 @@ getAmode other
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- This is all just ridiculous, since it carefully undoes 
  -- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
    -- ASSERT(rep == I32)???
    = do (x_reg, x_code) <- getSomeReg x
         let off = ImmInt (-(fromInteger i))
-       return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
    
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
    -- ASSERT(rep == I32)???
    = do (x_reg, x_code) <- getSomeReg x
         let off = ImmInt (fromInteger i)
-       return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
  
  -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
  -- recognised by the next rule.
@@ -1670,79 +1780,63 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
         let
            code = x_code `appOL` y_code
             base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
                 code)
  
-getAmode (CmmLit lit)
+getAmode (CmmLit lit) | not (is64BitLit lit)
    = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
  
  getAmode expr = do
    (reg,code) <- getSomeReg expr
-  return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
  
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
  #if sparc_TARGET_ARCH
  
-getAmode (CmmMachOp MO_Nat_Sub [x, StInt i])
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
    | fits13Bits (-i)
-  = getNewRegNat PtrRep                `thenNat` \ tmp ->
-    getRegister x              `thenNat` \ register ->
-    let
-       code = registerCode register tmp
-       reg  = registerName register tmp
-       off  = ImmInt (-(fromInteger i))
-    in
-    return (Amode (AddrRegImm reg off) code)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+         off  = ImmInt (-(fromInteger i))
+       return (Amode (AddrRegImm reg off) code)
  
  
-getAmode (CmmMachOp MO_Nat_Add [x, StInt i])
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
    | fits13Bits i
-  = getNewRegNat PtrRep                `thenNat` \ tmp ->
-    getRegister x              `thenNat` \ register ->
-    let
-       code = registerCode register tmp
-       reg  = registerName register tmp
-       off  = ImmInt (fromInteger i)
-    in
-    return (Amode (AddrRegImm reg off) code)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+        off  = ImmInt (fromInteger i)
+       return (Amode (AddrRegImm reg off) code)
  
-getAmode (CmmMachOp MO_Nat_Add [x, y])
-  = getNewRegNat PtrRep        `thenNat` \ tmp1 ->
-    getNewRegNat IntRep        `thenNat` \ tmp2 ->
-    getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+  = do
+    (regX, codeX) <- getSomeReg x
+    (regY, codeY) <- getSomeReg y
      let
-       code1 = registerCode register1 tmp1
-       reg1  = registerName register1 tmp1
-       code2 = registerCode register2 tmp2
-       reg2  = registerName register2 tmp2
-       code__2 = code1 `appOL` code2
-    in
-    return (Amode (AddrRegReg reg1 reg2) code__2)
+       code = codeX `appOL` codeY
+    return (Amode (AddrRegReg regX regY) code)
  
-getAmode leaf
-  | isJust imm
-  = getNewRegNat PtrRep            `thenNat` \ tmp ->
-    let
+-- XXX Is this same as "leaf" in Stix?
+getAmode (CmmLit lit)
+  = do
+      tmp <- getNewRegNat I32
+      let
         code = unitOL (SETHI (HI imm__2) tmp)
-    in
-    return (Amode (AddrRegImm tmp (LO imm__2)) code)
-  where
-    imm    = maybeImm leaf
-    imm__2 = case imm of Just x -> x
+      return (Amode (AddrRegImm tmp (LO imm__2)) code)
+      where
+         imm__2 = litToImm lit
  
  getAmode other
-  = getNewRegNat PtrRep                `thenNat` \ tmp ->
-    getRegister other          `thenNat` \ register ->
-    let
-       code = registerCode register tmp
-       reg  = registerName register tmp
-       off  = ImmInt 0
-    in
-    return (Amode (AddrRegImm reg off) code)
+  = do
+       (reg, code) <- getSomeReg other
+       let
+           off  = ImmInt 0
+       return (Amode (AddrRegImm reg off) code)
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -1760,14 +1854,22 @@ getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
          (reg, code) <- getSomeReg x
          return (Amode (AddrRegImm reg off) code)
  
+   -- optimize addition with 32-bit immediate
+   -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
+  = do
+        tmp <- getNewRegNat I32
+        (src, srcCode) <- getSomeReg x
+        let imm = litToImm lit
+            code = srcCode `snocOL` ADDIS tmp src (HA imm)
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+
  getAmode (CmmLit lit)
    = do
          tmp <- getNewRegNat I32
-        let
+        let imm = litToImm lit
              code = unitOL (LIS tmp (HA imm))
          return (Amode (AddrRegImm tmp (LO imm)) code)
-    where
-        imm = litToImm lit
      
  getAmode (CmmMachOp (MO_Add I32) [x, y])
    = do
@@ -1786,29 +1888,38 @@ getAmode other
  -- -----------------------------------------------------------------------------
  -- getOperand: sometimes any operand will do.
  
--- getOperand gets a *safe* operand; that is, the value of the operand
--- will remain valid across the computation of an arbitrary expression,
--- unless the expression is computed directly into a register which
--- the operand refers to (see trivialCode where this function is used
--- for an example).
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
  
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-getOperand (CmmLoad mem pk) 
-  | not (isFloatingRep pk) && pk /= I64 = do
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+                                          CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk) 
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
      Amode src mem_code <- getAmode mem
      (src',save_code) <- 
         if (amodeCouldBeClobbered src) 
                 then do
                    tmp <- getNewRegNat wordRep
-                  return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0),
+                  return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
                            unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
                 else
                    return (src, nilOL)
      return (OpAddr src', save_code `appOL` mem_code)
-
-getOperand e = do
+getNonClobberedOperand e = do
      (reg, code) <- getNonClobberedReg e
      return (OpReg reg, code)
  
@@ -1818,7 +1929,60 @@ amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
  regClobbered (RealReg rr) = isFastTrue (freeReg rr)
  regClobbered _ = False
  
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+                                          CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
+    return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getOperand e = do
+    (reg, code) <- getSomeReg e
+    return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit)  = not (is64BitLit lit)
+                         || isSuitableFloatingPointLit lit
+isOperand _             = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory.  However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getRegOrMem e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = is64BitInteger i
+   -- assume that labels are in the range 0-2^31-1: this assumes the
+   -- small memory model (see gcc docs, -mcmodel=small).
  #endif
+is64BitLit x = False
+#endif
+
+is64BitInteger :: Integer -> Bool
+is64BitInteger i = i > 0x7fffffff || i < -0x80000000
  
  -- -----------------------------------------------------------------------------
  --  The 'CondCode' type:  Condition codes passed up the tree.
@@ -1837,7 +2001,7 @@ getCondCode = panic "MachCode.getCondCode: not on Alphas"
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH || sparc_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
  -- yes, they really do seem to want exactly the same!
  
  getCondCode (CmmMachOp mop [x, y])
@@ -1931,10 +2095,10 @@ condFltCode = panic "MachCode.condFltCode: not on Alphas"
  #endif /* alpha_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
      Amode x_addr x_code <- getAmode x
      let
         imm  = litToImm lit
@@ -1952,50 +2116,29 @@ condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
      --
      return (CondCode False cond code)
  
--- anything vs immediate
-condIntCode cond x (CmmLit lit) = do
-    (x_reg, x_code) <- getSomeReg x
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+    (x_reg, x_code) <- getNonClobberedReg x
+    (y_op,  y_code) <- getOperand y    
      let
-       imm  = litToImm lit
-       code = x_code `snocOL`
-                  CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
-    -- in
-    return (CondCode False cond code)
-
--- memory vs anything
-condIntCode cond (CmmLoad x pk) y = do
-    (y_reg, y_code) <- getNonClobberedReg y
-    Amode x_addr x_code <- getAmode x
-    let
-       code = y_code `appOL`
-               x_code `snocOL`
-                 CMP pk (OpReg y_reg) (OpAddr x_addr)
-    -- in
-    return (CondCode False cond code)
-
--- anything vs memory
-condIntCode cond y (CmmLoad x pk) = do
-    (y_reg, y_code) <- getNonClobberedReg y
-    Amode x_addr x_code <- getAmode x
-    let
-       code = y_code `appOL`
-               x_code `snocOL`
-                 CMP pk (OpAddr x_addr) (OpReg y_reg)
+       code = x_code `appOL` y_code `snocOL`
+                  CMP (cmmExprRep x) y_op (OpReg x_reg)
      -- in
      return (CondCode False cond code)
  
  -- anything vs anything
  condIntCode cond x y = do
-  (x_op, x_code) <- getOperand x
-  (y_reg, y_code) <- getSomeReg y
+  (y_reg, y_code) <- getNonClobberedReg y
+  (x_op, x_code) <- getRegOrMem x
    let
-       code = x_code `appOL`
-              y_code `snocOL`
+       code = y_code `appOL`
+              x_code `snocOL`
                   CMP (cmmExprRep x) (OpReg y_reg) x_op
    -- in
    return (CondCode False cond code)
+#endif
  
------------
+#if i386_TARGET_ARCH
  condFltCode cond x y 
    = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
    (x_reg, x_code) <- getNonClobberedReg x
@@ -2006,77 +2149,73 @@ condFltCode cond x y
    -- The GCMP insn does the test and sets the zero flag if comparable
    -- and true.  Hence we always supply EQQ as the condition to test.
    return (CondCode True EQQ code)
-
  #endif /* i386_TARGET_ARCH */
  
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg.  We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_op, y_code) <- getOperand y
+  let
+       code = x_code `appOL`
+              y_code `snocOL`
+                 CMP (cmmExprRep x) y_op (OpReg x_reg)
+       -- NB(1): we need to use the unsigned comparison operators on the
+       -- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
+#endif
+
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
  #if sparc_TARGET_ARCH
  
-condIntCode cond x (StInt y)
+condIntCode cond x (CmmLit (CmmInt y rep))
    | fits13Bits y
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ tmp ->
-    let
-       code = registerCode register tmp
-       src1 = registerName register tmp
-       src2 = ImmInt (fromInteger y)
-       code__2 = code `snocOL` SUB False True src1 (RIImm src2) g0
-    in
-    return (CondCode False cond code__2)
+  = do
+       (src1, code) <- getSomeReg x
+       let
+           src2 = ImmInt (fromInteger y)
+           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+       return (CondCode False cond code')
  
-condIntCode cond x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat IntRep                `thenNat` \ tmp1 ->
-    getNewRegNat IntRep                `thenNat` \ tmp2 ->
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
      let
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
         code__2 = code1 `appOL` code2 `snocOL`
                   SUB False True src1 (RIReg src2) g0
-    in
      return (CondCode False cond code__2)
  
  -----------
-condFltCode cond x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat (registerRep register1)
-                               `thenNat` \ tmp1 ->
-    getNewRegNat (registerRep register2)
-                               `thenNat` \ tmp2 ->
-    getNewRegNat F64   `thenNat` \ tmp ->
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat F64
      let
-       promote x = FxTOy F DF x tmp
-
-       pk1   = registerRep register1
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
+       promote x = FxTOy F32 F64 x tmp
  
-       pk2   = registerRep register2
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
+       pk1   = cmmExprRep x
+       pk2   = cmmExprRep y
  
         code__2 =
                 if pk1 == pk2 then
                     code1 `appOL` code2 `snocOL`
-                   FCMP True (primRepToSize pk1) src1 src2
+                   FCMP True pk1 src1 src2
                 else if pk1 == F32 then
                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
-                   FCMP True DF tmp src2
+                   FCMP True F64 tmp src2
                 else
                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
-                   FCMP True DF src1 tmp
-    in
+                   FCMP True F64 src1 tmp
      return (CondCode True cond code__2)
  
  #endif /* sparc_TARGET_ARCH */
  
  #if powerpc_TARGET_ARCH
--- ###FIXME: I16 and I8!
+--  ###FIXME: I16 and I8!
  condIntCode cond x (CmmLit (CmmInt y rep))
    | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
    = do
@@ -2162,7 +2301,7 @@ assignIntCode pk dst src
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- integer assignment to memory
  assignMem_IntCode pk addr src = do
@@ -2180,7 +2319,7 @@ assignMem_IntCode pk addr src = do
      return code
    where
      get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)  -- code, operator
-    get_op_RI (CmmLit lit)
+    get_op_RI (CmmLit lit) | not (is64BitLit lit)
        = return (nilOL, OpImm (litToImm lit))
      get_op_RI op
        = do (reg,code) <- getNonClobberedReg op
@@ -2203,33 +2342,19 @@ assignReg_IntCode pk reg src = do
  
  #if sparc_TARGET_ARCH
  
-assignMem_IntCode pk addr src
-  = getNewRegNat IntRep                    `thenNat` \ tmp ->
-    getAmode addr                          `thenNat` \ amode ->
-    getRegister src                        `thenNat` \ register ->
-    let
-       code1   = amodeCode amode
-       dst__2  = amodeAddr amode
-       code2   = registerCode register tmp
-       src__2  = registerName register tmp
-       sz      = primRepToSize pk
-       code__2 = code1 `appOL` code2 `snocOL` ST sz src__2 dst__2
-    in
-    return code__2
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+assignReg_IntCode pk reg src = do
+    r <- getRegister src
+    return $ case r of
+       Any _ code         -> code dst
+       Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
+    where
+      dst = getRegisterReg reg
  
-assignReg_IntCode pk reg src
-  = getRegister src                        `thenNat` \ register2 ->
-    getRegisterReg reg                     `thenNat` \ register1 ->
-    getNewRegNat IntRep                    `thenNat` \ tmp ->
-    let
-       dst__2  = registerName register1 tmp
-       code    = registerCode register2 dst__2
-       src__2  = registerName register2 dst__2
-       code__2 = if isFixed register2
-                 then code `snocOL` OR False g0 (RIReg src__2) dst__2
-                 else code
-    in
-    return code__2
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -2289,7 +2414,7 @@ assignFltCode pk dst src
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- Floating point assignment to memory
  assignMem_FltCode pk addr src = do
@@ -2298,7 +2423,8 @@ assignMem_FltCode pk addr src = do
    let
         code = src_code `appOL`
                addr_code `snocOL`
-                GST pk src_reg addr
+                IF_ARCH_i386(GST pk src_reg addr,
+                            MOV pk (OpReg src_reg) (OpAddr addr))
    return code
  
  -- Floating point assignment to a register/temporary
@@ -2313,53 +2439,28 @@ assignReg_FltCode pk reg src = do
  #if sparc_TARGET_ARCH
  
  -- Floating point assignment to memory
-assignMem_FltCode pk addr src
-  = getNewRegNat pk                `thenNat` \ tmp1 ->
-    getAmode addr                  `thenNat` \ amode ->
-    getRegister src                `thenNat` \ register ->
+assignMem_FltCode pk addr src = do
+    Amode dst__2 code1 <- getAmode addr
+    (src__2, code2) <- getSomeReg src
+    tmp1 <- getNewRegNat pk
      let
-       sz      = primRepToSize pk
-       dst__2  = amodeAddr amode
-
-       code1   = amodeCode amode
-       code2   = registerCode register tmp1
-
-       src__2  = registerName register tmp1
-       pk__2   = registerRep register
-       sz__2   = primRepToSize pk__2
-
+       pk__2   = cmmExprRep src
         code__2 = code1 `appOL` code2 `appOL`
             if   pk == pk__2 
-            then unitOL (ST sz src__2 dst__2)
-           else toOL [FxTOy sz__2 sz src__2 tmp1, ST sz tmp1 dst__2]
-    in
+            then unitOL (ST pk src__2 dst__2)
+           else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
      return code__2
  
  -- Floating point assignment to a register/temporary
--- Why is this so bizarrely ugly?
-assignReg_FltCode pk reg src
-  = getRegisterReg reg                     `thenNat` \ register1 ->
-    getRegister src                        `thenNat` \ register2 ->
-    let 
-        pk__2   = registerRep register2 
-        sz__2   = primRepToSize pk__2
-    in
-    getNewRegNat pk__2                      `thenNat` \ tmp ->
-    let
-       sz      = primRepToSize pk
-       dst__2  = registerName register1 g0    -- must be Fixed
-       reg__2  = if pk /= pk__2 then tmp else dst__2
-       code    = registerCode register2 reg__2
-       src__2  = registerName register2 reg__2
-       code__2 = 
-               if pk /= pk__2 then
-                    code `snocOL` FxTOy sz__2 sz src__2 dst__2
-               else if isFixed register2 then
-                    code `snocOL` FMOV sz src__2 dst__2
-               else
-                    code
-    in
-    return code__2
+-- ToDo: Verify correctness
+assignReg_FltCode pk reg src = do
+    r <- getRegister src
+    v1 <- getNewRegNat pk
+    return $ case r of
+        Any _ code         -> code dst
+       Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
+    where
+      dst = getRegisterReg reg
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -2407,7 +2508,7 @@ genJump tree
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  genJump (CmmLoad mem pk) = do
    Amode target code <- getAmode mem
@@ -2426,19 +2527,15 @@ genJump expr = do
  
  #if sparc_TARGET_ARCH
  
-genJump (CmmLabel lbl)
+genJump (CmmLit (CmmLabel lbl))
    = return (toOL [CALL (Left target) 0 True, NOP])
    where
      target = ImmCLbl lbl
  
  genJump tree
-  = getRegister tree                       `thenNat` \ register ->
-    getNewRegNat PtrRep            `thenNat` \ tmp ->
-    let
-       code   = registerCode register tmp
-       target = registerName register tmp
-    in
-    return (code `snocOL` JMP dsts (AddrRegReg target g0) `snocOL` NOP)
+  = do
+        (target, code) <- getSomeReg tree
+       return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -2462,12 +2559,12 @@ genBranch :: BlockId -> NatM InstrBlock
  genBranch id = return (unitOL (BR id))
  #endif
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  genBranch id = return (unitOL (JXX ALWAYS id))
  #endif
  
  #if sparc_TARGET_ARCH
-genBranch id = return (toOL [BI ALWAYS False id, NOP])
+genBranch (BlockId id) = return (toOL [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP])
  #endif
  
  #if powerpc_TARGET_ARCH
@@ -2658,19 +2755,56 @@ genCondJump id bool = do
    CondCode _ cond code <- getCondCode bool
    return (code `snocOL` JXX cond id)
  
-#endif /* i386_TARGET_ARCH */
+#endif
  
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if sparc_TARGET_ARCH
+#if x86_64_TARGET_ARCH
  
  genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+       return (cond_code `snocOL` JXX cond id)
+    else do
+       lbl <- getBlockIdNat
+
+       -- see comment with condFltReg
+       let code = case cond of
+                       NE  -> or_unordered
+                       GU  -> plain_test
+                       GEU -> plain_test
+                       _   -> and_ordered
+
+           plain_test = unitOL (
+                 JXX cond id
+               )
+           or_unordered = toOL [
+                 JXX cond id,
+                 JXX PARITY id
+               ]
+           and_ordered = toOL [
+                 JXX PARITY lbl,
+                 JXX cond id,
+                 JXX ALWAYS lbl,
+                 NEWBLOCK lbl
+               ]
+       return (cond_code `appOL` code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if sparc_TARGET_ARCH
+
+genCondJump (BlockId id) bool = do
    CondCode is_float cond code <- getCondCode bool
    return (
         code `appOL` 
         toOL (
           if   is_float
-         then [NOP, BF cond False id, NOP]
-         else [BI cond False id, NOP]
+         then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+         else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
         )
      )
  
@@ -2802,11 +2936,21 @@ genCCall (CmmPrim op) [(r,_)] args vols = do
              return (any (getRegisterReg r))
  
  genCCall target dest_regs args vols = do
-    sizes_n_codes <- mapM push_arg (reverse args)
-    delta <- getDeltaNat
-    let 
-       (sizes, push_codes) = unzip sizes_n_codes
+    let
+        sizes               = map (arg_size . cmmExprRep . fst) (reverse args)
+#if !darwin_TARGET_OS        
          tot_arg_size        = sum sizes
+#else
+        raw_arg_size        = sum sizes
+        tot_arg_size        = roundTo 16 raw_arg_size
+        arg_pad_size        = tot_arg_size - raw_arg_size
+    delta0 <- getDeltaNat
+    setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+    push_codes <- mapM push_arg (reverse args)
+    delta <- getDeltaNat
+
      -- in
      -- deal with static vs dynamic call targets
      (callinsns,cconv) <-
@@ -2814,19 +2958,27 @@ genCCall target dest_regs args vols = do
         -- CmmPrim -> ...
          CmmForeignCall (CmmLit (CmmLabel lbl)) conv
             -> -- ToDo: stdcall arg sizes
-             return (unitOL (CALL (Left fn_imm)), conv)
+             return (unitOL (CALL (Left fn_imm) []), conv)
            where fn_imm = ImmCLbl lbl
          CmmForeignCall expr conv
             -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
                   ASSERT(dyn_rep == I32)
-                  return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+                  return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
  
-    let        push_code = concatOL push_codes
+    let        push_code
+#if darwin_TARGET_OS
+            | arg_pad_size /= 0
+            = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+                    DELTA (delta0 - arg_pad_size)]
+              `appOL` concatOL push_codes
+            | otherwise
+#endif
+            = concatOL push_codes
         call = callinsns `appOL`
                 toOL (
                         -- Deallocate parameters after call for ccall;
                         -- but not for stdcall (callee does it)
-                  (if cconv == StdCallConv then [] else 
+                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
                    ++
                    [DELTA (delta + tot_arg_size)]
@@ -2857,10 +3009,15 @@ genCCall target dest_regs args vols = do
    where
      arg_size F64 = 8
      arg_size F32 = 4
+    arg_size I64 = 8
      arg_size _   = 4
  
+    roundTo a x | x `mod` a == 0 = x
+                | otherwise = x + a - (x `mod` a)
+
+
      push_arg :: (CmmExpr,MachHint){-current argument-}
-                    -> NatM (Int, InstrBlock)  -- argsz, code
+                    -> NatM InstrBlock  -- code
  
      push_arg (arg,_hint) -- we don't need the hints on x86
        | arg_rep == I64 = do
@@ -2870,7 +3027,7 @@ genCCall target dest_regs args vols = do
          let 
              r_hi = getHiVRegFromLo r_lo
          -- in
-       return (8,     code `appOL`
+       return (       code `appOL`
                         toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
                               PUSH I32 (OpReg r_lo), DELTA (delta - 8),
                              DELTA (delta-8)]
@@ -2882,16 +3039,14 @@ genCCall target dest_regs args vols = do
          let size = arg_size sz
          setDeltaNat (delta-size)
          if (case sz of F64 -> True; F32 -> True; _ -> False)
-           then return (size,
-                        code `appOL`
+           then return (code `appOL`
                          toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
                                DELTA (delta-size),
-                              GST sz reg (AddrBaseIndex (Just esp) 
-                                                        Nothing 
+                              GST sz reg (AddrBaseIndex (EABaseReg esp) 
+                                                        EAIndexNone
                                                          (ImmInt 0))]
                         )
-           else return (size,
-                        code `snocOL`
+           else return (code `snocOL`
                          PUSH I32 (OpReg reg) `snocOL`
                          DELTA (delta-size)
                         )
@@ -2904,41 +3059,61 @@ genCCall target dest_regs args vols = do
          (reg,code) <- getSomeReg op
         return (code, reg, cmmExprRep op)
  
+#endif /* i386_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
    -> Maybe [GlobalReg] -> NatM InstrBlock
  outOfLineFloatOp mop res args vols
-  | cmmRegRep res == F64
-  = stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)
-
-  | otherwise
-  = do uq <- getUniqueNat
-       let 
-        tmp = CmmLocal (LocalReg uq F64)
-       -- in
-       code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] (map promote args) vols)
-       code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
-       return (code1 `appOL` code2)
+  = do
+      targetExpr <- cmmMakeDynamicReference addImportNat True lbl
+      let target = CmmForeignCall targetExpr CCallConv
+        
+      if cmmRegRep res == F64
+        then
+          stmtToInstrs (CmmCall target [(res,FloatHint)] args vols)  
+        else do
+          uq <- getUniqueNat
+          let 
+            tmp = CmmLocal (LocalReg uq F64)
+          -- in
+          code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)]
+                                         (map promote args) vols)
+          code2 <- stmtToInstrs (CmmAssign res (demote (CmmReg tmp)))
+          return (code1 `appOL` code2)
    where
+#if i386_TARGET_ARCH
          promote (x,hint) = (CmmMachOp (MO_S_Conv F32 F64) [x], hint)
          demote  x = CmmMachOp (MO_S_Conv F64 F32) [x]
+#else
+        promote (x,hint) = (x,hint)
+        demote  x = x
+#endif
  
-       target = CmmForeignCall (CmmLit lbl) CCallConv
-       lbl = CmmLabel (mkForeignLabel fn Nothing False)
+       lbl = mkForeignLabel fn Nothing True
  
         fn = case mop of
-             MO_F32_Exp   -> FSLIT("exp")
-             MO_F32_Log   -> FSLIT("log")
-
-             MO_F32_Asin  -> FSLIT("asin")
-             MO_F32_Acos  -> FSLIT("acos")
-             MO_F32_Atan  -> FSLIT("atan")
-
-             MO_F32_Sinh  -> FSLIT("sinh")
-             MO_F32_Cosh  -> FSLIT("cosh")
-             MO_F32_Tanh  -> FSLIT("tanh")
-             MO_F32_Pwr   -> FSLIT("pow")
-
+             MO_F32_Sqrt  -> FSLIT("sqrtf")
+             MO_F32_Sin   -> FSLIT("sinf")
+             MO_F32_Cos   -> FSLIT("cosf")
+             MO_F32_Tan   -> FSLIT("tanf")
+             MO_F32_Exp   -> FSLIT("expf")
+             MO_F32_Log   -> FSLIT("logf")
+
+             MO_F32_Asin  -> FSLIT("asinf")
+             MO_F32_Acos  -> FSLIT("acosf")
+             MO_F32_Atan  -> FSLIT("atanf")
+
+             MO_F32_Sinh  -> FSLIT("sinhf")
+             MO_F32_Cosh  -> FSLIT("coshf")
+             MO_F32_Tanh  -> FSLIT("tanhf")
+             MO_F32_Pwr   -> FSLIT("powf")
+
+             MO_F64_Sqrt  -> FSLIT("sqrt")
+             MO_F64_Sin   -> FSLIT("sin")
+             MO_F64_Cos   -> FSLIT("cos")
+             MO_F64_Tan   -> FSLIT("tan")
               MO_F64_Exp   -> FSLIT("exp")
               MO_F64_Log   -> FSLIT("log")
  
@@ -2951,9 +3126,167 @@ outOfLineFloatOp mop res args vols
               MO_F64_Tanh  -> FSLIT("tanh")
               MO_F64_Pwr   -> FSLIT("pow")
  
-              other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop)
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  
-#endif /* i386_TARGET_ARCH */
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols = 
+  outOfLineFloatOp op r args vols
+
+genCCall target dest_regs args vols = do
+
+       -- load up the register arguments
+    (stack_args, aregs, fregs, load_args_code)
+        <- load_args args allArgRegs allFPArgRegs nilOL
+
+    let
+       fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
+       int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+       arg_regs = int_regs_used ++ fp_regs_used
+               -- for annotating the call instruction with
+
+       sse_regs = length fp_regs_used
+
+       tot_arg_size = arg_size * length stack_args
+
+       -- On entry to the called function, %rsp should be aligned
+       -- on a 16-byte boundary +8 (i.e. the first stack arg after
+       -- the return address is 16-byte aligned).  In STG land
+       -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+       -- need to make sure we push a multiple of 16-bytes of args,
+       -- plus the return address, to get the correct alignment.
+       -- Urg, this is hard.  We need to feed the delta back into
+       -- the arg pushing code.
+    (real_size, adjust_rsp) <-
+       if tot_arg_size `rem` 16 == 0
+           then return (tot_arg_size, nilOL)
+           else do -- we need to adjust...
+               delta <- getDeltaNat
+               setDeltaNat (delta-8)
+               return (tot_arg_size+8, toOL [
+                               SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+                               DELTA (delta-8)
+                       ])
+
+       -- push the stack args, right to left
+    push_code <- push_args (reverse stack_args) nilOL
+    delta <- getDeltaNat
+
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+       -- CmmPrim -> ...
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+             return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+          where fn_imm = ImmCLbl lbl
+        CmmForeignCall expr conv
+           -> do (dyn_r, dyn_c) <- getSomeReg expr
+                return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+    let
+       -- The x86_64 ABI requires us to set %al to the number of SSE
+       -- registers that contain arguments, if the called routine
+       -- is a varargs function.  We don't know whether it's a
+       -- varargs function or not, so we have to assume it is.
+       --
+       -- It's not safe to omit this assignment, even if the number
+       -- of SSE regs in use is zero.  If %al is larger than 8
+       -- on entry to a varargs function, seg faults ensue.
+       assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+    let call = callinsns `appOL`
+               toOL (
+                       -- Deallocate parameters after call for ccall;
+                       -- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || real_size==0 then [] else 
+                  [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + real_size)]
+               )
+    -- in
+    setDeltaNat (delta + real_size)
+
+    let
+       -- assign the results, if necessary
+       assign_code []     = nilOL
+       assign_code [(dest,_hint)] = 
+         case rep of
+               F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+               F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+               rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+         where 
+               rep = cmmRegRep dest
+               r_dest = getRegisterReg dest
+       assign_code many = panic "genCCall.assign_code many"
+
+    return (load_args_code     `appOL` 
+           adjust_rsp          `appOL`
+           push_code           `appOL`
+           assign_eax sse_regs `appOL`
+           call                `appOL` 
+           assign_code dest_regs)
+
+  where
+    arg_size = 8 -- always, at the mo
+
+    load_args :: [(CmmExpr,MachHint)]
+             -> [Reg]                  -- int regs avail for args
+             -> [Reg]                  -- FP regs avail for args
+             -> InstrBlock
+             -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
+    load_args args [] [] code     =  return (args, [], [], code)
+       -- no more regs to use
+    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
+       -- no more args to push
+    load_args ((arg,hint) : rest) aregs fregs code
+       | isFloatingRep arg_rep = 
+       case fregs of
+         [] -> push_this_arg
+         (r:rs) -> do
+            arg_code <- getAnyReg arg
+            load_args rest aregs rs (code `appOL` arg_code r)
+       | otherwise =
+       case aregs of
+         [] -> push_this_arg
+         (r:rs) -> do
+            arg_code <- getAnyReg arg
+            load_args rest rs fregs (code `appOL` arg_code r)
+       where
+         arg_rep = cmmExprRep arg
+
+         push_this_arg = do
+           (args',ars,frs,code') <- load_args rest aregs fregs code
+           return ((arg,hint):args', ars, frs, code')
+
+    push_args [] code = return code
+    push_args ((arg,hint):rest) code
+       | isFloatingRep arg_rep = do
+        (arg_reg, arg_code) <- getSomeReg arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+        let code' = code `appOL` toOL [
+                       MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
+                       SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+                       DELTA (delta-arg_size)]
+        push_args rest code'
+
+       | otherwise = do
+       -- we only ever generate word-sized function arguments.  Promotion
+       -- has already happened: our Int8# type is kept sign-extended
+       -- in an Int#, for example.
+        ASSERT(arg_rep == I64) return ()
+        (arg_op, arg_code) <- getOperand arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+        let code' = code `appOL` toOL [PUSH I64 arg_op, 
+                                       DELTA (delta-arg_size)]
+        push_args rest code'
+       where
+         arg_rep = cmmExprRep arg
+#endif
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
@@ -2988,23 +3321,33 @@ outOfLineFloatOp mop res args vols
     stack only immediately prior to the call proper.  Sigh.
  -}
  
-genCCall fn cconv kind args
-  = mapNat arg_to_int_vregs args `thenNat` \ argcode_and_vregs ->
+genCCall target dest_regs argsAndHints vols = do
+    let
+        args = map fst argsAndHints
+    argcode_and_vregs <- mapM arg_to_int_vregs args
      let 
          (argcodes, vregss) = unzip argcode_and_vregs
          n_argRegs          = length allArgRegs
          n_argRegs_used     = min (length vregs) n_argRegs
          vregs              = concat vregss
-    in
      -- deal with static vs dynamic call targets
-    (case fn of
-        Left t_static
-           -> return (unitOL (CALL (Left fn__2) n_argRegs_used False))
-        Right dyn
-           -> arg_to_int_vregs dyn `thenNat` \ (dyn_c, [dyn_r]) ->
-              return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-    )
-                               `thenNat` \ callinsns ->
+    callinsns <- (case target of
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
+               return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+        CmmForeignCall expr conv -> do
+                (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+                return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+       CmmPrim mop -> do
+                 (res, reduce) <- outOfLineFloatOp mop
+                 lblOrMopExpr <- case res of
+                      Left lbl -> do
+                           return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+                      Right mopExpr -> do
+                           (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+                           return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+                 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
+
+      )
      let
          argcode = concatOL argcodes
          (move_sp_down, move_sp_up)
@@ -3015,23 +3358,13 @@ genCCall fn cconv kind args
                   else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
          transfer_code
             = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
-    in
-        return (argcode       `appOL`
-                   move_sp_down  `appOL`
-                   transfer_code `appOL`
-                   callinsns     `appOL`
-                   unitOL NOP    `appOL`
-                   move_sp_up)
+    return (argcode       `appOL`
+            move_sp_down  `appOL`
+            transfer_code `appOL`
+            callinsns     `appOL`
+            unitOL NOP    `appOL`
+            move_sp_up)
    where
-     -- function names that begin with '.' are assumed to be special
-     -- internally generated names like '.mul,' which don't get an
-     -- underscore prefix
-     -- ToDo:needed (WDP 96/03) ???
-     fn_static = unLeft fn
-     fn__2 = case (headFS fn_static) of
-               '.' -> ImmLit (ftext fn_static)
-               _   -> ImmCLbl (mkForeignLabel fn_static False)
-
       -- move args from the integer vregs into which they have been 
       -- marshalled, into %o0 .. %o5, and the rest onto the stack.
       move_final :: [Reg] -> [Reg] -> Int -> [Instr]
@@ -3040,7 +3373,7 @@ genCCall fn cconv kind args
          = []
  
       move_final (v:vs) [] offset     -- out of aregs; move to stack
-        = ST W v (spRel offset)
+        = ST I32 v (spRel offset)
            : move_final vs [] (offset+1)
  
       move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
@@ -3051,49 +3384,93 @@ genCCall fn cconv kind args
       -- or two integer vregs.
       arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
       arg_to_int_vregs arg
-        | is64BitRep (repOfCmmExpr arg)
-        = iselExpr64 arg               `thenNat` \ (ChildCode64 code vr_lo) ->
-          let r_lo = VirtualRegI vr_lo
+        | (cmmExprRep arg) == I64
+        = do
+         (ChildCode64 code r_lo) <- iselExpr64 arg
+          let 
                r_hi = getHiVRegFromLo r_lo
-          in  return (code, [r_hi, r_lo])
+          return (code, [r_hi, r_lo])
          | otherwise
-        = getRegister arg                     `thenNat` \ register ->
-          getNewRegNat (registerRep register) `thenNat` \ tmp ->
-          let code = registerCode register tmp
-              src  = registerName register tmp
-              pk   = registerRep register
-          in
-          -- the value is in src.  Get it into 1 or 2 int vregs.
+        = do
+         (src, code) <- getSomeReg arg
+          tmp <- getNewRegNat (cmmExprRep arg)
+          let
+              pk   = cmmExprRep arg
            case pk of
-             F64 -> 
-                getNewRegNat WordRep  `thenNat` \ v1 ->
-                getNewRegNat WordRep  `thenNat` \ v2 ->
-                return (
-                   code                          `snocOL`
-                   FMOV DF src f0                `snocOL`
-                   ST   F  f0 (spRel 16)         `snocOL`
-                   LD   W  (spRel 16) v1         `snocOL`
-                   ST   F  (fPair f0) (spRel 16) `snocOL`
-                   LD   W  (spRel 16) v2
-                   ,
-                   [v1,v2]
-                )
-             F32 -> 
-                getNewRegNat WordRep  `thenNat` \ v1 ->
-                return (
-                   code                    `snocOL`
-                   ST   F  src (spRel 16)  `snocOL`
-                   LD   W  (spRel 16) v1
-                   ,
-                   [v1]
-                )
-             other ->
-                getNewRegNat WordRep  `thenNat` \ v1 ->
-                return (
-                   code `snocOL` OR False g0 (RIReg src) v1
-                   , 
-                   [v1]
-                )
+             F64 -> do
+                      v1 <- getNewRegNat I32
+                      v2 <- getNewRegNat I32
+                      return (
+                        code                          `snocOL`
+                        FMOV F64 src f0                `snocOL`
+                        ST   F32  f0 (spRel 16)         `snocOL`
+                        LD   I32  (spRel 16) v1         `snocOL`
+                        ST   F32  (fPair f0) (spRel 16) `snocOL`
+                        LD   I32  (spRel 16) v2
+                        ,
+                        [v1,v2]
+                       )
+             F32 -> do
+                      v1 <- getNewRegNat I32
+                      return (
+                        code                    `snocOL`
+                        ST   F32  src (spRel 16)  `snocOL`
+                        LD   I32  (spRel 16) v1
+                        ,
+                        [v1]
+                       )
+             other -> do
+                        v1 <- getNewRegNat I32
+                        return (
+                          code `snocOL` OR False g0 (RIReg src) v1
+                          , 
+                          [v1]
+                         )
+outOfLineFloatOp mop =
+    do
+      mopExpr <- cmmMakeDynamicReference addImportNat True $
+                 mkForeignLabel functionName Nothing True
+      let mopLabelOrExpr = case mopExpr of
+                       CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+      return (mopLabelOrExpr, reduce)
+            where
+                (reduce, functionName) = case mop of
+                 MO_F32_Exp    -> (True,  FSLIT("exp"))
+                 MO_F32_Log    -> (True,  FSLIT("log"))
+                 MO_F32_Sqrt   -> (True,  FSLIT("sqrt"))
+
+                 MO_F32_Sin    -> (True,  FSLIT("sin"))
+                 MO_F32_Cos    -> (True,  FSLIT("cos"))
+                 MO_F32_Tan    -> (True,  FSLIT("tan"))
+
+                 MO_F32_Asin   -> (True,  FSLIT("asin"))
+                 MO_F32_Acos   -> (True,  FSLIT("acos"))
+                 MO_F32_Atan   -> (True,  FSLIT("atan"))
+
+                 MO_F32_Sinh   -> (True,  FSLIT("sinh"))
+                 MO_F32_Cosh   -> (True,  FSLIT("cosh"))
+                 MO_F32_Tanh   -> (True,  FSLIT("tanh"))
+
+                 MO_F64_Exp    -> (False, FSLIT("exp"))
+                 MO_F64_Log    -> (False, FSLIT("log"))
+                 MO_F64_Sqrt   -> (False, FSLIT("sqrt"))
+
+                 MO_F64_Sin    -> (False, FSLIT("sin"))
+                 MO_F64_Cos    -> (False, FSLIT("cos"))
+                 MO_F64_Tan    -> (False, FSLIT("tan"))
+
+                 MO_F64_Asin   -> (False, FSLIT("asin"))
+                 MO_F64_Acos   -> (False, FSLIT("acos"))
+                 MO_F64_Atan   -> (False, FSLIT("atan"))
+
+                 MO_F64_Sinh   -> (False, FSLIT("sinh"))
+                 MO_F64_Cosh   -> (False, FSLIT("cosh"))
+                 MO_F64_Tanh   -> (False, FSLIT("tanh"))
+
+                  other -> pprPanic "outOfLineFloatOp(sparc) "
+                                (pprCallishMachOp mop)
+
  #endif /* sparc_TARGET_ARCH */
  
  #if powerpc_TARGET_ARCH
@@ -3123,6 +3500,10 @@ genCCall fn cconv kind args
      * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
        starting with an odd-numbered GPR. It may skip a GPR to achieve this.
        Darwin just treats an I64 like two separate I32s (high word first).
+    * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
+      4-byte aligned like everything else on Darwin.
+    * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
+      PowerPC Linux does not agree, so neither do we.
        
      According to both conventions, The parameter area should be part of the
      caller's stack frame, allocated in the caller's prologue code (large enough
@@ -3142,12 +3523,16 @@ genCCall target dest_regs argsAndHints vols
                                                          initialStackOffset
                                                          (toOL []) []
                                                  
+        (labelOrExpr, reduceToF32) <- case target of
+            CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+            CmmForeignCall expr conv -> return  (Right expr, False)
+            CmmPrim mop -> outOfLineFloatOp mop
+                                                        
          let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
-            codeAfter = move_sp_up finalStack `appOL` moveResult
+            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
  
          case labelOrExpr of
              Left lbl -> do
-               addImportNat True lbl
                 return (         codeBefore
                          `snocOL` BL lbl usedRegs
                          `appOL`         codeAfter)
@@ -3162,17 +3547,17 @@ genCCall target dest_regs argsAndHints vols
  #if darwin_TARGET_OS
          initialStackOffset = 24
             -- size of linkage area + size of arguments, in bytes       
-       stackDelta _finalStack = roundTo16 $ (24 +) $ max 32 $ sum $
+       stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
                                        map machRepByteWidth argReps
  #elif linux_TARGET_OS
          initialStackOffset = 8
-        stackDelta finalStack = roundTo16 finalStack
+        stackDelta finalStack = roundTo 16 finalStack
  #endif
         args = map fst argsAndHints
         argReps = map cmmExprRep args
  
-       roundTo16 x | x `mod` 16 == 0 = x
-                   | otherwise = x + 16 - (x `mod` 16)
+       roundTo a x | x `mod` a == 0 = x
+                   | otherwise = x + a - (x `mod` a)
  
          move_sp_down finalStack
                 | delta > 64 =
@@ -3209,9 +3594,10 @@ genCCall target dest_regs argsAndHints vols
                  storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
                  
  #elif linux_TARGET_OS
-                let stackCode = accumCode `appOL` code
-                        `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset))
-                        `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset+4)))
+                let stackOffset' = roundTo 8 stackOffset
+                    stackCode = accumCode `appOL` code
+                        `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+                        `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
                      regCode hireg loreg =
                          accumCode `appOL` code
                              `snocOL` MR hireg vr_hi
@@ -3225,7 +3611,7 @@ genCCall target dest_regs argsAndHints vols
                          passArguments args regs fprs stackOffset
                                        (regCode hireg loreg) (hireg : loreg : accumUsed)
                      _ -> -- only one or no regs left
-                        passArguments args [] fprs (stackOffset+8)
+                        passArguments args [] fprs (stackOffset'+8)
                                        stackCode accumUsed
  #endif
          
@@ -3252,11 +3638,20 @@ genCCall target dest_regs argsAndHints vols
                  passArguments args
                                (drop nGprs gprs)
                                (drop nFprs fprs)
-                              (stackOffset + stackBytes)
+                              (stackOffset' + stackBytes)
                                (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
                                accumUsed
              where
-                stackSlot = AddrRegImm sp (ImmInt stackOffset)
+#if darwin_TARGET_OS
+        -- stackOffset is at least 4-byte aligned
+        -- The Darwin ABI is happy with that.
+                stackOffset' = stackOffset
+#else
+        -- ... the SysV ABI requires 8-byte alignment for doubles.
+                stackOffset' | rep == F64 = roundTo 8 stackOffset
+                             | otherwise  =           stackOffset
+#endif
+                stackSlot = AddrRegImm sp (ImmInt stackOffset')
                  (nGprs, nFprs, stackBytes, regs) = case rep of
                      I32 -> (1, 0, 4, gprs)
  #if darwin_TARGET_OS
@@ -3270,7 +3665,7 @@ genCCall target dest_regs argsAndHints vols
                      F64 -> (0, 1, 8, fprs)
  #endif
          
-        moveResult =
+        moveResult reduceToF32 =
              case dest_regs of
                  [] -> nilOL
                  [(dest, _hint)]
@@ -3282,47 +3677,51 @@ genCCall target dest_regs argsAndHints vols
                      where rep = cmmRegRep dest
                            r_dest = getRegisterReg dest
                            
-        (labelOrExpr, reduceToF32) = case target of
-            CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> (Left lbl, False)
-            CmmForeignCall expr conv -> (Right expr, False)
-            CmmPrim mop -> (Left $ mkForeignLabel label Nothing False, reduce)
-                where
-                    (label, reduce) = case mop of
-                        MO_F32_Exp   -> (FSLIT("exp"), True)
-                        MO_F32_Log   -> (FSLIT("log"), True)
-                        MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
-                        
-                        MO_F32_Sin   -> (FSLIT("sin"), True)
-                        MO_F32_Cos   -> (FSLIT("cos"), True)
-                        MO_F32_Tan   -> (FSLIT("tan"), True)
-                        
-                        MO_F32_Asin  -> (FSLIT("asin"), True)
-                        MO_F32_Acos  -> (FSLIT("acos"), True)
-                        MO_F32_Atan  -> (FSLIT("atan"), True)
-                        
-                        MO_F32_Sinh  -> (FSLIT("sinh"), True)
-                        MO_F32_Cosh  -> (FSLIT("cosh"), True)
-                        MO_F32_Tanh  -> (FSLIT("tanh"), True)
-                        MO_F32_Pwr   -> (FSLIT("pow"), True)
-                        
-                        MO_F64_Exp   -> (FSLIT("exp"), False)
-                        MO_F64_Log   -> (FSLIT("log"), False)
-                        MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
+        outOfLineFloatOp mop =
+            do
+                mopExpr <- cmmMakeDynamicReference addImportNat True $
+                              mkForeignLabel functionName Nothing True
+                let mopLabelOrExpr = case mopExpr of
+                        CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+                return (mopLabelOrExpr, reduce)
+            where
+                (functionName, reduce) = case mop of
+                    MO_F32_Exp   -> (FSLIT("exp"), True)
+                    MO_F32_Log   -> (FSLIT("log"), True)
+                    MO_F32_Sqrt  -> (FSLIT("sqrt"), True)
                          
-                        MO_F64_Sin   -> (FSLIT("sin"), False)
-                        MO_F64_Cos   -> (FSLIT("cos"), False)
-                        MO_F64_Tan   -> (FSLIT("tan"), False)
+                    MO_F32_Sin   -> (FSLIT("sin"), True)
+                    MO_F32_Cos   -> (FSLIT("cos"), True)
+                    MO_F32_Tan   -> (FSLIT("tan"), True)
+                    
+                    MO_F32_Asin  -> (FSLIT("asin"), True)
+                    MO_F32_Acos  -> (FSLIT("acos"), True)
+                    MO_F32_Atan  -> (FSLIT("atan"), True)
+                    
+                    MO_F32_Sinh  -> (FSLIT("sinh"), True)
+                    MO_F32_Cosh  -> (FSLIT("cosh"), True)
+                    MO_F32_Tanh  -> (FSLIT("tanh"), True)
+                    MO_F32_Pwr   -> (FSLIT("pow"), True)
                          
-                        MO_F64_Asin  -> (FSLIT("asin"), False)
-                        MO_F64_Acos  -> (FSLIT("acos"), False)
-                        MO_F64_Atan  -> (FSLIT("atan"), False)
+                    MO_F64_Exp   -> (FSLIT("exp"), False)
+                    MO_F64_Log   -> (FSLIT("log"), False)
+                    MO_F64_Sqrt  -> (FSLIT("sqrt"), False)
                          
-                        MO_F64_Sinh  -> (FSLIT("sinh"), False)
-                        MO_F64_Cosh  -> (FSLIT("cosh"), False)
-                        MO_F64_Tanh  -> (FSLIT("tanh"), False)
-                        MO_F64_Pwr   -> (FSLIT("pow"), False)
-                        other -> pprPanic "genCCall(ppc): unknown callish op"
-                                        (pprCallishMachOp other)
+                    MO_F64_Sin   -> (FSLIT("sin"), False)
+                    MO_F64_Cos   -> (FSLIT("cos"), False)
+                    MO_F64_Tan   -> (FSLIT("tan"), False)
+                     
+                    MO_F64_Asin  -> (FSLIT("asin"), False)
+                    MO_F64_Acos  -> (FSLIT("acos"), False)
+                    MO_F64_Atan  -> (FSLIT("atan"), False)
+                    
+                    MO_F64_Sinh  -> (FSLIT("sinh"), False)
+                    MO_F64_Cosh  -> (FSLIT("cosh"), False)
+                    MO_F64_Tanh  -> (FSLIT("tanh"), False)
+                    MO_F64_Pwr   -> (FSLIT("pow"), False)
+                    other -> pprPanic "genCCall(ppc): unknown callish op"
+                                    (pprCallishMachOp other)
  
  #endif /* darwin_TARGET_OS || linux_TARGET_OS */
                  
@@ -3334,37 +3733,89 @@ genCCall target dest_regs argsAndHints vols
  
  genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
  
-#if i386_TARGET_ARCH
-genSwitch expr ids = do
-  (reg,e_code) <- getSomeReg expr
-  lbl <- getNewLabelNat
-  let
-       jumpTable = map jumpTableEntry ids
-       op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
-       code = e_code `appOL` toOL [
-               LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-               JMP_TBL op [ id | Just id <- ids ]
-            ]
-  -- in
-  return code
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+genSwitch expr ids
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordRep)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            ADD wordRep op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+            code = e_code `appOL` toOL [
+                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                    JMP_TBL op [ id | Just id <- ids ]
+                 ]
+        -- in
+        return code
  #elif powerpc_TARGET_ARCH
-genSwitch expr ids = do
-  (reg,e_code) <- getSomeReg expr
-  tmp <- getNewRegNat I32
-  lbl <- getNewLabelNat
-  let
-       jumpTable = map jumpTableEntry ids
-
-        code = e_code `appOL` toOL [
-                        LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                        SLW tmp reg (RIImm (ImmInt 2)),
-                        ADDIS tmp tmp (HA (ImmCLbl lbl)),
-                        LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
-                        MTCTR tmp,
-                        BCTR [ id | Just id <- ids ]
-                ]
-  -- in
-  return code
+genSwitch expr ids 
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat I32
+        lbl <- getNewLabelNat
+        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordRep)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            LD I32 tmp (AddrRegReg tableReg tmp),
+                            ADD tmp tmp (RIReg tableReg),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat I32
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+        
+            code = e_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
+                            LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
  #else
  genSwitch expr ids = panic "ToDo: genSwitch"
  #endif
@@ -3398,7 +3849,7 @@ condFltReg = panic "MachCode.condFltReg (not on Alpha)"
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  condIntReg cond x y = do
    CondCode _ cond cond_code <- condIntCode cond x y
@@ -3406,135 +3857,152 @@ condIntReg cond x y = do
    let 
         code dst = cond_code `appOL` toOL [
                     SETCC cond (OpReg tmp),
-                   MOV I32 (OpReg tmp) (OpReg dst),
-                   AND I32 (OpImm (ImmInt 1)) (OpReg dst)
+                   MOVZxL I8 (OpReg tmp) (OpReg dst)
                   ]
-                  -- NB. (1) Tha AND is needed here because the x86 only
-                  -- sets the low byte in the SETCC instruction.
-                  -- NB. (2) The extra temporary register is a hack to
-                  -- work around the fact that the setcc instructions only
-                  -- accept byte registers.  dst might not be a byte-able reg,
-                  -- but currently all free registers are byte-able, so we're
-                  -- guaranteed that a new temporary is byte-able.
    -- in
    return (Any I32 code)
  
+#endif
+
+#if i386_TARGET_ARCH
  
  condFltReg cond x y = do
-  lbl1 <- getBlockIdNat
-  lbl2 <- getBlockIdNat
    CondCode _ cond cond_code <- condFltCode cond x y
-  let
-       code dst = cond_code `appOL` toOL [
-           JXX cond lbl1,
-           MOV I32 (OpImm (ImmInt 0)) (OpReg dst),
-           JXX ALWAYS lbl2,
-           NEWBLOCK lbl1,
-           MOV I32 (OpImm (ImmInt 1)) (OpReg dst),
-           JXX ALWAYS lbl2,
-           NEWBLOCK lbl2]
-               -- SIGH, have to split up this block somehow...
+  tmp <- getNewRegNat I8
+  let 
+       code dst = cond_code `appOL` toOL [
+                   SETCC cond (OpReg tmp),
+                   MOVZxL I8 (OpReg tmp) (OpReg dst)
+                 ]
    -- in
    return (Any I32 code)
  
-#endif /* i386_TARGET_ARCH */
+#endif
+
+#if x86_64_TARGET_ARCH
+
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp1 <- getNewRegNat wordRep
+  tmp2 <- getNewRegNat wordRep
+  let 
+       -- We have to worry about unordered operands (eg. comparisons
+       -- against NaN).  If the operands are unordered, the comparison
+       -- sets the parity flag, carry flag and zero flag.
+       -- All comparisons are supposed to return false for unordered
+       -- operands except for !=, which returns true.
+       --
+       -- Optimisation: we don't have to test the parity flag if we
+       -- know the test has already excluded the unordered case: eg >
+       -- and >= test for a zero carry flag, which can only occur for
+       -- ordered operands.
+       --
+       -- ToDo: by reversing comparisons we could avoid testing the
+       -- parity flag in more cases.
+
+       code dst = 
+          cond_code `appOL` 
+            (case cond of
+               NE  -> or_unordered dst
+               GU  -> plain_test   dst
+               GEU -> plain_test   dst
+               _   -> and_ordered  dst)
+
+       plain_test dst = toOL [
+                   SETCC cond (OpReg tmp1),
+                   MOVZxL I8 (OpReg tmp1) (OpReg dst)
+                ]
+       or_unordered dst = toOL [
+                   SETCC cond (OpReg tmp1),
+                   SETCC PARITY (OpReg tmp2),
+                   OR I8 (OpReg tmp1) (OpReg tmp2),
+                   MOVZxL I8 (OpReg tmp2) (OpReg dst)
+                 ]
+       and_ordered dst = toOL [
+                   SETCC cond (OpReg tmp1),
+                   SETCC NOTPARITY (OpReg tmp2),
+                   AND I8 (OpReg tmp1) (OpReg tmp2),
+                   MOVZxL I8 (OpReg tmp2) (OpReg dst)
+                 ]
+  -- in
+  return (Any I32 code)
+
+#endif
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
  #if sparc_TARGET_ARCH
  
-condIntReg EQQ x (StInt 0)
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ tmp ->
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat I32
      let
-       code = registerCode register tmp
-       src  = registerName register tmp
         code__2 dst = code `appOL` toOL [
             SUB False True g0 (RIReg src) g0,
             SUB True False g0 (RIImm (ImmInt (-1))) dst]
-    in
-    return (Any IntRep code__2)
+    return (Any I32 code__2)
  
-condIntReg EQQ x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat IntRep                `thenNat` \ tmp1 ->
-    getNewRegNat IntRep                `thenNat` \ tmp2 ->
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
      let
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
         code__2 dst = code1 `appOL` code2 `appOL` toOL [
             XOR False src1 (RIReg src2) dst,
             SUB False True g0 (RIReg dst) g0,
             SUB True False g0 (RIImm (ImmInt (-1))) dst]
-    in
-    return (Any IntRep code__2)
+    return (Any I32 code__2)
  
-condIntReg NE x (StInt 0)
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep        `thenNat` \ tmp ->
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat I32
      let
-       code = registerCode register tmp
-       src  = registerName register tmp
         code__2 dst = code `appOL` toOL [
             SUB False True g0 (RIReg src) g0,
             ADD True False g0 (RIImm (ImmInt 0)) dst]
-    in
-    return (Any IntRep code__2)
+    return (Any I32 code__2)
  
-condIntReg NE x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat IntRep                `thenNat` \ tmp1 ->
-    getNewRegNat IntRep                `thenNat` \ tmp2 ->
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
      let
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
         code__2 dst = code1 `appOL` code2 `appOL` toOL [
             XOR False src1 (RIReg src2) dst,
             SUB False True g0 (RIReg dst) g0,
             ADD True False g0 (RIImm (ImmInt 0)) dst]
-    in
-    return (Any IntRep code__2)
+    return (Any I32 code__2)
  
-condIntReg cond x y
-  = getBlockIdNat              `thenNat` \ lbl1 ->
-    getBlockIdNat              `thenNat` \ lbl2 ->
-    condIntCode cond x y       `thenNat` \ condition ->
+condIntReg cond x y = do
+    BlockId lbl1 <- getBlockIdNat
+    BlockId lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
      let
-       code = condCode condition
-       cond = condName condition
-       code__2 dst = code `appOL` toOL [
-           BI cond False (ImmCLbl lbl1), NOP,
+       code__2 dst = cond_code `appOL` toOL [
+           BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
             OR False g0 (RIImm (ImmInt 0)) dst,
-           BI ALWAYS False (ImmCLbl lbl2), NOP,
-           NEWBLOCK lbl1,
+           BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+           NEWBLOCK (BlockId lbl1),
             OR False g0 (RIImm (ImmInt 1)) dst,
-           NEWBLOCK lbl2]
-    in
-    return (Any IntRep code__2)
+           NEWBLOCK (BlockId lbl2)]
+    return (Any I32 code__2)
  
-condFltReg cond x y
-  = getBlockIdNat              `thenNat` \ lbl1 ->
-    getBlockIdNat              `thenNat` \ lbl2 ->
-    condFltCode cond x y       `thenNat` \ condition ->
+condFltReg cond x y = do
+    BlockId lbl1 <- getBlockIdNat
+    BlockId lbl2 <- getBlockIdNat
+    CondCode _ cond cond_code <- condFltCode cond x y
      let
-       code = condCode condition
-       cond = condName condition
-       code__2 dst = code `appOL` toOL [
+       code__2 dst = cond_code `appOL` toOL [ 
             NOP,
-           BF cond False (ImmCLbl lbl1), NOP,
+           BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
             OR False g0 (RIImm (ImmInt 0)) dst,
-           BI ALWAYS False (ImmCLbl lbl2), NOP,
-           NEWBLOCK lbl1,
+           BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
+           NEWBLOCK (BlockId lbl1),
             OR False g0 (RIImm (ImmInt 1)) dst,
-           NEWBLOCK lbl2]
-    in
-    return (Any IntRep code__2)
+           NEWBLOCK (BlockId lbl2)]
+    return (Any I32 code__2)
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -3601,9 +4069,11 @@ trivialCode
      -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
        ,IF_ARCH_i386 ((Operand -> Operand -> Instr) 
                       -> Maybe (Operand -> Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) 
+                     -> Maybe (Operand -> Operand -> Instr)
        ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
        ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
-      ,))))
+      ,)))))
      -> CmmExpr -> CmmExpr -- the two arguments
      -> NatM Register
  
@@ -3611,9 +4081,10 @@ trivialCode
  trivialFCode
      :: MachRep
      -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
+      ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr)
        ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
-      ,)))
+      ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+      ,))))
      -> CmmExpr -> CmmExpr -- the two arguments
      -> NatM Register
  #endif
@@ -3622,9 +4093,10 @@ trivialUCode
      :: MachRep 
      -> IF_ARCH_alpha((RI -> Reg -> Instr)
        ,IF_ARCH_i386 ((Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Instr)
        ,IF_ARCH_sparc((RI -> Reg -> Instr)
        ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
-      ,))))
+      ,)))))
      -> CmmExpr -- the one argument
      -> NatM Register
  
@@ -3633,8 +4105,9 @@ trivialUFCode
      :: MachRep
      -> IF_ARCH_alpha((Reg -> Reg -> Instr)
        ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+      ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
        ,IF_ARCH_sparc((Reg -> Reg -> Instr)
-      ,)))
+      ,))))
      -> CmmExpr -- the one argument
      -> NatM Register
  #endif
@@ -3713,7 +4186,7 @@ trivialUFCode _ instr x
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  {-
  The Rules of the Game are:
@@ -3760,16 +4233,8 @@ SDM's version of The Rules:
    register happens to be the destination register.
  -}
  
-trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
-  a_code <- getAnyReg a
-  let
-       code dst
-          = a_code dst `snocOL` 
-           instr (OpImm (litToImm lit_b)) (OpReg dst)
-  -- in
-  return (Any rep code)
-              
-trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+  | not (is64BitLit lit_a) = do
    b_code <- getAnyReg b
    let
         code dst 
@@ -3778,8 +4243,11 @@ trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
    -- in
    return (Any rep code)
  
-trivialCode rep instr maybe_revinstr a b = do
-  (b_op, b_code) <- getOperand b
+trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+  (b_op, b_code) <- getNonClobberedOperand b
    a_code <- getAnyReg a
    tmp <- getNewRegNat rep
    let
@@ -3790,7 +4258,7 @@ trivialCode rep instr maybe_revinstr a b = do
       -- as the destination reg.  In this case, we have to save b in a
       -- new temporary across the computation of a.
       code dst
-       | dst `clashesWith` b_op =
+       | dst `regClashesWithOp` b_op =
                 b_code `appOL`
                 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
                 a_code dst `snocOL`
@@ -3801,9 +4269,10 @@ trivialCode rep instr maybe_revinstr a b = do
                 instr b_op (OpReg dst)
    -- in
    return (Any rep code)
- where
-  reg `clashesWith` OpReg reg2   = reg == reg2
-  reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
+
+reg `regClashesWithOp` OpReg reg2   = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _            = False
  
  -----------
  
@@ -3818,6 +4287,8 @@ trivialUCode rep instr x = do
  
  -----------
  
+#if i386_TARGET_ARCH
+
  trivialFCode pk instr x y = do
    (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
    (y_reg, y_code) <- getSomeReg y
@@ -3829,6 +4300,14 @@ trivialFCode pk instr x y = do
    -- in
    return (Any pk code)
  
+#endif
+
+#if x86_64_TARGET_ARCH
+
+trivialFCode pk instr x y = genTrivialCode  pk (instr pk) x y
+
+#endif
+
  -------------
  
  trivialUFCode rep instr x = do
@@ -3846,86 +4325,65 @@ trivialUFCode rep instr x = do
  
  #if sparc_TARGET_ARCH
  
-trivialCode instr x (StInt y)
+trivialCode pk instr x (CmmLit (CmmInt y d))
    | fits13Bits y
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ tmp ->
-    let
-       code = registerCode register tmp
-       src1 = registerName register tmp
+  = do
+      (src1, code) <- getSomeReg x
+      tmp <- getNewRegNat I32
+      let
         src2 = ImmInt (fromInteger y)
         code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
-    in
-    return (Any IntRep code__2)
+      return (Any I32 code__2)
  
-trivialCode instr x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat IntRep                `thenNat` \ tmp1 ->
-    getNewRegNat IntRep                `thenNat` \ tmp2 ->
+trivialCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat I32
+    tmp2 <- getNewRegNat I32
      let
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
         code__2 dst = code1 `appOL` code2 `snocOL`
                       instr src1 (RIReg src2) dst
-    in
-    return (Any IntRep code__2)
+    return (Any I32 code__2)
  
  ------------
-trivialFCode pk instr x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat (registerRep register1)
-                               `thenNat` \ tmp1 ->
-    getNewRegNat (registerRep register2)
-                               `thenNat` \ tmp2 ->
-    getNewRegNat F64           `thenNat` \ tmp ->
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat (cmmExprRep x)
+    tmp2 <- getNewRegNat (cmmExprRep y)
+    tmp <- getNewRegNat F64
      let
-       promote x = FxTOy F DF x tmp
+       promote x = FxTOy F32 F64 x tmp
  
-       pk1   = registerRep register1
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
-
-       pk2   = registerRep register2
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
+       pk1   = cmmExprRep x
+       pk2   = cmmExprRep y
  
         code__2 dst =
                 if pk1 == pk2 then
                     code1 `appOL` code2 `snocOL`
-                   instr (primRepToSize pk) src1 src2 dst
+                   instr pk src1 src2 dst
                 else if pk1 == F32 then
                     code1 `snocOL` promote src1 `appOL` code2 `snocOL`
-                   instr DF tmp src2 dst
+                   instr F64 tmp src2 dst
                 else
                     code1 `appOL` code2 `snocOL` promote src2 `snocOL`
-                   instr DF src1 tmp dst
-    in
+                   instr F64 src1 tmp dst
      return (Any (if pk1 == pk2 then pk1 else F64) code__2)
  
  ------------
-trivialUCode instr x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ tmp ->
+trivialUCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
      let
-       code = registerCode register tmp
-       src  = registerName register tmp
         code__2 dst = code `snocOL` instr (RIReg src) dst
-    in
-    return (Any IntRep code__2)
+    return (Any pk code__2)
  
  -------------
-trivialUFCode pk instr x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat pk            `thenNat` \ tmp ->
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
      let
-       code = registerCode register tmp
-       src  = registerName register tmp
         code__2 dst = code `snocOL` instr src dst
-    in
      return (Any pk code__2)
  
  #endif /* sparc_TARGET_ARCH */
@@ -4019,7 +4477,7 @@ remainderCode rep div x y = do
  coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
  coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
  
-#ifdef sparc_TARGET_ARCH
+#if sparc_TARGET_ARCH
  coerceDbl2Flt :: CmmExpr -> NatM Register
  coerceFlt2Dbl :: CmmExpr -> NatM Register
  #endif
@@ -4087,57 +4545,70 @@ coerceFP2Int from to x = do
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+        code dst = x_code `snocOL` opc x_reg dst
+  -- in
+  return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
  #if sparc_TARGET_ARCH
  
-coerceInt2FP pk x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ reg ->
+coerceInt2FP pk1 pk2 x = do
+    (src, code) <- getSomeReg x
      let
-       code = registerCode register reg
-       src  = registerName register reg
-
         code__2 dst = code `appOL` toOL [
-           ST W src (spRel (-2)),
-           LD W (spRel (-2)) dst,
-           FxTOy W (primRepToSize pk) dst dst]
-    in
-    return (Any pk code__2)
+           ST pk1 src (spRel (-2)),
+           LD pk1 (spRel (-2)) dst,
+           FxTOy pk1 pk2 dst dst]
+    return (Any pk2 code__2)
  
  ------------
-coerceFP2Int fprep x
-  = ASSERT(fprep == F64 || fprep == F32)
-    getRegister x              `thenNat` \ register ->
-    getNewRegNat fprep         `thenNat` \ reg ->
-    getNewRegNat F32   `thenNat` \ tmp ->
+coerceFP2Int pk fprep x = do
+    (src, code) <- getSomeReg x
+    reg <- getNewRegNat fprep
+    tmp <- getNewRegNat pk
      let
-       code = registerCode register reg
-       src  = registerName register reg
-       code__2 dst = code `appOL` toOL [
-           FxTOy (primRepToSize fprep) W src tmp,
-           ST W tmp (spRel (-2)),
-           LD W (spRel (-2)) dst]
-    in
-    return (Any IntRep code__2)
+       code__2 dst = ASSERT(fprep == F64 || fprep == F32)
+           code `appOL` toOL [
+           FxTOy fprep pk src tmp,
+           ST pk tmp (spRel (-2)),
+           LD pk (spRel (-2)) dst]
+    return (Any pk code__2)
  
  ------------
-coerceDbl2Flt x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat F64   `thenNat` \ tmp ->
-    let code = registerCode register tmp
-        src  = registerName register tmp
-    in
-        return (Any F32 
-                       (\dst -> code `snocOL` FxTOy DF F src dst)) 
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst)) 
  
  ------------
-coerceFlt2Dbl x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat F32   `thenNat` \ tmp ->
-    let code = registerCode register tmp
-        src  = registerName register tmp
-    in
-        return (Any F64
-                       (\dst -> code `snocOL` FxTOy F DF src dst)) 
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst))
  
  #endif /* sparc_TARGET_ARCH */
  
@@ -4147,6 +4618,8 @@ coerceInt2FP fromRep toRep x = do
      lbl <- getNewLabelNat
      itmp <- getNewRegNat I32
      ftmp <- getNewRegNat F64
+    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    Amode addr addr_code <- getAmode dynRef
      let
         code' dst = code `appOL` maybe_exts `appOL` toOL [
                 LDATA ReadOnlyData
@@ -4157,9 +4630,9 @@ coerceInt2FP fromRep toRep x = do
                 ST I32 itmp (spRel 3),
                 LIS itmp (ImmInt 0x4330),
                 ST I32 itmp (spRel 2),
-               LD F64 ftmp (spRel 2),
-               LIS itmp (HA (ImmCLbl lbl)),
-               LD F64 dst (AddrRegImm itmp (LO (ImmCLbl lbl))),
+               LD F64 ftmp (spRel 2)
+            ] `appOL` addr_code `appOL` toOL [
+               LD F64 dst addr,
                 FSUB F64 dst ftmp dst
             ] `appOL` maybe_frsp dst
              
@@ -4201,3 +4674,4 @@ eXTRA_STK_ARGS_HERE :: Int
  eXTRA_STK_ARGS_HERE
    = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
  #endif
+