[project @ 2005-04-01 12:14:29 by simonmar]

author simonmar <unknown>

Fri, 1 Apr 2005 12:14:30 +0000 (12:14 +0000)

committer simonmar <unknown>

Fri, 1 Apr 2005 12:14:30 +0000 (12:14 +0000)
author simonmar <unknown>
Fri, 1 Apr 2005 12:14:30 +0000 (12:14 +0000)
committer simonmar <unknown>
Fri, 1 Apr 2005 12:14:30 +0000 (12:14 +0000)
diff --git a/ghc/compiler/cmm/Cmm.hs b/ghc/compiler/cmm/Cmm.hs

index aa92e01..a8576ec 100644 (file)
--- a/ghc/compiler/cmm/Cmm.hs
+++ b/ghc/compiler/cmm/Cmm.hs
@@ -241,6 +241,7 @@ data Section
    | ReadOnlyData
    | RelocatableReadOnlyData
    | UninitialisedData
+  | ReadOnlyData16     -- .rodata.cst16 on x86_64, 16-byte aligned
    | OtherSection String
  
  data CmmStatic
diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs

index e790991..2675a26 100644 (file)
--- a/ghc/compiler/nativeGen/AsmCodeGen.lhs
+++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs
@@ -552,12 +552,37 @@ cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
  cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
  cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
  
--- ToDo: eliminate multiple conversions.  Be careful though: can't remove
--- a narrowing, and can't remove conversions to/from floating point types.
-
--- ToDo: eliminate nested comparisons:
---    CmmMachOp MO_Lt [CmmMachOp MO_Eq [x,y], CmmLit (CmmInt 0 _)]
--- turns into a simple equality test.
+-- Eliminate nested conversions where possible
+cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
+  | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
+    Just (_,   rep3,signed2) <- isIntConversion conv_outer
+  = case () of
+       -- widen then narrow to the same size is a nop
+      _ | rep1 < rep2 && rep1 == rep3 -> x
+       -- Widen then narrow to different size: collapse to single conversion
+       -- but remember to use the signedness from the widening, just in case
+       -- the final conversion is a widen.
+       | rep1 < rep2 && rep2 > rep3 ->
+           cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+       -- Nested widenings: collapse if the signedness is the same
+       | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
+           cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+       -- Nested narrowings: collapse
+       | rep1 > rep2 && rep2 > rep3 ->
+           cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
+       | otherwise ->
+           CmmMachOp conv_outer args
+  where
+       isIntConversion (MO_U_Conv rep1 rep2) = Just (rep1,rep2,False)
+       isIntConversion (MO_S_Conv rep1 rep2) = Just (rep1,rep2,True)
+       isIntConversion _ = Nothing
+ 
+       intconv True  = MO_S_Conv
+       intconv False = MO_U_Conv
+
+-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
+-- but what if the architecture only supports word-sized loads, should
+-- we do the transformation anyway?
  
  cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
    = case mop of
diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs

index 35e0105..c93b678 100644 (file)
--- a/ghc/compiler/nativeGen/MachCodeGen.hs
+++ b/ghc/compiler/nativeGen/MachCodeGen.hs
@@ -37,6 +37,7 @@ import Outputable
  import qualified Outputable
  import FastString
  import FastTypes       ( isFastTrue )
+import Constants       ( wORD_SIZE )
  
  #ifdef DEBUG
  import Outputable      ( assertPanic )
@@ -102,15 +103,17 @@ stmtToInstrs stmt = case stmt of
  
      CmmAssign reg src
        | isFloatingRep kind -> assignReg_FltCode kind reg src
-      | wordRep == I32 && kind == I64
-                          -> assignReg_I64Code      reg src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64       -> assignReg_I64Code      reg src
+#endif
        | otherwise         -> assignReg_IntCode kind reg src
         where kind = cmmRegRep reg
  
      CmmStore addr src
        | isFloatingRep kind -> assignMem_FltCode kind addr src
-      | wordRep == I32 && kind == I64
-                        -> assignMem_I64Code      addr src
+#if WORD_SIZE_IN_BITS==32
+      | kind == I64     -> assignMem_I64Code      addr src
+#endif
        | otherwise       -> assignMem_IntCode kind addr src
         where kind = cmmExprRep src
  
@@ -157,9 +160,14 @@ data ChildCode64   -- a.k.a "Register64"
                         -- selection game are therefore that the returned
                         -- Reg may be modified
  
+#if WORD_SIZE_IN_BITS==32
  assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
  assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+#endif
+
+#ifndef x86_64_TARGET_ARCH
  iselExpr64        :: CmmExpr -> NatM ChildCode64
+#endif
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
@@ -463,6 +471,21 @@ swizzleRegisterRep (Any _ codefn)     rep = Any rep codefn
  
  
  -- -----------------------------------------------------------------------------
+-- Utils based on getRegister, below
+
+-- The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed _ reg code -> 
+       return (reg, code)
+
+-- -----------------------------------------------------------------------------
  -- Grab the Reg for a CmmReg
  
  getRegisterReg :: CmmReg -> Reg
@@ -769,6 +792,31 @@ getRegister (CmmLit (CmmFloat d F64))
      -- in
      return (Any F64 code)
  
+#endif /* i386_TARGET_ARCH */
+
+#if x86_64_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat 0.0 rep)) = do
+   let code dst = unitOL  (XOR rep (OpReg dst) (OpReg dst))
+       -- I don't know why there are xorpd, xorps, and pxor instructions.
+       -- They all appear to do the same thing --SDM
+   return (Any rep code)
+
+getRegister (CmmLit (CmmFloat f rep)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat f rep)],
+           MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+       -- ToDo: should use %rip-relative
+           ]
+    -- in
+    return (Any rep code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- catch simple cases of zero- or sign-extended load
  getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
@@ -787,11 +835,86 @@ getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
    code <- intLoadCode (MOVSxL I16) addr
    return (Any I32 code)
  
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I8) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I16) addr
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
+  return (Any I64 code)
+
+getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL I32) addr
+  return (Any I64 code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
+  lbl <- getNewLabelNat
+  let
+    code dst = toOL [
+       -- This is how gcc does it, so it can't be that bad:
+       LDATA ReadOnlyData16 [
+               CmmAlign 16,
+               CmmDataLabel lbl,
+               CmmStaticLit (CmmInt 0x80000000 I32),
+               CmmStaticLit (CmmInt 0 I32),
+               CmmStaticLit (CmmInt 0 I32),
+               CmmStaticLit (CmmInt 0 I32)
+       ],
+       XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+               -- xorps, so we need the 128-bit constant
+               -- ToDo: rip-relative
+       ]
+  --
+  return (Any F32 code)
+
+getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
+  lbl <- getNewLabelNat
+  let
+       -- This is how gcc does it, so it can't be that bad:
+    code dst = toOL [
+       LDATA ReadOnlyData16 [
+               CmmAlign 16,
+               CmmDataLabel lbl,
+               CmmStaticLit (CmmInt 0x8000000000000000 I64),
+               CmmStaticLit (CmmInt 0 I64)
+       ],
+               -- gcc puts an unpck here.  Wonder if we need it.
+       XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst)
+               -- xorpd, so we need the 128-bit constant
+               -- ToDo: rip-relative
+       ]
+  --
+  return (Any F64 code)
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  getRegister (CmmMachOp mop [x]) -- unary MachOps
    = case mop of
+#if i386_TARGET_ARCH
        MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
        MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
+#endif
  
        MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
        MO_Not rep   -> trivialUCode rep (NOT  rep) x
@@ -805,6 +928,15 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_S_Conv I16 I8  -> conversionNop I16 x
        MO_U_Conv I32 I16 -> conversionNop I32 x
        MO_S_Conv I32 I16 -> conversionNop I32 x
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I64 I32 -> conversionNop I64 x
+      MO_S_Conv I64 I32 -> conversionNop I64 x
+      MO_U_Conv I64 I16 -> conversionNop I64 x
+      MO_S_Conv I64 I16 -> conversionNop I64 x
+      MO_U_Conv I64 I8  -> conversionNop I64 x
+      MO_S_Conv I64 I8  -> conversionNop I64 x
+#endif
+
        MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
        MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
  
@@ -817,12 +949,32 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps
        MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
        MO_S_Conv I8  I16 -> integerExtend I8  I16 MOVSxL x
  
+#if x86_64_TARGET_ARCH
+      MO_U_Conv I8  I64 -> integerExtend I8  I64 MOVZxL x
+      MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
+      MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
+      MO_S_Conv I8  I64 -> integerExtend I8  I64 MOVSxL x
+      MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
+      MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
+       -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+       -- However, we don't want the register allocator to throw it
+       -- away as an unnecessary reg-to-reg move, so we keep it in
+       -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
        MO_S_Conv F32 F64 -> conversionNop F64 x
        MO_S_Conv F64 F32 -> conversionNop F32 x
+#else
+      MO_S_Conv F32 F64 -> coerceFP2FP F64 x
+      MO_S_Conv F64 F32 -> coerceFP2FP F32 x
+#endif
+
        MO_S_Conv from to
         | isFloatingRep from -> coerceFP2Int from to x
         | isFloatingRep to   -> coerceInt2FP from to x
  
+      other -> pprPanic "getRegister" (pprMachOp mop)
     where
         -- signed or unsigned extension.
         integerExtend from to instr expr = do
@@ -869,14 +1021,27 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
        MO_U_Lt rep -> condIntReg LU  x y
        MO_U_Le rep -> condIntReg LEU x y
  
-      MO_Add F32 -> trivialFCode F32  GADD x y
-      MO_Sub F32 -> trivialFCode F32  GSUB x y
+#if i386_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 GADD x y
+      MO_Sub F32 -> trivialFCode F32 GSUB x y
  
        MO_Add F64 -> trivialFCode F64 GADD x y
        MO_Sub F64 -> trivialFCode F64 GSUB x y
  
-      MO_S_Quot F32 -> trivialFCode  F32  GDIV x y
+      MO_S_Quot F32 -> trivialFCode F32 GDIV x y
        MO_S_Quot F64 -> trivialFCode F64 GDIV x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Add F32 -> trivialFCode F32 ADD x y
+      MO_Sub F32 -> trivialFCode F32 SUB x y
+
+      MO_Add F64 -> trivialFCode F64 ADD x y
+      MO_Sub F64 -> trivialFCode F64 SUB x y
+
+      MO_S_Quot F32 -> trivialFCode F32 FDIV x y
+      MO_S_Quot F64 -> trivialFCode F64 FDIV x y
+#endif
  
        MO_Add rep -> add_code rep x y
        MO_Sub rep -> sub_code rep x y
@@ -886,8 +1051,16 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
        MO_U_Quot rep -> div_code rep False True  x y
        MO_U_Rem  rep -> div_code rep False False x y
  
+#if i386_TARGET_ARCH
        MO_Mul   F32 -> trivialFCode F32 GMUL x y
        MO_Mul   F64 -> trivialFCode F64 GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_Mul   F32 -> trivialFCode F32 MUL x y
+      MO_Mul   F64 -> trivialFCode F64 MUL x y
+#endif
+
        MO_Mul   rep -> let op = IMUL rep in 
                       trivialCode rep op (Just op) x y
  
@@ -912,24 +1085,24 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
    where
      --------------------
      imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo I32 a b = do
-         res_lo <- getNewRegNat I32
-         res_hi <- getNewRegNat I32
+    imulMayOflo rep a b = do
+         res_lo <- getNewRegNat rep
+         res_hi <- getNewRegNat rep
           (a_reg, a_code) <- getNonClobberedReg a
           (b_reg, b_code) <- getSomeReg   b
           let 
               code dst = a_code `appOL` b_code `appOL`
                          toOL [
-                           MOV I32 (OpReg a_reg) (OpReg res_hi),
-                           MOV I32 (OpReg b_reg) (OpReg res_lo),
+                           MOV rep (OpReg a_reg) (OpReg res_hi),
+                           MOV rep (OpReg b_reg) (OpReg res_lo),
                             IMUL64 res_hi res_lo,               -- result in res_hi:res_lo
-                           SAR I32 (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
-                           SUB I32 (OpReg res_hi) (OpReg res_lo),      -- compare against upper
-                           MOV I32 (OpReg res_lo) (OpReg dst)
+                           SAR rep (OpImm (ImmInt 31)) (OpReg res_lo), -- sign extend lower part
+                           SUB rep (OpReg res_hi) (OpReg res_lo),      -- compare against upper
+                           MOV rep (OpReg res_lo) (OpReg dst)
                             -- dst==0 if high part == sign extended low part
                          ]
           -- in
-        return (Any I32 code)
+        return (Any rep code)
  
      --------------------
      shift_code :: MachRep
@@ -987,7 +1160,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
            (y_op, y_code) <- getOperand y -- cannot be clobbered
            x_code <- getAnyReg x
            let
-            widen | signed    = CLTD
+            widen | signed    = CLTD rep
                    | otherwise = XOR rep (OpReg edx) (OpReg edx)
  
              instr | signed    = IDIV
@@ -1004,17 +1177,18 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
             return (Fixed rep result code)
  
  
-
  getRegister (CmmLoad mem pk)
    | isFloatingRep pk
    = do
      Amode src mem_code <- getAmode mem
      let
         code dst = mem_code `snocOL` 
-                  GLD pk src dst
+                  IF_ARCH_i386(GLD pk src dst,
+                               MOV pk (OpAddr src) (OpReg dst))
      --
      return (Any pk code)
  
+#if i386_TARGET_ARCH
  getRegister (CmmLoad mem pk)
    | pk /= I64
    = do 
@@ -1029,6 +1203,15 @@ getRegister (CmmLoad mem pk)
         -- we can't guarantee access to an 8-bit variant of every register
         -- (esi and edi don't have 8-bit variants), so to make things
         -- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+  = do 
+    code <- intLoadCode (MOV pk) mem
+    return (Any pk code)
+#endif
  
  getRegister (CmmLit (CmmInt 0 rep))
    = let
@@ -1065,22 +1248,13 @@ anyReg :: Register -> NatM (Reg -> InstrBlock)
  anyReg (Any _ code)          = return code
  anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
  
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed _ reg code -> 
-       return (reg, code)
-
  -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
  -- Fixed registers might not be byte-addressable, so we make sure we've
  -- got a temporary, inserting an extra reg copy if necessary.
  getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
  getByteReg expr = do
    r <- getRegister expr
    case r of
@@ -1094,6 +1268,7 @@ getByteReg expr = do
             return (tmp, code `snocOL` reg2reg rep reg tmp)
         -- ToDo: could optimise slightly by checking for byte-addressable
         -- real registers, but that will happen very rarely if at all.
+#endif
  
  -- Another variant: this time we want the result in a register that cannot
  -- be modified by code to evaluate an arbitrary expression.
@@ -1114,10 +1289,12 @@ getNonClobberedReg expr = do
  
  reg2reg :: MachRep -> Reg -> Reg -> Instr
  reg2reg rep src dst 
+#if i386_TARGET_ARCH
    | isFloatingRep rep = GMOV src dst
+#endif
    | otherwise        = MOV rep (OpReg src) (OpReg dst)
  
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
@@ -1553,19 +1730,6 @@ extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
  extendUExpr I32 x = x
  extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
  
---  ###FIXME: exact code duplication from x86 case
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed _ reg code -> 
-       return (reg, code)
-
  #endif /* powerpc_TARGET_ARCH */
  
  
@@ -1639,17 +1803,19 @@ getAmode other
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- This is all just ridiculous, since it carefully undoes 
  -- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
    -- ASSERT(rep == I32)???
    = do (x_reg, x_code) <- getSomeReg x
         let off = ImmInt (-(fromInteger i))
         return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code)
    
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+  | not (is64BitLit lit)
    -- ASSERT(rep == I32)???
    = do (x_reg, x_code) <- getSomeReg x
         let off = ImmInt (fromInteger i)
@@ -1674,14 +1840,14 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
         return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0))
                 code)
  
-getAmode (CmmLit lit)
+getAmode (CmmLit lit) | not (is64BitLit lit)
    = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
  
  getAmode expr = do
    (reg,code) <- getSomeReg expr
    return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code)
  
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
@@ -1795,17 +1961,19 @@ getAmode other
  -- -----------------------------------------------------------------------------
  -- getOperand: sometimes any operand will do.
  
--- getOperand gets a *safe* operand; that is, the value of the operand
--- will remain valid across the computation of an arbitrary expression,
--- unless the expression is computed directly into a register which
--- the operand refers to (see trivialCode where this function is used
--- for an example).
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
  
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-getOperand (CmmLoad mem pk) 
-  | not (isFloatingRep pk) && pk /= I64 = do
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+getNonClobberedOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk) 
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
      Amode src mem_code <- getAmode mem
      (src',save_code) <- 
         if (amodeCouldBeClobbered src) 
@@ -1816,8 +1984,7 @@ getOperand (CmmLoad mem pk)
                 else
                    return (src, nilOL)
      return (OpAddr src', save_code `appOL` mem_code)
-
-getOperand e = do
+getNonClobberedOperand e = do
      (reg, code) <- getNonClobberedReg e
      return (OpReg reg, code)
  
@@ -1827,6 +1994,39 @@ amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
  regClobbered (RealReg rr) = isFastTrue (freeReg rr)
  regClobbered _ = False
  
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+getOperand (CmmLit lit)
+  | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getOperand e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit)  = not (is64BitLit lit) && 
+                         not (isFloatingRep (cmmLitRep lit))
+isOperand _             = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getRegOrMem e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is64BitLit (CmmInt i I64) = i > 0x7fffffff || i < -0x80000000
+#endif
+is64BitLit x = False
  #endif
  
  -- -----------------------------------------------------------------------------
@@ -1846,7 +2046,7 @@ getCondCode = panic "MachCode.getCondCode: not on Alphas"
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH || sparc_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
  -- yes, they really do seem to want exactly the same!
  
  getCondCode (CmmMachOp mop [x, y])
@@ -1940,10 +2140,10 @@ condFltCode = panic "MachCode.condFltCode: not on Alphas"
  #endif /* alpha_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) = do
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
      Amode x_addr x_code <- getAmode x
      let
         imm  = litToImm lit
@@ -1961,50 +2161,29 @@ condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
      --
      return (CondCode False cond code)
  
--- anything vs immediate
-condIntCode cond x (CmmLit lit) = do
-    (x_reg, x_code) <- getSomeReg x
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+    (x_reg, x_code) <- getNonClobberedReg x
+    (y_op,  y_code) <- getOperand y    
      let
-       imm  = litToImm lit
-       code = x_code `snocOL`
-                  CMP (cmmLitRep lit) (OpImm imm) (OpReg x_reg)
-    -- in
-    return (CondCode False cond code)
-
--- memory vs anything
-condIntCode cond (CmmLoad x pk) y = do
-    (y_reg, y_code) <- getNonClobberedReg y
-    Amode x_addr x_code <- getAmode x
-    let
-       code = y_code `appOL`
-               x_code `snocOL`
-                 CMP pk (OpReg y_reg) (OpAddr x_addr)
-    -- in
-    return (CondCode False cond code)
-
--- anything vs memory
-condIntCode cond y (CmmLoad x pk) = do
-    (y_reg, y_code) <- getNonClobberedReg y
-    Amode x_addr x_code <- getAmode x
-    let
-       code = y_code `appOL`
-               x_code `snocOL`
-                 CMP pk (OpAddr x_addr) (OpReg y_reg)
+       code = x_code `appOL` y_code `snocOL`
+                  CMP (cmmExprRep x) y_op (OpReg x_reg)
      -- in
      return (CondCode False cond code)
  
  -- anything vs anything
  condIntCode cond x y = do
-  (x_op, x_code) <- getOperand x
-  (y_reg, y_code) <- getSomeReg y
+  (y_reg, y_code) <- getNonClobberedReg y
+  (x_op, x_code) <- getRegOrMem x
    let
-       code = x_code `appOL`
-              y_code `snocOL`
+       code = y_code `appOL`
+              x_code `snocOL`
                   CMP (cmmExprRep x) (OpReg y_reg) x_op
    -- in
    return (CondCode False cond code)
+#endif
  
------------
+#if i386_TARGET_ARCH
  condFltCode cond x y 
    = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
    (x_reg, x_code) <- getNonClobberedReg x
@@ -2015,9 +2194,25 @@ condFltCode cond x y
    -- The GCMP insn does the test and sets the zero flag if comparable
    -- and true.  Hence we always supply EQQ as the condition to test.
    return (CondCode True EQQ code)
-
  #endif /* i386_TARGET_ARCH */
  
+#if x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg.  We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_op, y_code) <- getOperand y
+  let
+       code = x_code `appOL`
+              y_code `snocOL`
+                 CMP (cmmExprRep x) y_op (OpReg x_reg)
+  -- in
+  return (CondCode False (condToUnsigned cond) code)
+       -- we need to use the unsigned comparison operators on the
+       -- result of this comparison.
+#endif
+
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
  #if sparc_TARGET_ARCH
@@ -2171,7 +2366,7 @@ assignIntCode pk dst src
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- integer assignment to memory
  assignMem_IntCode pk addr src = do
@@ -2189,7 +2384,7 @@ assignMem_IntCode pk addr src = do
      return code
    where
      get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)  -- code, operator
-    get_op_RI (CmmLit lit)
+    get_op_RI (CmmLit lit) | not (is64BitLit lit)
        = return (nilOL, OpImm (litToImm lit))
      get_op_RI op
        = do (reg,code) <- getNonClobberedReg op
@@ -2298,7 +2493,7 @@ assignFltCode pk dst src
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- Floating point assignment to memory
  assignMem_FltCode pk addr src = do
@@ -2307,7 +2502,8 @@ assignMem_FltCode pk addr src = do
    let
         code = src_code `appOL`
                addr_code `snocOL`
-                GST pk src_reg addr
+                IF_ARCH_i386(GST pk src_reg addr,
+                            MOV pk (OpReg src_reg) (OpAddr addr))
    return code
  
  -- Floating point assignment to a register/temporary
@@ -2416,7 +2612,7 @@ genJump tree
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  genJump (CmmLoad mem pk) = do
    Amode target code <- getAmode mem
@@ -2471,7 +2667,7 @@ genBranch :: BlockId -> NatM InstrBlock
  genBranch id = return (unitOL (BR id))
  #endif
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  genBranch id = return (unitOL (JXX ALWAYS id))
  #endif
  
@@ -2661,7 +2857,7 @@ genCondJump lbl (StPrim op [x, y])
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  genCondJump id bool = do
    CondCode _ cond code <- getCondCode bool
@@ -2835,7 +3031,7 @@ genCCall target dest_regs args vols = do
                 toOL (
                         -- Deallocate parameters after call for ccall;
                         -- but not for stdcall (callee does it)
-                  (if cconv == StdCallConv then [] else 
+                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
                    [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
                    ++
                    [DELTA (delta + tot_arg_size)]
@@ -2966,6 +3162,159 @@ outOfLineFloatOp mop res args vols
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
+#if x86_64_TARGET_ARCH
+
+genCCall (CmmPrim op) [(r,_)] args vols = 
+  panic "genCCall(CmmPrim)(x86_64)"
+
+genCCall target dest_regs args vols = do
+
+       -- load up the register arguments
+    (stack_args, sse_regs, load_args_code)
+        <- load_args args allArgRegs allFPArgRegs 0 nilOL
+
+    let
+       tot_arg_size = arg_size * length stack_args
+
+       -- On entry to the called function, %rsp should be aligned
+       -- on a 16-byte boundary +8 (i.e. the first stack arg after
+       -- the return address is 16-byte aligned).  In STG land
+       -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+       -- need to make sure we push a multiple of 16-bytes of args,
+       -- plus the return address, to get the correct alignment.
+       -- Urg, this is hard.  We need to feed the delta back into
+       -- the arg pushing code.
+    (real_size, adjust_rsp) <-
+       if tot_arg_size `rem` 16 == 0
+           then return (tot_arg_size, nilOL)
+           else do -- we need to adjust...
+               delta <- getDeltaNat
+               setDeltaNat (delta-8)
+               return (tot_arg_size+8, toOL [
+                               SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
+                               DELTA (delta-8)
+                       ])
+
+       -- push the stack args, right to left
+    push_code <- push_args (reverse stack_args) nilOL
+    delta <- getDeltaNat
+
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+       -- CmmPrim -> ...
+        CmmForeignCall (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+             return (unitOL (CALL (Left fn_imm)), conv)
+          where fn_imm = ImmCLbl lbl
+        CmmForeignCall expr conv
+           -> do (dyn_r, dyn_c) <- getSomeReg expr
+                return (dyn_c `snocOL` CALL (Right dyn_r), conv)
+
+    let
+       -- The x86_64 ABI requires us to set %al to the number of SSE
+       -- registers that contain arguments, if the called routine
+       -- is a varargs function.  We don't know whether it's a
+       -- varargs function or not, so we have to assume it is.
+       --
+       -- It's not safe to omit this assignment, even if the number
+       -- of SSE regs in use is zero.  If %al is larger than 8
+       -- on entry to a varargs function, seg faults ensue.
+       assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
+
+    let call = callinsns `appOL`
+               toOL (
+                       -- Deallocate parameters after call for ccall;
+                       -- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || real_size==0 then [] else 
+                  [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + real_size)]
+               )
+    -- in
+    setDeltaNat (delta + real_size)
+
+    let
+       -- assign the results, if necessary
+       assign_code []     = nilOL
+       assign_code [(dest,_hint)] = 
+         case rep of
+               F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+               F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
+               rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
+         where 
+               rep = cmmRegRep dest
+               r_dest = getRegisterReg dest
+       assign_code many = panic "genCCall.assign_code many"
+
+    return (load_args_code     `appOL` 
+           adjust_rsp          `appOL`
+           push_code           `appOL`
+           assign_eax sse_regs `appOL`
+           call                `appOL` 
+           assign_code dest_regs)
+
+  where
+    arg_size = 8 -- always, at the mo
+
+    load_args :: [(CmmExpr,MachHint)]
+             -> [Reg]                  -- int regs avail for args
+             -> [Reg]                  -- FP regs avail for args
+             -> Int -> InstrBlock
+             -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock)
+    load_args args [] [] sse_regs code = return (args, sse_regs, code)
+       -- no more regs to use
+    load_args [] aregs fregs sse_regs code = return ([],sse_regs,code)
+       -- no more args to push
+    load_args ((arg,hint) : rest) aregs fregs sse_regs code
+       | isFloatingRep arg_rep = 
+       case fregs of
+         [] -> push_this_arg
+         (r:rs) -> do
+            arg_code <- getAnyReg arg
+            load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r)
+       | otherwise =
+       case aregs of
+         [] -> push_this_arg
+         (r:rs) -> do
+            arg_code <- getAnyReg arg
+            load_args rest rs fregs sse_regs (code `appOL` arg_code r)
+       where
+         arg_rep = cmmExprRep arg
+
+         push_this_arg = do
+           (args',sse',code') <- load_args rest aregs fregs sse_regs code
+           return ((arg,hint):args', sse', code')
+
+    push_args [] code = return code
+    push_args ((arg,hint):rest) code
+       | isFloatingRep arg_rep = do
+        (arg_reg, arg_code) <- getSomeReg arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+        let code' = code `appOL` toOL [
+                       MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
+                       SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+                       DELTA (delta-arg_size)]
+        push_args rest code'
+
+       | otherwise = do
+       -- we only ever generate word-sized function arguments.  Promotion
+       -- has already happened: our Int8# type is kept sign-extended
+       -- in an Int#, for example.
+        ASSERT(arg_rep == I64) return ()
+        (arg_op, arg_code) <- getOperand arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+        let code' = code `appOL` toOL [PUSH I64 arg_op, 
+                                       DELTA (delta-arg_size)]
+        push_args rest code'
+       where
+         arg_rep = cmmExprRep arg
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
  #if sparc_TARGET_ARCH
  {- 
     The SPARC calling convention is an absolute
@@ -3365,13 +3714,13 @@ genCCall target dest_regs argsAndHints vols
  
  genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  genSwitch expr ids = do
    (reg,e_code) <- getSomeReg expr
    lbl <- getNewLabelNat
    let
         jumpTable = map jumpTableEntry ids
-       op = OpAddr (AddrBaseIndex Nothing (Just (reg,4)) (ImmCLbl lbl))
+       op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl))
         code = e_code `appOL` toOL [
                 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
                 JMP_TBL op [ id | Just id <- ids ]
@@ -3455,7 +3804,7 @@ condFltReg = panic "MachCode.condFltReg (not on Alpha)"
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  condIntReg cond x y = do
    CondCode _ cond cond_code <- condIntCode cond x y
@@ -3658,9 +4007,11 @@ trivialCode
      -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
        ,IF_ARCH_i386 ((Operand -> Operand -> Instr) 
                       -> Maybe (Operand -> Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) 
+                     -> Maybe (Operand -> Operand -> Instr)
        ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
        ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
-      ,))))
+      ,)))))
      -> CmmExpr -> CmmExpr -- the two arguments
      -> NatM Register
  
@@ -3670,7 +4021,8 @@ trivialFCode
      -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
        ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
        ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr)
-      ,)))
+      ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr)
+      ,))))
      -> CmmExpr -> CmmExpr -- the two arguments
      -> NatM Register
  #endif
@@ -3679,9 +4031,10 @@ trivialUCode
      :: MachRep 
      -> IF_ARCH_alpha((RI -> Reg -> Instr)
        ,IF_ARCH_i386 ((Operand -> Instr)
+      ,IF_ARCH_x86_64 ((Operand -> Instr)
        ,IF_ARCH_sparc((RI -> Reg -> Instr)
        ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
-      ,))))
+      ,)))))
      -> CmmExpr -- the one argument
      -> NatM Register
  
@@ -3690,8 +4043,9 @@ trivialUFCode
      :: MachRep
      -> IF_ARCH_alpha((Reg -> Reg -> Instr)
        ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
+      ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
        ,IF_ARCH_sparc((Reg -> Reg -> Instr)
-      ,)))
+      ,))))
      -> CmmExpr -- the one argument
      -> NatM Register
  #endif
@@ -3770,7 +4124,7 @@ trivialUFCode _ instr x
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  {-
  The Rules of the Game are:
@@ -3817,16 +4171,8 @@ SDM's version of The Rules:
    register happens to be the destination register.
  -}
  
-trivialCode rep instr maybe_revinstr a (CmmLit lit_b) = do
-  a_code <- getAnyReg a
-  let
-       code dst
-          = a_code dst `snocOL` 
-           instr (OpImm (litToImm lit_b)) (OpReg dst)
-  -- in
-  return (Any rep code)
-              
-trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
+trivialCode rep instr (Just revinstr) (CmmLit lit_a) b
+  | not (is64BitLit lit_a) = do
    b_code <- getAnyReg b
    let
         code dst 
@@ -3836,7 +4182,7 @@ trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = do
    return (Any rep code)
  
  trivialCode rep instr maybe_revinstr a b = do
-  (b_op, b_code) <- getOperand b
+  (b_op, b_code) <- getNonClobberedOperand b
    a_code <- getAnyReg a
    tmp <- getNewRegNat rep
    let
@@ -3861,6 +4207,7 @@ trivialCode rep instr maybe_revinstr a b = do
   where
    reg `clashesWith` OpReg reg2   = reg == reg2
    reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode)
+  reg `clashesWith` _            = False
  
  -----------
  
@@ -3875,6 +4222,8 @@ trivialUCode rep instr x = do
  
  -----------
  
+#if i386_TARGET_ARCH
+
  trivialFCode pk instr x y = do
    (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
    (y_reg, y_code) <- getSomeReg y
@@ -3886,6 +4235,26 @@ trivialFCode pk instr x y = do
    -- in
    return (Any pk code)
  
+#endif
+
+#if x86_64_TARGET_ARCH
+
+-- We use the 2-operand SSE2 floating pt instructions.  ToDo: improve on
+-- this by using some of the special cases in trivialCode above.
+trivialFCode pk instr x y = do
+  (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too
+  x_code <- getAnyReg x
+  let
+     code dst =
+       y_code `appOL`
+       x_code dst `snocOL`
+       instr pk (IF_ARCH_x86_64(OpReg,) y_reg)
+                (IF_ARCH_x86_64(OpReg,) dst)
+  -- in
+  return (Any pk code)
+
+#endif
+
  -------------
  
  trivialUFCode rep instr x = do
@@ -4076,7 +4445,7 @@ remainderCode rep div x y = do
  coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register
  coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register
  
-#ifdef sparc_TARGET_ARCH
+#if sparc_TARGET_ARCH
  coerceDbl2Flt :: CmmExpr -> NatM Register
  coerceFlt2Dbl :: CmmExpr -> NatM Register
  #endif
@@ -4144,6 +4513,37 @@ coerceFP2Int from to x = do
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
+#if x86_64_TARGET_ARCH
+
+coerceFP2Int from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceInt2FP from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of F32 -> CVTSI2SS; F64 -> CVTSI2SD
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any to code) -- works even if the destination rep is <I32
+
+coerceFP2FP :: MachRep -> CmmExpr -> NatM Register
+coerceFP2FP to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD
+        code dst = x_code `snocOL` opc x_reg dst
+  -- in
+  return (Any to code)
+
+#endif
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
  #if sparc_TARGET_ARCH
  
  coerceInt2FP pk x
diff --git a/ghc/compiler/nativeGen/MachInstrs.hs b/ghc/compiler/nativeGen/MachInstrs.hs

index 1b662e3..0839694 100644 (file)
--- a/ghc/compiler/nativeGen/MachInstrs.hs
+++ b/ghc/compiler/nativeGen/MachInstrs.hs
@@ -14,24 +14,22 @@ module MachInstrs (
  
         -- * Machine instructions
         Instr(..),
-       Cond(..),
-#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
+       Cond(..), condUnsigned, condToSigned, condToUnsigned,
+
+#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
         Size(..), machRepSize,
  #endif
         RI(..),
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
         Operand(..),
+#endif
+#if i386_TARGET_ARCH
         i386_insert_ffrees,
  #endif
  #if sparc_TARGET_ARCH
         riZero, fpRelEA, moveSp, fPair,
  #endif
-#if powerpc_TARGET_ARCH
-       condUnsigned, condToSigned,
-#endif
-       DestInfo(..), hasDestInfo, pprDests,
-
      ) where
  
  #include "HsVersions.h"
@@ -42,7 +40,6 @@ import MachOp         ( MachRep(..) )
  import CLabel           ( CLabel, pprCLabel )
  import Panic           ( panic )
  import Outputable
-import Config           ( cLeadingUnderscore )
  import FastString
  
  import GLAEXTS
@@ -72,7 +69,7 @@ data Cond
    | ULE                -- For CMP only
    | ULT                -- For CMP only
  #endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
    = ALWAYS     -- What's really used? ToDo
    | EQQ
    | GE
@@ -122,6 +119,23 @@ data Cond
  #endif
      deriving Eq  -- to make an assertion work
  
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
  
  -- -----------------------------------------------------------------------------
  -- Sizes on this architecture
@@ -129,7 +143,7 @@ data Cond
  -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
  -- here.  I've removed them from the x86 version, we'll see what happens --SDM
  
-#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
+#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  data Size
  #if alpha_TARGET_ARCH
      = B            -- byte
@@ -363,7 +377,7 @@ bit or 64 bit precision.
  --SDM 1/2003
  -}
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- data Instr continues...
  
@@ -371,6 +385,9 @@ bit or 64 bit precision.
         | MOV         MachRep Operand Operand
         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
+       -- x86_64 note: plain mov into a 32-bit register always zero-extends
+       -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
+       -- don't affect the high bits of the register.
  
  -- Load effective address (also a very useful three-operand add instruction :-)
         | LEA         MachRep Operand Operand
@@ -379,9 +396,9 @@ bit or 64 bit precision.
         | ADD         MachRep Operand Operand
         | ADC         MachRep Operand Operand
         | SUB         MachRep Operand Operand
-       | IMUL        MachRep Operand Operand   -- signed int mul
-       | MUL         MachRep Operand Operand   -- unsigned int mul
  
+       | MUL         MachRep Operand Operand
+       | IMUL        MachRep Operand Operand   -- signed int mul
          | IMUL64      Reg Reg
         -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
  
@@ -403,6 +420,7 @@ bit or 64 bit precision.
          | BT          MachRep Imm Operand
         | NOP
  
+#if i386_TARGET_ARCH
  -- Float Arithmetic.
  
  -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles 
@@ -442,6 +460,32 @@ bit or 64 bit precision.
         | GTAN        MachRep Reg Reg -- src, dst
         
          | GFREE         -- do ffree on all x86 regs; an ugly hack
+#endif
+
+#if x86_64_TARGET_ARCH
+-- SSE2 floating point: we use a restricted set of the available SSE2
+-- instructions for floating-point.
+
+       -- use MOV for moving (either movss or movsd (movlpd better?))
+
+       | CVTSS2SD      Reg Reg         -- F32 to F64
+       | CVTSD2SS      Reg Reg         -- F64 to F32
+       | CVTSS2SI      Operand Reg     -- F32 to I32/I64 (with rounding)
+       | CVTSD2SI      Operand Reg     -- F64 to I32/I64 (with rounding)
+       | CVTSI2SS      Operand Reg     -- I32/I64 to F32
+       | CVTSI2SD      Operand Reg     -- I32/I64 to F64
+
+       -- use ADD & SUB for arithmetic.  In both cases, operands
+       -- are  Operand Reg.
+
+       -- SSE2 floating-point division:
+       | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
+
+       -- use CMP for comparisons.  ucomiss and ucomisd instructions
+       -- compare single/double prec floating point respectively.
+
+       | SQRT          MachRep Operand Reg     -- src, dst
+#endif
  
  -- Comparison
         | TEST          MachRep Operand Operand
@@ -462,7 +506,7 @@ bit or 64 bit precision.
         | CALL        (Either Imm Reg)
  
  -- Other things.
-       | CLTD -- sign extend %eax into %edx:%eax
+       | CLTD MachRep   -- sign extend %eax into %edx:%eax
  
         | FETCHGOT    Reg  -- pseudo-insn for position-independent code
                             -- pretty-prints as
@@ -475,7 +519,9 @@ data Operand
    | OpImm  Imm         -- immediate value
    | OpAddr AddrMode    -- memory reference
  
+#endif /* i386 or x86_64 */
  
+#if i386_TARGET_ARCH
  i386_insert_ffrees :: [Instr] -> [Instr]
  i386_insert_ffrees insns
     | any is_G_instr insns
@@ -506,7 +552,6 @@ is_G_instr instr
          GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
          GFREE -> panic "is_G_instr: GFREE (!)"
          other -> False
-
  #endif /* i386_TARGET_ARCH */
  
  
@@ -670,33 +715,4 @@ fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
               | FETCHPC Reg            -- pseudo-instruction:
                                        -- bcl to next insn, mflr reg
               
-condUnsigned GU = True
-condUnsigned LU = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _ = False
-
-condToSigned GU = GTT
-condToSigned LU = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x = x
  #endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- DestInfo
-
--- ToDo: might not be needed anymore --SDM
-
--- used by insnFuture in RegAllocInfo.lhs
-data DestInfo
-   = NoDestInfo             -- no supplied dests; infer from context
-   | DestInfo [CLabel]      -- precisely these dests and no others
-
-hasDestInfo NoDestInfo   = False
-hasDestInfo (DestInfo _) = True
-
-pprDests :: DestInfo -> SDoc
-pprDests NoDestInfo      = text "NoDestInfo"
-pprDests (DestInfo dsts) = brackets (hsep (map pprCLabel dsts))
diff --git a/ghc/compiler/nativeGen/MachRegs.lhs b/ghc/compiler/nativeGen/MachRegs.lhs

index a3946a7..44448f6 100644 (file)
--- a/ghc/compiler/nativeGen/MachRegs.lhs
+++ b/ghc/compiler/nativeGen/MachRegs.lhs
@@ -51,6 +51,15 @@ module MachRegs (
         fake0, fake1, fake2, fake3, fake4, fake5,
         addrModeRegs,
  #endif
+#if x86_64_TARGET_ARCH
+       rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
+       eax, ebx, ecx, edx, esi, edi, ebp, esp,
+       r8, r9, r10, r11, r12, r13, r14, r15,
+       xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+       xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+       xmm, eax, edx,
+       addrModeRegs, allFPArgRegs,
+#endif
  #if sparc_TARGET_ARCH
         fits13Bits,
         fpRel, gReg, iReg, lReg, oReg, largeOffsetError,
@@ -141,7 +150,7 @@ data AddrMode
    | AddrRegImm Reg Imm
  #endif
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
    = AddrBaseIndex      Base Index Displacement
    | ImmAddr            Imm Int
  
@@ -160,7 +169,7 @@ type Displacement = Imm
    | AddrRegImm Reg Imm
  #endif
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  addrModeRegs :: AddrMode -> [Reg]
  addrModeRegs (AddrBaseIndex b i _) =  b_regs ++ i_regs
    where
@@ -177,7 +186,7 @@ addrOffset addr off
  #if alpha_TARGET_ARCH
        _ -> panic "MachMisc.addrOffset not defined for Alpha"
  #endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
        ImmAddr i off0     -> Just (ImmAddr i (off0 + off))
  
        AddrBaseIndex r i (ImmInt n) -> Just (AddrBaseIndex r i (ImmInt (n + off)))
@@ -280,8 +289,10 @@ spRel :: Int       -- desired stack offset in words, positive or negative
        -> AddrMode
  
  spRel n
-#if i386_TARGET_ARCH
+#if defined(i386_TARGET_ARCH)
    = AddrBaseIndex (Just esp) Nothing (ImmInt (n * wORD_SIZE))
+#elif defined(x86_64_TARGET_ARCH)
+  = AddrBaseIndex (Just rsp) Nothing (ImmInt (n * wORD_SIZE))
  #else
    = AddrRegImm sp (ImmInt (n * wORD_SIZE))
  #endif
@@ -497,6 +508,88 @@ showReg n
  #endif
  
  {-
+AMD x86_64 architecture:
+- Registers 0-16 have 32-bit counterparts (eax, ebx etc.)
+- Registers 0-7 have 16-bit counterparts (ax, bx etc.)
+- Registers 0-3 have 8 bit counterparts (ah, bh etc.)
+
+-}
+
+#if x86_64_TARGET_ARCH
+
+rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi, 
+  r8, r9, r10, r11, r12, r13, r14, r15,
+  xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+  xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 :: Reg
+
+rax   = RealReg 0
+rbx   = RealReg 1
+rcx   = RealReg 2
+rdx   = RealReg 3
+rsi   = RealReg 4
+rdi   = RealReg 5
+rbp   = RealReg 6
+rsp   = RealReg 7
+r8    = RealReg 8
+r9    = RealReg 9
+r10   = RealReg 10
+r11   = RealReg 11
+r12   = RealReg 12
+r13   = RealReg 13
+r14   = RealReg 14
+r15   = RealReg 15
+xmm0  = RealReg 16
+xmm1  = RealReg 17
+xmm2  = RealReg 18
+xmm3  = RealReg 19
+xmm4  = RealReg 20
+xmm5  = RealReg 21
+xmm6  = RealReg 22
+xmm7  = RealReg 23
+xmm8  = RealReg 24
+xmm9  = RealReg 25
+xmm10 = RealReg 26
+xmm11 = RealReg 27
+xmm12 = RealReg 28
+xmm13 = RealReg 29
+xmm14 = RealReg 30
+xmm15 = RealReg 31
+
+ -- so we can re-use some x86 code:
+eax = rax
+ebx = rbx
+ecx = rcx
+edx = rdx
+esi = rsi
+edi = rdi
+ebp = rbp
+esp = rsp
+
+xmm n = RealReg (16+n)
+
+-- On x86, we might want to have an 8-bit RegClass, which would
+-- contain just regs 1-4 (the others don't have 8-bit versions).
+-- However, we can get away without this at the moment because the
+-- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
+regClass (RealReg i)     = if i < 16 then RcInteger else RcDouble
+regClass (VirtualRegI  u) = RcInteger
+regClass (VirtualRegHi u) = RcInteger
+regClass (VirtualRegD  u) = RcDouble
+regClass (VirtualRegF  u) = pprPanic "regClass(x86_64):VirtualRegF" 
+                                    (ppr (VirtualRegF u))
+
+regNames 
+ = ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp" ]
+
+showReg :: RegNo -> String
+showReg n
+  | n >= 16 = "%xmm" ++ show n  
+  | n >= 8  = "%r" ++ show n
+  | otherwise = regNames !! n
+
+#endif
+
+{-
  The SPARC has 64 registers of interest; 32 integer registers and 32
  floating point registers.  The mapping of STG registers to SPARC
  machine registers is defined in StgRegs.h.  We are, of course,
@@ -647,6 +740,42 @@ names in the header files.  Gag me with a spoon, eh?
  #define fake4 12
  #define fake5 13
  #endif
+
+#if x86_64_TARGET_ARCH
+#define rax   0
+#define rbx   1
+#define rcx   2
+#define rdx   3
+#define rsi   4
+#define rdi   5
+#define rbp   6
+#define rsp   7
+#define r8    8
+#define r9    9
+#define r10   10
+#define r11   11
+#define r12   12
+#define r13   13
+#define r14   14
+#define r15   15
+#define xmm0  16
+#define xmm1  17
+#define xmm2  18
+#define xmm3  19
+#define xmm4  20
+#define xmm5  21
+#define xmm6  22
+#define xmm7  23
+#define xmm8  24
+#define xmm9  25
+#define xmm10 26
+#define xmm11 27
+#define xmm12 28
+#define xmm13 29
+#define xmm14 30
+#define xmm15 31
+#endif
+
  #if sparc_TARGET_ARCH
  #define g0 0
  #define g1 1
@@ -824,11 +953,12 @@ allMachRegNos :: [RegNo]
  allMachRegNos
     = IF_ARCH_alpha( [0..63],
       IF_ARCH_i386(  [0..13],
+     IF_ARCH_x86_64( [0..31],
       IF_ARCH_sparc( ([0..31]
                       ++ [f0,f2 .. nCG_FirstFloatReg-1]
                       ++ [nCG_FirstFloatReg .. f31]),
       IF_ARCH_powerpc([0..63],
-                   ))))
+                   )))))
  
  -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  -- i.e., these are the regs for which we are prepared to allow the
@@ -854,6 +984,11 @@ callClobberedRegs
      -- caller-saves registers
      map RealReg [eax,ecx,edx,fake0,fake1,fake2,fake3,fake4,fake5]
  #endif /* i386_TARGET_ARCH */
+#if x86_64_TARGET_ARCH
+    -- caller-saves registers
+    map RealReg ([rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] ++ [16..31])
+       -- all xmm regs are caller-saves
+#endif /* x86_64_TARGET_ARCH */
  #if sparc_TARGET_ARCH
      map RealReg 
          ( oReg 7 :
@@ -880,6 +1015,10 @@ argRegs :: RegNo -> [Reg]
  argRegs _ = panic "MachRegs.argRegs(x86): should not be used!"
  #endif
  
+#if x86_64_TARGET_ARCH
+argRegs _ = panic "MachRegs.argRegs(x86_64): should not be used!"
+#endif
+
  #if alpha_TARGET_ARCH
  argRegs 0 = []
  argRegs 1 = freeMappedRegs [16, fReg 16]
@@ -932,6 +1071,13 @@ allArgRegs :: [Reg]
  allArgRegs = panic "MachRegs.allArgRegs(x86): should not be used!"
  #endif
  
+#if x86_64_TARGET_ARCH
+allArgRegs :: [Reg]
+allArgRegs = map RealReg [rdi,rsi,rdx,rcx,r8,r9]
+allFPArgRegs :: [Reg]
+allFPArgRegs = map RealReg [xmm0 .. xmm7]
+#endif
+
  #if powerpc_TARGET_ARCH
  allArgRegs :: [Reg]
  allArgRegs = map RealReg [3..10]
@@ -960,6 +1106,10 @@ freeReg 63 = fastBool False  -- always zero (f31)
  freeReg esp = fastBool False  --       %esp is the C stack pointer
  #endif
  
+#if x86_64_TARGET_ARCH
+freeReg rsp = fastBool False  --       %rsp is the C stack pointer
+#endif
+
  #if sparc_TARGET_ARCH
  freeReg g0 = fastBool False  --        %g0 is always 0.
  freeReg g5 = fastBool False  --        %g5 is reserved (ABI).
diff --git a/ghc/compiler/nativeGen/NCG.h b/ghc/compiler/nativeGen/NCG.h

index 78db0c9..b17f682 100644 (file)
--- a/ghc/compiler/nativeGen/NCG.h
+++ b/ghc/compiler/nativeGen/NCG.h
@@ -26,6 +26,12 @@
  # define IF_ARCH_i386(x,y) y
  #endif
  -- - - - - - - - - - - - - - - - - - - - - - 
+#if x86_64_TARGET_ARCH
+# define IF_ARCH_x86_64(x,y) x
+#else
+# define IF_ARCH_x86_64(x,y) y
+#endif
+-- - - - - - - - - - - - - - - - - - - - - - 
  #if freebsd_TARGET_OS
  # define IF_OS_freebsd(x,y) x
  #else
diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs

index 26b192f..197a82a 100644 (file)
--- a/ghc/compiler/nativeGen/PprMach.hs
+++ b/ghc/compiler/nativeGen/PprMach.hs
@@ -2,8 +2,8 @@
  --
  -- Pretty-printing assembly language
  --
-        -- (c) The University of Glasgow 1993-2004
-        --
+-- (c) The University of Glasgow 1993-2005
+--
  -----------------------------------------------------------------------------
  
  -- We start with the @pprXXX@s with some cross-platform commonality
@@ -21,7 +21,7 @@ module PprMach (
  #include "HsVersions.h"
  
  import Cmm
-import MachOp          ( MachRep(..) )
+import MachOp          ( MachRep(..), wordRep, isFloatingRep )
  import MachRegs                -- may differ per-platform
  import MachInstrs
  
@@ -115,13 +115,13 @@ pprBasicBlock (BasicBlock (BlockId id) instrs) =
  -- on which bit of it we care about.  Yurgh.
  
  pprUserReg :: Reg -> Doc
-pprUserReg = pprReg IF_ARCH_i386(I32,)
+pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,)
  
-pprReg :: IF_ARCH_i386(MachRep ->,) Reg -> Doc
+pprReg :: IF_ARCH_i386(MachRep ->, IF_ARCH_x86_64(MachRep ->,)) Reg -> Doc
  
-pprReg IF_ARCH_i386(s,) r
+pprReg IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) r
    = case r of
-      RealReg i      -> ppr_reg_no IF_ARCH_i386(s,) i
+      RealReg i      -> ppr_reg_no IF_ARCH_i386(s, IF_ARCH_x86_64(s,)) i
        VirtualRegI  u  -> text "%vI_" <> asmSDoc (pprUnique u)
        VirtualRegHi u  -> text "%vHi_" <> asmSDoc (pprUnique u)
        VirtualRegF  u  -> text "%vF_" <> asmSDoc (pprUnique u)
@@ -200,6 +200,74 @@ pprReg IF_ARCH_i386(s,) r
         _  -> SLIT("very naughty I386 register")
        })
  #endif
+
+#if x86_64_TARGET_ARCH
+    ppr_reg_no :: MachRep -> Int -> Doc
+    ppr_reg_no I8   = ppr_reg_byte
+    ppr_reg_no I16  = ppr_reg_word
+    ppr_reg_no I32  = ppr_reg_long
+    ppr_reg_no _    = ppr_reg_quad
+
+    ppr_reg_byte i = ptext
+      (case i of {
+        0 -> SLIT("%al");     1 -> SLIT("%bl");
+        2 -> SLIT("%cl");     3 -> SLIT("%dl");
+        4 -> SLIT("%sil");    5 -> SLIT("%dil"); -- new 8-bit regs!
+        6 -> SLIT("%bpl");    7 -> SLIT("%spl");
+        8 -> SLIT("%r8b");    9  -> SLIT("%r9b");
+       10 -> SLIT("%r10b");   11 -> SLIT("%r11b");
+       12 -> SLIT("%r12b");   13 -> SLIT("%r13b");
+       14 -> SLIT("%r14b");   15 -> SLIT("%r15b");
+       _  -> SLIT("very naughty x86_64 byte register")
+      })
+
+    ppr_reg_word i = ptext
+      (case i of {
+        0 -> SLIT("%ax");     1 -> SLIT("%bx");
+        2 -> SLIT("%cx");     3 -> SLIT("%dx");
+        4 -> SLIT("%si");     5 -> SLIT("%di");
+        6 -> SLIT("%bp");     7 -> SLIT("%sp");
+        8 -> SLIT("%r8w");    9  -> SLIT("%r9w");
+       10 -> SLIT("%r10w");   11 -> SLIT("%r11w");
+       12 -> SLIT("%r12w");   13 -> SLIT("%r13w");
+       14 -> SLIT("%r14w");   15 -> SLIT("%r15w");
+       _  -> SLIT("very naughty x86_64 word register")
+      })
+
+    ppr_reg_long i = ptext
+      (case i of {
+        0 -> SLIT("%eax");    1  -> SLIT("%ebx");
+        2 -> SLIT("%ecx");    3  -> SLIT("%edx");
+        4 -> SLIT("%esi");    5  -> SLIT("%edi");
+        6 -> SLIT("%ebp");    7  -> SLIT("%esp");
+        8 -> SLIT("%r8d");    9  -> SLIT("%r9d");
+       10 -> SLIT("%r10d");   11 -> SLIT("%r11d");
+       12 -> SLIT("%r12d");   13 -> SLIT("%r13d");
+       14 -> SLIT("%r14d");   15 -> SLIT("%r15d");
+       _  -> SLIT("very naughty x86_64 register")
+      })
+
+    ppr_reg_quad i = ptext
+      (case i of {
+        0 -> SLIT("%rax");     1 -> SLIT("%rbx");
+        2 -> SLIT("%rcx");     3 -> SLIT("%rdx");
+        4 -> SLIT("%rsi");     5 -> SLIT("%rdi");
+        6 -> SLIT("%rbp");     7 -> SLIT("%rsp");
+        8 -> SLIT("%r8");      9 -> SLIT("%r9");
+       10 -> SLIT("%r10");    11 -> SLIT("%r11");
+       12 -> SLIT("%r12");    13 -> SLIT("%r13");
+       14 -> SLIT("%r14");    15 -> SLIT("%r15");
+       16 -> SLIT("%xmm0");   17 -> SLIT("%xmm1");
+       18 -> SLIT("%xmm2");   19 -> SLIT("%xmm3");
+       20 -> SLIT("%xmm4");   21 -> SLIT("%xmm5");
+       22 -> SLIT("%xmm6");   23 -> SLIT("%xmm7");
+       24 -> SLIT("%xmm8");   25 -> SLIT("%xmm9");
+       26 -> SLIT("%xmm10");  27 -> SLIT("%xmm11");
+       28 -> SLIT("%xmm12");  28 -> SLIT("%xmm13");
+       30 -> SLIT("%xmm13");  31 -> SLIT("%xmm15")
+      })
+#endif
+
  #if sparc_TARGET_ARCH
      ppr_reg_no :: Int -> Doc
      ppr_reg_no i = ptext
@@ -290,7 +358,7 @@ pprReg IF_ARCH_i386(s,) r
  -- -----------------------------------------------------------------------------
  -- pprSize: print a 'Size'
  
-#if powerpc_TARGET_ARCH || i386_TARGET_ARCH
+#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH
  pprSize :: MachRep -> Doc
  #else
  pprSize :: Size -> Doc
@@ -310,14 +378,21 @@ pprSize x = ptext (case x of
  --      SF -> SLIT("s") UNUSED
          TF -> SLIT("t")
  #endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
         I8   -> SLIT("b")
         I16  -> SLIT("w")
         I32  -> SLIT("l")
-       F32  -> SLIT("s")
-       F64  -> SLIT("l")
+       I64  -> SLIT("q")
+#endif
+#if i386_TARGET_ARCH
+       F32  -> SLIT("l")
+       F64  -> SLIT("q")
         F80  -> SLIT("t")
  #endif
+#if x86_64_TARGET_ARCH
+       F32  -> SLIT("ss")      -- "scalar single-precision float" (SSE2)
+       F64  -> SLIT("sd")      -- "scalar double-precision float" (SSE2)
+#endif
  #if sparc_TARGET_ARCH
         B   -> SLIT("sb")
         Bu  -> SLIT("ub")
@@ -362,9 +437,9 @@ pprCond c = ptext (case c of {
         GTT  -> SLIT("gt");
         GE  -> SLIT("ge")
  #endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
         GEU     -> SLIT("ae");  LU    -> SLIT("b");
-       EQQ     -> SLIT("e");   GTT    -> SLIT("g");
+       EQQ     -> SLIT("e");   GTT   -> SLIT("g");
         GE      -> SLIT("ge");  GU    -> SLIT("a");
         LTT     -> SLIT("l");   LE    -> SLIT("le");
         LEU     -> SLIT("be");  NE    -> SLIT("ne");
@@ -466,7 +541,7 @@ pprAddr (AddrRegImm r1 i)
  
  -------------------
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  pprAddr (ImmAddr imm off)
    = let        pp_imm = pprImm imm
      in
@@ -481,7 +556,7 @@ pprAddr (AddrBaseIndex base index displacement)
    = let
         pp_disp  = ppr_disp displacement
         pp_off p = pp_disp <> char '(' <> p <> char ')'
-       pp_reg r = pprReg I32 r
+       pp_reg r = pprReg wordRep r
      in
      case (base,index) of
        (Nothing, Nothing)    -> pp_disp
@@ -540,39 +615,59 @@ pprSectionHeader Text
         IF_ARCH_alpha(SLIT("\t.text\n\t.align 3") {-word boundary-}
         ,IF_ARCH_sparc(SLIT(".text\n\t.align 4") {-word boundary-}
         ,IF_ARCH_i386(SLIT(".text\n\t.align 4,0x90") {-needs per-OS variation!-}
+       ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8") {-needs per-OS variation!-}
         ,IF_ARCH_powerpc(SLIT(".text\n.align 2")
-       ,))))
+       ,)))))
  pprSectionHeader Data
      = ptext
          IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
         ,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
         ,IF_ARCH_i386(SLIT(".data\n\t.align 4")
+       ,IF_ARCH_x86_64(SLIT(".data\n\t.align 8")
          ,IF_ARCH_powerpc(SLIT(".data\n.align 2")
-       ,))))
+       ,)))))
  pprSectionHeader ReadOnlyData
      = ptext
          IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
         ,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
         ,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 4")
+       ,IF_ARCH_x86_64(SLIT(".section .rodata\n\t.align 8")
          ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const\n.align 2"),
                                        SLIT(".section .rodata\n\t.align 2"))
-       ,))))
+       ,)))))
  pprSectionHeader RelocatableReadOnlyData
      = ptext
          IF_ARCH_alpha(SLIT("\t.data\n\t.align 3")
         ,IF_ARCH_sparc(SLIT(".data\n\t.align 8") {-<8 will break double constants -}
-       ,IF_ARCH_i386(SLIT(".data\n\t.align 4")
+       ,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 4")
+       ,IF_ARCH_x86_64(SLIT(".text\n\t.align 8")
          ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
                                        SLIT(".data\n\t.align 2"))
-       ,))))
+       ,)))))
+       -- the assembler on x86_64/Linux refuses to generate code for
+       --   .quad  x - y
+       -- where x is in the text section and y in the rodata section.
+       -- It works if y is in the text section, though.  This is probably
+       -- going to cause difficulties for PIC, I imagine.
  pprSectionHeader UninitialisedData
      = ptext
          IF_ARCH_alpha(SLIT("\t.bss\n\t.align 3")
         ,IF_ARCH_sparc(SLIT(".bss\n\t.align 8") {-<8 will break double constants -}
         ,IF_ARCH_i386(SLIT(".section .bss\n\t.align 4")
+       ,IF_ARCH_x86_64(SLIT(".section .bss\n\t.align 8")
          ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const_data\n.align 2"),
                                        SLIT(".section .bss\n\t.align 2"))
-       ,))))
+       ,)))))
+pprSectionHeader ReadOnlyData16
+    = ptext
+        IF_ARCH_alpha(SLIT("\t.data\n\t.align 4")
+       ,IF_ARCH_sparc(SLIT(".data\n\t.align 16")
+       ,IF_ARCH_i386(SLIT(".section .rodata\n\t.align 16")
+       ,IF_ARCH_x86_64(SLIT(".section .rodata.cst16\n\t.align 16")
+        ,IF_ARCH_powerpc(IF_OS_darwin(SLIT(".const\n.align 4"),
+                                      SLIT(".section .rodata\n\t.align 4"))
+       ,)))))
+
  pprSectionHeader (OtherSection sec)
      = panic "PprMach.pprSectionHeader: unknown section"
  
@@ -586,11 +681,8 @@ pprData (CmmStaticLit lit)       = pprDataItem lit
  pprGloblDecl :: CLabel -> Doc
  pprGloblDecl lbl
    | not (externallyVisibleCLabel lbl) = empty
-  | otherwise = ptext IF_ARCH_alpha(SLIT(".globl ")
-                       ,IF_ARCH_i386(SLIT(".globl ")
-                       ,IF_ARCH_sparc(SLIT(".global ")
-                       ,IF_ARCH_powerpc(SLIT(".globl ")
-                       ,)))) <>
+  | otherwise = ptext IF_ARCH_sparc(SLIT(".global "), 
+                                   SLIT(".globl ")) <>
                 pprCLabel_asm lbl
  
  pprLabel :: CLabel -> Doc
@@ -612,8 +704,9 @@ pprASCII str
  pprAlign bytes =
         IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2,
         IF_ARCH_i386(ptext SLIT(".align ") <> int bytes,
+       IF_ARCH_x86_64(ptext SLIT(".align ") <> int bytes,
         IF_ARCH_sparc(ptext SLIT(".align ") <> int bytes,
-       IF_ARCH_powerpc(ptext SLIT(".align ") <> int pow2,))))
+       IF_ARCH_powerpc(ptext SLIT(".align ") <> int pow2,)))))
    where
         pow2 = log2 bytes
         
@@ -646,7 +739,7 @@ pprDataItem lit
         ppr_item I16  x = [ptext SLIT("\t.short\t") <> pprImm imm]
         ppr_item I64  x = [ptext SLIT("\t.quad\t") <> pprImm imm]
  #endif
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
         ppr_item I16  x = [ptext SLIT("\t.word\t") <> pprImm imm]
         ppr_item I64  x = [ptext SLIT("\t.quad\t") <> pprImm imm]
  #endif
@@ -672,10 +765,11 @@ pprInstr (COMMENT s)
     =  IF_ARCH_alpha( ((<>) (ptext SLIT("\t# ")) (ftext s))
       ,IF_ARCH_sparc( ((<>) (ptext SLIT("! "))   (ftext s))
       ,IF_ARCH_i386( ((<>) (ptext SLIT("# "))   (ftext s))
+     ,IF_ARCH_x86_64( ((<>) (ptext SLIT("# "))   (ftext s))
       ,IF_ARCH_powerpc( IF_OS_linux(
          ((<>) (ptext SLIT("# ")) (ftext s)),
          ((<>) (ptext SLIT("; ")) (ftext s)))
-     ,))))
+     ,)))))
  
  pprInstr (DELTA d)
     = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
@@ -1071,7 +1165,7 @@ pprSizeRegRegReg name size reg1 reg2 reg3
  -- -----------------------------------------------------------------------------
  -- pprInstr for an x86
  
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack
    | src == dst
@@ -1081,10 +1175,18 @@ pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack
  #else
      empty
  #endif
+
  pprInstr (MOV size src dst)
    = pprSizeOpOp SLIT("mov") size src dst
-pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes I32 src dst
-pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes I32 src dst
+
+pprInstr (MOVZxL I32 src dst) = pprSizeOpOp SLIT("mov") I32 src dst
+       -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
+       -- movl.  But we represent it as a MOVZxL instruction, because
+       -- the reg alloc would tend to throw away a plain reg-to-reg
+       -- move, and we still want it to do that.
+
+pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce SLIT("movz") sizes wordRep src dst
+pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce SLIT("movs") sizes wordRep src dst
  
  -- here we do some patching, since the physical registers are only set late
  -- in the code generation.
@@ -1117,11 +1219,13 @@ pprInstr (IMUL size op1 op2) = pprSizeOpOp SLIT("imul") size op1 op2
     however, cannot be used to determine if the upper half of the
     result is non-zero."  So there.  
  -} 
-pprInstr (MUL size op1 op2) = pprSizeOpOp SLIT("imul") size op1 op2
-
  pprInstr (AND size src dst) = pprSizeOpOp SLIT("and") size src dst
  pprInstr (OR  size src dst) = pprSizeOpOp SLIT("or")  size src dst
+
+pprInstr (XOR F32 src dst)  = pprOpOp SLIT("xorps") F32 src dst
+pprInstr (XOR F64 src dst)  = pprOpOp SLIT("xorpd") F64 src dst
  pprInstr (XOR size src dst) = pprSizeOpOp SLIT("xor")  size src dst
+
  pprInstr (NOT size op) = pprSizeOp SLIT("not") size op
  pprInstr (NEGI size op) = pprSizeOp SLIT("neg") size op
  
@@ -1131,7 +1235,10 @@ pprInstr (SHR size src dst) = pprShift SLIT("shr") size src dst
  
  pprInstr (BT  size imm src) = pprSizeImmOp SLIT("bt") size imm src
  
-pprInstr (CMP size src dst) = pprSizeOpOp SLIT("cmp")  size src dst
+pprInstr (CMP size src dst) 
+  | isFloatingRep size =  pprSizeOpOp SLIT("ucomi")  size src dst -- SSE2
+  | otherwise          =  pprSizeOpOp SLIT("cmp")  size src dst
+
  pprInstr (TEST size src dst) = pprSizeOpOp SLIT("test")  size src dst
  pprInstr (PUSH size op) = pprSizeOp SLIT("push") size op
  pprInstr (POP size op) = pprSizeOp SLIT("pop") size op
@@ -1141,7 +1248,8 @@ pprInstr (POP size op) = pprSizeOp SLIT("pop") size op
  -- pprInstr POPA = ptext SLIT("\tpopal")
  
  pprInstr NOP = ptext SLIT("\tnop")
-pprInstr CLTD = ptext SLIT("\tcltd")
+pprInstr (CLTD I32) = ptext SLIT("\tcltd")
+pprInstr (CLTD I64) = ptext SLIT("\tcqto")
  
  pprInstr (SETCC cond op) = pprCondInstr SLIT("set") cond (pprOperand I8 op)
  
@@ -1150,17 +1258,42 @@ pprInstr (JXX cond (BlockId id))
    where lab = mkAsmTempLabel id
  
  pprInstr (JMP (OpImm imm)) = (<>) (ptext SLIT("\tjmp ")) (pprImm imm)
-pprInstr (JMP op)          = (<>) (ptext SLIT("\tjmp *")) (pprOperand I32 op)
+pprInstr (JMP op)          = (<>) (ptext SLIT("\tjmp *")) (pprOperand wordRep op)
  pprInstr (JMP_TBL op ids)  = pprInstr (JMP op)
  pprInstr (CALL (Left imm))      = (<>) (ptext SLIT("\tcall ")) (pprImm imm)
-pprInstr (CALL (Right reg))     = (<>) (ptext SLIT("\tcall *")) (pprReg I32 reg)
+pprInstr (CALL (Right reg))     = (<>) (ptext SLIT("\tcall *")) (pprReg wordRep reg)
  
  pprInstr (IDIV sz op)  = pprSizeOp SLIT("idiv") sz op
  pprInstr (DIV sz op)    = pprSizeOp SLIT("div")  sz op
  
  pprInstr (IMUL64 sd_hi sd_lo) = pprInstr_imul64 sd_hi sd_lo
  
+#if x86_64_TARGET_ARCH
+pprInstr (MUL size op1 op2) = pprSizeOpOp SLIT("mul") size op1 op2
+
+pprInstr (FDIV size op1 op2) = pprSizeOpOp SLIT("div") size op1 op2
+
+pprInstr (CVTSS2SD from to) = pprRegReg SLIT("cvtss2sd") from to
+pprInstr (CVTSD2SS from to) = pprRegReg SLIT("cvtsd2ss") from to
+pprInstr (CVTSS2SI from to) = pprOpReg  SLIT("cvtss2si") from to
+pprInstr (CVTSD2SI from to) = pprOpReg  SLIT("cvtsd2si") from to
+pprInstr (CVTSI2SS from to) = pprOpReg  SLIT("cvtsi2ss") from to
+pprInstr (CVTSI2SD from to) = pprOpReg  SLIT("cvtsi2sd") from to
+#endif
+
+pprInstr (FETCHGOT reg)
+   = vcat [ ptext SLIT("\tcall 1f"),
+            hcat [ ptext SLIT("1:\tpopl\t"), pprReg I32 reg ],
+            hcat [ ptext SLIT("\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
+                   pprReg I32 reg ]
+          ]
  
+#endif
+
+-- -----------------------------------------------------------------------------
+-- i386 floating-point
+
+#if i386_TARGET_ARCH
  -- Simulating a flat register set on the x86 FP stack is tricky.
  -- you have to free %st(7) before pushing anything on the FP reg stack
  -- so as to preclude the possibility of a FP stack overflow exception.
@@ -1357,31 +1490,6 @@ pprInstr GFREE
              ptext SLIT("\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)") 
            ]
  
-pprInstr (FETCHGOT reg)
-   = vcat [ ptext SLIT("\tcall 1f"),
-            hcat [ ptext SLIT("1:\tpopl\t"), pprReg I32 reg ],
-            hcat [ ptext SLIT("\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "),
-                   pprReg I32 reg ]
-          ]
-
--- Emit code to make hi_reg:lo_reg be the 64-bit product of hi_reg and lo_reg
-pprInstr_imul64 hi_reg lo_reg
-   = let fakeInsn = text "imul64" <+> pp_hi_reg <> comma <+> pp_lo_reg
-         pp_hi_reg = pprReg I32 hi_reg
-         pp_lo_reg = pprReg I32 lo_reg
-     in     
-         vcat [
-            text "\t# BEGIN " <> fakeInsn,
-            text "\tpushl" <+> pp_hi_reg <> text" ;  pushl" <+> pp_lo_reg,
-            text "\tpushl %eax ; pushl %edx",
-            text "\tmovl 12(%esp), %eax ; imull 8(%esp)",
-            text "\tmovl %edx, 12(%esp) ; movl %eax, 8(%esp)",
-            text "\tpopl %edx ; popl %eax",
-            text "\tpopl" <+> pp_lo_reg <> text " ;  popl" <+> pp_hi_reg,
-            text "\t# END   " <> fakeInsn
-         ]
-
-
  --------------------------
  
  -- coerce %st(0) to the specified size
@@ -1431,7 +1539,26 @@ pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg SLIT("gadd") sz src1 src2 d
  pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst
  pprGInstr (GMUL sz src1 src2 dst) = pprSizeRegRegReg SLIT("gmul") sz src1 src2 dst
  pprGInstr (GDIV sz src1 src2 dst) = pprSizeRegRegReg SLIT("gdiv") sz src1 src2 dst
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
+-- Emit code to make hi_reg:lo_reg be the 64-bit product of hi_reg and lo_reg
+pprInstr_imul64 hi_reg lo_reg
+   = let fakeInsn = text "imul64" <+> pp_hi_reg <> comma <+> pp_lo_reg
+         pp_hi_reg = pprReg wordRep hi_reg
+         pp_lo_reg = pprReg wordRep lo_reg
+     in     
+         vcat [
+            text "\t# BEGIN " <> fakeInsn,
+            text "\tpushl" <+> pp_hi_reg <> text" ;  pushl" <+> pp_lo_reg,
+            text "\tpushl %eax ; pushl %edx",
+            text "\tmovl 12(%esp), %eax ; imull 8(%esp)",
+            text "\tmovl %edx, 12(%esp) ; movl %eax, 8(%esp)",
+            text "\tpopl %edx ; popl %eax",
+            text "\tpopl" <+> pp_lo_reg <> text " ;  popl" <+> pp_hi_reg,
+            text "\t# END   " <> fakeInsn
+         ]
  -- Continue with I386-only printing bits and bobs:
  
  pprDollImm :: Imm -> Doc
@@ -1443,6 +1570,10 @@ pprOperand s (OpReg r)   = pprReg s r
  pprOperand s (OpImm i)   = pprDollImm i
  pprOperand s (OpAddr ea) = pprAddr ea
  
+pprMnemonic_  :: LitString -> Doc
+pprMnemonic_ name = 
+   char '\t' <> ptext name <> space
+
  pprMnemonic  :: LitString -> MachRep -> Doc
  pprMnemonic name size = 
     char '\t' <> ptext name <> pprSize size <> space
@@ -1473,6 +1604,15 @@ pprSizeOpOp name size op1 op2
         pprOperand size op2
      ]
  
+pprOpOp :: LitString -> MachRep -> Operand -> Operand -> Doc
+pprOpOp name size op1 op2
+  = hcat [
+       pprMnemonic_ name,
+       pprOperand size op1,
+       comma,
+       pprOperand size op2
+    ]
+
  pprSizeReg :: LitString -> MachRep -> Reg -> Doc
  pprSizeReg name size reg1
    = hcat [
@@ -1489,6 +1629,24 @@ pprSizeRegReg name size reg1 reg2
          pprReg size reg2
      ]
  
+pprRegReg :: LitString -> Reg -> Reg -> Doc
+pprRegReg name reg1 reg2
+  = hcat [
+       pprMnemonic_ name,
+       pprReg wordRep reg1,
+        comma,
+        pprReg wordRep reg2
+    ]
+
+pprOpReg :: LitString -> Operand -> Reg -> Doc
+pprOpReg name op1 reg2
+  = hcat [
+       pprMnemonic_ name,
+       pprOperand wordRep op1,
+        comma,
+        pprReg wordRep reg2
+    ]
+
  pprCondRegReg :: LitString -> MachRep -> Cond -> Reg -> Reg -> Doc
  pprCondRegReg name size cond reg1 reg2
    = hcat [
diff --git a/ghc/compiler/nativeGen/RegAllocInfo.hs b/ghc/compiler/nativeGen/RegAllocInfo.hs

index 7d1bf48..6b929e5 100644 (file)
--- a/ghc/compiler/nativeGen/RegAllocInfo.hs
+++ b/ghc/compiler/nativeGen/RegAllocInfo.hs
@@ -24,7 +24,7 @@ module RegAllocInfo (
  #include "HsVersions.h"
  
  import Cmm             ( BlockId )
-#if powerpc_TARGET_ARCH || i386_TARGET_ARCH
+#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH
  import MachOp           ( MachRep(..) )
  #endif
  import MachInstrs
@@ -138,7 +138,7 @@ regUsage instr = case instr of
  
  #endif /* alpha_TARGET_ARCH */
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  regUsage instr = case instr of
      MOV    sz src dst  -> usageRW src dst
@@ -173,9 +173,10 @@ regUsage instr = case instr of
      JMP_TBL op ids      -> mkRU (use_R op) []
      CALL   (Left imm)  -> mkRU [] callClobberedRegs
      CALL   (Right reg) -> mkRU [reg] callClobberedRegs
-    CLTD               -> mkRU [eax] [edx]
+    CLTD   sz          -> mkRU [eax] [edx]
      NOP                        -> mkRU [] []
  
+#if i386_TARGET_ARCH
      GMOV   src dst     -> mkRU [src] [dst]
      GLD    sz src dst  -> mkRU (use_EA src) [dst]
      GST    sz src dst  -> mkRU (src : use_EA dst) []
@@ -201,6 +202,17 @@ regUsage instr = case instr of
      GSIN   sz src dst  -> mkRU [src] [dst]
      GCOS   sz src dst  -> mkRU [src] [dst]
      GTAN   sz src dst  -> mkRU [src] [dst]
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD src dst   -> mkRU [src] [dst]
+    CVTSD2SS src dst   -> mkRU [src] [dst]
+    CVTSS2SI src dst   -> mkRU (use_R src) [dst]
+    CVTSD2SI src dst   -> mkRU (use_R src) [dst]
+    CVTSI2SS src dst   -> mkRU (use_R src) [dst]
+    CVTSI2SD src dst   -> mkRU (use_R src) [dst]
+    FDIV sz src dst     -> usageRM src dst
+#endif    
  
      FETCHGOT reg        -> mkRU [] [reg]
  
@@ -244,7 +256,7 @@ regUsage instr = case instr of
      mkRU src dst = RU (filter interesting src)
                       (filter interesting dst)
  
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  #if sparc_TARGET_ARCH
  
@@ -370,7 +382,7 @@ regUsage instr = case instr of
  jumpDests :: Instr -> [BlockId] -> [BlockId]
  jumpDests insn acc
    = case insn of
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
         JXX _ id        -> id : acc
         JMP_TBL _ ids   -> ids ++ acc
  #elif powerpc_TARGET_ARCH
@@ -445,7 +457,7 @@ patchRegs instr env = case instr of
  
  #endif /* alpha_TARGET_ARCH */
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  patchRegs instr env = case instr of
      MOV  sz src dst    -> patch2 (MOV  sz) src dst
@@ -477,6 +489,7 @@ patchRegs instr env = case instr of
      JMP op             -> patch1 JMP op
      JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
  
+#if i386_TARGET_ARCH
      GMOV src dst       -> GMOV (env src) (env dst)
      GLD sz src dst     -> GLD sz (lookupAddr src) (env dst)
      GST sz src dst     -> GST sz (env src) (lookupAddr dst)
@@ -502,6 +515,17 @@ patchRegs instr env = case instr of
      GSIN sz src dst    -> GSIN sz (env src) (env dst)
      GCOS sz src dst    -> GCOS sz (env src) (env dst)
      GTAN sz src dst    -> GTAN sz (env src) (env dst)
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD src dst   -> CVTSS2SD (env src) (env dst)
+    CVTSD2SS src dst   -> CVTSD2SS (env src) (env dst)
+    CVTSS2SI src dst   -> CVTSS2SI (patchOp src) (env dst)
+    CVTSD2SI src dst   -> CVTSD2SI (patchOp src) (env dst)
+    CVTSI2SS src dst   -> CVTSI2SS (patchOp src) (env dst)
+    CVTSI2SD src dst   -> CVTSI2SD (patchOp src) (env dst)
+    FDIV sz src dst    -> FDIV sz (patchOp src) (patchOp dst)
+#endif    
  
      CALL (Left imm)    -> instr
      CALL (Right reg)   -> CALL (Right (env reg))
@@ -512,7 +536,7 @@ patchRegs instr env = case instr of
      COMMENT _          -> instr
      DELTA _            -> instr
      JXX _ _            -> instr
-    CLTD               -> instr
+    CLTD _             -> instr
  
      _other             -> panic "patchRegs: unrecognised instr"
  
@@ -534,7 +558,7 @@ patchRegs instr env = case instr of
         lookupIndex Nothing      = Nothing
         lookupIndex (Just (r,i)) = Just (env r, i)
  
-#endif /* i386_TARGET_ARCH */
+#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  #if sparc_TARGET_ARCH
  
@@ -645,7 +669,7 @@ patchRegs instr env = case instr of
  -- by assigning the src and dest temporaries to the same real register.
  
  isRegRegMove :: Instr -> Maybe (Reg,Reg)
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  -- TMP:
  isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
  #elif powerpc_TARGET_ARCH
@@ -678,6 +702,12 @@ mkSpillInstr reg delta slot
            RcInteger -> MOV I32 (OpReg reg) (OpAddr (spRel off_w))
            _         -> GST F80 reg (spRel off_w) {- RcFloat/RcDouble -}
  #endif
+#ifdef x86_64_TARGET_ARCH
+    let off_w = (off-delta) `div` 8
+    in case regClass reg of
+          RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w))
+          _         -> panic "mkSpillInstr: ToDo"
+#endif
  #ifdef sparc_TARGET_ARCH
         {-SPARC: spill below frame pointer leaving 2 words/spill-}
                          let{off_w = 1 + (off `div` 4);
@@ -705,16 +735,22 @@ mkLoadInstr reg delta slot
      let
          off     = spillSlotToOffset slot
      in
-#ifdef alpha_TARGET_ARCH
+#if alpha_TARGET_ARCH
          LD  sz dyn (spRel (- (off `div` 8)))
  #endif
-#ifdef i386_TARGET_ARCH
+#if i386_TARGET_ARCH
         let off_w = (off-delta) `div` 4
          in case regClass reg of {
                RcInteger -> MOV I32 (OpAddr (spRel off_w)) (OpReg reg);
                _         -> GLD F80 (spRel off_w) reg} {- RcFloat/RcDouble -}
  #endif
-#ifdef sparc_TARGET_ARCH
+#if x86_64_TARGET_ARCH
+       let off_w = (off-delta) `div` 8
+        in case regClass reg of
+              RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg)
+              _         -> panic "mkLoadInstr: ToDo"
+#endif
+#if sparc_TARGET_ARCH
          let{off_w = 1 + (off `div` 4);
              sz = case regClass vreg of {
                     RcInteger -> W;
@@ -722,7 +758,7 @@ mkLoadInstr reg delta slot
                     RcDouble  -> DF}}
          in LD sz (fpRel (- off_w)) dyn
  #endif
-#ifdef powerpc_TARGET_ARCH
+#if powerpc_TARGET_ARCH
      let sz = case regClass reg of
                  RcInteger -> I32
                  RcDouble -> F64
author	simonmar <unknown>
	Fri, 1 Apr 2005 12:14:30 +0000 (12:14 +0000)
committer	simonmar <unknown>
	Fri, 1 Apr 2005 12:14:30 +0000 (12:14 +0000)
ghc/compiler/cmm/Cmm.hs		patch \| blob \| history
ghc/compiler/nativeGen/AsmCodeGen.lhs		patch \| blob \| history
ghc/compiler/nativeGen/MachCodeGen.hs		patch \| blob \| history
ghc/compiler/nativeGen/MachInstrs.hs		patch \| blob \| history
ghc/compiler/nativeGen/MachRegs.lhs		patch \| blob \| history
ghc/compiler/nativeGen/NCG.h		patch \| blob \| history
ghc/compiler/nativeGen/PprMach.hs		patch \| blob \| history
ghc/compiler/nativeGen/RegAllocInfo.hs		patch \| blob \| history