Fix truncate on amd64 NCG; fixes arith005.

[ghc-hetmet.git] / compiler / nativeGen / MachCodeGen.hs
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

index 7f09c9d..3ff958d 100644 (file)
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -21,7 +21,7 @@ module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
  import MachInstrs
  import MachRegs
  import NCGMonad
-import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase )
+import PositionIndependentCode
  import RegAllocInfo ( mkBranchInstr )
  
  -- Our intermediate code:
@@ -42,13 +42,14 @@ import Constants    ( wORD_SIZE )
  
  #ifdef DEBUG
  import Outputable      ( assertPanic )
-import TRACE           ( trace )
+import Debug.Trace     ( trace )
  #endif
  
  import Control.Monad   ( mapAndUnzipM )
-import Maybe           ( fromJust )
-import DATA_BITS
-import DATA_WORD
+import Data.Maybe      ( fromJust )
+import Data.Bits
+import Data.Word
+import Data.Int
  
  -- -----------------------------------------------------------------------------
  -- Top-level of the instruction selector
@@ -486,10 +487,14 @@ getRegisterReg (CmmGlobal mid)
  
  getRegister :: CmmExpr -> NatM Register
  
+#if !x86_64_TARGET_ARCH
+    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+    -- register, it can only be used for rip-relative addressing.
  getRegister (CmmReg (CmmGlobal PicBaseReg))
    = do
        reg <- getPicBaseNat wordRep
        return (Fixed wordRep reg nilOL)
+#endif
  
  getRegister (CmmReg reg) 
    = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
@@ -497,6 +502,31 @@ getRegister (CmmReg reg)
  getRegister tree@(CmmRegOff _ _) 
    = getRegister (mangleIndexTree tree)
  
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_U_Conv I64 I32)
+             [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_S_Conv I64 I32)
+             [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 rlo code
+
+getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed I32 rlo code       
+
+#endif
+
  -- end of machine-"independent" bit; here we go on the rest...
  
  #if alpha_TARGET_ARCH
@@ -736,7 +766,7 @@ getRegister leaf
  
  getRegister (CmmLit (CmmFloat f F32)) = do
      lbl <- getNewLabelNat
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
      Amode addr addr_code <- getAmode dynRef
      let code dst =
             LDATA ReadOnlyData
@@ -759,7 +789,7 @@ getRegister (CmmLit (CmmFloat d F64))
  
    | otherwise = do
      lbl <- getNewLabelNat
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
      Amode addr addr_code <- getAmode dynRef
      let code dst =
             LDATA ReadOnlyData
@@ -844,6 +874,13 @@ getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
  #endif
  
  #if x86_64_TARGET_ARCH
+getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Any I64 (\dst -> unitOL $
+        LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
+#endif
+
+#if x86_64_TARGET_ARCH
  getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
    x_code <- getAnyReg x
    lbl <- getNewLabelNat
@@ -1658,7 +1695,7 @@ getRegister (CmmLit (CmmInt i rep))
  
  getRegister (CmmLit (CmmFloat f frep)) = do
      lbl <- getNewLabelNat
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
      Amode addr addr_code <- getAmode dynRef
      let code dst = 
             LDATA ReadOnlyData  [CmmDataLabel lbl,
@@ -1757,6 +1794,14 @@ getAmode other
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  
+#if x86_64_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Amode (ripRel (litToImm displacement)) nilOL
+
+#endif
+
  #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- This is all just ridiculous, since it carefully undoes 
@@ -1784,15 +1829,18 @@ getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
  getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
                                         [y, CmmLit (CmmInt shift _)]])
    | shift == 0 || shift == 1 || shift == 2 || shift == 3
-  = do (x_reg, x_code) <- getNonClobberedReg x
-       -- x must be in a temp, because it has to stay live over y_code
-       -- we could compre x_reg and y_reg and do something better here...
-       (y_reg, y_code) <- getSomeReg y
-       let
-          code = x_code `appOL` y_code
-           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0))
-               code)
+  = x86_complex_amode x y shift 0
+
+getAmode (CmmMachOp (MO_Add rep) 
+                [x, CmmMachOp (MO_Add _)
+                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
+                         CmmLit (CmmInt offset _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  && not (is64BitInteger offset)
+  = x86_complex_amode x y shift offset
+
+getAmode (CmmMachOp (MO_Add rep) [x,y])
+  = x86_complex_amode x y 0 0
  
  getAmode (CmmLit lit) | not (is64BitLit lit)
    = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
@@ -1801,6 +1849,19 @@ getAmode expr = do
    (reg,code) <- getSomeReg expr
    return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
  
+
+x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
+x86_complex_amode base index shift offset
+  = do (x_reg, x_code) <- getNonClobberedReg base
+       -- x must be in a temp, because it has to stay live over y_code
+       -- we could compre x_reg and y_reg and do something better here...
+       (y_reg, y_code) <- getSomeReg index
+       let
+          code = x_code `appOL` y_code
+           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
+               code)
+
  #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
  
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -1993,7 +2054,12 @@ is64BitLit x = False
  #endif
  
  is64BitInteger :: Integer -> Bool
-is64BitInteger i = i > 0x7fffffff || i < -0x80000000
+is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
+  where i64 = fromIntegral i :: Int64
+  -- a CmmInt is intended to be truncated to the appropriate 
+  -- number of bits, so here we truncate it to Int64.  This is
+  -- important because e.g. -1 as a CmmInt might be either
+  -- -1 or 18446744073709551615.
  
  -- -----------------------------------------------------------------------------
  --  The 'CondCode' type:  Condition codes passed up the tree.
@@ -2016,7 +2082,7 @@ getCondCode = panic "MachCode.getCondCode: not on Alphas"
  -- yes, they really do seem to want exactly the same!
  
  getCondCode (CmmMachOp mop [x, y])
-  = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons
+  = 
      case mop of
        MO_Eq F32 -> condFltCode EQQ x y
        MO_Ne F32 -> condFltCode NE  x y
@@ -2047,7 +2113,7 @@ getCondCode (CmmMachOp mop [x, y])
        MO_U_Lt rep -> condIntCode LU   x y
        MO_U_Le rep -> condIntCode LEU  x y
  
-      other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop)
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
  
  getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
  
@@ -2315,6 +2381,25 @@ assignIntCode pk dst src
  #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  
  -- integer assignment to memory
+
+-- specific case of adding/subtracting an integer to a particular address.
+-- ToDo: catch other cases where we can use an operation directly on a memory 
+-- address.
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+                                                 CmmLit (CmmInt i _)])
+   | addr == addr2, pk /= I64 || not (is64BitInteger i),
+     Just instr <- check op
+   = do Amode amode code_addr <- getAmode addr
+        let code = code_addr `snocOL`
+                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
+        return code
+   where
+        check (MO_Add _) = Just ADD
+        check (MO_Sub _) = Just SUB
+        check _ = Nothing
+        -- ToDo: more?
+
+-- general case
  assignMem_IntCode pk addr src = do
      Amode addr code_addr <- getAmode addr
      (code_src, op_src)   <- get_op_RI src
@@ -2909,6 +2994,10 @@ genCCall fn cconv result_regs args
  
  #if i386_TARGET_ARCH
  
+genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
+       -- write barrier compiles to no code on x86/x86-64; 
+       -- we keep it this long in order to prevent earlier optimisations.
+
  -- we only cope with a single result for foreign calls
  genCCall (CmmPrim op) [(r,_)] args vols = do
    case op of
@@ -3063,7 +3152,7 @@ outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)]
    -> Maybe [GlobalReg] -> NatM InstrBlock
  outOfLineFloatOp mop res args vols
    = do
-      targetExpr <- cmmMakeDynamicReference addImportNat True lbl
+      targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
        let target = CmmForeignCall targetExpr CCallConv
          
        if cmmRegRep res == F64
@@ -3078,7 +3167,7 @@ outOfLineFloatOp mop res args vols
            code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp))
            return (code1 `appOL` code2)
    where
-       lbl = mkForeignLabel fn Nothing True
+       lbl = mkForeignLabel fn Nothing False
  
         fn = case mop of
               MO_F32_Sqrt  -> FSLIT("sqrtf")
@@ -3119,6 +3208,10 @@ outOfLineFloatOp mop res args vols
  
  #if x86_64_TARGET_ARCH
  
+genCCall (CmmPrim MO_WriteBarrier) _ _ _ = return nilOL
+       -- write barrier compiles to no code on x86/x86-64; 
+       -- we keep it this long in order to prevent earlier optimisations.
+
  genCCall (CmmPrim op) [(r,_)] args vols = 
    outOfLineFloatOp op r args vols
  
@@ -3131,7 +3224,7 @@ genCCall target dest_regs args vols = do
      let
         fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
         int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
-       arg_regs = int_regs_used ++ fp_regs_used
+       arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
                 -- for annotating the call instruction with
  
         sse_regs = length fp_regs_used
@@ -3415,7 +3508,7 @@ genCCall target dest_regs argsAndHints vols = do
                           )
  outOfLineFloatOp mop =
      do
-      mopExpr <- cmmMakeDynamicReference addImportNat True $
+      mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
                   mkForeignLabel functionName Nothing True
        let mopLabelOrExpr = case mopExpr of
                         CmmLit (CmmLabel lbl) -> Left lbl
@@ -3500,6 +3593,10 @@ outOfLineFloatOp mop =
      frame just before ccalling.
  -}
  
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ _
+ = return $ unitOL LWSYNC
+
  genCCall target dest_regs argsAndHints vols
    = ASSERT (not $ any (`elem` [I8,I16]) argReps)
          -- we rely on argument promotion in the codeGen
@@ -3666,7 +3763,7 @@ genCCall target dest_regs argsAndHints vols
                            
          outOfLineFloatOp mop =
              do
-                mopExpr <- cmmMakeDynamicReference addImportNat True $
+                mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
                                mkForeignLabel functionName Nothing True
                  let mopLabelOrExpr = case mopExpr of
                          CmmLit (CmmLabel lbl) -> Left lbl
@@ -3726,7 +3823,7 @@ genSwitch expr ids
    = do
          (reg,e_code) <- getSomeReg expr
          lbl <- getNewLabelNat
-        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
          (tableReg,t_code) <- getSomeReg $ dynRef
          let
              jumpTable = map jumpTableEntryRel ids
@@ -3740,11 +3837,25 @@ genSwitch expr ids
              op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
                                         (EAIndex reg wORD_SIZE) (ImmInt 0))
  
+#if x86_64_TARGET_ARCH && darwin_TARGET_OS
+    -- on Mac OS X/x86_64, put the jump table in the text section
+    -- to work around a limitation of the linker.
+    -- ld64 is unable to handle the relocations for
+    --     .quad L1 - L0
+    -- if L0 is not preceded by a non-anonymous label in its section.
+    
+            code = e_code `appOL` t_code `appOL` toOL [
+                            ADD wordRep op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
+                            LDATA Text (CmmDataLabel lbl : jumpTable)
+                    ]
+#else
              code = e_code `appOL` t_code `appOL` toOL [
                              LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
                              ADD wordRep op (OpReg tableReg),
                              JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
                      ]
+#endif
          return code
    | otherwise
    = do
@@ -3766,7 +3877,7 @@ genSwitch expr ids
          (reg,e_code) <- getSomeReg expr
          tmp <- getNewRegNat I32
          lbl <- getNewLabelNat
-        dynRef <- cmmMakeDynamicReference addImportNat False lbl
+        dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
          (tableReg,t_code) <- getSomeReg $ dynRef
          let
              jumpTable = map jumpTableEntryRel ids
@@ -4452,6 +4563,8 @@ remainderCode rep div x y = do
  -- -----------------------------------------------------------------------------
  --  Coercing to/from integer/floating-point...
  
+-- When going to integer, we truncate (round towards 0).
+
  -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
  -- conversions.  We have to store temporaries in memory to move
  -- between the integer and the floating point register sets.
@@ -4537,7 +4650,7 @@ coerceFP2Int from to x = do
  coerceFP2Int from to x = do
    (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
    let
-        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
          code dst = x_code `snocOL` opc x_op dst
    -- in
    return (Any to code) -- works even if the destination rep is <I32
@@ -4605,7 +4718,7 @@ coerceInt2FP fromRep toRep x = do
      lbl <- getNewLabelNat
      itmp <- getNewRegNat I32
      ftmp <- getNewRegNat F64
-    dynRef <- cmmMakeDynamicReference addImportNat False lbl
+    dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
      Amode addr addr_code <- getAmode dynRef
      let
         code' dst = code `appOL` maybe_exts `appOL` toOL [