Misc. cleanups to CPS converter

[ghc-hetmet.git] / compiler / nativeGen / MachCodeGen.hs
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

index 865d02d..3abf6a4 100644 (file)
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -265,6 +265,17 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
     -- in
     return (ChildCode64 code rlo)
  
+iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
+     fn <- getAnyReg expr
+     r_dst_lo <-  getNewRegNat I32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         code = fn r_dst_lo
+     return (
+             ChildCode64 (code `snocOL` 
+                          MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
+                          r_dst_lo
+            )
+
  iselExpr64 expr
     = pprPanic "iselExpr64(i386)" (ppr expr)
  
@@ -1148,16 +1159,30 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
           -- in
           return (Any rep code)
          
-    {- Case2: shift length is complex (non-immediate) -}
+    {- Case2: shift length is complex (non-immediate)
+      * y must go in %ecx.
+      * we cannot do y first *and* put its result in %ecx, because
+        %ecx might be clobbered by x.
+      * if we do y second, then x cannot be 
+        in a clobbered reg.  Also, we cannot clobber x's reg
+        with the instruction itself.
+      * so we can either:
+        - do y first, put its result in a fresh tmp, then copy it to %ecx later
+        - do y second and put its result into %ecx.  x gets placed in a fresh
+          tmp.  This is likely to be better, becuase the reg alloc can
+          eliminate this reg->reg move here (it won't eliminate the other one,
+          because the move is into the fixed %ecx).
+    -}
      shift_code rep instr x y{-amount-} = do
-        (x_reg, x_code) <- getNonClobberedReg x
+        x_code <- getAnyReg x
+       tmp <- getNewRegNat rep
          y_code <- getAnyReg y
         let 
-          code = x_code `appOL`
+          code = x_code tmp `appOL`
                   y_code ecx `snocOL`
-                 instr (OpReg ecx) (OpReg x_reg)
+                 instr (OpReg ecx) (OpReg tmp)
          -- in
-        return (Fixed rep x_reg code)
+        return (Fixed rep tmp code)
  
      --------------------
      add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
@@ -3347,10 +3372,10 @@ genCCall target dest_regs args vols = do
          (arg_reg, arg_code) <- getSomeReg arg
           delta <- getDeltaNat
           setDeltaNat (delta-arg_size)
-        let code' = code `appOL` toOL [
-                       MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0)),
+        let code' = code `appOL` arg_code `appOL` toOL [
                         SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
-                       DELTA (delta-arg_size)]
+                       DELTA (delta-arg_size),
+                       MOV arg_rep (OpReg arg_reg) (OpAddr  (spRel 0))]
          push_args rest code'
  
         | otherwise = do
@@ -4563,6 +4588,8 @@ remainderCode rep div x y = do
  -- -----------------------------------------------------------------------------
  --  Coercing to/from integer/floating-point...
  
+-- When going to integer, we truncate (round towards 0).
+
  -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
  -- conversions.  We have to store temporaries in memory to move
  -- between the integer and the floating point register sets.
@@ -4648,7 +4675,7 @@ coerceFP2Int from to x = do
  coerceFP2Int from to x = do
    (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
    let
-        opc  = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI
+        opc  = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ
          code dst = x_code `snocOL` opc x_op dst
    -- in
    return (Any to code) -- works even if the destination rep is <I32