[project @ 2000-01-26 13:40:54 by sewardj]

[ghc-hetmet.git] / ghc / compiler / nativeGen / MachMisc.lhs
diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs

index d72de13..867495b 100644 (file)
--- a/ghc/compiler/nativeGen/MachMisc.lhs
+++ b/ghc/compiler/nativeGen/MachMisc.lhs
@@ -24,8 +24,9 @@ module MachMisc (
  
         Instr(..),  IF_ARCH_i386(Operand(..) COMMA,)
         Cond(..),
-       Size(..)
-       
+       Size(..),
+        IF_ARCH_i386(i386_insert_ffrees COMMA,)        
+
  #if alpha_TARGET_ARCH
         , RI(..)
  #endif
@@ -41,7 +42,7 @@ module MachMisc (
  
  import AbsCSyn         ( MagicId(..) ) 
  import AbsCUtils       ( magicIdPrimRep )
-import CLabel           ( CLabel )
+import CLabel           ( CLabel, isAsmTemp )
  import Const           ( mkMachInt, Literal(..) )
  import MachRegs                ( stgReg, callerSaves, RegLoc(..),
                           Imm(..), Reg(..), 
@@ -76,7 +77,7 @@ fmtAsmLbl s
       -}
       '$' : s
       ,{-otherwise-}
-     s
+     '.':'L':s
       )
  
  ---------------------------
@@ -435,6 +436,40 @@ data RI
  #endif {- alpha_TARGET_ARCH -}
  \end{code}
  
+Intel, in their infinite wisdom, selected a stack model for floating
+point registers on x86.  That might have made sense back in 1979 --
+nowadays we can see it for the nonsense it really is.  A stack model
+fits poorly with the existing nativeGen infrastructure, which assumes
+flat integer and FP register sets.  Prior to this commit, nativeGen
+could not generate correct x86 FP code -- to do so would have meant
+somehow working the register-stack paradigm into the register
+allocator and spiller, which sounds very difficult.
+  
+We have decided to cheat, and go for a simple fix which requires no
+infrastructure modifications, at the expense of generating ropey but
+correct FP code.  All notions of the x86 FP stack and its insns have
+been removed.  Instead, we pretend (to the instruction selector and
+register allocator) that x86 has six floating point registers, %fake0
+.. %fake5, which can be used in the usual flat manner.  We further
+claim that x86 has floating point instructions very similar to SPARC
+and Alpha, that is, a simple 3-operand register-register arrangement.
+Code generation and register allocation proceed on this basis.
+  
+When we come to print out the final assembly, our convenient fiction
+is converted to dismal reality.  Each fake instruction is
+independently converted to a series of real x86 instructions.
+%fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
+arithmetic operations, the two operands are pushed onto the top of the
+FP stack, the operation done, and the result copied back into the
+relevant register.  There are only six %fake registers because 2 are
+needed for the translation, and x86 has 8 in total.
+
+The translation is inefficient but is simple and it works.  A cleverer
+translation would handle a sequence of insns, simulating the FP stack
+contents, would not impose a fixed mapping from %fake to %st regs, and
+hopefully could avoid most of the redundant reg-reg moves of the
+current translation.
+
  \begin{code}
  #if i386_TARGET_ARCH
  
@@ -480,6 +515,7 @@ data RI
  
                -- all the 3-operand fake fp insns are src1 src2 dst
                -- and furthermore are constrained to be fp regs only.
+              -- IMPORTANT: keep is_G_insn up to date with any changes here
               | GMOV          Reg Reg -- src(fpreg), dst(fpreg)
                | GLD           Size MachRegsAddr Reg -- src, dst(fpreg)
                | GST           Size Reg MachRegsAddr -- src(fpreg), dst
@@ -504,6 +540,7 @@ data RI
               | GNEG          Size Reg Reg -- src, dst
               | GSQRT         Size Reg Reg -- src, dst
  
+              | GFREE         -- do ffree on all x86 regs; an ugly hack
  -- Comparison
  
               | TEST          Size Operand Operand
@@ -532,6 +569,38 @@ data Operand
    | OpImm  Imm         -- immediate value
    | OpAddr MachRegsAddr        -- memory reference
  
+
+i386_insert_ffrees :: [Instr] -> [Instr]
+i386_insert_ffrees insns
+   | any is_G_instr insns
+   = concatMap ffree_before_nonlocal_transfers insns
+   | otherwise
+   = insns
+
+ffree_before_nonlocal_transfers insn
+   = case insn of
+        CALL _                                      -> [GFREE, insn]
+        JMP (OpImm (ImmCLbl clbl)) | isAsmTemp clbl -> [insn]
+        JMP _                                       -> [GFREE, insn]
+        other                                       -> [insn]
+
+
+-- if you ever add a new FP insn to the fake x86 FP insn set,
+-- you must update this too
+is_G_instr :: Instr -> Bool
+is_G_instr instr
+   = case instr of
+        GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
+        GFTOD _ _ -> True; GFTOI _ _ -> True;
+        GDTOF _ _ -> True; GDTOI _ _ -> True;
+        GITOF _ _ -> True; GITOD _ _ -> True;
+       GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
+       GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
+       GCMP _ _ _ -> True; GABS _ _ _ -> True
+       GNEG _ _ _ -> True; GSQRT _ _ _ -> True
+        GFREE -> panic "is_G_instr: GFREE (!)"
+        other -> False
+
  #endif {- i386_TARGET_ARCH -}
  \end{code}