From c39373f1371fd1e46ea91be262f00c277b31f8e5 Mon Sep 17 00:00:00 2001
From: sewardj <unknown>
Date: Fri, 28 Jan 2000 18:07:56 +0000
Subject: [PATCH] [project @ 2000-01-28 18:07:55 by sewardj] Modifications to
 make x86 register spilling to work reasonably.  It should
 work ok most of the time, although there is still a remote
 possibility that the allocator simply will be unable to
 complete spilling, and will just give up.

-- Incrementally try with 0, 1, 2 and 3 spill regs, so as not to
   unduly restrict the supply of regs in code which doesn't need spilling.

-- Remove the use of %ecx for shift values, so it is always available
   as the first-choice spill temporary.  For code which doesn't do
   int division, make %edx and %eax available for spilling too.
   Shifts by a non-constant amount (very rare) are now done by
   a short test-and-jump sequence, so that %ecx is not tied up.

-- x86 FP: do sin, cos, tan in-line so we get the same answers as gcc.

-- Moved a little code around to remove recursive dependencies.

-- Fix a subtle bug in x86 regUsage, which could cause underestimation
   of live ranges.
---
 ghc/compiler/nativeGen/AsmCodeGen.lhs     |    4 +-
 ghc/compiler/nativeGen/AsmRegAlloc.lhs    |  115 ++++++++++++--------
 ghc/compiler/nativeGen/MachCode.lhs       |   86 +++++++++++++--
 ghc/compiler/nativeGen/MachMisc.hi-boot   |    3 +-
 ghc/compiler/nativeGen/MachMisc.hi-boot-5 |    3 +-
 ghc/compiler/nativeGen/MachMisc.lhs       |    5 +
 ghc/compiler/nativeGen/MachRegs.lhs       |   39 +------
 ghc/compiler/nativeGen/PprMach.lhs        |   34 +++++-
 ghc/compiler/nativeGen/RegAllocInfo.lhs   |  162 +++++++++++++++++++++++------
 9 files changed, 329 insertions(+), 122 deletions(-)
diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs
index aa5d4e4..31c3825 100644
--- a/ghc/compiler/nativeGen/AsmCodeGen.lhs
+++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs
@@ -21,7 +21,7 @@ import AbsCSyn		( AbstractC, MagicId )
 import AsmRegAlloc	( runRegAllocate )
 import OrdList		( OrdList )
 import PrimOp		( commutableOp, PrimOp(..) )
-import RegAllocInfo	( mkMRegsState, MRegsState )
+import RegAllocInfo	( mkMRegsState, MRegsState, findReservedRegs )
 import Stix		( StixTree(..), StixReg(..), 
                           pprStixTrees, CodeSegment(..) )
 import PrimRep		( isFloatingRep, PrimRep(..) )
@@ -130,7 +130,7 @@ might be needed.
 scheduleMachCode :: [InstrList] -> [[Instr]]
 
 scheduleMachCode
-  = map (runRegAllocate freeRegsState reservedRegs)
+  = map (runRegAllocate freeRegsState findReservedRegs)
   where
     freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)
 \end{code}
diff --git a/ghc/compiler/nativeGen/AsmRegAlloc.lhs b/ghc/compiler/nativeGen/AsmRegAlloc.lhs
index 9a6fca0..2ddb991 100644
--- a/ghc/compiler/nativeGen/AsmRegAlloc.lhs
+++ b/ghc/compiler/nativeGen/AsmRegAlloc.lhs
@@ -31,24 +31,38 @@ things the hard way.
 \begin{code}
 runRegAllocate
     :: MRegsState
-    -> [RegNo]
+    -> ([Instr] -> [[RegNo]])
     -> InstrList
     -> [Instr]
 
-runRegAllocate regs reserve_regs instrs
+runRegAllocate regs find_reserve_regs instrs
   = case simpleAlloc of
-	Just x  -> x
-	Nothing -> hairyAlloc
+	Just simple -> simple
+	Nothing     -> tryHairy reserves
   where
-    flatInstrs	= flattenOrdList instrs
-    simpleAlloc = simpleRegAlloc regs [] emptyFM   flatInstrs
-    hairyAlloc	= hairyRegAlloc  regs reserve_regs flatInstrs
+    tryHairy [] 
+       = error "nativeGen: register allocator: too difficult!  Try -fvia-C.\n"
+    tryHairy (resv:resvs)
+       = case hairyAlloc resv of
+            Just success -> success
+            Nothing      -> fooble resvs (tryHairy resvs)
+
+    fooble [] x = x
+    fooble (resvs:_) x = trace ("nativeGen: spilling with " 
+                                ++ show (length resvs - 2) ++ 
+                                " int temporaries") x
+
+    reserves         = find_reserve_regs flatInstrs
+    flatInstrs       = flattenOrdList instrs
+    simpleAlloc      = simpleRegAlloc regs [] emptyFM   flatInstrs
+    hairyAlloc resvd = hairyRegAlloc  regs resvd flatInstrs
 
-runHairyRegAllocate		-- use only hairy for i386!
+
+runHairyRegAllocate
     :: MRegsState
     -> [RegNo]
     -> InstrList
-    -> [Instr]
+    -> Maybe [Instr]
 
 runHairyRegAllocate regs reserve_regs instrs
   = hairyRegAlloc regs reserve_regs flatInstrs
@@ -83,7 +97,8 @@ simpleRegAlloc free live env (instr:instrs)
   where
     instr3 = patchRegs instr (lookup env2)
 
-    (srcs, dsts) = case regUsage instr of (RU s d) -> (regSetToList s, regSetToList d)
+    (srcs, dsts) = case regUsage instr of 
+                      (RU s d) -> (regSetToList s, regSetToList d)
 
     lookup env x = case lookupFM env x of Just y -> y; Nothing -> x
 
@@ -121,40 +136,49 @@ Here is the ``clever'' bit. First go backward (i.e. left), looking for
 the last use of dynamic registers. Then go forward (i.e. right), filling
 registers with static placements.
 
+hairyRegAlloc takes reserve_regs as the regs to use as spill
+temporaries.  First it tries to allocate using all regs except
+reserve_regs.  If that fails, it inserts spill code and tries again to
+allocate regs, but this time with the spill temporaries available.
+Even this might not work if there are insufficient spill temporaries:
+in the worst case on x86, we'd need 3 of them, for insns like
+addl (reg1,reg2,4) reg3, since this insn uses all 3 regs as input.
+
 \begin{code}
 hairyRegAlloc
     :: MRegsState
     -> [RegNo]
     -> [Instr]
-    -> [Instr]
+    -> Maybe [Instr]
 
 hairyRegAlloc regs reserve_regs instrs =
-  case mapAccumB (doRegAlloc reserve_regs) (RH regs' 1 emptyFM) noFuture instrs of 
-   (RH _ mloc1 _, _, instrs')
-     | mloc1 == 1 -> instrs'
-     | otherwise  ->
-      let
-       instrs_patched' = patchMem instrs'
-       instrs_patched  = flattenOrdList instrs_patched'
-      in
-      case mapAccumB do_RegAlloc_Nil (RH regs'' mloc1 emptyFM) noFuture instrs_patched of
-	 ((RH _ mloc2 _),_,instrs'') 
-	    | mloc2 == mloc1 -> instrs'' 
-            | otherwise      -> instrs''
-	       --pprPanic "runRegAllocate" (ppr mloc2 <+> ppr mloc1)
+  case mapAccumB (doRegAlloc reserve_regs) 
+                 (RH regs' 1 emptyFM) noFuture instrs of 
+     (RH _ mloc1 _, _, instrs')
+        -- succeeded w/out using reserves
+        | mloc1 == 1 -> Just instrs'
+        -- failed, and no reserves avail, so pointless to attempt spilling 
+        | null reserve_regs -> Nothing
+        -- failed, but we have reserves, so attempt to do spilling
+        | otherwise  
+        -> let instrs_patched' = patchMem instrs'
+               instrs_patched  = flattenOrdList instrs_patched'
+           in
+               case mapAccumB (doRegAlloc []) (RH regs'' mloc1 emptyFM) 
+                    noFuture instrs_patched of
+                  ((RH _ mloc2 _),_,instrs'') 
+                     -- successfully allocated the patched code
+        	     | mloc2 == mloc1 -> Just instrs''
+                     -- no; we have to give up
+                     | otherwise      -> Nothing 
+                       -- instrs''
+	               -- pprPanic "runRegAllocate" (ppr mloc2 <+> ppr mloc1)
   where
     regs'  = regs `useMRegs` reserve_regs
     regs'' = mkMRegsState reserve_regs
 
-do_RegAlloc_Nil = doRegAlloc [] -- out here to avoid CAF (sigh)
-do_RegAlloc_Nil
-    :: RegHistory MRegsState
-    -> RegFuture
-    -> Instr
-    -> (RegHistory MRegsState, RegFuture, Instr)
-
-noFuture :: RegFuture
-noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM
+    noFuture :: RegFuture
+    noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM
 \end{code}
 
 Here we patch instructions that reference ``registers'' which are really in
@@ -225,7 +249,8 @@ getUsage (RF next_in_use future reg_conflicts) instr
 	       (RL in_use future') = regLiveness instr (RL next_in_use future)
 	       live_through = in_use `minusRegSet` dsts
 	       last_used = [ r | r <- regSetToList srcs,
-			     not (r `elementOfRegSet` (fstFL future) || r `elementOfRegSet` in_use)]
+			     not (r `elementOfRegSet` (fstFL future) 
+                                  || r `elementOfRegSet` in_use)]
 
 	       in_use' = srcs `unionRegSets` live_through
 
@@ -245,7 +270,9 @@ getUsage (RF next_in_use future reg_conflicts) instr
 		  Nothing        -> live_dynamics
 		  Just conflicts -> conflicts `unionRegSets` live_dynamics
 
-	       live_dynamics = mkRegSet [ r | r@(UnmappedReg _ _) <- regSetToList live_through ]
+	       live_dynamics 
+                  = mkRegSet [ r | r@(UnmappedReg _ _) 
+                                      <- regSetToList live_through ]
 
 doRegAlloc'
     :: [RegNo]
@@ -254,7 +281,8 @@ doRegAlloc'
     -> Instr
     -> (RegHistory MRegsState, Instr)
 
-doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) instr =
+doRegAlloc' reserved (RH frs loc env) 
+                     (RI in_use srcs dsts lastu conflicts) instr =
 
     (RH frs'' loc' env'', patchRegs instr dynToStatic)
 
@@ -264,14 +292,17 @@ doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) inst
       free :: [RegNo]
       free = extractMappedRegNos (map dynToStatic lastu)
 
-      -- (1) free registers that are used last as source operands in this instruction
-      frs_not_in_use = frs `useMRegs` (extractMappedRegNos (regSetToList in_use))
+      -- (1) free registers that are used last as 
+      --     source operands in this instruction
+      frs_not_in_use = frs `useMRegs` 
+                       (extractMappedRegNos (regSetToList in_use))
       frs' = (frs_not_in_use `freeMRegs` free) `useMRegs` reserved
 
       -- (2) allocate new registers for the destination operands
       -- allocate registers for new dynamics
 
-      new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts, r `not_elem` keysFM env ]
+      new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts, 
+                          r `not_elem` keysFM env ]
 
       (frs'', loc', new) = foldr allocateNewRegs (frs', loc, []) new_dynamix
 
@@ -283,14 +314,16 @@ doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) inst
       dynToStatic dyn@(UnmappedReg _ _) =
 	case lookupFM env' dyn of
 	    Just r -> r
-	    Nothing -> trace "Lost register; possibly a floating point type error in a _ccall_?" dyn
+	    Nothing -> trace ("Lost register; possibly a floating point"
+                              ++" type error in a _ccall_?") dyn
       dynToStatic other = other
 
       allocateNewRegs :: Reg 
                       -> (MRegsState, Int, [(Reg, Reg)]) 
 		      -> (MRegsState, Int, [(Reg, Reg)])
 
-      allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst) = (fs', mem', (d, f) : lst)
+      allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst) 
+         = (fs', mem', (d, f) : lst)
 	where 
 	 (fs', f, mem') = 
 	   case acceptable fs of
diff --git a/ghc/compiler/nativeGen/MachCode.lhs b/ghc/compiler/nativeGen/MachCode.lhs
index a4bd777..b38b24b 100644
--- a/ghc/compiler/nativeGen/MachCode.lhs
+++ b/ghc/compiler/nativeGen/MachCode.lhs
@@ -34,7 +34,6 @@ import UniqSupply	( returnUs, thenUs, mapUs, mapAndUnzipUs,
 			  mapAccumLUs, UniqSM
 			)
 import Outputable
-import PprMach 		( pprSize )
 \end{code}
 
 Code extractor for an entire stix tree---stix statement level.
@@ -499,6 +498,15 @@ getRegister (StPrim primop [x]) -- unary PrimOps
       FloatSqrtOp  -> trivialUFCode FloatRep  (GSQRT F) x
       DoubleSqrtOp -> trivialUFCode DoubleRep (GSQRT DF) x
 
+      FloatSinOp  -> trivialUFCode FloatRep  (GSIN F) x
+      DoubleSinOp -> trivialUFCode DoubleRep (GSIN DF) x
+
+      FloatCosOp  -> trivialUFCode FloatRep  (GCOS F) x
+      DoubleCosOp -> trivialUFCode DoubleRep (GCOS DF) x
+
+      FloatTanOp  -> trivialUFCode FloatRep  (GTAN F) x
+      DoubleTanOp -> trivialUFCode DoubleRep (GTAN DF) x
+
       Double2FloatOp -> trivialUFCode FloatRep  GDTOF x
       Float2DoubleOp -> trivialUFCode DoubleRep GFTOD x
 
@@ -523,9 +531,9 @@ getRegister (StPrim primop [x]) -- unary PrimOps
 	      FloatExpOp    -> (True,  SLIT("exp"))
 	      FloatLogOp    -> (True,  SLIT("log"))
 
-	      FloatSinOp    -> (True,  SLIT("sin"))
-	      FloatCosOp    -> (True,  SLIT("cos"))
-	      FloatTanOp    -> (True,  SLIT("tan"))
+	      --FloatSinOp    -> (True,  SLIT("sin"))
+	      --FloatCosOp    -> (True,  SLIT("cos"))
+	      --FloatTanOp    -> (True,  SLIT("tan"))
 
 	      FloatAsinOp   -> (True,  SLIT("asin"))
 	      FloatAcosOp   -> (True,  SLIT("acos"))
@@ -538,9 +546,9 @@ getRegister (StPrim primop [x]) -- unary PrimOps
 	      DoubleExpOp   -> (False, SLIT("exp"))
 	      DoubleLogOp   -> (False, SLIT("log"))
 
-	      DoubleSinOp   -> (False, SLIT("sin"))
-	      DoubleCosOp   -> (False, SLIT("cos"))
-	      DoubleTanOp   -> (False, SLIT("tan"))
+	      --DoubleSinOp   -> (False, SLIT("sin"))
+	      --DoubleCosOp   -> (False, SLIT("cos"))
+	      --DoubleTanOp   -> (False, SLIT("tan"))
 
 	      DoubleAsinOp  -> (False, SLIT("asin"))
 	      DoubleAcosOp  -> (False, SLIT("acos"))
@@ -674,6 +682,65 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 
       {- Case2: shift length is complex (non-immediate) -}
     shift_code instr x y{-amount-}
+     = getRegister x   `thenUs` \ register1 ->
+       getRegister y   `thenUs` \ register2 ->
+       getUniqLabelNCG `thenUs` \ lbl_test3 ->
+       getUniqLabelNCG `thenUs` \ lbl_test2 ->
+       getUniqLabelNCG `thenUs` \ lbl_test1 ->
+       getUniqLabelNCG `thenUs` \ lbl_test0 ->
+       getUniqLabelNCG `thenUs` \ lbl_after ->
+       getNewRegNCG IntRep   `thenUs` \ tmp ->
+       let code__2 dst
+              = let src_val  = registerName register1 dst
+                    code_val = registerCode register1 dst
+                    src_amt  = registerName register2 tmp
+                    code_amt = registerCode register2 tmp
+                    r_dst    = OpReg dst
+                    r_tmp    = OpReg tmp
+                in
+                    code_val .
+                    code_amt .
+                    mkSeqInstrs [
+                       COMMENT (_PK_ "begin shift sequence"),
+                       MOV L (OpReg src_val) r_dst,
+                       MOV L (OpReg src_amt) r_tmp,
+
+                       BT L (ImmInt 4) r_tmp,
+                       JXX GEU lbl_test3,
+                       instr (OpImm (ImmInt 16)) r_dst,
+
+                       LABEL lbl_test3,
+                       BT L (ImmInt 3) r_tmp,
+                       JXX GEU lbl_test2,
+                       instr (OpImm (ImmInt 8)) r_dst,
+
+                       LABEL lbl_test2,
+                       BT L (ImmInt 2) r_tmp,
+                       JXX GEU lbl_test1,
+                       instr (OpImm (ImmInt 4)) r_dst,
+
+                       LABEL lbl_test1,
+                       BT L (ImmInt 1) r_tmp,
+                       JXX GEU lbl_test0,
+                       instr (OpImm (ImmInt 2)) r_dst,
+
+                       LABEL lbl_test0,
+                       BT L (ImmInt 0) r_tmp,
+                       JXX GEU lbl_after,
+                       instr (OpImm (ImmInt 1)) r_dst,
+                       LABEL lbl_after,
+                                           
+                       COMMENT (_PK_ "end shift sequence")
+                    ]
+       in
+       returnUs (Any IntRep code__2)
+
+{-
+     -- since ECX is always used as a spill temporary, we can't
+     -- use it here to do non-immediate shifts.  No big deal --
+     -- they are only very rare, and we can give an equivalent
+     -- insn sequence which doesn't use ECX.
+     -- DO NOT USE THIS CODE, SINCE IT IS INCOMPATIBLE WITH THE SPILLER
      = getRegister y		`thenUs` \ register1 ->  
        getRegister x		`thenUs` \ register2 ->
        let
@@ -699,6 +766,7 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps
 		      mkSeqInstr (instr (OpReg ecx) (OpReg eax))
        in
        returnUs (Fixed IntRep eax code__2)
+-}
 
     --------------------
     add_code :: Size -> StixTree -> StixTree -> UniqSM Register
@@ -2441,10 +2509,10 @@ condIntReg cond x y
 	code = condCode condition
 	cond = condName condition
 	-- ToDo: if dst is eax, ebx, ecx, or edx we would not need the move.
-	code__2 dst = code . mkSeqInstrs [
+	code__2 dst = code . mkSeqInstrs [COMMENT (_PK_ "aaaaa"),
 	    SETCC cond (OpReg tmp),
 	    AND L (OpImm (ImmInt 1)) (OpReg tmp),
-	    MOV L (OpReg tmp) (OpReg dst)]
+	    MOV L (OpReg tmp) (OpReg dst) ,COMMENT (_PK_ "bbbbb")]
     in
     returnUs (Any IntRep code__2)
 
diff --git a/ghc/compiler/nativeGen/MachMisc.hi-boot b/ghc/compiler/nativeGen/MachMisc.hi-boot
index 91f6330..242c93a 100644
--- a/ghc/compiler/nativeGen/MachMisc.hi-boot
+++ b/ghc/compiler/nativeGen/MachMisc.hi-boot
@@ -1,7 +1,8 @@
 _interface_ MachMisc 1
 _exports_
-MachMisc fixedHdrSize fmtAsmLbl underscorePrefix;
+MachMisc Instr fixedHdrSize fmtAsmLbl underscorePrefix;
 _declarations_
 1 fixedHdrSize _:_ PrelBase.Int ;;
 2 fmtAsmLbl _:_ PrelBase.String -> PrelBase.String ;;
 1 underscorePrefix _:_ PrelBase.Bool ;;
+1 data Instr;
\ No newline at end of file
diff --git a/ghc/compiler/nativeGen/MachMisc.hi-boot-5 b/ghc/compiler/nativeGen/MachMisc.hi-boot-5
index 6fb5f9e..8c2a6f2 100644
--- a/ghc/compiler/nativeGen/MachMisc.hi-boot-5
+++ b/ghc/compiler/nativeGen/MachMisc.hi-boot-5
@@ -1,5 +1,6 @@
 __interface MachMisc 1 0 where
-__export MachMisc fixedHdrSize fmtAsmLbl underscorePrefix;
+__export MachMisc Instr fixedHdrSize fmtAsmLbl underscorePrefix;
 1 fixedHdrSize :: PrelBase.Int ;
 2 fmtAsmLbl :: PrelBase.String -> PrelBase.String ;
 1 underscorePrefix :: PrelBase.Bool ;
+1 data Instr ;
diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs
index d31af20..893bf87 100644
--- a/ghc/compiler/nativeGen/MachMisc.lhs
+++ b/ghc/compiler/nativeGen/MachMisc.lhs
@@ -507,6 +507,7 @@ current translation.
 	      | SAR	      Size Operand Operand -- 1st operand must be an Imm or CL
 	      | SHR	      Size Operand Operand -- 1st operand must be an Imm or CL
 	      | NOP
+              | BT            Size Imm Operand
 
 -- Float Arithmetic. -- ToDo for 386
 
@@ -539,6 +540,9 @@ current translation.
      	      | GABS	      Size Reg Reg -- src, dst
     	      | GNEG	      Size Reg Reg -- src, dst
     	      | GSQRT	      Size Reg Reg -- src, dst
+    	      | GSIN	      Size Reg Reg -- src, dst
+    	      | GCOS	      Size Reg Reg -- src, dst
+    	      | GTAN	      Size Reg Reg -- src, dst
 
               | GFREE         -- do ffree on all x86 regs; an ugly hack
 -- Comparison
@@ -598,6 +602,7 @@ is_G_instr instr
 	GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
     	GCMP _ _ _ -> True; GABS _ _ _ -> True
     	GNEG _ _ _ -> True; GSQRT _ _ _ -> True
+        GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
         GFREE -> panic "is_G_instr: GFREE (!)"
         other -> False
 
diff --git a/ghc/compiler/nativeGen/MachRegs.lhs b/ghc/compiler/nativeGen/MachRegs.lhs
index f32024f..446e7dd 100644
--- a/ghc/compiler/nativeGen/MachRegs.lhs
+++ b/ghc/compiler/nativeGen/MachRegs.lhs
@@ -26,13 +26,13 @@ module MachRegs (
 	callClobberedRegs,
 	callerSaves,
 	extractMappedRegNos,
+        mappedRegNo,
 	freeMappedRegs,
 	freeReg, freeRegs,
 	getNewRegNCG,
 	magicIdRegMaybe,
 	mkReg,
 	realReg,
-	reservedRegs,
 	saveLoc,
 	spRel,
 	stgReg,
@@ -336,6 +336,10 @@ extractMappedRegNos regs
   where
     ex (MappedReg i) acc = IBOX(i) : acc  -- we'll take it
     ex _	     acc = acc		  -- leave it out
+
+mappedRegNo :: Reg -> RegNo
+mappedRegNo (MappedReg i) = IBOX(i)
+mappedRegNo _             = pprPanic "mappedRegNo" empty
 \end{code}
 
 ** Machine-specific Reg stuff: **
@@ -733,40 +737,7 @@ magicIdRegMaybe HpLim		   	= Just (FixedReg ILIT(REG_HpLim))
 magicIdRegMaybe _		   	= Nothing
 \end{code}
 
-%************************************************************************
-%*									*
-\subsection{Free, reserved, call-clobbered, and argument registers}
-%*									*
-%************************************************************************
-
-@freeRegs@ is the list of registers we can use in register allocation.
-@freeReg@ (below) says if a particular register is free.
-
-With a per-instruction clobber list, we might be able to get some of
-these back, but it's probably not worth the hassle.
-
-@callClobberedRegs@ ... the obvious.
-
-@argRegs@: assuming a call with N arguments, what registers will be
-used to hold arguments?  (NB: it doesn't know whether the arguments
-are integer or floating-point...)
-
 \begin{code}
-reservedRegs :: [RegNo]
-reservedRegs
-#if alpha_TARGET_ARCH
-  = [NCG_Reserved_I1, NCG_Reserved_I2,
-     NCG_Reserved_F1, NCG_Reserved_F2]
-#endif
-#if i386_TARGET_ARCH
-  = [{-certainly cannot afford any!-}]
-#endif
-#if sparc_TARGET_ARCH
-  = [NCG_Reserved_I1, NCG_Reserved_I2,
-     NCG_Reserved_F1, NCG_Reserved_F2,
-     NCG_Reserved_D1, NCG_Reserved_D2]
-#endif
-
 -------------------------------
 freeRegs :: [Reg]
 freeRegs
diff --git a/ghc/compiler/nativeGen/PprMach.lhs b/ghc/compiler/nativeGen/PprMach.lhs
index e35e22c..6232f37 100644
--- a/ghc/compiler/nativeGen/PprMach.lhs
+++ b/ghc/compiler/nativeGen/PprMach.lhs
@@ -941,7 +941,7 @@ pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack
 #ifdef DEBUG
     (<>) (ptext SLIT("# warning: ")) (pprSizeOpOp SLIT("mov") size s d)
 #else
-    (ptext SLIT(""))
+    empty
 #endif
 pprInstr (MOV size src dst)
   = pprSizeOpOp SLIT("mov") size src dst
@@ -977,9 +977,9 @@ pprInstr (XOR size src dst) = pprSizeOpOp SLIT("xor")  size src dst
 pprInstr (NOT size op) = pprSizeOp SLIT("not") size op
 pprInstr (NEGI size op) = pprSizeOp SLIT("neg") size op
 
-pprInstr (SHL size imm dst) = pprSizeByteOpOp SLIT("shl")  size imm dst
-pprInstr (SAR size imm dst) = pprSizeByteOpOp SLIT("sar")  size imm dst
-pprInstr (SHR size imm dst) = pprSizeByteOpOp SLIT("shr")  size imm dst
+pprInstr (SHL size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("shl")  size imm dst
+pprInstr (SAR size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("sar")  size imm dst
+pprInstr (SHR size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("shr")  size imm dst
 
 pprInstr (CMP size src dst) = pprSizeOpOp SLIT("cmp")  size src dst
 pprInstr (TEST size src dst) = pprSizeOpOp SLIT("test")  size src dst
@@ -989,6 +989,7 @@ pprInstr PUSHA = ptext SLIT("\tpushal")
 pprInstr POPA = ptext SLIT("\tpopal")
 
 pprInstr (NOP) = ptext SLIT("\tnop")
+pprInstr (BT size imm src) = pprSizeImmOp SLIT("bt") size imm src
 pprInstr (CLTD) = ptext SLIT("\tcltd")
 
 pprInstr (SETCC cond op) = pprCondInstr SLIT("set") cond (pprOperand B op)
@@ -1047,6 +1048,15 @@ pprInstr g@(GNEG sz src dst)
    = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
 pprInstr g@(GSQRT sz src dst)
    = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt ; ", gpop dst 1])
+pprInstr g@(GSIN sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fsin ; ", gpop dst 1])
+pprInstr g@(GCOS sz src dst)
+   = pprG g (hcat [gtab, gpush src 0, text " ; fcos ; ", gpop dst 1])
+
+pprInstr g@(GTAN sz src dst)
+   = pprG g (hcat [gtab, text "ffree %st(6) ; ",
+                   gpush src 0, text " ; fptan ; ", 
+                   text " fstp %st(0) ; ", gpop dst 1])
 
 pprInstr g@(GADD sz src1 src2 dst)
    = pprG g (hcat [gtab, gpush src1 0, 
@@ -1106,6 +1116,9 @@ pprGInstr (GCMP sz src dst) = pprSizeRegReg SLIT("gcmp") sz src dst
 pprGInstr (GABS sz src dst) = pprSizeRegReg SLIT("gabs") sz src dst
 pprGInstr (GNEG sz src dst) = pprSizeRegReg SLIT("gneg") sz src dst
 pprGInstr (GSQRT sz src dst) = pprSizeRegReg SLIT("gsqrt") sz src dst
+pprGInstr (GSIN sz src dst) = pprSizeRegReg SLIT("gsin") sz src dst
+pprGInstr (GCOS sz src dst) = pprSizeRegReg SLIT("gcos") sz src dst
+pprGInstr (GTAN sz src dst) = pprSizeRegReg SLIT("gtan") sz src dst
 
 pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg SLIT("gadd") sz src1 src2 dst
 pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst
@@ -1124,6 +1137,19 @@ pprOperand s (OpReg r) = pprReg s r
 pprOperand s (OpImm i) = pprDollImm i
 pprOperand s (OpAddr ea) = pprAddr ea
 
+pprSizeImmOp :: FAST_STRING -> Size -> Imm -> Operand -> SDoc
+pprSizeImmOp name size imm op1
+  = hcat [
+        char '\t',
+	ptext name,
+	pprSize size,
+	space,
+	char '$',
+	pprImm imm,
+	comma,
+	pprOperand size op1
+    ]
+	
 pprSizeOp :: FAST_STRING -> Size -> Operand -> SDoc
 pprSizeOp name size op1
   = hcat [
diff --git a/ghc/compiler/nativeGen/RegAllocInfo.lhs b/ghc/compiler/nativeGen/RegAllocInfo.lhs
index eab566c..c1bd50c 100644
--- a/ghc/compiler/nativeGen/RegAllocInfo.lhs
+++ b/ghc/compiler/nativeGen/RegAllocInfo.lhs
@@ -35,6 +35,7 @@ module RegAllocInfo (
 	patchRegs,
 	regLiveness,
 	spillReg,
+	IF_ARCH_i386(findReservedRegs COMMA,)
 
 	RegSet,
 	elementOfRegSet,
@@ -64,7 +65,6 @@ import OrdList		( mkUnitList )
 import PrimRep		( PrimRep(..) )
 import UniqSet		-- quite a bit of it
 import Outputable
-import PprMach		( pprInstr )
 \end{code}
 
 %************************************************************************
@@ -354,22 +354,24 @@ regUsage instr = case instr of
 #if i386_TARGET_ARCH
 
 regUsage instr = case instr of
-    MOV  sz src dst	-> usage2 src dst
-    MOVZxL sz src dst	-> usage2 src dst
-    MOVSxL sz src dst	-> usage2 src dst
-    LEA  sz src dst	-> usage2 src dst
-    ADD  sz src dst	-> usage2 src dst
-    SUB  sz src dst	-> usage2 src dst
-    IMUL sz src dst	-> usage2 src dst
+    MOV  sz src dst	-> usage2  src dst
+    MOVZxL sz src dst	-> usage2  src dst
+    MOVSxL sz src dst	-> usage2  src dst
+    LEA  sz src dst	-> usage2  src dst
+    ADD  sz src dst	-> usage2s src dst
+    SUB  sz src dst	-> usage2s src dst
+    IMUL sz src dst	-> usage2s src dst
     IDIV sz src		-> usage (eax:edx:opToReg src) [eax,edx]
-    AND  sz src dst	-> usage2 src dst
-    OR   sz src dst	-> usage2 src dst
-    XOR  sz src dst	-> usage2 src dst
+    AND  sz src dst	-> usage2s src dst
+    OR   sz src dst	-> usage2s src dst
+    XOR  sz src dst	-> usage2s src dst
     NOT  sz op		-> usage1 op
     NEGI sz op		-> usage1 op
-    SHL  sz dst len	-> usage2 dst len -- len is either an Imm or ecx.
-    SAR  sz dst len	-> usage2 dst len -- len is either an Imm or ecx.
-    SHR  sz len dst	-> usage2 dst len -- len is either an Imm or ecx.
+    SHL  sz len dst	-> usage2s len dst -- len is either an Imm or ecx.
+    SAR  sz len dst	-> usage2s len dst -- len is either an Imm or ecx.
+    SHR  sz len dst	-> usage2s len dst -- len is either an Imm or ecx.
+    BT   sz imm src	-> usage (opToReg src) []
+
     PUSH sz op		-> usage (opToReg op) []
     POP  sz op		-> usage [] (opToReg op)
     TEST sz src dst	-> usage (opToReg src ++ opToReg dst) []
@@ -403,21 +405,35 @@ regUsage instr = case instr of
     GABS sz src dst	-> usage [src] [dst]
     GNEG sz src dst	-> usage [src] [dst]
     GSQRT sz src dst	-> usage [src] [dst]
+    GSIN sz src dst	-> usage [src] [dst]
+    GCOS sz src dst	-> usage [src] [dst]
+    GTAN sz src dst	-> usage [src] [dst]
 
     COMMENT _		-> noUsage
     SEGMENT _ 		-> noUsage
     LABEL _		-> noUsage
     ASCII _ _		-> noUsage
     DATA _ _		-> noUsage
-    _			-> error ("regUsage(x86): " ++ showSDoc (pprInstr instr))
+    _			-> pprPanic "regUsage(x86) " empty
+
  where
+    -- 2 operand form in which the second operand is purely a destination
     usage2 :: Operand -> Operand -> RegUsage
     usage2 op (OpReg reg) = usage (opToReg op) [reg]
     usage2 op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) []
     usage2 op (OpImm imm) = usage (opToReg op) []
+
+    -- 2 operand form in which the second operand is also an input
+    usage2s :: Operand -> Operand -> RegUsage
+    usage2s op (OpReg reg) = usage (opToReg op ++ [reg]) [reg]
+    usage2s op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) []
+    usage2s op (OpImm imm) = usage (opToReg op) []
+
+    -- 1 operand form in which the operand is both used and written
     usage1 :: Operand -> RegUsage
     usage1 (OpReg reg)    = usage [reg] [reg]
     usage1 (OpAddr ea)    = usage (addrToRegs ea) []
+
     allFPRegs = [fake0,fake1,fake2,fake3,fake4,fake5]
 
     --callClobberedRegs = [ eax, ecx, edx ] -- according to gcc, anyway.
@@ -442,6 +458,14 @@ regUsage instr = case instr of
     interesting (FixedReg _) = False
     interesting _ = True
 
+
+-- Allow the spiller to decide whether or not it can use 
+-- %eax and %edx as spill temporaries.
+hasFixedEAXorEDX instr = case instr of
+    IDIV _ _ -> True
+    CLTD     -> True
+    other    -> False
+
 #endif {- i386_TARGET_ARCH -}
 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 #if sparc_TARGET_ARCH
@@ -495,6 +519,71 @@ regUsage instr = case instr of
 #endif {- sparc_TARGET_ARCH -}
 \end{code}
 
+
+%************************************************************************
+%*									*
+\subsection{Free, reserved, call-clobbered, and argument registers}
+%*									*
+%************************************************************************
+
+@freeRegs@ is the list of registers we can use in register allocation.
+@freeReg@ (below) says if a particular register is free.
+
+With a per-instruction clobber list, we might be able to get some of
+these back, but it's probably not worth the hassle.
+
+@callClobberedRegs@ ... the obvious.
+
+@argRegs@: assuming a call with N arguments, what registers will be
+used to hold arguments?  (NB: it doesn't know whether the arguments
+are integer or floating-point...)
+
+findReservedRegs tells us which regs can be used as spill temporaries.
+The list of instructions for which we are attempting allocation is
+supplied.  This is so that we can (at least for x86) examine it to
+discover which registers are being used in a fixed way -- for example,
+%eax and %edx are used by integer division, so they can't be used as
+spill temporaries.  However, most instruction lists don't do integer
+division, so we don't want to rule them out altogether.
+
+findReservedRegs returns not a list of spill temporaries, but a list
+of list of them.  This is so that the allocator can attempt allocating
+with at first no spill temps, then if that fails, increasing numbers.
+For x86 it is important that we minimise the number of regs reserved
+as spill temporaries, since there are so few.  For Alpha and Sparc
+this isn't a concern; we just ignore the supplied code list and return
+a singleton list which we know will satisfy all spill demands.
+
+\begin{code}
+findReservedRegs :: [Instr] -> [[RegNo]]
+findReservedRegs instrs
+#if alpha_TARGET_ARCH
+  = [[NCG_Reserved_I1, NCG_Reserved_I2,
+      NCG_Reserved_F1, NCG_Reserved_F2]]
+#endif
+#if sparc_TARGET_ARCH
+  = [[NCG_Reserved_I1, NCG_Reserved_I2,
+      NCG_Reserved_F1, NCG_Reserved_F2,
+      NCG_Reserved_D1, NCG_Reserved_D2]]
+#endif
+#if i386_TARGET_ARCH
+    -- Sigh.  This is where it gets complicated.
+  = -- first of all, try without any at all.
+    map (map mappedRegNo) (
+    [ [],
+    -- if that doesn't work, try one integer reg (which might fail)
+    -- and two float regs (which will always fix any float insns)
+      [ecx, fake4,fake5]
+    ]
+    -- dire straits (but still correct): see if we can bag %eax and %edx
+    ++ if   any hasFixedEAXorEDX instrs
+       then []  -- bummer
+       else [ [ecx,edx,fake4,fake5],
+              [ecx,edx,eax,fake4,fake5] ]
+    )
+#endif
+\end{code}
+
 %************************************************************************
 %*									*
 \subsection{@RegLiveness@ type; @regLiveness@ function}
@@ -655,6 +744,7 @@ patchRegs instr env = case instr of
     SHL  sz imm dst 	-> patch2 (SHL  sz) imm dst
     SAR  sz imm dst 	-> patch2 (SAR  sz) imm dst
     SHR  sz imm dst 	-> patch2 (SHR  sz) imm dst
+    BT   sz imm src     -> patch1 (BT sz imm) src
     TEST sz src dst	-> patch2 (TEST sz) src dst
     CMP  sz src dst	-> patch2 (CMP  sz) src dst
     PUSH sz op		-> patch1 (PUSH sz) op
@@ -684,6 +774,9 @@ patchRegs instr env = case instr of
     GABS sz src dst	-> GABS sz (env src) (env dst)
     GNEG sz src dst	-> GNEG sz (env src) (env dst)
     GSQRT sz src dst	-> GSQRT sz (env src) (env dst)
+    GSIN sz src dst	-> GSIN sz (env src) (env dst)
+    GCOS sz src dst	-> GCOS sz (env src) (env dst)
+    GTAN sz src dst	-> GTAN sz (env src) (env dst)
 
     COMMENT _		-> instr
     SEGMENT _ 		-> instr
@@ -693,7 +786,8 @@ patchRegs instr env = case instr of
     JXX _ _		-> instr
     CALL _		-> instr
     CLTD		-> instr
-    _			-> error ("patchInstr: " ++ showSDoc (pprInstr instr)) --instr
+    _			-> pprPanic "patchInstr(x86)" empty
+
   where
     patch1 insn op      = insn (patchOp op)
     patch2 insn src dst = insn (patchOp src) (patchOp dst)
@@ -761,7 +855,7 @@ patchRegs instr env = case instr of
 
 Spill to memory, and load it back...
 
-JRS, 000122: on x86, don't spill directly below the stack pointer, since 
+JRS, 000122: on x86, don't spill directly above the stack pointer, since 
 some insn sequences (int <-> conversions) use this as a temp location.
 Leave 16 bytes of slop.
 
@@ -769,36 +863,44 @@ Leave 16 bytes of slop.
 spillReg, loadReg :: Reg -> Reg -> InstrList
 
 spillReg dyn (MemoryReg i pk)
-  | i >= 0  -- JRS paranoia
-  = let
-	sz = primRepToSize pk
+  | i >= 0 -- JRS paranoia
+  = let	sz = primRepToSize pk
     in
     mkUnitList (
 	{-Alpha: spill below the stack pointer (?)-}
 	 IF_ARCH_alpha( ST sz dyn (spRel i)
 
 	{-I386: spill above stack pointer leaving 2 words/spill-}
-	,IF_ARCH_i386 ( if pk == FloatRep || pk == DoubleRep
-                        then GST DF dyn (spRel (16 + 2 * i))
-                        else MOV sz (OpReg dyn) (OpAddr (spRel (16 + 2 * i)))
+	,IF_ARCH_i386 ( let loc | i < 60    = 4 + 2 * i
+                                | otherwise = -2000 - 2 * i
+                        in
+                        if pk == FloatRep || pk == DoubleRep
+                        then GST DF dyn (spRel loc)
+                        else MOV sz (OpReg dyn) (OpAddr (spRel loc))
 
 	{-SPARC: spill below frame pointer leaving 2 words/spill-}
 	,IF_ARCH_sparc( ST sz dyn (fpRel (-2 * i))
         ,)))
     )
-
+  | otherwise
+  = pprPanic "spillReg:" (text "invalid spill location: " <> int i)
+   
 ----------------------------
 loadReg (MemoryReg i pk) dyn
-  | i >= 0  -- JRS paranoia
-  = let
-	sz = primRepToSize pk
+  | i >= 0 -- JRS paranoia
+  = let	sz = primRepToSize pk
     in
     mkUnitList (
 	 IF_ARCH_alpha( LD  sz dyn (spRel i)
-	,IF_ARCH_i386 ( if   pk == FloatRep || pk == DoubleRep
-                        then GLD DF (spRel (16 + 2 * i)) dyn
-                        else MOV sz (OpAddr (spRel (16 + 2 * i))) (OpReg dyn)
+	,IF_ARCH_i386 ( let loc | i < 60    = 4 + 2 * i
+                                | otherwise = -2000 - 2 * i
+                        in
+                        if   pk == FloatRep || pk == DoubleRep
+                        then GLD DF (spRel loc) dyn
+                        else MOV sz (OpAddr (spRel loc)) (OpReg dyn)
 	,IF_ARCH_sparc( LD  sz (fpRel (-2 * i)) dyn
 	,)))
     )
+  | otherwise
+  = pprPanic "loadReg:" (text "invalid spill location: " <> int i)
 \end{code}
-- 
1.7.10.4