From: Ben.Lippmeier@anu.edu.au <unknown>
Date: Wed, 14 Jan 2009 05:44:16 +0000 (+0000)
Subject: Start fixing the SPARC native code generator
X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=8db404018f101a182c92cc724a8de08cf5ab10ba

Start fixing the SPARC native code generator

  * Use BlockIds in branch instructions instead of Imms.
  * Assign FP values returned from C calls to the right regs
  * Fix loading of F32s
  * Add a SPARC version of the FreeRegs map to the linear allcator.
---

diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index e62a477..90285bf 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -29,7 +29,8 @@ import MachInstrs
 import MachRegs
 import NCGMonad
 import PositionIndependentCode
-import RegAllocInfo ( mkBranchInstr )
+import RegAllocInfo 	( mkBranchInstr, mkRegRegMoveInstr )
+import MachRegs
 
 -- Our intermediate code:
 import BlockId
@@ -2921,14 +2922,14 @@ genCondJump id bool = do
 
 #if sparc_TARGET_ARCH
 
-genCondJump (BlockId id) bool = do
+genCondJump bid bool = do
   CondCode is_float cond code <- getCondCode bool
   return (
        code `appOL` 
        toOL (
          if   is_float
-         then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
-         else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
+         then [NOP, BF cond False bid, NOP]
+         else [BI cond False bid, NOP]
        )
     )
 
@@ -3481,14 +3482,40 @@ genCCall target dest_regs argsAndHints = do
              in  if   nn <= 0
                  then (nilOL, nilOL)
                  else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
         transfer_code
            = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+
+	-- assign the results, if necessary
+	assign_code []	= nilOL
+	
+	assign_code [CmmHinted dest _hint]	
+	 = let	rep	= localRegType dest
+		width	= typeWidth rep
+		r_dest 	= getRegisterReg (CmmLocal dest)
+
+	 	result
+		 	| isFloatType rep 
+			, W32	<- width
+			= unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+			
+			| isFloatType rep
+			, W64	<- width
+			= unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+			
+			| not $ isFloatType rep
+			, W32	<- width
+			= unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+			
+	   in	result
+				
     return (argcode       `appOL`
             move_sp_down  `appOL`
             transfer_code `appOL`
             callinsns     `appOL`
             unitOL NOP    `appOL`
-            move_sp_up)
+            move_sp_up	  `appOL`
+	    assign_code dest_regs)
   where
      -- move args from the integer vregs into which they have been 
      -- marshalled, into %o0 .. %o5, and the rest onto the stack.
@@ -3520,7 +3547,8 @@ genCCall target dest_regs argsAndHints = do
 	  (src, code) <- getSomeReg arg
           tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
           let
-              pk   = cmmExprType arg
+              pk   	= cmmExprType arg
+              Just f0_high = fPair f0
           case cmmTypeSize pk of
              FF64 -> do
                       v1 <- getNewRegNat II32
@@ -3530,7 +3558,7 @@ genCCall target dest_regs argsAndHints = do
                         FMOV FF64 src f0                `snocOL`
                         ST   FF32  f0 (spRel 16)         `snocOL`
                         LD   II32  (spRel 16) v1         `snocOL`
-                        ST   FF32  (fPair f0) (spRel 16) `snocOL`
+                        ST   FF32  f0_high (spRel 16) `snocOL`
                         LD   II32  (spRel 16) v2
                         ,
                         [v1,v2]
@@ -4149,32 +4177,32 @@ condIntReg NE x y = do
     return (Any II32 code__2)
 
 condIntReg cond x y = do
-    BlockId lbl1 <- getBlockIdNat
-    BlockId lbl2 <- getBlockIdNat
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
     CondCode _ cond cond_code <- condIntCode cond x y
     let
 	code__2 dst = cond_code `appOL` toOL [
-	    BI cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    BI cond False bid1, NOP,
 	    OR False g0 (RIImm (ImmInt 0)) dst,
-	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
-	    NEWBLOCK (BlockId lbl1),
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
 	    OR False g0 (RIImm (ImmInt 1)) dst,
-	    NEWBLOCK (BlockId lbl2)]
+	    NEWBLOCK bid2]
     return (Any II32 code__2)
 
 condFltReg cond x y = do
-    BlockId lbl1 <- getBlockIdNat
-    BlockId lbl2 <- getBlockIdNat
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
     CondCode _ cond cond_code <- condFltCode cond x y
     let
     	code__2 dst = cond_code `appOL` toOL [ 
     	    NOP,
-	    BF cond False (ImmCLbl (mkAsmTempLabel lbl1)), NOP,
+	    BF cond False bid1, NOP,
 	    OR False g0 (RIImm (ImmInt 0)) dst,
-	    BI ALWAYS False (ImmCLbl (mkAsmTempLabel lbl2)), NOP,
-	    NEWBLOCK (BlockId lbl1),
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
 	    OR False g0 (RIImm (ImmInt 1)) dst,
-	    NEWBLOCK (BlockId lbl2)]
+	    NEWBLOCK bid2]
     return (Any II32 code__2)
 
 #endif /* sparc_TARGET_ARCH */
@@ -4762,7 +4790,7 @@ coerceInt2FP width1 width2 x = do
     	code__2 dst = code `appOL` toOL [
     	    ST (intSize width1) src (spRel (-2)),
     	    LD (intSize width1) (spRel (-2)) dst,
-    	    FxTOy (intSize width1) (floatSize width1) dst dst]
+    	    FxTOy (intSize width1) (floatSize width2) dst dst]
     return (Any (floatSize $ width2) code__2)
 
 ------------
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs
index 2ae4474..7b319af 100644
--- a/compiler/nativeGen/MachInstrs.hs
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -591,8 +591,8 @@ is_G_instr instr
     	      | FxTOy	      Size Size Reg Reg -- src, dst
 
 -- Jumping around.
-	      | BI	      Cond Bool Imm -- cond, annul?, target
-    	      | BF  	      Cond Bool Imm -- cond, annul?, target
+	      | BI	      Cond Bool BlockId -- cond, annul?, target
+    	      | BF  	      Cond Bool BlockId -- cond, annul?, target
 
 	      | JMP	      AddrMode     -- target
 	      | CALL	      (Either Imm Reg) Int Bool -- target, args, terminal
@@ -617,9 +617,17 @@ moveSp n
    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 
 -- Produce the second-half-of-a-double register given the first half.
-fPair :: Reg -> Reg
-fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
-fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
+fPair :: Reg -> Maybe Reg
+fPair (RealReg n) 
+	| n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+
+fPair (VirtualRegD u)
+	= Just (VirtualRegHi u)
+
+fPair other 
+	= trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ show other) 
+		Nothing
+		
 #endif /* sparc_TARGET_ARCH */
 
 
diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs
index 9c80423..4b3dff4 100644
--- a/compiler/nativeGen/MachRegs.lhs
+++ b/compiler/nativeGen/MachRegs.lhs
@@ -75,7 +75,7 @@ module MachRegs (
 #endif
 #if sparc_TARGET_ARCH
 	fits13Bits, 
-	fpRel, gReg, iReg, lReg, oReg, largeOffsetError,
+	fpRel, gReg, iReg, lReg, oReg, fReg, largeOffsetError,
 	fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
 #endif
 #if powerpc_TARGET_ARCH
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
index 24ba78f..eb373fe 100644
--- a/compiler/nativeGen/PprMach.hs
+++ b/compiler/nativeGen/PprMach.hs
@@ -42,7 +42,7 @@ import Unique		( pprUnique )
 import Pretty
 import FastString
 import qualified Outputable
-import Outputable	( Outputable )
+import Outputable	( Outputable, pprPanic, ppr, docToSDoc)
 
 import Data.Array.ST
 import Data.Word	( Word8 )
@@ -1886,25 +1886,25 @@ pprInstr (RELOAD slot reg)
 --    sub g1,g2,g1           -- to restore g1
 
 pprInstr (LD FF64 (AddrRegReg g1 g2) reg)
-  = vcat [
+ = let Just regH	= fPair reg
+   in vcat [
        hcat [ptext (sLit "\tadd\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1],
        hcat [pp_ld_lbracket, pprReg g1, pp_rbracket_comma, pprReg reg],
-       hcat [pp_ld_lbracket, pprReg g1, ptext (sLit "+4]"), comma, pprReg (fPair reg)],
+       hcat [pp_ld_lbracket, pprReg g1, ptext (sLit "+4]"), comma, pprReg regH],
        hcat [ptext (sLit "\tsub\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1]
     ]
 
 -- Translate to
 --    ld  [addr],%fn
 --    ld  [addr+4],%f(n+1)
-pprInstr (LD FF64 addr reg) | isJust off_addr
-  = vcat [
-       hcat [pp_ld_lbracket, pprAddr addr, pp_rbracket_comma, pprReg reg],
-       hcat [pp_ld_lbracket, pprAddr addr2, pp_rbracket_comma,pprReg (fPair reg)]
-    ]
-  where
-    off_addr = addrOffset addr 4
-    addr2 = case off_addr of Just x -> x
-
+pprInstr (LD FF64 addr reg)
+ = let	Just addr2	= addrOffset addr 4
+ 	Just regH	= fPair reg
+   in	vcat [
+	       hcat [pp_ld_lbracket, pprAddr addr, pp_rbracket_comma, pprReg reg],
+	       hcat [pp_ld_lbracket, pprAddr addr2, pp_rbracket_comma,pprReg regH]
+	    ]
+       
 
 pprInstr (LD size addr reg)
   = hcat [
@@ -1925,11 +1925,12 @@ pprInstr (LD size addr reg)
 --    st  %f(n+1),[g1+4]
 --    sub g1,g2,g1           -- to restore g1
 pprInstr (ST FF64 reg (AddrRegReg g1 g2))
- = vcat [
+ = let	Just regH	= fPair reg
+   in vcat [
        hcat [ptext (sLit "\tadd\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1],
        hcat [ptext (sLit "\tst\t"), pprReg reg, pp_comma_lbracket, 
              pprReg g1,	rbrack],
-       hcat [ptext (sLit "\tst\t"), pprReg (fPair reg), pp_comma_lbracket,
+       hcat [ptext (sLit "\tst\t"), pprReg regH, pp_comma_lbracket,
              pprReg g1, ptext (sLit "+4]")],
        hcat [ptext (sLit "\tsub\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1]
     ]
@@ -1937,16 +1938,17 @@ pprInstr (ST FF64 reg (AddrRegReg g1 g2))
 -- Translate to
 --    st  %fn,[addr]
 --    st  %f(n+1),[addr+4]
-pprInstr (ST FF64 reg addr) | isJust off_addr 
- = vcat [
-      hcat [ptext (sLit "\tst\t"), pprReg reg, pp_comma_lbracket, 
-            pprAddr addr, rbrack],
-      hcat [ptext (sLit "\tst\t"), pprReg (fPair reg), pp_comma_lbracket,
-            pprAddr addr2, rbrack]
-    ]
-  where
-    off_addr = addrOffset addr 4
-    addr2 = case off_addr of Just x -> x
+pprInstr instr@(ST FF64 reg addr)
+ = let	Just addr2	= addrOffset addr 4
+	Just regH	= fPair reg
+   in	vcat [
+	      hcat [ptext (sLit "\tst\t"), pprReg reg, pp_comma_lbracket, 
+	            pprAddr addr, rbrack],
+	      hcat [ptext (sLit "\tst\t"), pprReg regH, pp_comma_lbracket,
+	            pprAddr addr2, rbrack]
+	    ]
+    
+    
 
 -- no distinction is made between signed and unsigned bytes on stores for the
 -- Sparc opcodes (at least I cannot see any, and gas is nagging me --SOF),
@@ -1964,8 +1966,8 @@ pprInstr (ST size reg addr)
     ]
 
 pprInstr (ADD x cc reg1 ri reg2)
---  | not x && not cc && riZero ri
---  = hcat [ ptext (sLit "\tmov\t"), pprReg reg1, comma, pprReg reg2 ]
+  | not x && not cc && riZero ri
+  = hcat [ ptext (sLit "\tmov\t"), pprReg reg1, comma, pprReg reg2 ]
 
   | otherwise
   = pprRegRIReg (if x then sLit "addx" else sLit "add") cc reg1 ri reg2
@@ -1982,12 +1984,12 @@ pprInstr (AND  b reg1 ri reg2) = pprRegRIReg (sLit "and")  b reg1 ri reg2
 pprInstr (ANDN b reg1 ri reg2) = pprRegRIReg (sLit "andn") b reg1 ri reg2
 
 pprInstr (OR b reg1 ri reg2)
-{-  | not b && reg1 == g0
+  | not b && reg1 == g0
   = let doit = hcat [ ptext (sLit "\tmov\t"), pprRI ri, comma, pprReg reg2 ]
     in  case ri of
            RIReg rrr | rrr == reg2 -> empty
            other                   -> doit
--}
+
   | otherwise
   = pprRegRIReg (sLit "or") b reg1 ri reg2
 
@@ -2016,10 +2018,13 @@ pprInstr NOP = ptext (sLit "\tnop")
 
 pprInstr (FABS FF32 reg1 reg2) = pprSizeRegReg (sLit "fabs") FF32 reg1 reg2
 pprInstr (FABS FF64 reg1 reg2)
-  = (<>) (pprSizeRegReg (sLit "fabs") FF32 reg1 reg2)
+ = let	Just reg1H	= fPair reg1
+ 	Just reg2H	= fPair reg2
+   in
+    (<>) (pprSizeRegReg (sLit "fabs") FF32 reg1 reg2)
     (if (reg1 == reg2) then empty
      else (<>) (char '\n')
-    	  (pprSizeRegReg (sLit "fmov") FF32 (fPair reg1) (fPair reg2)))
+    	  (pprSizeRegReg (sLit "fmov") FF32 reg1H reg2H))
 
 pprInstr (FADD size reg1 reg2 reg3)
   = pprSizeRegRegReg (sLit "fadd") size reg1 reg2 reg3
@@ -2030,20 +2035,26 @@ pprInstr (FDIV size reg1 reg2 reg3)
 
 pprInstr (FMOV FF32 reg1 reg2) = pprSizeRegReg (sLit "fmov") FF32 reg1 reg2
 pprInstr (FMOV FF64 reg1 reg2)
-  = (<>) (pprSizeRegReg (sLit "fmov") FF32 reg1 reg2)
+ = let	Just reg1H	= fPair reg1
+ 	Just reg2H	= fPair reg2
+   in
+    (<>) (pprSizeRegReg (sLit "fmov") FF32 reg1 reg2)
     (if (reg1 == reg2) then empty
      else (<>) (char '\n')
-    	  (pprSizeRegReg (sLit "fmov") FF32 (fPair reg1) (fPair reg2)))
+    	  (pprSizeRegReg (sLit "fmov") FF32 reg1H reg2H))
 
 pprInstr (FMUL size reg1 reg2 reg3)
   = pprSizeRegRegReg (sLit "fmul") size reg1 reg2 reg3
 
 pprInstr (FNEG FF32 reg1 reg2) = pprSizeRegReg (sLit "fneg") FF32 reg1 reg2
 pprInstr (FNEG FF64 reg1 reg2)
-  = (<>) (pprSizeRegReg (sLit "fneg") FF32 reg1 reg2)
+ = let	Just reg1H	= fPair reg1
+ 	Just reg2H	= fPair reg2
+   in
+    (<>) (pprSizeRegReg (sLit "fneg") FF32 reg1 reg2)
     (if (reg1 == reg2) then empty
      else (<>) (char '\n')
-    	  (pprSizeRegReg (sLit "fmov") FF32 (fPair reg1) (fPair reg2)))
+    	  (pprSizeRegReg (sLit "fmov") FF32 reg1H reg2H))
 
 pprInstr (FSQRT size reg1 reg2)     = pprSizeRegReg (sLit "fsqrt") size reg1 reg2
 pprInstr (FSUB size reg1 reg2 reg3) = pprSizeRegRegReg (sLit "fsub") size reg1 reg2 reg3
@@ -2064,20 +2075,20 @@ pprInstr (FxTOy size1 size2 reg1 reg2)
     ]
 
 
-pprInstr (BI cond b lab)
+pprInstr (BI cond b (BlockId id))
   = hcat [
 	ptext (sLit "\tb"), pprCond cond,
 	if b then pp_comma_a else empty,
 	char '\t',
-	pprImm lab
+	pprCLabel_asm (mkAsmTempLabel id)
     ]
 
-pprInstr (BF cond b lab)
+pprInstr (BF cond b (BlockId id))
   = hcat [
 	ptext (sLit "\tfb"), pprCond cond,
 	if b then pp_comma_a else empty,
 	char '\t',
-	pprImm lab
+	pprCLabel_asm (mkAsmTempLabel id)
     ]
 
 pprInstr (JMP addr) = (<>) (ptext (sLit "\tjmp\t")) (pprAddr addr)
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
index 0992f6e..1b8bdb6 100644
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ b/compiler/nativeGen/RegAllocInfo.hs
@@ -421,6 +421,11 @@ jumpDests insn acc
         BCC _ id        -> id : acc
         BCCFAR _ id     -> id : acc
         BCTR targets    -> targets ++ acc
+#elif sparc_TARGET_ARCH
+	BI   _ _ id	-> id : acc
+	BF   _ _ id	-> id : acc
+#else
+#error "RegAllocInfo.jumpDests not finished"
 #endif
 	_other		-> acc
 
@@ -908,7 +913,7 @@ mkBranchInstr id = [JXX ALWAYS id]
 #endif
 
 #if sparc_TARGET_ARCH
-mkBranchInstr (BlockId id) = [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP]
+mkBranchInstr id = [BI ALWAYS False id, NOP]
 #endif
 
 #if powerpc_TARGET_ARCH
diff --git a/compiler/nativeGen/RegAllocLinear.hs b/compiler/nativeGen/RegAllocLinear.hs
index c4a5a4a..66ac1bf 100644
--- a/compiler/nativeGen/RegAllocLinear.hs
+++ b/compiler/nativeGen/RegAllocLinear.hs
@@ -94,6 +94,7 @@ import MachInstrs
 import RegAllocInfo
 import RegLiveness
 import Cmm hiding (RegSet)
+import PprMach
 
 import Digraph
 import Unique		( Uniquable(getUnique), Unique )
@@ -103,6 +104,7 @@ import UniqSupply
 import Outputable
 import State
 import FastString
+import MonadUtils
 
 import Data.Maybe
 import Data.List
@@ -110,6 +112,9 @@ import Control.Monad
 import Data.Word
 import Data.Bits
 
+import Debug.Trace
+
+#include "../includes/MachRegs.h"
 
 -- -----------------------------------------------------------------------------
 -- The free register set
@@ -126,7 +131,7 @@ getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
 allocateReg f r = filter (/= r) f
 -}
 
-#if defined(powerpc_TARGET_ARCH)
+#if defined(powerpc_TARGET_ARCH) 
 
 -- The PowerPC has 32 integer and 32 floating point registers.
 -- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much
@@ -157,7 +162,7 @@ getFreeRegs :: RegClass -> FreeRegs -> [RegNo]	-- lazilly
 getFreeRegs cls (FreeRegs g f)
     | RcDouble <- cls = go f (0x80000000) 63
     | RcInteger <- cls = go g (0x80000000) 31
-    | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad cls" (ppr cls)
+    | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls)
     where
         go _ 0 _ = []
         go x m i | x .&. m /= 0 = i : (go x (m `shiftR` 1) $! i-1)
@@ -168,16 +173,176 @@ allocateReg r (FreeRegs g f)
     | r > 31    = FreeRegs g (f .&. complement (1 `shiftL` (fromIntegral r - 32)))
     | otherwise = FreeRegs (g .&. complement (1 `shiftL` fromIntegral r)) f
 
-#else
+
+#elif defined(sparc_TARGET_ARCH)
+--------------------------------------------------------------------------------
+-- SPARC is like PPC, except for twinning of floating point regs.
+--	When we allocate a double reg we must take an even numbered
+--	float reg, as well as the one after it.
+
+
+-- Holds bitmaps showing what registers are currently allocated.
+--	The float and double reg bitmaps overlap, but we only alloc
+--	float regs into the float map, and double regs into the double map.
+--
+--	Free regs have a bit set in the corresponding bitmap.
+--
+data FreeRegs 
+	= FreeRegs 
+		!Word32 	-- int    reg bitmap	regs  0..31
+		!Word32 	-- float  reg bitmap	regs 32..63
+		!Word32		-- double reg bitmap	regs 32..63
+	deriving( Show )
+
+
+-- | A reg map where no regs are free to be allocated.
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0 0 0
+
+
+-- | The initial set of free regs.
+--	Don't treat the top half of reg pairs we're using as doubles as being free.
+initFreeRegs :: FreeRegs
+initFreeRegs 
+-- = 	trace (show allocable ++ "\n" ++ show freeDouble) 
+-- $ 	regs
+ = 	regs
+ where	
+	freeDouble	= getFreeRegs RcDouble regs
+	regs		= foldr releaseReg noFreeRegs allocable
+	allocable	= allocatableRegs \\ doublePairs
+	doublePairs	= [43, 45, 47, 49, 51, 53]
+
+			
+-- | Get all the free registers of this class.
+getFreeRegs :: RegClass -> FreeRegs -> [RegNo]	-- lazilly
+getFreeRegs cls (FreeRegs g f d)
+	| RcInteger <- cls = go g 1 0
+	| RcFloat   <- cls = go f 1 32
+	| RcDouble  <- cls = go d 1 32
+	| otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls)
+	where
+		go _ 0 _ = []
+	        go x m i | x .&. m /= 0 = i : (go x (m `shiftL` 1) $! i+1)
+        	         | otherwise    = go x (m `shiftL` 1) $! i+1
+
+showFreeRegs :: FreeRegs -> String
+showFreeRegs regs
+ 	=  "FreeRegs\n"
+	++ "    integer: " ++ (show $ getFreeRegs RcInteger regs)	++ "\n"
+	++ "      float: " ++ (show $ getFreeRegs RcFloat   regs)	++ "\n"
+	++ "     double: " ++ (show $ getFreeRegs RcDouble  regs)	++ "\n"
+
+
+-- | Check whether a reg is free
+regIsFree :: RegNo -> FreeRegs -> Bool
+regIsFree r (FreeRegs g f d)
+
+	-- a general purpose reg
+	| r <= 31	
+	, mask	<- 1 `shiftL` fromIntegral r
+	= g .&. mask /= 0
+
+	-- use the first 22 float regs as double precision
+	| r >= 32
+	, r <= 53
+	, mask	<- 1 `shiftL` (fromIntegral r - 32)
+	= d .&. mask /= 0
+
+	-- use the last 10 float regs as single precision
+	| otherwise 
+	, mask	<- 1 `shiftL` (fromIntegral r - 32)
+	= f .&. mask /= 0
+	
+
+-- | Grab a register.
+grabReg :: RegNo -> FreeRegs -> FreeRegs
+grabReg r (FreeRegs g f d)
+
+	-- a general purpose reg
+	| r <= 31
+	, mask	<- complement (1 `shiftL` fromIntegral r)
+	= FreeRegs (g .&. mask) f d
+    
+	-- use the first 22 float regs as double precision
+	| r >= 32
+	, r <= 53
+	, mask	<- complement (1 `shiftL` (fromIntegral r - 32))
+	= FreeRegs g f (d .&. mask)
+
+	-- use the last 10 float regs as single precision
+	| otherwise
+	, mask	<- complement (1 `shiftL` (fromIntegral r - 32))
+	= FreeRegs g (f .&. mask) d
+
+
+
+-- | Release a register from allocation.
+--	The register liveness information says that most regs die after a C call, 
+--	but we still don't want to allocate to some of them.
+--
+releaseReg :: RegNo -> FreeRegs -> FreeRegs
+releaseReg r regs@(FreeRegs g f d)
+
+	-- used by STG machine, or otherwise unavailable
+	| r >= 0  && r <= 15	= regs
+	| r >= 17 && r <= 21	= regs
+	| r >= 24 && r <= 31	= regs
+	| r >= 32 && r <= 41	= regs
+	| r >= 54 && r <= 59	= regs
+
+	-- never release the high part of double regs.
+	| r == 43		= regs
+	| r == 45		= regs
+	| r == 47		= regs
+	| r == 49		= regs
+	| r == 51		= regs
+	| r == 53		= regs
+	
+	-- a general purpose reg
+	| r <= 31	
+	, mask	<- 1 `shiftL` fromIntegral r
+	= FreeRegs (g .|. mask) f d
+
+	-- use the first 22 float regs as double precision
+	| r >= 32
+	, r <= 53
+	, mask	<- 1 `shiftL` (fromIntegral r - 32)
+	= FreeRegs g f (d .|. mask)
+
+	-- use the last 10 float regs as single precision
+	| otherwise 
+	, mask	<- 1 `shiftL` (fromIntegral r - 32)
+	= FreeRegs g (f .|. mask) d
+
+
+-- | Allocate a register in the map.
+allocateReg :: RegNo -> FreeRegs -> FreeRegs
+allocateReg r regs@(FreeRegs g f d) 
+
+	-- if the reg isn't actually free then we're in trouble
+{-	| not $ regIsFree r regs
+	= pprPanic 
+		"RegAllocLinear.allocateReg"
+		(text "reg " <> ppr r <> text " is not free")
+-}  
+	| otherwise
+	= grabReg r regs
+
+
+     
+--------------------------------------------------------------------------------
 
 -- If we have less than 32 registers, or if we have efficient 64-bit words,
 -- we will just use a single bitfield.
 
-#if defined(alpha_TARGET_ARCH)
-type FreeRegs = Word64
 #else
+
+#  if defined(alpha_TARGET_ARCH)
+type FreeRegs = Word64
+#  else
 type FreeRegs = Word32
-#endif
+#  endif
 
 noFreeRegs :: FreeRegs
 noFreeRegs = 0
@@ -465,11 +630,14 @@ genRaInsn block_live new_instrs instr r_dying w_dying =
     -- (a) save any temporaries which will be clobbered by this instruction
     clobber_saves <- saveClobberedTemps real_written r_dying
 
-    {-
-    freeregs <- getFreeRegsR
+
+{-  freeregs <- getFreeRegsR
     assig <- getAssigR
-    pprTrace "raInsn" (docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ ppr virt_read <+> ppr virt_written $$ text (show freeregs) $$ ppr assig) $ do
-    -}
+    pprTrace "raInsn" 
+    	(docToSDoc (pprInstr instr) $$ ppr r_dying <+> ppr w_dying $$ ppr virt_read <+> ppr virt_written 
+		$$ text (show freeregs) $$ ppr assig) 
+		$ do
+-}
 
     -- (b), (c) allocate real regs for all regs read by this instruction.
     (r_spills, r_allocd) <- 
@@ -592,7 +760,9 @@ clobberRegs :: [RegNo] -> RegM ()
 clobberRegs [] = return () -- common case
 clobberRegs clobbered = do
   freeregs <- getFreeRegsR
+--  setFreeRegsR $! foldr grabReg freeregs clobbered
   setFreeRegsR $! foldr allocateReg freeregs clobbered
+
   assig <- getAssigR
   setAssigR $! clobber assig (ufmToList assig)
  where
@@ -652,13 +822,14 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
         case getFreeRegs (regClass r) freeregs of
 
       	-- case (2): we have a free register
-      	  my_reg:_ -> do
+      	  freeClass@(my_reg:_) -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -}
+	    do
     	    spills'   <- loadTemp reading r loc my_reg spills
 	    let new_loc 
 		 | Just (InMem slot) <- loc, reading = InBoth my_reg slot
 		 | otherwise		             = InReg my_reg
 	    setAssigR (addToUFM assig r $! new_loc)
-	    setFreeRegsR (allocateReg my_reg freeregs)
+	    setFreeRegsR $ allocateReg my_reg freeregs
     	    allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
 
         -- case (3): we need to push something out to free up a register
@@ -701,7 +872,8 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
 
 	        let (temp_to_push_out, my_reg) 
 			= case candidates2 of
-				[]	-> panic "RegAllocLinear.allocRegsAndSpill: no spill candidates"
+				[]	-> panic $ "RegAllocLinear.allocRegsAndSpill: no spill candidates"
+					++ "assignment: " ++ show (ufmToList assig) ++ "\n"
 				(x:_)	-> x
 				
     	        (spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out