From 3fd25ca4ae2b41c283210112c6e710e8a44804ba Mon Sep 17 00:00:00 2001
From: Simon Marlow <marlowsd@gmail.com>
Date: Mon, 15 Feb 2010 13:01:02 +0000
Subject: [PATCH] Represent the free register set using Word64 on x86-64
 (fixes ffi009) Following recent changes to the numbering of
 registers, we overflowed Word32 on x86-64, with the result
 that xmm8 and later we not being allocated.

---
 compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs |    4 ++++
 compiler/nativeGen/X86/Ppr.hs                      |    2 +-
 compiler/nativeGen/X86/Regs.hs                     |   19 +++++++++++++------
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
index 4ba637f..0a15e56 100644
--- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
@@ -12,7 +12,11 @@ import Data.Word
 import Data.Bits
 
 type FreeRegs 
+#ifdef i386_TARGET_ARCH
 	= Word32
+#else
+	= Word64
+#endif
 
 noFreeRegs :: FreeRegs
 noFreeRegs = 0
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index fe94f21..0c8d106 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -942,7 +942,7 @@ gpop reg offset
    = hcat [text "fstp ", greg reg offset]
 
 greg :: Reg -> RegNo -> Doc
-greg reg offset = text "%st(" <> int (gregno reg - 16+offset) <> char ')'
+greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
 
 gsemi :: Doc
 gsemi = text " ; "
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 697528d..a04e854 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -25,7 +25,7 @@ module X86.Regs (
 	EABase(..), EAIndex(..), addrModeRegs,
 
 	eax, ebx, ecx, edx, esi, edi, ebp, esp,
-	fake0, fake1, fake2, fake3, fake4, fake5,
+	fake0, fake1, fake2, fake3, fake4, fake5, firstfake,
 
 	rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
 	r8,  r9,  r10, r11, r12, r13, r14, r15,
@@ -105,7 +105,7 @@ realRegSqueeze cls rr
  	RcInteger
 	 -> case rr of
 	 	RealRegSingle regNo
-			| regNo	< firstfake -> _ILIT(1)	-- first fake reg is 16
+			| regNo	< firstfake -> _ILIT(1)
 			| otherwise	-> _ILIT(0)
 			
 		RealRegPair{}		-> _ILIT(0)
@@ -218,13 +218,21 @@ spRel _	= panic "X86.Regs.spRel: not defined for this architecture"
 
 #endif
 
+-- The register numbers must fit into 32 bits on x86, so that we can
+-- use a Word32 to represent the set of free registers in the register
+-- allocator.
+
 firstfake, lastfake :: RegNo
 firstfake = 16
 lastfake  = 21
 
 firstxmm, lastxmm :: RegNo
 firstxmm  = 24
+#if i386_TARGET_ARCH
+lastxmm   = 31
+#else
 lastxmm   = 39
+#endif
 
 lastint :: RegNo
 #if i386_TARGET_ARCH
@@ -296,7 +304,7 @@ Intel x86 architecture:
 - Only ebx, esi, edi and esp are available across a C call (they are callee-saves).
 - Registers 0-7 have 16-bit counterparts (ax, bx etc.)
 - Registers 0-3 have 8 bit counterparts (ah, bh etc.)
-- Registers 8-13 are fakes; we pretend x86 has 6 conventionally-addressable
+- Registers fake0..fake5 are fakes; we pretend x86 has 6 conventionally-addressable
   fp registers, and 3-operand insns for them, and we translate this into
   real stack-based x86 fp code after register allocation.
 
@@ -372,7 +380,7 @@ xmm14 = regSingle 38
 xmm15 = regSingle 39
 
 allFPArgRegs :: [Reg]
-allFPArgRegs	= map regSingle [24 .. 31]
+allFPArgRegs	= map regSingle [firstxmm .. firstxmm+7]
 
 ripRel :: Displacement -> AddrMode
 ripRel imm	= AddrBaseIndex EABaseRip EAIndexNone imm
@@ -391,7 +399,7 @@ esp = rsp
 -}
 
 xmm :: RegNo -> Reg
-xmm n = regSingle (24+n)
+xmm n = regSingle (firstxmm+n)
 
 
 
@@ -458,7 +466,6 @@ callClobberedRegs 	:: [Reg]
 #define xmm14 38
 #define xmm15 39
 
-
 #if i386_TARGET_ARCH
 freeReg esp = fastBool False  --	%esp is the C stack pointer
 #endif
-- 
1.7.10.4