patchRegs,
regLiveness,
spillReg,
+ findReservedRegs,
RegSet,
elementOfRegSet,
#include "HsVersions.h"
import List ( partition )
+import OrdList ( unitOL )
import MachMisc
import MachRegs
-import MachCode ( InstrList )
+import MachCode ( InstrBlock )
import BitSet ( unitBS, mkBS, minusBS, unionBS, listBS, BitSet )
import CLabel ( pprCLabel_asm, CLabel{-instance Ord-} )
import FiniteMap ( addToFM, lookupFM, FiniteMap )
-import OrdList ( mkUnitList )
import PrimRep ( PrimRep(..) )
import UniqSet -- quite a bit of it
import Outputable
-import PprMach ( pprInstr )
+import Constants ( rESERVED_C_STACK_BYTES )
\end{code}
%************************************************************************
#if i386_TARGET_ARCH
regUsage instr = case instr of
- MOV sz src dst -> usage2 src dst
- MOVZX sz src dst -> usage2 src dst
- MOVSX sz src dst -> usage2 src dst
- LEA sz src dst -> usage2 src dst
- ADD sz src dst -> usage2 src dst
- SUB sz src dst -> usage2 src dst
- IMUL sz src dst -> usage2 src dst
- IDIV sz src -> usage (eax:edx:opToReg src) [eax,edx]
- AND sz src dst -> usage2 src dst
- OR sz src dst -> usage2 src dst
- XOR sz src dst -> usage2 src dst
- NOT sz op -> usage1 op
- NEGI sz op -> usage1 op
- SHL sz dst len -> usage2 dst len -- len is either an Imm or ecx.
- SAR sz dst len -> usage2 dst len -- len is either an Imm or ecx.
- SHR sz len dst -> usage2 dst len -- len is either an Imm or ecx.
- PUSH sz op -> usage (opToReg op) []
- POP sz op -> usage [] (opToReg op)
- TEST sz src dst -> usage (opToReg src ++ opToReg dst) []
- CMP sz src dst -> usage (opToReg src ++ opToReg dst) []
- SETCC cond op -> usage [] (opToReg op)
- JXX cond lbl -> usage [] []
- JMP op -> usage (opToReg op) freeRegs
- CALL imm -> usage [] callClobberedRegs
- CLTD -> usage [eax] [edx]
- NOP -> usage [] []
-
- GMOV src dst -> usage [src] [dst]
- GLD sz src dst -> usage (addrToRegs src) [dst]
- GST sz src dst -> usage [src] (addrToRegs dst)
-
- GFTOD src dst -> usage [src] [dst]
- GFTOI src dst -> usage [src] [dst]
-
- GDTOF src dst -> usage [src] [dst]
- GDTOI src dst -> usage [src] [dst]
-
- GITOF src dst -> usage [src] [dst]
- GITOD src dst -> usage [src] [dst]
-
- GADD sz s1 s2 dst -> usage [s1,s2] [dst]
- GSUB sz s1 s2 dst -> usage [s1,s2] [dst]
- GMUL sz s1 s2 dst -> usage [s1,s2] [dst]
- GDIV sz s1 s2 dst -> usage [s1,s2] [dst]
-
- GCMP sz src1 src2 -> usage [src1,src2] []
- GABS sz src dst -> usage [src] [dst]
- GNEG sz src dst -> usage [src] [dst]
- GSQRT sz src dst -> usage [src] [dst]
+ MOV sz src dst -> usageRW src dst
+ MOVZxL sz src dst -> usageRW src dst
+ MOVSxL sz src dst -> usageRW src dst
+ LEA sz src dst -> usageRW src dst
+ ADD sz src dst -> usageRM src dst
+ SUB sz src dst -> usageRM src dst
+ IMUL sz src dst -> usageRM src dst
+ IDIV sz src -> mkRU (eax:edx:use_R src) [eax,edx]
+ AND sz src dst -> usageRM src dst
+ OR sz src dst -> usageRM src dst
+ XOR sz src dst -> usageRM src dst
+ NOT sz op -> usageM op
+ NEGI sz op -> usageM op
+ SHL sz imm dst -> usageM dst
+ SAR sz imm dst -> usageM dst
+ SHR sz imm dst -> usageM dst
+ BT sz imm src -> mkRU (use_R src) []
+
+ PUSH sz op -> mkRU (use_R op) []
+ POP sz op -> mkRU [] (def_W op)
+ TEST sz src dst -> mkRU (use_R src ++ use_R dst) []
+ CMP sz src dst -> mkRU (use_R src ++ use_R dst) []
+ SETCC cond op -> mkRU [] (def_W op)
+ JXX cond lbl -> mkRU [] []
+ JMP op -> mkRU (use_R op) freeRegs
+ CALL imm -> mkRU [] callClobberedRegs
+ CLTD -> mkRU [eax] [edx]
+ NOP -> mkRU [] []
+
+ GMOV src dst -> mkRU [src] [dst]
+ GLD sz src dst -> mkRU (use_EA src) [dst]
+ GST sz src dst -> mkRU (src : use_EA dst) []
+
+ GLDZ dst -> mkRU [] [dst]
+ GLD1 dst -> mkRU [] [dst]
+
+ GFTOD src dst -> mkRU [src] [dst]
+ GFTOI src dst -> mkRU [src] [dst]
+
+ GDTOF src dst -> mkRU [src] [dst]
+ GDTOI src dst -> mkRU [src] [dst]
+
+ GITOF src dst -> mkRU [src] [dst]
+ GITOD src dst -> mkRU [src] [dst]
+
+ GADD sz s1 s2 dst -> mkRU [s1,s2] [dst]
+ GSUB sz s1 s2 dst -> mkRU [s1,s2] [dst]
+ GMUL sz s1 s2 dst -> mkRU [s1,s2] [dst]
+ GDIV sz s1 s2 dst -> mkRU [s1,s2] [dst]
+
+ GCMP sz src1 src2 -> mkRU [src1,src2] []
+ GABS sz src dst -> mkRU [src] [dst]
+ GNEG sz src dst -> mkRU [src] [dst]
+ GSQRT sz src dst -> mkRU [src] [dst]
+ GSIN sz src dst -> mkRU [src] [dst]
+ GCOS sz src dst -> mkRU [src] [dst]
+ GTAN sz src dst -> mkRU [src] [dst]
COMMENT _ -> noUsage
SEGMENT _ -> noUsage
- LABEL _ -> noUsage
- ASCII _ _ -> noUsage
- DATA _ _ -> noUsage
- _ -> error ("regUsage: " ++ showSDoc (pprInstr instr)) --noUsage
+ LABEL _ -> noUsage
+ ASCII _ _ -> noUsage
+ DATA _ _ -> noUsage
+ DELTA _ -> noUsage
+ _ -> pprPanic "regUsage(x86)" empty
+
where
- usage2 :: Operand -> Operand -> RegUsage
- usage2 op (OpReg reg) = usage (opToReg op) [reg]
- usage2 op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) []
- usage2 op (OpImm imm) = usage (opToReg op) []
- usage1 :: Operand -> RegUsage
- usage1 (OpReg reg) = usage [reg] [reg]
- usage1 (OpAddr ea) = usage (addrToRegs ea) []
- allFPRegs = [fake0,fake1,fake2,fake3,fake4,fake5]
-
- --callClobberedRegs = [ eax, ecx, edx ] -- according to gcc, anyway.
- callClobberedRegs = [eax,fake0,fake1,fake2,fake3,fake4,fake5]
-
--- General purpose register collecting functions.
-
- opToReg (OpReg reg) = [reg]
- opToReg (OpImm imm) = []
- opToReg (OpAddr ea) = addrToRegs ea
-
- addrToRegs (AddrBaseIndex base index _) = baseToReg base ++ indexToReg index
- where baseToReg Nothing = []
- baseToReg (Just r) = [r]
- indexToReg Nothing = []
- indexToReg (Just (r,_)) = [r]
- addrToRegs (ImmAddr _ _) = []
-
- usage src dst = RU (mkRegSet (filter interesting src))
- (mkRegSet (filter interesting dst))
+ -- 2 operand form; first operand Read; second Written
+ usageRW :: Operand -> Operand -> RegUsage
+ usageRW op (OpReg reg) = mkRU (use_R op) [reg]
+ usageRW op (OpAddr ea) = mkRU (use_R op ++ use_EA ea) []
+
+ -- 2 operand form; first operand Read; second Modified
+ usageRM :: Operand -> Operand -> RegUsage
+ usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
+ usageRM op (OpAddr ea) = mkRU (use_R op ++ use_EA ea) []
+
+ -- 1 operand form; operand Modified
+ usageM :: Operand -> RegUsage
+ usageM (OpReg reg) = mkRU [reg] [reg]
+ usageM (OpAddr ea) = mkRU (use_EA ea) []
+
+ -- caller-saves registers
+ callClobberedRegs = [eax,ecx,edx,fake0,fake1,fake2,fake3,fake4,fake5]
+
+ -- Registers defd when an operand is written.
+ def_W (OpReg reg) = [reg]
+ def_W (OpAddr ea) = []
+
+ -- Registers used when an operand is read.
+ use_R (OpReg reg) = [reg]
+ use_R (OpImm imm) = []
+ use_R (OpAddr ea) = use_EA ea
+
+ -- Registers used to compute an effective address.
+ use_EA (ImmAddr _ _) = []
+ use_EA (AddrBaseIndex Nothing Nothing _) = []
+ use_EA (AddrBaseIndex (Just b) Nothing _) = [b]
+ use_EA (AddrBaseIndex Nothing (Just (i,_)) _) = [i]
+ use_EA (AddrBaseIndex (Just b) (Just (i,_)) _) = [b,i]
+
+ mkRU src dst = RU (mkRegSet (filter interesting src))
+ (mkRegSet (filter interesting dst))
interesting (FixedReg _) = False
- interesting _ = True
+ interesting _ = True
+
+
+-- Allow the spiller to decide whether or not it can use
+-- %edx as spill temporaries.
+hasFixedEDX instr
+ = case instr of
+ IDIV _ _ -> True
+ CLTD -> True
+ other -> False
#endif {- i386_TARGET_ARCH -}
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#endif {- sparc_TARGET_ARCH -}
\end{code}
+
+%************************************************************************
+%* *
+\subsection{Free, reserved, call-clobbered, and argument registers}
+%* *
+%************************************************************************
+
+@freeRegs@ is the list of registers we can use in register allocation.
+@freeReg@ (below) says if a particular register is free.
+
+With a per-instruction clobber list, we might be able to get some of
+these back, but it's probably not worth the hassle.
+
+@callClobberedRegs@ ... the obvious.
+
+@argRegs@: assuming a call with N arguments, what registers will be
+used to hold arguments? (NB: it doesn't know whether the arguments
+are integer or floating-point...)
+
+findReservedRegs tells us which regs can be used as spill temporaries.
+The list of instructions for which we are attempting allocation is
+supplied. This is so that we can (at least for x86) examine it to
+discover which registers are being used in a fixed way -- for example,
+%eax and %edx are used by integer division, so they can't be used as
+spill temporaries. However, most instruction lists don't do integer
+division, so we don't want to rule them out altogether.
+
+findReservedRegs returns not a list of spill temporaries, but a list
+of list of them. This is so that the allocator can attempt allocating
+with at first no spill temps, then if that fails, increasing numbers.
+For x86 it is important that we minimise the number of regs reserved
+as spill temporaries, since there are so few. For Alpha and Sparc
+this isn't a concern; we just ignore the supplied code list and return
+a singleton list which we know will satisfy all spill demands.
+
+\begin{code}
+findReservedRegs :: [Instr] -> [[RegNo]]
+findReservedRegs instrs
+#if alpha_TARGET_ARCH
+ = --[[NCG_Reserved_I1, NCG_Reserved_I2,
+ -- NCG_Reserved_F1, NCG_Reserved_F2]]
+ error "findReservedRegs: alpha"
+#endif
+#if sparc_TARGET_ARCH
+ = --[[NCG_Reserved_I1, NCG_Reserved_I2,
+ -- NCG_Reserved_F1, NCG_Reserved_F2,
+ -- NCG_Reserved_D1, NCG_Reserved_D2]]
+ error "findReservedRegs: sparc"
+#endif
+#if i386_TARGET_ARCH
+ -- We can use %fake4 and %fake5 safely for float temps.
+ -- Int regs are more troublesome. Only %ecx is definitely
+ -- available. If there are no division insns, we can use %edx
+ -- too. At a pinch, we also could bag %eax if there are no
+ -- divisions and no ccalls, but so far we've never encountered
+ -- a situation where three integer temporaries are necessary.
+ --
+ -- Because registers are in short supply on x86, we give the
+ -- allocator a whole bunch of possibilities, starting with zero
+ -- temporaries and working up to all that are available. This
+ -- is inefficient, but spills are pretty rare, so we don't care
+ -- if the register allocator has to try half a dozen or so possibilities
+ -- before getting to one that works.
+ = let f1 = fake5
+ f2 = fake4
+ intregs_avail
+ = ecx : if any hasFixedEDX instrs then [] else [edx]
+ possibilities
+ = case intregs_avail of
+ [i1] -> [ [], [i1], [f1], [i1,f1], [f1,f2], [i1,f1,f2] ]
+
+ [i1,i2] -> [ [], [i1], [f1], [i1,i2], [i1,f1], [f1,f2],
+ [i1,i2,f1], [i1,f1,f2], [i1,i2,f1,f2] ]
+ in
+ map (map mappedRegNo) possibilities
+#endif
+\end{code}
+
%************************************************************************
%* *
\subsection{@RegLiveness@ type; @regLiveness@ function}
patchRegs instr env = case instr of
MOV sz src dst -> patch2 (MOV sz) src dst
- MOVZX sz src dst -> patch2 (MOVZX sz) src dst
- MOVSX sz src dst -> patch2 (MOVSX sz) src dst
+ MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst
+ MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst
LEA sz src dst -> patch2 (LEA sz) src dst
ADD sz src dst -> patch2 (ADD sz) src dst
SUB sz src dst -> patch2 (SUB sz) src dst
XOR sz src dst -> patch2 (XOR sz) src dst
NOT sz op -> patch1 (NOT sz) op
NEGI sz op -> patch1 (NEGI sz) op
- SHL sz imm dst -> patch2 (SHL sz) imm dst
- SAR sz imm dst -> patch2 (SAR sz) imm dst
- SHR sz imm dst -> patch2 (SHR sz) imm dst
+ SHL sz imm dst -> patch1 (SHL sz imm) dst
+ SAR sz imm dst -> patch1 (SAR sz imm) dst
+ SHR sz imm dst -> patch1 (SHR sz imm) dst
+ BT sz imm src -> patch1 (BT sz imm) src
TEST sz src dst -> patch2 (TEST sz) src dst
CMP sz src dst -> patch2 (CMP sz) src dst
PUSH sz op -> patch1 (PUSH sz) op
GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
GST sz src dst -> GST sz (env src) (lookupAddr dst)
+ GLDZ dst -> GLDZ (env dst)
+ GLD1 dst -> GLD1 (env dst)
+
GFTOD src dst -> GFTOD (env src) (env dst)
GFTOI src dst -> GFTOI (env src) (env dst)
GABS sz src dst -> GABS sz (env src) (env dst)
GNEG sz src dst -> GNEG sz (env src) (env dst)
GSQRT sz src dst -> GSQRT sz (env src) (env dst)
+ GSIN sz src dst -> GSIN sz (env src) (env dst)
+ GCOS sz src dst -> GCOS sz (env src) (env dst)
+ GTAN sz src dst -> GTAN sz (env src) (env dst)
COMMENT _ -> instr
SEGMENT _ -> instr
LABEL _ -> instr
ASCII _ _ -> instr
DATA _ _ -> instr
+ DELTA _ -> instr
JXX _ _ -> instr
CALL _ -> instr
CLTD -> instr
- _ -> error ("patchInstr: " ++ showSDoc (pprInstr instr)) --instr
+ _ -> pprPanic "patchInstr(x86)" empty
+
where
patch1 insn op = insn (patchOp op)
patch2 insn src dst = insn (patchOp src) (patchOp dst)
Spill to memory, and load it back...
-JRS, 000122: on x86, don't spill directly below the stack pointer, since
-some insn sequences (int <-> conversions) use this as a temp location.
-Leave 16 bytes of slop.
+JRS, 000122: on x86, don't spill directly above the stack pointer,
+since some insn sequences (int <-> conversions, and eventually
+StixInteger) use this as a temp location. Leave 8 words (ie, 64 bytes
+for a 64-bit arch) of slop.
\begin{code}
-spillReg, loadReg :: Reg -> Reg -> InstrList
-
-spillReg dyn (MemoryReg i pk)
- | i >= 0 -- JRS paranoia
- = let
- sz = primRepToSize pk
+maxSpillSlots :: Int
+maxSpillSlots = (rESERVED_C_STACK_BYTES - 64) `div` 12
+
+-- convert a spill slot number to a *byte* offset, with no sign:
+-- decide on a per arch basis whether you are spilling above or below
+-- the C stack pointer.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+ | slot >= 0 && slot < maxSpillSlots
+ = 64 + 12 * slot
+ | otherwise
+ = pprPanic "spillSlotToOffset:"
+ (text "invalid spill location: " <> int slot)
+
+spillReg, loadReg :: Int -> Reg -> Reg -> Instr
+
+spillReg delta dyn (MemoryReg i pk)
+ = let sz = primRepToSize pk
+ off = spillSlotToOffset i
in
- mkUnitList (
{-Alpha: spill below the stack pointer (?)-}
- IF_ARCH_alpha( ST sz dyn (spRel i)
+ IF_ARCH_alpha( ST sz dyn (spRel (- (off `div` 8)))
- {-I386: spill above stack pointer leaving 2 words/spill-}
- ,IF_ARCH_i386 ( if pk == FloatRep || pk == DoubleRep
- then GST sz dyn (spRel (16 + 2 * i))
- else MOV sz (OpReg dyn) (OpAddr (spRel (16 + 2 * i)))
+ {-I386: spill above stack pointer leaving 3 words/spill-}
+ ,IF_ARCH_i386 ( let off_w = (off-delta) `div` 4
+ in
+ if pk == FloatRep || pk == DoubleRep
+ then GST F80 dyn (spRel off_w)
+ else MOV sz (OpReg dyn) (OpAddr (spRel off_w))
{-SPARC: spill below frame pointer leaving 2 words/spill-}
- ,IF_ARCH_sparc( ST sz dyn (fpRel (-2 * i))
+ ,IF_ARCH_sparc( ST sz dyn (fpRel (- (off `div` 4)))
,)))
- )
-----------------------------
-loadReg (MemoryReg i pk) dyn
- | i >= 0 -- JRS paranoia
- = let
- sz = primRepToSize pk
+
+loadReg delta (MemoryReg i pk) dyn
+ = let sz = primRepToSize pk
+ off = spillSlotToOffset i
in
- mkUnitList (
- IF_ARCH_alpha( LD sz dyn (spRel i)
- ,IF_ARCH_i386 ( if pk == FloatRep || pk == DoubleRep
- then GLD sz (spRel (16 + 2 * i)) dyn
- else MOV sz (OpAddr (spRel (16 + 2 * i))) (OpReg dyn)
- ,IF_ARCH_sparc( LD sz (fpRel (-2 * i)) dyn
+ IF_ARCH_alpha( LD sz dyn (spRel (- (off `div` 8)))
+ ,IF_ARCH_i386 ( let off_w = (off-delta) `div` 4
+ in
+ if pk == FloatRep || pk == DoubleRep
+ then GLD F80 (spRel off_w) dyn
+ else MOV sz (OpAddr (spRel off_w)) (OpReg dyn)
+ ,IF_ARCH_sparc( LD sz (fpRel (- (off `div` 4))) dyn
,)))
- )
\end{code}