NCG: Split up the native code generator into arch specific modules

author Ben.Lippmeier@anu.edu.au <unknown>

Sun, 15 Feb 2009 05:51:58 +0000 (05:51 +0000)

committer Ben.Lippmeier@anu.edu.au <unknown>

Sun, 15 Feb 2009 05:51:58 +0000 (05:51 +0000)
author Ben.Lippmeier@anu.edu.au <unknown>
Sun, 15 Feb 2009 05:51:58 +0000 (05:51 +0000)
committer Ben.Lippmeier@anu.edu.au <unknown>
Sun, 15 Feb 2009 05:51:58 +0000 (05:51 +0000)
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in

index 2328eca..b276943 100644 (file)
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -454,31 +454,38 @@ Library
  
          Exposed-Modules:
              AsmCodeGen
  
          Exposed-Modules:
              AsmCodeGen
-            MachCodeGen
-            Regs
-            RegsBase
-            Instrs
-            RegAllocInfo
-            PprMach
+            TargetReg
+            NCGMonad
+            Instruction
+            Size
+            Reg
+            RegClass
              PprBase
              PprBase
+            PIC
+            Platform
              Alpha.Regs
              Alpha.RegInfo
              Alpha.Instr
              Alpha.Ppr
              Alpha.Regs
              Alpha.RegInfo
              Alpha.Instr
              Alpha.Ppr
+            Alpha.CodeGen
              X86.Regs
              X86.RegInfo
              X86.Instr
              X86.Regs
              X86.RegInfo
              X86.Instr
+            X86.Cond
              X86.Ppr
              X86.Ppr
+            X86.CodeGen
              PPC.Regs
              PPC.RegInfo
              PPC.Instr
              PPC.Regs
              PPC.RegInfo
              PPC.Instr
+            PPC.Cond
              PPC.Ppr
              PPC.Ppr
+            PPC.CodeGen
              SPARC.Regs
              SPARC.RegInfo
              SPARC.Instr
              SPARC.Regs
              SPARC.RegInfo
              SPARC.Instr
+            SPARC.Cond
              SPARC.Ppr
              SPARC.Ppr
-            NCGMonad
-            PositionIndependentCode
+            SPARC.CodeGen
              RegAlloc.Liveness
              RegAlloc.Graph.Main
              RegAlloc.Graph.Stats
              RegAlloc.Liveness
              RegAlloc.Graph.Main
              RegAlloc.Graph.Stats
@@ -488,6 +495,7 @@ Library
              RegAlloc.Graph.Spill
              RegAlloc.Graph.SpillClean
              RegAlloc.Graph.SpillCost
              RegAlloc.Graph.Spill
              RegAlloc.Graph.SpillClean
              RegAlloc.Graph.SpillCost
+            RegAlloc.Graph.TrivColorable
              RegAlloc.Linear.Main
              RegAlloc.Linear.JoinToTargets
              RegAlloc.Linear.State
              RegAlloc.Linear.Main
              RegAlloc.Linear.JoinToTargets
              RegAlloc.Linear.State
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs

index 44bd124..eb9a182 100644 (file)
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -64,6 +64,7 @@ module DynFlags (
  
  #include "HsVersions.h"
  
  
  #include "HsVersions.h"
  
+import Platform
  import Module
  import PackageConfig
  import PrelNames        ( mAIN, main_RDR_Unqual )
  import Module
  import PackageConfig
  import PrelNames        ( mAIN, main_RDR_Unqual )
@@ -339,6 +340,7 @@ data DynFlags = DynFlags {
    specConstrCount       :: Maybe Int,   -- ^ Max number of specialisations for any one function
    liberateCaseThreshold :: Maybe Int,   -- ^ Threshold for LiberateCase
  
    specConstrCount       :: Maybe Int,   -- ^ Max number of specialisations for any one function
    liberateCaseThreshold :: Maybe Int,   -- ^ Threshold for LiberateCase
  
+  targetPlatform       :: Platform,    -- ^ The platform we're compiling for. Used by the NCG.
    stolen_x86_regs       :: Int,
    cmdlineHcIncludes     :: [String],    -- ^ @\-\#includes@
    importPaths           :: [FilePath],
    stolen_x86_regs       :: Int,
    cmdlineHcIncludes     :: [String],    -- ^ @\-\#includes@
    importPaths           :: [FilePath],
@@ -584,6 +586,7 @@ defaultDynFlags =
          specConstrThreshold     = Just 200,
          specConstrCount         = Just 3,
          liberateCaseThreshold   = Just 200,
          specConstrThreshold     = Just 200,
          specConstrCount         = Just 3,
          liberateCaseThreshold   = Just 200,
+        targetPlatform          = defaultTargetPlatform,
          stolen_x86_regs         = 4,
          cmdlineHcIncludes       = [],
          importPaths             = ["."],
          stolen_x86_regs         = 4,
          cmdlineHcIncludes       = [],
          importPaths             = ["."],
diff --git a/compiler/nativeGen/Alpha/CodeGen.hs b/compiler/nativeGen/Alpha/CodeGen.hs

new file mode 100644 (file)

index 0000000..4ce774f
--- /dev/null
+++ b/compiler/nativeGen/Alpha/CodeGen.hs
@@ -0,0 +1,789 @@
+module Alpha.CodeGen ()
+
+where
+
+{-
+
+getRegister :: CmmExpr -> NatM Register
+
+#if !x86_64_TARGET_ARCH
+    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+    -- register, it can only be used for rip-relative addressing.
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat wordSize
+      return (Fixed wordSize reg nilOL)
+#endif
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+                 (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+#endif
+
+-- end of machine-"independent" bit; here we go on the rest...
+
+
+getRegister (StDouble d)
+  = getBlockIdNat                  `thenNat` \ lbl ->
+    getNewRegNat PtrRep            `thenNat` \ tmp ->
+    let code dst = mkSeqInstrs [
+           LDATA RoDataSegment lbl [
+                   DATA TF [ImmLab (rational d)]
+               ],
+           LDA tmp (AddrImm (ImmCLbl lbl)),
+           LD TF dst (AddrReg tmp)]
+    in
+       return (Any FF64 code)
+
+getRegister (StPrim primop [x]) -- unary PrimOps
+  = case primop of
+      IntNegOp -> trivialUCode (NEG Q False) x
+
+      NotOp    -> trivialUCode NOT x
+
+      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
+      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
+
+      OrdOp -> coerceIntCode IntRep x
+      ChrOp -> chrCode x
+
+      Float2IntOp  -> coerceFP2Int    x
+      Int2FloatOp  -> coerceInt2FP pr x
+      Double2IntOp -> coerceFP2Int    x
+      Int2DoubleOp -> coerceInt2FP pr x
+
+      Double2FloatOp -> coerceFltCode x
+      Float2DoubleOp -> coerceFltCode x
+
+      other_op -> getRegister (StCall fn CCallConv FF64 [x])
+       where
+         fn = case other_op of
+                FloatExpOp    -> fsLit "exp"
+                FloatLogOp    -> fsLit "log"
+                FloatSqrtOp   -> fsLit "sqrt"
+                FloatSinOp    -> fsLit "sin"
+                FloatCosOp    -> fsLit "cos"
+                FloatTanOp    -> fsLit "tan"
+                FloatAsinOp   -> fsLit "asin"
+                FloatAcosOp   -> fsLit "acos"
+                FloatAtanOp   -> fsLit "atan"
+                FloatSinhOp   -> fsLit "sinh"
+                FloatCoshOp   -> fsLit "cosh"
+                FloatTanhOp   -> fsLit "tanh"
+                DoubleExpOp   -> fsLit "exp"
+                DoubleLogOp   -> fsLit "log"
+                DoubleSqrtOp  -> fsLit "sqrt"
+                DoubleSinOp   -> fsLit "sin"
+                DoubleCosOp   -> fsLit "cos"
+                DoubleTanOp   -> fsLit "tan"
+                DoubleAsinOp  -> fsLit "asin"
+                DoubleAcosOp  -> fsLit "acos"
+                DoubleAtanOp  -> fsLit "atan"
+                DoubleSinhOp  -> fsLit "sinh"
+                DoubleCoshOp  -> fsLit "cosh"
+                DoubleTanhOp  -> fsLit "tanh"
+  where
+    pr = panic "MachCode.getRegister: no primrep needed for Alpha"
+
+getRegister (StPrim primop [x, y]) -- dyadic PrimOps
+  = case primop of
+      CharGtOp -> trivialCode (CMP LTT) y x
+      CharGeOp -> trivialCode (CMP LE) y x
+      CharEqOp -> trivialCode (CMP EQQ) x y
+      CharNeOp -> int_NE_code x y
+      CharLtOp -> trivialCode (CMP LTT) x y
+      CharLeOp -> trivialCode (CMP LE) x y
+
+      IntGtOp  -> trivialCode (CMP LTT) y x
+      IntGeOp  -> trivialCode (CMP LE) y x
+      IntEqOp  -> trivialCode (CMP EQQ) x y
+      IntNeOp  -> int_NE_code x y
+      IntLtOp  -> trivialCode (CMP LTT) x y
+      IntLeOp  -> trivialCode (CMP LE) x y
+
+      WordGtOp -> trivialCode (CMP ULT) y x
+      WordGeOp -> trivialCode (CMP ULE) x y
+      WordEqOp -> trivialCode (CMP EQQ)  x y
+      WordNeOp -> int_NE_code x y
+      WordLtOp -> trivialCode (CMP ULT) x y
+      WordLeOp -> trivialCode (CMP ULE) x y
+
+      AddrGtOp -> trivialCode (CMP ULT) y x
+      AddrGeOp -> trivialCode (CMP ULE) y x
+      AddrEqOp -> trivialCode (CMP EQQ)  x y
+      AddrNeOp -> int_NE_code x y
+      AddrLtOp -> trivialCode (CMP ULT) x y
+      AddrLeOp -> trivialCode (CMP ULE) x y
+       
+      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
+      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+      IntAddOp  -> trivialCode (ADD Q False) x y
+      IntSubOp  -> trivialCode (SUB Q False) x y
+      IntMulOp  -> trivialCode (MUL Q False) x y
+      IntQuotOp -> trivialCode (DIV Q False) x y
+      IntRemOp  -> trivialCode (REM Q False) x y
+
+      WordAddOp  -> trivialCode (ADD Q False) x y
+      WordSubOp  -> trivialCode (SUB Q False) x y
+      WordMulOp  -> trivialCode (MUL Q False) x y
+      WordQuotOp -> trivialCode (DIV Q True) x y
+      WordRemOp  -> trivialCode (REM Q True) x y
+
+      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
+      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
+      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
+      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
+
+      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
+      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
+      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
+      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
+
+      AddrAddOp  -> trivialCode (ADD Q False) x y
+      AddrSubOp  -> trivialCode (SUB Q False) x y
+      AddrRemOp  -> trivialCode (REM Q True) x y
+
+      AndOp  -> trivialCode AND x y
+      OrOp   -> trivialCode OR  x y
+      XorOp  -> trivialCode XOR x y
+      SllOp  -> trivialCode SLL x y
+      SrlOp  -> trivialCode SRL x y
+
+      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
+      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
+      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
+
+      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
+      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
+  where
+    {- ------------------------------------------------------------
+       Some bizarre special code for getting condition codes into
+       registers.  Integer non-equality is a test for equality
+       followed by an XOR with 1.  (Integer comparisons always set
+       the result register to 0 or 1.)  Floating point comparisons of
+       any kind leave the result in a floating point register, so we
+       need to wrangle an integer register out of things.
+    -}
+    int_NE_code :: StixTree -> StixTree -> NatM Register
+
+    int_NE_code x y
+      = trivialCode (CMP EQQ) x y      `thenNat` \ register ->
+       getNewRegNat IntRep             `thenNat` \ tmp ->
+       let
+           code = registerCode register tmp
+           src  = registerName register tmp
+           code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
+       in
+       return (Any IntRep code__2)
+
+    {- ------------------------------------------------------------
+       Comments for int_NE_code also apply to cmpF_code
+    -}
+    cmpF_code
+       :: (Reg -> Reg -> Reg -> Instr)
+       -> Cond
+       -> StixTree -> StixTree
+       -> NatM Register
+
+    cmpF_code instr cond x y
+      = trivialFCode pr instr x y      `thenNat` \ register ->
+       getNewRegNat FF64               `thenNat` \ tmp ->
+       getBlockIdNat                   `thenNat` \ lbl ->
+       let
+           code = registerCode register tmp
+           result  = registerName register tmp
+
+           code__2 dst = code . mkSeqInstrs [
+               OR zeroh (RIImm (ImmInt 1)) dst,
+               BF cond  result (ImmCLbl lbl),
+               OR zeroh (RIReg zeroh) dst,
+               NEWBLOCK lbl]
+       in
+       return (Any IntRep code__2)
+      where
+       pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+      ------------------------------------------------------------
+
+getRegister (CmmLoad pk mem)
+  = getAmode mem                   `thenNat` \ amode ->
+    let
+       code = amodeCode amode
+       src   = amodeAddr amode
+       size = primRepToSize pk
+       code__2 dst = code . mkSeqInstr (LD size dst src)
+    in
+    return (Any pk code__2)
+
+getRegister (StInt i)
+  | fits8Bits i
+  = let
+       code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
+    in
+    return (Any IntRep code)
+  | otherwise
+  = let
+       code dst = mkSeqInstr (LDI Q dst src)
+    in
+    return (Any IntRep code)
+  where
+    src = ImmInt (fromInteger i)
+
+getRegister leaf
+  | isJust imm
+  = let
+       code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
+    in
+    return (Any PtrRep code)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+getAmode (StPrim IntSubOp [x, StInt i])
+  = getNewRegNat PtrRep                `thenNat` \ tmp ->
+    getRegister x              `thenNat` \ register ->
+    let
+       code = registerCode register tmp
+       reg  = registerName register tmp
+       off  = ImmInt (-(fromInteger i))
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode (StPrim IntAddOp [x, StInt i])
+  = getNewRegNat PtrRep                `thenNat` \ tmp ->
+    getRegister x              `thenNat` \ register ->
+    let
+       code = registerCode register tmp
+       reg  = registerName register tmp
+       off  = ImmInt (fromInteger i)
+    in
+    return (Amode (AddrRegImm reg off) code)
+
+getAmode leaf
+  | isJust imm
+  = return (Amode (AddrImm imm__2) id)
+  where
+    imm = maybeImm leaf
+    imm__2 = case imm of Just x -> x
+
+getAmode other
+  = getNewRegNat PtrRep                `thenNat` \ tmp ->
+    getRegister other          `thenNat` \ register ->
+    let
+       code = registerCode register tmp
+       reg  = registerName register tmp
+    in
+    return (Amode (AddrReg reg) code)
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+
+assignIntCode pk (CmmLoad dst _) src
+  = getNewRegNat IntRep            `thenNat` \ tmp ->
+    getAmode dst                   `thenNat` \ amode ->
+    getRegister src                `thenNat` \ register ->
+    let
+       code1   = amodeCode amode []
+       dst__2  = amodeAddr amode
+       code2   = registerCode register tmp []
+       src__2  = registerName register tmp
+       sz      = primRepToSize pk
+       code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignIntCode pk dst src
+  = getRegister dst                        `thenNat` \ register1 ->
+    getRegister src                        `thenNat` \ register2 ->
+    let
+       dst__2  = registerName register1 zeroh
+       code    = registerCode register2 dst__2
+       src__2  = registerName register2 dst__2
+       code__2 = if isFixed register2
+                 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
+                 else code
+    in
+    return code__2
+
+assignFltCode pk (CmmLoad dst _) src
+  = getNewRegNat pk                `thenNat` \ tmp ->
+    getAmode dst                   `thenNat` \ amode ->
+    getRegister src                        `thenNat` \ register ->
+    let
+       code1   = amodeCode amode []
+       dst__2  = amodeAddr amode
+       code2   = registerCode register tmp []
+       src__2  = registerName register tmp
+       sz      = primRepToSize pk
+       code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+    in
+    return code__2
+
+assignFltCode pk dst src
+  = getRegister dst                        `thenNat` \ register1 ->
+    getRegister src                        `thenNat` \ register2 ->
+    let
+       dst__2  = registerName register1 zeroh
+       code    = registerCode register2 dst__2
+       src__2  = registerName register2 dst__2
+       code__2 = if isFixed register2
+                 then code . mkSeqInstr (FMOV src__2 dst__2)
+                 else code
+    in
+    return code__2
+
+
+-- -----------------------------------------------------------------------------
+-- Generating an non-local jump
+
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+genJump (CmmLabel lbl)
+  | isAsmTemp lbl = returnInstr (BR target)
+  | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = getRegister tree               `thenNat` \ register ->
+    getNewRegNat PtrRep            `thenNat` \ tmp ->
+    let
+       dst    = registerName register pv
+       code   = registerCode register pv
+       target = registerName register pv
+    in
+    if isFixed register then
+       returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
+    else
+    return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+
+genBranch = return . toOL . mkBranchInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+ALPHA: For comparisons with 0, we're laughing, because we can just do
+the desired conditional branch.
+
+-}
+
+
+genCondJump
+    :: BlockId     -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+genCondJump id (StPrim op [x, StInt 0])
+  = getRegister x                          `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+                                   `thenNat` \ tmp ->
+    let
+       code   = registerCode register tmp
+       value  = registerName register tmp
+       pk     = registerRep register
+       target = ImmCLbl lbl
+    in
+    returnSeq code [BI (cmpOp op) value target]
+  where
+    cmpOp CharGtOp = GTT
+    cmpOp CharGeOp = GE
+    cmpOp CharEqOp = EQQ
+    cmpOp CharNeOp = NE
+    cmpOp CharLtOp = LTT
+    cmpOp CharLeOp = LE
+    cmpOp IntGtOp = GTT
+    cmpOp IntGeOp = GE
+    cmpOp IntEqOp = EQQ
+    cmpOp IntNeOp = NE
+    cmpOp IntLtOp = LTT
+    cmpOp IntLeOp = LE
+    cmpOp WordGtOp = NE
+    cmpOp WordGeOp = ALWAYS
+    cmpOp WordEqOp = EQQ
+    cmpOp WordNeOp = NE
+    cmpOp WordLtOp = NEVER
+    cmpOp WordLeOp = EQQ
+    cmpOp AddrGtOp = NE
+    cmpOp AddrGeOp = ALWAYS
+    cmpOp AddrEqOp = EQQ
+    cmpOp AddrNeOp = NE
+    cmpOp AddrLtOp = NEVER
+    cmpOp AddrLeOp = EQQ
+
+genCondJump lbl (StPrim op [x, StDouble 0.0])
+  = getRegister x                          `thenNat` \ register ->
+    getNewRegNat (registerRep register)
+                                   `thenNat` \ tmp ->
+    let
+       code   = registerCode register tmp
+       value  = registerName register tmp
+       pk     = registerRep register
+       target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF (cmpOp op) value target))
+  where
+    cmpOp FloatGtOp = GTT
+    cmpOp FloatGeOp = GE
+    cmpOp FloatEqOp = EQQ
+    cmpOp FloatNeOp = NE
+    cmpOp FloatLtOp = LTT
+    cmpOp FloatLeOp = LE
+    cmpOp DoubleGtOp = GTT
+    cmpOp DoubleGeOp = GE
+    cmpOp DoubleEqOp = EQQ
+    cmpOp DoubleNeOp = NE
+    cmpOp DoubleLtOp = LTT
+    cmpOp DoubleLeOp = LE
+
+genCondJump lbl (StPrim op [x, y])
+  | fltCmpOp op
+  = trivialFCode pr instr x y      `thenNat` \ register ->
+    getNewRegNat FF64              `thenNat` \ tmp ->
+    let
+       code   = registerCode register tmp
+       result = registerName register tmp
+       target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BF cond result target))
+  where
+    pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+
+    fltCmpOp op = case op of
+       FloatGtOp -> True
+       FloatGeOp -> True
+       FloatEqOp -> True
+       FloatNeOp -> True
+       FloatLtOp -> True
+       FloatLeOp -> True
+       DoubleGtOp -> True
+       DoubleGeOp -> True
+       DoubleEqOp -> True
+       DoubleNeOp -> True
+       DoubleLtOp -> True
+       DoubleLeOp -> True
+       _ -> False
+    (instr, cond) = case op of
+       FloatGtOp -> (FCMP TF LE, EQQ)
+       FloatGeOp -> (FCMP TF LTT, EQQ)
+       FloatEqOp -> (FCMP TF EQQ, NE)
+       FloatNeOp -> (FCMP TF EQQ, EQQ)
+       FloatLtOp -> (FCMP TF LTT, NE)
+       FloatLeOp -> (FCMP TF LE, NE)
+       DoubleGtOp -> (FCMP TF LE, EQQ)
+       DoubleGeOp -> (FCMP TF LTT, EQQ)
+       DoubleEqOp -> (FCMP TF EQQ, NE)
+       DoubleNeOp -> (FCMP TF EQQ, EQQ)
+       DoubleLtOp -> (FCMP TF LTT, NE)
+       DoubleLeOp -> (FCMP TF LE, NE)
+
+genCondJump lbl (StPrim op [x, y])
+  = trivialCode instr x y          `thenNat` \ register ->
+    getNewRegNat IntRep            `thenNat` \ tmp ->
+    let
+       code   = registerCode register tmp
+       result = registerName register tmp
+       target = ImmCLbl lbl
+    in
+    return (code . mkSeqInstr (BI cond result target))
+  where
+    (instr, cond) = case op of
+       CharGtOp -> (CMP LE, EQQ)
+       CharGeOp -> (CMP LTT, EQQ)
+       CharEqOp -> (CMP EQQ, NE)
+       CharNeOp -> (CMP EQQ, EQQ)
+       CharLtOp -> (CMP LTT, NE)
+       CharLeOp -> (CMP LE, NE)
+       IntGtOp -> (CMP LE, EQQ)
+       IntGeOp -> (CMP LTT, EQQ)
+       IntEqOp -> (CMP EQQ, NE)
+       IntNeOp -> (CMP EQQ, EQQ)
+       IntLtOp -> (CMP LTT, NE)
+       IntLeOp -> (CMP LE, NE)
+       WordGtOp -> (CMP ULE, EQQ)
+       WordGeOp -> (CMP ULT, EQQ)
+       WordEqOp -> (CMP EQQ, NE)
+       WordNeOp -> (CMP EQQ, EQQ)
+       WordLtOp -> (CMP ULT, NE)
+       WordLeOp -> (CMP ULE, NE)
+       AddrGtOp -> (CMP ULE, EQQ)
+       AddrGeOp -> (CMP ULT, EQQ)
+       AddrEqOp -> (CMP EQQ, NE)
+       AddrNeOp -> (CMP EQQ, EQQ)
+       AddrLtOp -> (CMP ULT, NE)
+       AddrLeOp -> (CMP ULE, NE)
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ccallResultRegs = 
+
+genCCall fn cconv result_regs args
+  = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
+                         `thenNat` \ ((unused,_), argCode) ->
+    let
+       nRegs = length allArgRegs - length unused
+       code = asmSeqThen (map ($ []) argCode)
+    in
+       returnSeq code [
+           LDA pv (AddrImm (ImmLab (ptext fn))),
+           JSR ra (AddrReg pv) nRegs,
+           LDGP gp (AddrReg ra)]
+  where
+    ------------------------
+    {- Try to get a value into a specific register (or registers) for
+       a call.  The first 6 arguments go into the appropriate
+       argument register (separate registers for integer and floating
+       point arguments, but used in lock-step), and the remaining
+       arguments are dumped to the stack, beginning at 0(sp).  Our
+       first argument is a pair of the list of remaining argument
+       registers to be assigned for this call and the next stack
+       offset to use for overflowing arguments.  This way,
+       @get_Arg@ can be applied to all of a call's arguments using
+       @mapAccumLNat@.
+    -}
+    get_arg
+       :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
+       -> StixTree             -- Current argument
+       -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
+
+    -- We have to use up all of our argument registers first...
+
+    get_arg ((iDst,fDst):dsts, offset) arg
+      = getRegister arg                            `thenNat` \ register ->
+       let
+           reg  = if isFloatType pk then fDst else iDst
+           code = registerCode register reg
+           src  = registerName register reg
+           pk   = registerRep register
+       in
+       return (
+           if isFloatType pk then
+               ((dsts, offset), if isFixed register then
+                   code . mkSeqInstr (FMOV src fDst)
+                   else code)
+           else
+               ((dsts, offset), if isFixed register then
+                   code . mkSeqInstr (OR src (RIReg src) iDst)
+                   else code))
+
+    -- Once we have run out of argument registers, we move to the
+    -- stack...
+
+    get_arg ([], offset) arg
+      = getRegister arg                        `thenNat` \ register ->
+       getNewRegNat (registerRep register)
+                                       `thenNat` \ tmp ->
+       let
+           code = registerCode register tmp
+           src  = registerName register tmp
+           pk   = registerRep register
+           sz   = primRepToSize pk
+       in
+       return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
+
+trivialCode instr x (StInt y)
+  | fits8Bits y
+  = getRegister x              `thenNat` \ register ->
+    getNewRegNat IntRep                `thenNat` \ tmp ->
+    let
+       code = registerCode register tmp
+       src1 = registerName register tmp
+       src2 = ImmInt (fromInteger y)
+       code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
+    in
+    return (Any IntRep code__2)
+
+trivialCode instr x y
+  = getRegister x              `thenNat` \ register1 ->
+    getRegister y              `thenNat` \ register2 ->
+    getNewRegNat IntRep                `thenNat` \ tmp1 ->
+    getNewRegNat IntRep                `thenNat` \ tmp2 ->
+    let
+       code1 = registerCode register1 tmp1 []
+       src1  = registerName register1 tmp1
+       code2 = registerCode register2 tmp2 []
+       src2  = registerName register2 tmp2
+       code__2 dst = asmSeqThen [code1, code2] .
+                    mkSeqInstr (instr src1 (RIReg src2) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialUCode instr x
+  = getRegister x              `thenNat` \ register ->
+    getNewRegNat IntRep                `thenNat` \ tmp ->
+    let
+       code = registerCode register tmp
+       src  = registerName register tmp
+       code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
+    in
+    return (Any IntRep code__2)
+
+------------
+trivialFCode _ instr x y
+  = getRegister x              `thenNat` \ register1 ->
+    getRegister y              `thenNat` \ register2 ->
+    getNewRegNat FF64  `thenNat` \ tmp1 ->
+    getNewRegNat FF64  `thenNat` \ tmp2 ->
+    let
+       code1 = registerCode register1 tmp1
+       src1  = registerName register1 tmp1
+
+       code2 = registerCode register2 tmp2
+       src2  = registerName register2 tmp2
+
+       code__2 dst = asmSeqThen [code1 [], code2 []] .
+                     mkSeqInstr (instr src1 src2 dst)
+    in
+    return (Any FF64 code__2)
+
+trivialUFCode _ instr x
+  = getRegister x              `thenNat` \ register ->
+    getNewRegNat FF64  `thenNat` \ tmp ->
+    let
+       code = registerCode register tmp
+       src  = registerName register tmp
+       code__2 dst = code . mkSeqInstr (instr src dst)
+    in
+    return (Any FF64 code__2)
+
+#if alpha_TARGET_ARCH
+
+coerceInt2FP _ x
+  = getRegister x              `thenNat` \ register ->
+    getNewRegNat IntRep                `thenNat` \ reg ->
+    let
+       code = registerCode register reg
+       src  = registerName register reg
+
+       code__2 dst = code . mkSeqInstrs [
+           ST Q src (spRel 0),
+           LD TF dst (spRel 0),
+           CVTxy Q TF dst dst]
+    in
+    return (Any FF64 code__2)
+
+-------------
+coerceFP2Int x
+  = getRegister x              `thenNat` \ register ->
+    getNewRegNat FF64  `thenNat` \ tmp ->
+    let
+       code = registerCode register tmp
+       src  = registerName register tmp
+
+       code__2 dst = code . mkSeqInstrs [
+           CVTxy TF Q src tmp,
+           ST TF tmp (spRel 0),
+           LD Q dst (spRel 0)]
+    in
+    return (Any IntRep code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-}
+
+
+
+
+
diff --git a/compiler/nativeGen/Alpha/Instr.hs b/compiler/nativeGen/Alpha/Instr.hs

index e2d66d3..990ea8b 100644 (file)
--- a/compiler/nativeGen/Alpha/Instr.hs
+++ b/compiler/nativeGen/Alpha/Instr.hs
@@ -10,13 +10,14 @@
  #include "nativeGen/NCG.h"
  
  module Alpha.Instr (
  #include "nativeGen/NCG.h"
  
  module Alpha.Instr (
-       Cond(..),
-       Instr(..),
-       RI(..)
+--     Cond(..),
+--     Instr(..),
+--     RI(..)
  )
  
  where
  
  )
  
  where
  
+{-
  import BlockId
  import Regs
  import Cmm
  import BlockId
  import Regs
  import Cmm
@@ -138,3 +139,4 @@ data Instr
         | FUNEND CLabel
  
  
         | FUNEND CLabel
  
  
+-}
diff --git a/compiler/nativeGen/ArchReg.hs b/compiler/nativeGen/ArchReg.hs

new file mode 100644 (file)

index 0000000..7170228
--- /dev/null
+++ b/compiler/nativeGen/ArchReg.hs
@@ -0,0 +1,14 @@
+
+
+module ArchReg (
+
+)
+
+where
+
+
+class ArchReg reg format where
+       classOfReg      :: reg    -> RegClass
+       mkVReg          :: format -> VirtReg reg
+       
+       
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs

index ce411ed..8613a8e 100644 (file)
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -19,21 +19,56 @@ module AsmCodeGen ( nativeCodeGen ) where
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
  
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
  
-import Instrs
-import Regs
-import MachCodeGen
-import PprMach
-import RegAllocInfo
-import NCGMonad
-import PositionIndependentCode
-import RegAlloc.Liveness
  
  
-import qualified RegAlloc.Linear.Main  as Linear
+#if   alpha_TARGET_ARCH
+import Alpha.CodeGen
+import Alpha.Regs
+import Alpha.RegInfo
+import Alpha.Instr
+import Alpha.Ppr
+
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import X86.CodeGen
+import X86.Regs
+import X86.RegInfo
+import X86.Instr
+import X86.Ppr
+
+#elif sparc_TARGET_ARCH
+import SPARC.CodeGen
+import SPARC.Regs
+import SPARC.RegInfo
+import SPARC.Instr
+import SPARC.Ppr
+
+#elif powerpc_TARGET_ARCH
+import PPC.CodeGen
+import PPC.Regs
+import PPC.RegInfo
+import PPC.Instr
+import PPC.Ppr
+
+#else
+#error "AsmCodeGen: unknown architecture"
+
+#endif
+
+import RegAlloc.Liveness
+import qualified RegAlloc.Linear.Main          as Linear
  
  import qualified GraphColor                    as Color
  import qualified RegAlloc.Graph.Main           as Color
  import qualified RegAlloc.Graph.Stats          as Color
  import qualified RegAlloc.Graph.Coalesce       as Color
  
  import qualified GraphColor                    as Color
  import qualified RegAlloc.Graph.Main           as Color
  import qualified RegAlloc.Graph.Stats          as Color
  import qualified RegAlloc.Graph.Coalesce       as Color
+import qualified RegAlloc.Graph.TrivColorable  as Color
+
+import qualified TargetReg                     as Target
+
+import Platform
+import Instruction
+import PIC
+import Reg
+import NCGMonad
  
  import Cmm
  import CmmOpt          ( cmmMiniInline, cmmMachOpFold )
  
  import Cmm
  import CmmOpt          ( cmmMiniInline, cmmMachOpFold )
@@ -160,7 +195,7 @@ nativeCodeGen dflags h us cmms
  
                 dumpIfSet_dyn dflags
                         Opt_D_dump_asm_conflicts "Register conflict graph"
  
                 dumpIfSet_dyn dflags
                         Opt_D_dump_asm_conflicts "Register conflict graph"
-                       $ Color.dotGraph Color.regDotColor trivColorable
+                       $ Color.dotGraph Target.targetRegDotColor (Color.trivColorable Target.targetRegClass)
                         $ graphGlobal)
  
  
                         $ graphGlobal)
  
  
@@ -172,7 +207,7 @@ nativeCodeGen dflags h us cmms
  
         -- write out the imports
         Pretty.printDoc Pretty.LeftMode h
  
         -- write out the imports
         Pretty.printDoc Pretty.LeftMode h
-               $ makeImportsDoc (concat imports)
+               $ makeImportsDoc dflags (concat imports)
  
         return  ()
  
  
         return  ()
  
@@ -225,13 +260,13 @@ cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
  cmmNativeGen 
         :: DynFlags
         -> UniqSupply
  cmmNativeGen 
         :: DynFlags
         -> UniqSupply
-       -> RawCmmTop                            -- ^ the cmm to generate code for
-       -> Int                                  -- ^ sequence number of this top thing
+       -> RawCmmTop                                    -- ^ the cmm to generate code for
+       -> Int                                          -- ^ sequence number of this top thing
         -> IO   ( UniqSupply
         -> IO   ( UniqSupply
-               , [NatCmmTop]                   -- native code
-               , [CLabel]                      -- things imported by this cmm
-               , Maybe [Color.RegAllocStats]   -- stats for the coloring register allocator
-               , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
+               , [NatCmmTop Instr]                     -- native code
+               , [CLabel]                              -- things imported by this cmm
+               , Maybe [Color.RegAllocStats Instr]     -- stats for the coloring register allocator
+               , Maybe [Linear.RegAllocStats])         -- stats for the linear register allocators
  
  cmmNativeGen dflags us cmm count
   = do
  
  cmmNativeGen dflags us cmm count
   = do
@@ -375,8 +410,8 @@ x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
  
  -- | Build a doc for all the imports.
  --
  
  -- | Build a doc for all the imports.
  --
-makeImportsDoc :: [CLabel] -> Pretty.Doc
-makeImportsDoc imports
+makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
+makeImportsDoc dflags imports
   = dyld_stubs imports
  
  #if HAVE_SUBSECTIONS_VIA_SYMBOLS
   = dyld_stubs imports
  
  #if HAVE_SUBSECTIONS_VIA_SYMBOLS
@@ -410,13 +445,16 @@ makeImportsDoc imports
  {-      dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
                                     map head $ group $ sort imps-}
  
  {-      dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
                                     map head $ group $ sort imps-}
  
+       arch    = platformArch  $ targetPlatform dflags
+       os      = platformOS    $ targetPlatform dflags
+       
         -- (Hack) sometimes two Labels pretty-print the same, but have
         -- different uniques; so we compare their text versions...
         dyld_stubs imps
         -- (Hack) sometimes two Labels pretty-print the same, but have
         -- different uniques; so we compare their text versions...
         dyld_stubs imps
-               | needImportedSymbols
+               | needImportedSymbols arch os
                 = Pretty.vcat $
                 = Pretty.vcat $
-                       (pprGotDeclaration :) $
-                       map (pprImportedSymbol . fst . head) $
+                       (pprGotDeclaration arch os :) $
+                       map ( pprImportedSymbol arch os . fst . head) $
                         groupBy (\(_,a) (_,b) -> a == b) $
                         sortBy (\(_,a) (_,b) -> compare a b) $
                         map doPpr $
                         groupBy (\(_,a) (_,b) -> a == b) $
                         sortBy (\(_,a) (_,b) -> compare a b) $
                         map doPpr $
@@ -437,7 +475,11 @@ makeImportsDoc imports
  -- such that as many of the local jumps as possible turn into
  -- fallthroughs.
  
  -- such that as many of the local jumps as possible turn into
  -- fallthroughs.
  
-sequenceTop :: NatCmmTop -> NatCmmTop
+sequenceTop 
+       :: Instruction instr
+       => NatCmmTop instr
+       -> NatCmmTop instr
+
  sequenceTop top@(CmmData _ _) = top
  sequenceTop (CmmProc info lbl params (ListGraph blocks)) = 
    CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
  sequenceTop top@(CmmData _ _) = top
  sequenceTop (CmmProc info lbl params (ListGraph blocks)) = 
    CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
@@ -452,21 +494,36 @@ sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
  -- FYI, the classic layout for basic blocks uses postorder DFS; this
  -- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
  
  -- FYI, the classic layout for basic blocks uses postorder DFS; this
  -- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
  
-sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
+sequenceBlocks 
+       :: Instruction instr
+       => [NatBasicBlock instr] 
+       -> [NatBasicBlock instr]
+
  sequenceBlocks [] = []
  sequenceBlocks (entry:blocks) = 
    seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
    -- the first block is the entry point ==> it must remain at the start.
  
  sequenceBlocks [] = []
  sequenceBlocks (entry:blocks) = 
    seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
    -- the first block is the entry point ==> it must remain at the start.
  
-sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
+
+sccBlocks 
+       :: Instruction instr
+       => [NatBasicBlock instr] 
+       -> [SCC ( NatBasicBlock instr
+               , Unique
+               , [Unique])]
+
  sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
  
  sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
  
-getOutEdges :: [Instr] -> [Unique]
-getOutEdges instrs = case jumpDests (last instrs) [] of
-                       [one] -> [getUnique one]
-                       _many -> []
-               -- we're only interested in the last instruction of
-               -- the block, and only if it has a single destination.
+-- we're only interested in the last instruction of
+-- the block, and only if it has a single destination.
+getOutEdges 
+       :: Instruction instr
+       => [instr] -> [Unique]
+
+getOutEdges instrs 
+       = case jumpDestsOfInstr (last instrs) of
+               [one] -> [getUnique one]
+               _many -> []
  
  mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
  
  
  mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
  
@@ -494,7 +551,10 @@ reorder id accum (b@(block,id',out) : rest)
  -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
  -- big, we have to work around this limitation.
  
  -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
  -- big, we have to work around this limitation.
  
-makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
+makeFarBranches 
+       :: Instruction instr
+       => [NatBasicBlock instr] 
+       -> [NatBasicBlock instr]
  
  #if powerpc_TARGET_ARCH
  makeFarBranches blocks
  
  #if powerpc_TARGET_ARCH
  makeFarBranches blocks
@@ -530,7 +590,11 @@ makeFarBranches = id
  -- -----------------------------------------------------------------------------
  -- Shortcut branches
  
  -- -----------------------------------------------------------------------------
  -- Shortcut branches
  
-shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
+shortcutBranches 
+       :: DynFlags 
+       -> [NatCmmTop Instr] 
+       -> [NatCmmTop Instr]
+
  shortcutBranches dflags tops
    | optLevel dflags < 1 = tops    -- only with -O or higher
    | otherwise           = map (apply_mapping mapping) tops'
  shortcutBranches dflags tops
    | optLevel dflags < 1 = tops    -- only with -O or higher
    | otherwise           = map (apply_mapping mapping) tops'
@@ -589,12 +653,17 @@ apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
  -- Switching between the two monads whilst carrying along the same
  -- Unique supply breaks abstraction.  Is that bad?
  
  -- Switching between the two monads whilst carrying along the same
  -- Unique supply breaks abstraction.  Is that bad?
  
-genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
+genMachCode 
+       :: DynFlags 
+       -> RawCmmTop 
+       -> UniqSM 
+               ( [NatCmmTop Instr]
+               , [CLabel])
  
  genMachCode dflags cmm_top
    = do { initial_us <- getUs
         ; let initial_st           = mkNatM_State initial_us 0 dflags
  
  genMachCode dflags cmm_top
    = do { initial_us <- getUs
         ; let initial_st           = mkNatM_State initial_us 0 dflags
-             (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+             (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
               final_delta          = natm_delta final_st
               final_imports        = natm_imports final_st
         ; if   final_delta == 0
               final_delta          = natm_delta final_st
               final_imports        = natm_imports final_st
         ; if   final_delta == 0
diff --git a/compiler/nativeGen/Instrs.hs b/compiler/nativeGen/Instrs.hs

deleted file mode 100644 (file)

index 3f38a36..0000000
--- a/compiler/nativeGen/Instrs.hs
+++ /dev/null
@@ -1,97 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-#include "nativeGen/NCG.h"
-
-
-module Instrs (
-       NatCmm,
-       NatCmmTop,
-       NatBasicBlock,
-       condUnsigned,
-       condToSigned,
-       condToUnsigned,
-
-#if   alpha_TARGET_ARCH
-       module Alpha.Instr
-#elif powerpc_TARGET_ARCH
-       module PPC.Instr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-       module X86.Instr
-#elif sparc_TARGET_ARCH
-       module SPARC.Instr
-#else
-#error "Instrs: not defined for this architecture"
-#endif
-)
-
-where
-
-#include "HsVersions.h"
-
-import BlockId
-import Regs
-import Cmm
-import CLabel           ( CLabel, pprCLabel )
-import Panic           ( panic )
-import Outputable
-import FastString
-import Constants       ( wORD_SIZE )
-
-import GHC.Exts
-
-#if   alpha_TARGET_ARCH
-import Alpha.Instr
-#elif powerpc_TARGET_ARCH
-import PPC.Instr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Instr
-#elif sparc_TARGET_ARCH
-import SPARC.Instr
-#else
-#error "Instrs: not defined for this architecture"
-#endif
-
-
--- Our flavours of the Cmm types
--- Type synonyms for Cmm populated with native code
-
-type NatCmm        = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
-type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
-type NatBasicBlock = GenBasicBlock Instr
-
-
--- Condition utils
-condUnsigned GU  = True
-condUnsigned LU  = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _   = False
-
-condToSigned GU  = GTT
-condToSigned LU  = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x   = x
-
-condToUnsigned GTT = GU
-condToUnsigned LTT = LU
-condToUnsigned GE  = GEU
-condToUnsigned LE  = LEU
-condToUnsigned x   = x
-
-
-
-
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs

new file mode 100644 (file)

index 0000000..22c37a5
--- /dev/null
+++ b/compiler/nativeGen/Instruction.hs
@@ -0,0 +1,159 @@
+
+module Instruction (
+       RegUsage(..),
+       noUsage,
+       NatCmm,
+       NatCmmTop,
+       NatBasicBlock,
+       Instruction(..)
+)
+
+where
+
+import Reg
+
+import BlockId
+import Cmm
+
+-- | Holds a list of source and destination registers used by a
+--     particular instruction. 
+--
+--   Machine registers that are pre-allocated to stgRegs are filtered
+--     out, because they are uninteresting from a register allocation
+--     standpoint.  (We wouldn't want them to end up on the free list!) 
+--
+--   As far as we are concerned, the fixed registers simply don't exist
+--     (for allocation purposes, anyway).
+--
+data RegUsage 
+       = RU [Reg] [Reg]
+
+-- | No regs read or written to.
+noUsage :: RegUsage
+noUsage  = RU [] []
+
+
+-- Our flavours of the Cmm types
+-- Type synonyms for Cmm populated with native code
+type NatCmm instr
+       = GenCmm
+               CmmStatic
+               [CmmStatic]
+               (ListGraph instr)
+
+type NatCmmTop instr
+       = GenCmmTop
+               CmmStatic
+               [CmmStatic]
+               (ListGraph instr)
+
+
+type NatBasicBlock instr
+       = GenBasicBlock instr
+
+
+
+
+-- | Common things that we can do with instructions, on all architectures.
+--     These are used by the shared parts of the native code generator,
+--     specifically the register allocators.
+--
+class  Instruction instr where
+       
+       -- | Get the registers that are being used by this instruction.
+       --      regUsage doesn't need to do any trickery for jumps and such.  
+       --      Just state precisely the regs read and written by that insn.  
+       --      The consequences of control flow transfers, as far as register
+       --      allocation goes, are taken care of by the register allocator.
+       --
+       regUsageOfInstr
+               :: instr        
+               -> RegUsage
+
+
+       -- | Apply a given mapping to all the register references in this
+       --      instruction.
+       patchRegsOfInstr
+               :: instr 
+               -> (Reg -> Reg) 
+               -> instr
+
+       
+       -- | Checks whether this instruction is a jump/branch instruction. 
+       --      One that can change the flow of control in a way that the 
+       --      register allocator needs to worry about. 
+       isJumpishInstr
+               :: instr -> Bool
+
+
+       -- | Give the possible destinations of this jump instruction.
+       --      Must be defined for all jumpish instructions.
+       jumpDestsOfInstr
+               :: instr -> [BlockId]
+       
+
+       -- | Change the destination of this jump instruction.
+       --      Used in the linear allocator when adding fixup blocks for join
+       --      points.
+       patchJumpInstr
+               :: instr
+               -> (BlockId -> BlockId)
+               -> instr
+
+               
+       -- | An instruction to spill a register into a spill slot.
+       mkSpillInstr    
+               :: Reg          -- ^ the reg to spill
+               -> Int          -- ^ the current stack delta
+               -> Int          -- ^ spill slot to use
+               -> instr
+
+               
+       -- | An instruction to reload a register from a spill slot.
+       mkLoadInstr     
+               :: Reg          -- ^ the reg to reload.
+               -> Int          -- ^ the current stack delta
+               -> Int          -- ^ the spill slot to use
+               -> instr
+
+       -- | See if this instruction is telling us the current C stack delta
+       takeDeltaInstr 
+               :: instr
+               -> Maybe Int
+
+       -- | Check whether this instruction is some meta thing inserted into 
+       --      the instruction stream for other purposes.
+       --
+       --      Not something that has to be treated as a real machine instruction
+       --      and have its registers allocated.
+       --
+       --      eg, comments, delta, ldata, etc. 
+       isMetaInstr     
+               :: instr
+               -> Bool
+               
+
+               
+       -- | Copy the value in a register to another one.
+       --      Must work for all register classes.
+       mkRegRegMoveInstr
+               :: Reg          -- ^ source register
+               -> Reg          -- ^ destination register
+               -> instr
+
+       -- | Take the source and destination from this reg -> reg move instruction
+       --      or Nothing if it's not one
+       takeRegRegMoveInstr
+               :: instr
+               -> Maybe (Reg, Reg)
+               
+       -- | Make an unconditional jump instruction.
+       --      For architectures with branch delay slots, its ok to put
+       --      a NOP after the jump. Don't fill the delay slot with an
+       --      instruction that references regs or you'll confuse the 
+       --      linear allocator.
+       mkJumpInstr     
+               :: BlockId
+               -> [instr]
+               
+               
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs

deleted file mode 100644 (file)

index d94a906..0000000
--- a/compiler/nativeGen/MachCodeGen.hs
+++ /dev/null
@@ -1,5199 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Generating machine code (instruction selection)
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
--- This is a big module, but, if you pay attention to
--- (a) the sectioning, (b) the type signatures, and
--- (c) the #if blah_TARGET_ARCH} things, the
--- structure should not be too overwhelming.
-
-module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
-
-#include "HsVersions.h"
-#include "nativeGen/NCG.h"
-#include "MachDeps.h"
-
--- NCG stuff:
-import Instrs
-import Regs
-import NCGMonad
-import PositionIndependentCode
-import RegAllocInfo    ( mkBranchInstr, mkRegRegMoveInstr )
-import PprMach
-
--- Our intermediate code:
-import BlockId
-import PprCmm          ( pprExpr )
-import Cmm
-import CLabel
-import ClosureInfo     ( C_SRT(..) )
-
--- The rest:
-import BasicTypes
-import StaticFlags     ( opt_PIC )
-import ForeignCall     ( CCallConv(..) )
-import OrdList
-import Pretty
-import qualified Outputable as O
-import Outputable
-import FastString
-import FastBool                ( isFastTrue )
-import Constants       ( wORD_SIZE )
-
-import Debug.Trace     ( trace )
-
-import Control.Monad   ( mapAndUnzipM )
-import Data.Maybe      ( fromJust )
-import Data.Bits
-import Data.Word
-import Data.Int
-
-
--- -----------------------------------------------------------------------------
--- Top-level of the instruction selector
-
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal (pre-order?) yields the insns in the correct
--- order.
-
-type InstrBlock = OrdList Instr
-
-cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
-cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
-  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
-  picBaseMb <- getPicBaseMaybeNat
-  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
-      tops = proc : concat statics
-  case picBaseMb of
-      Just picBase -> initializePicBase picBase tops
-      Nothing -> return tops
-  
-cmmTopCodeGen (CmmData sec dat) = do
-  return [CmmData sec dat]  -- no translation, we just use CmmStatic
-
-basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
-basicBlockCodeGen (BasicBlock id stmts) = do
-  instrs <- stmtsToInstrs stmts
-  -- code generation may introduce new basic block boundaries, which
-  -- are indicated by the NEWBLOCK instruction.  We must split up the
-  -- instruction stream into basic blocks again.  Also, we extract
-  -- LDATAs here too.
-  let
-       (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
-       
-       mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
-         = ([], BasicBlock id instrs : blocks, statics)
-       mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
-         = (instrs, blocks, CmmData sec dat:statics)
-       mkBlocks instr (instrs,blocks,statics)
-         = (instr:instrs, blocks, statics)
-  -- in
-  return (BasicBlock id top : other_blocks, statics)
-
-stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
-stmtsToInstrs stmts
-   = do instrss <- mapM stmtToInstrs stmts
-        return (concatOL instrss)
-
-stmtToInstrs :: CmmStmt -> NatM InstrBlock
-stmtToInstrs stmt = case stmt of
-    CmmNop        -> return nilOL
-    CmmComment s   -> return (unitOL (COMMENT s))
-
-    CmmAssign reg src
-      | isFloatType ty -> assignReg_FltCode size reg src
-#if WORD_SIZE_IN_BITS==32
-      | isWord64 ty    -> assignReg_I64Code      reg src
-#endif
-      | otherwise       -> assignReg_IntCode size reg src
-       where ty = cmmRegType reg
-             size = cmmTypeSize ty
-
-    CmmStore addr src
-      | isFloatType ty -> assignMem_FltCode size addr src
-#if WORD_SIZE_IN_BITS==32
-      | isWord64 ty     -> assignMem_I64Code      addr src
-#endif
-      | otherwise       -> assignMem_IntCode size addr src
-       where ty = cmmExprType src
-             size = cmmTypeSize ty
-
-    CmmCall target result_regs args _ _
-       -> genCCall target result_regs args
-
-    CmmBranch id         -> genBranch id
-    CmmCondBranch arg id  -> genCondJump id arg
-    CmmSwitch arg ids     -> genSwitch arg ids
-    CmmJump arg params   -> genJump arg
-    CmmReturn params     ->
-      panic "stmtToInstrs: return statement should have been cps'd away"
-
--- -----------------------------------------------------------------------------
--- General things for putting together code sequences
-
--- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: CmmExpr -> CmmExpr
-mangleIndexTree (CmmRegOff reg off)
-  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
-  where width = typeWidth (cmmRegType reg)
-
--- -----------------------------------------------------------------------------
---  Code gen for 64-bit arithmetic on 32-bit platforms
-
-{-
-Simple support for generating 64-bit code (ie, 64 bit values and 64
-bit assignments) on 32-bit platforms.  Unlike the main code generator
-we merely shoot for generating working code as simply as possible, and
-pay little attention to code quality.  Specifically, there is no
-attempt to deal cleverly with the fixed-vs-floating register
-distinction; all values are generated into (pairs of) floating
-registers, even if this would mean some redundant reg-reg moves as a
-result.  Only one of the VRegUniques is returned, since it will be
-of the VRegUniqueLo form, and the upper-half VReg can be determined
-by applying getHiVRegFromLo to it.
--}
-
-data ChildCode64       -- a.k.a "Register64"
-   = ChildCode64 
-        InstrBlock     -- code
-        Reg            -- the lower 32-bit temporary which contains the
-                       -- result; use getHiVRegFromLo to find the other
-                       -- VRegUnique.  Rules of this simplified insn
-                       -- selection game are therefore that the returned
-                       -- Reg may be modified
-
-#if WORD_SIZE_IN_BITS==32
-assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
-#endif
-
-#ifndef x86_64_TARGET_ARCH
-iselExpr64        :: CmmExpr -> NatM ChildCode64
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-assignMem_I64Code addrTree valueTree = do
-  Amode addr addr_code <- getAmode addrTree
-  ChildCode64 vcode rlo <- iselExpr64 valueTree
-  let 
-        rhi = getHiVRegFromLo rlo
-
-        -- Little-endian store
-        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
-        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
-  -- in
-  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
-   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
-   let 
-         r_dst_lo = mkVReg u_dst II32
-         r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
-         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
-   -- in
-   return (
-        vcode `snocOL` mov_lo `snocOL` mov_hi
-     )
-
-assignReg_I64Code lvalue valueTree
-   = panic "assignReg_I64Code(i386): invalid lvalue"
-
-------------
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
-  (rlo,rhi) <- getNewRegPairNat II32
-  let
-       r = fromIntegral (fromIntegral i :: Word32)
-       q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
-       code = toOL [
-               MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
-               MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
-               ]
-  -- in
-  return (ChildCode64 code rlo)
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-   Amode addr addr_code <- getAmode addrTree
-   (rlo,rhi) <- getNewRegPairNat II32
-   let 
-        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
-        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
-   -- in
-   return (
-            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
-                        rlo
-     )
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
-   = return (ChildCode64 nilOL (mkVReg vu II32))
-         
--- we handle addition, but rather badly
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
-   ChildCode64 code1 r1lo <- iselExpr64 e1
-   (rlo,rhi) <- getNewRegPairNat II32
-   let
-       r = fromIntegral (fromIntegral i :: Word32)
-       q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
-       r1hi = getHiVRegFromLo r1lo
-       code =  code1 `appOL`
-               toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                      ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
-                      MOV II32 (OpReg r1hi) (OpReg rhi),
-                      ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
-   -- in
-   return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
-   ChildCode64 code1 r1lo <- iselExpr64 e1
-   ChildCode64 code2 r2lo <- iselExpr64 e2
-   (rlo,rhi) <- getNewRegPairNat II32
-   let
-       r1hi = getHiVRegFromLo r1lo
-       r2hi = getHiVRegFromLo r2lo
-       code =  code1 `appOL`
-               code2 `appOL`
-               toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
-                      ADD II32 (OpReg r2lo) (OpReg rlo),
-                      MOV II32 (OpReg r1hi) (OpReg rhi),
-                      ADC II32 (OpReg r2hi) (OpReg rhi) ]
-   -- in
-   return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
-     fn <- getAnyReg expr
-     r_dst_lo <-  getNewRegNat II32
-     let r_dst_hi = getHiVRegFromLo r_dst_lo
-         code = fn r_dst_lo
-     return (
-             ChildCode64 (code `snocOL` 
-                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
-                          r_dst_lo
-            )
-
-iselExpr64 expr
-   = pprPanic "iselExpr64(i386)" (ppr expr)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-assignMem_I64Code addrTree valueTree = do
-     Amode addr addr_code <- getAmode addrTree
-     ChildCode64 vcode rlo <- iselExpr64 valueTree  
-     (src, code) <- getSomeReg addrTree
-     let 
-         rhi = getHiVRegFromLo rlo
-         -- Big-endian store
-         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
-         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
-     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
-     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
-     let 
-         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
-         r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = mkMOV r_src_lo r_dst_lo
-         mov_hi = mkMOV r_src_hi r_dst_hi
-         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
-assignReg_I64Code lvalue valueTree
-   = panic "assignReg_I64Code(sparc): invalid lvalue"
-
-
--- Load a 64 bit word
-iselExpr64 (CmmLoad addrTree ty) 
- | isWord64 ty
- = do  Amode amode addr_code   <- getAmode addrTree
-       let result
-
-               | AddrRegReg r1 r2      <- amode
-               = do    rlo     <- getNewRegNat II32
-                       tmp     <- getNewRegNat II32
-                       let rhi = getHiVRegFromLo rlo
-
-                       return  $ ChildCode64 
-                               (        addr_code 
-                               `appOL`  toOL
-                                        [ ADD False False r1 (RIReg r2) tmp
-                                        , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
-                                        , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
-                               rlo
-
-               | AddrRegImm r1 (ImmInt i) <- amode
-               = do    rlo     <- getNewRegNat II32
-                       let rhi = getHiVRegFromLo rlo
-                       
-                       return  $ ChildCode64 
-                               (        addr_code 
-                               `appOL`  toOL
-                                        [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
-                                        , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
-                               rlo
-               
-       result
-
-
--- Add a literal to a 64 bit integer
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
- = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
-       let r1_hi       = getHiVRegFromLo r1_lo
-       
-       r_dst_lo        <- getNewRegNat II32
-       let r_dst_hi    =  getHiVRegFromLo r_dst_lo 
-       
-       return  $ ChildCode64
-                       ( toOL
-                       [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
-                       , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
-                       r_dst_lo
-
-
--- Addition of II64
-iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
- = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
-       let r1_hi       = getHiVRegFromLo r1_lo
-
-       ChildCode64 code2 r2_lo <- iselExpr64 e2
-       let r2_hi       = getHiVRegFromLo r2_lo
-       
-       r_dst_lo        <- getNewRegNat II32
-       let r_dst_hi    = getHiVRegFromLo r_dst_lo
-       
-       let code =      code1
-               `appOL` code2
-               `appOL` toOL
-                       [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
-                       , ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
-       
-       return  $ ChildCode64 code r_dst_lo
-
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
-     r_dst_lo <-  getNewRegNat II32
-     let r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_lo = mkVReg uq II32
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = mkMOV r_src_lo r_dst_lo
-         mov_hi = mkMOV r_src_hi r_dst_hi
-         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-     return (
-            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
-         )
-
--- Convert something into II64
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
- = do
-       r_dst_lo        <- getNewRegNat II32
-       let r_dst_hi    = getHiVRegFromLo r_dst_lo
-
-       -- compute expr and load it into r_dst_lo
-       (a_reg, a_code) <- getSomeReg expr
-
-       let code        = a_code
-               `appOL` toOL
-                       [ mkRegRegMoveInstr g0    r_dst_hi      -- clear high 32 bits
-                       , mkRegRegMoveInstr a_reg r_dst_lo ]
-                       
-       return  $ ChildCode64 code r_dst_lo
-
-
-iselExpr64 expr
-   = pprPanic "iselExpr64(sparc)" (ppr expr)
-
-#endif /* sparc_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if powerpc_TARGET_ARCH
-
-getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
-getI64Amodes addrTree = do
-    Amode hi_addr addr_code <- getAmode addrTree
-    case addrOffset hi_addr 4 of
-        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
-        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
-                           return (AddrRegImm hi_ptr (ImmInt 0),
-                                   AddrRegImm hi_ptr (ImmInt 4),
-                                   code)
-
-assignMem_I64Code addrTree valueTree = do
-        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
-       ChildCode64 vcode rlo <- iselExpr64 valueTree
-       let 
-               rhi = getHiVRegFromLo rlo
-
-               -- Big-endian store
-               mov_hi = ST II32 rhi hi_addr
-               mov_lo = ST II32 rlo lo_addr
-       -- in
-       return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
-   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
-   let 
-         r_dst_lo = mkVReg u_dst II32
-         r_dst_hi = getHiVRegFromLo r_dst_lo
-         r_src_hi = getHiVRegFromLo r_src_lo
-         mov_lo = MR r_dst_lo r_src_lo
-         mov_hi = MR r_dst_hi r_src_hi
-   -- in
-   return (
-        vcode `snocOL` mov_lo `snocOL` mov_hi
-     )
-
-assignReg_I64Code lvalue valueTree
-   = panic "assignReg_I64Code(powerpc): invalid lvalue"
-
-
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr 
---   | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
---   = panic "iselExpr64(???)"
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
-    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
-    (rlo, rhi) <- getNewRegPairNat II32
-    let mov_hi = LD II32 rhi hi_addr
-        mov_lo = LD II32 rlo lo_addr
-    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
-                         rlo
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
-   = return (ChildCode64 nilOL (mkVReg vu II32))
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
-  (rlo,rhi) <- getNewRegPairNat II32
-  let
-       half0 = fromIntegral (fromIntegral i :: Word16)
-       half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
-       half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
-       half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
-       
-       code = toOL [
-               LIS rlo (ImmInt half1),
-               OR rlo rlo (RIImm $ ImmInt half0),
-               LIS rhi (ImmInt half3),
-               OR rlo rlo (RIImm $ ImmInt half2)
-               ]
-  -- in
-  return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
-   ChildCode64 code1 r1lo <- iselExpr64 e1
-   ChildCode64 code2 r2lo <- iselExpr64 e2
-   (rlo,rhi) <- getNewRegPairNat II32
-   let
-       r1hi = getHiVRegFromLo r1lo
-       r2hi = getHiVRegFromLo r2lo
-       code =  code1 `appOL`
-               code2 `appOL`
-               toOL [ ADDC rlo r1lo r2lo,
-                      ADDE rhi r1hi r2hi ]
-   -- in
-   return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
-    (expr_reg,expr_code) <- getSomeReg expr
-    (rlo, rhi) <- getNewRegPairNat II32
-    let mov_hi = LI rhi (ImmInt 0)
-        mov_lo = MR rlo expr_reg
-    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
-                         rlo
-iselExpr64 expr
-   = pprPanic "iselExpr64(powerpc)" (ppr expr)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- The 'Register' type
-
--- 'Register's passed up the tree.  If the stix code forces the register
--- to live in a pre-decided machine register, it comes out as @Fixed@;
--- otherwise, it comes out as @Any@, and the parent can decide which
--- register to put it in.
-
-data Register
-  = Fixed   Size Reg InstrBlock
-  | Any            Size (Reg -> InstrBlock)
-
-swizzleRegisterRep :: Register -> Size -> Register
--- Change the width; it's a no-op
-swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
-swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
-
-
--- -----------------------------------------------------------------------------
--- Utils based on getRegister, below
-
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed _ reg code -> 
-       return (reg, code)
-
--- -----------------------------------------------------------------------------
--- Grab the Reg for a CmmReg
-
-getRegisterReg :: CmmReg -> Reg
-
-getRegisterReg (CmmLocal (LocalReg u pk))
-  = mkVReg u (cmmTypeSize pk)
-
-getRegisterReg (CmmGlobal mid)
-  = case get_GlobalReg_reg_or_addr mid of
-       Left (RealReg rrno) -> RealReg rrno
-       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
-          -- By this stage, the only MagicIds remaining should be the
-          -- ones which map to a real machine register on this
-          -- platform.  Hence ...
-
-
--- -----------------------------------------------------------------------------
--- Generate code to get a subtree into a Register
-
--- Don't delete this -- it's very handy for debugging.
---getRegister expr 
---   | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
---   = panic "getRegister(???)"
-
-getRegister :: CmmExpr -> NatM Register
-
-#if !x86_64_TARGET_ARCH
-    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
-    -- register, it can only be used for rip-relative addressing.
-getRegister (CmmReg (CmmGlobal PicBaseReg))
-  = do
-      reg <- getPicBaseNat wordSize
-      return (Fixed wordSize reg nilOL)
-#endif
-
-getRegister (CmmReg reg) 
-  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
-                 (getRegisterReg reg) nilOL)
-
-getRegister tree@(CmmRegOff _ _) 
-  = getRegister (mangleIndexTree tree)
-
-
-#if WORD_SIZE_IN_BITS==32
-    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-    -- TO_W_(x), TO_W_(x >> 32)
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32)
-             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32)
-             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 rlo code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
-  ChildCode64 code rlo <- iselExpr64 x
-  return $ Fixed II32 rlo code       
-
-#endif
-
--- end of machine-"independent" bit; here we go on the rest...
-
-#if alpha_TARGET_ARCH
-
-getRegister (StDouble d)
-  = getBlockIdNat                  `thenNat` \ lbl ->
-    getNewRegNat PtrRep            `thenNat` \ tmp ->
-    let code dst = mkSeqInstrs [
-           LDATA RoDataSegment lbl [
-                   DATA TF [ImmLab (rational d)]
-               ],
-           LDA tmp (AddrImm (ImmCLbl lbl)),
-           LD TF dst (AddrReg tmp)]
-    in
-       return (Any FF64 code)
-
-getRegister (StPrim primop [x]) -- unary PrimOps
-  = case primop of
-      IntNegOp -> trivialUCode (NEG Q False) x
-
-      NotOp    -> trivialUCode NOT x
-
-      FloatNegOp  -> trivialUFCode FloatRep (FNEG TF) x
-      DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
-
-      OrdOp -> coerceIntCode IntRep x
-      ChrOp -> chrCode x
-
-      Float2IntOp  -> coerceFP2Int    x
-      Int2FloatOp  -> coerceInt2FP pr x
-      Double2IntOp -> coerceFP2Int    x
-      Int2DoubleOp -> coerceInt2FP pr x
-
-      Double2FloatOp -> coerceFltCode x
-      Float2DoubleOp -> coerceFltCode x
-
-      other_op -> getRegister (StCall fn CCallConv FF64 [x])
-       where
-         fn = case other_op of
-                FloatExpOp    -> fsLit "exp"
-                FloatLogOp    -> fsLit "log"
-                FloatSqrtOp   -> fsLit "sqrt"
-                FloatSinOp    -> fsLit "sin"
-                FloatCosOp    -> fsLit "cos"
-                FloatTanOp    -> fsLit "tan"
-                FloatAsinOp   -> fsLit "asin"
-                FloatAcosOp   -> fsLit "acos"
-                FloatAtanOp   -> fsLit "atan"
-                FloatSinhOp   -> fsLit "sinh"
-                FloatCoshOp   -> fsLit "cosh"
-                FloatTanhOp   -> fsLit "tanh"
-                DoubleExpOp   -> fsLit "exp"
-                DoubleLogOp   -> fsLit "log"
-                DoubleSqrtOp  -> fsLit "sqrt"
-                DoubleSinOp   -> fsLit "sin"
-                DoubleCosOp   -> fsLit "cos"
-                DoubleTanOp   -> fsLit "tan"
-                DoubleAsinOp  -> fsLit "asin"
-                DoubleAcosOp  -> fsLit "acos"
-                DoubleAtanOp  -> fsLit "atan"
-                DoubleSinhOp  -> fsLit "sinh"
-                DoubleCoshOp  -> fsLit "cosh"
-                DoubleTanhOp  -> fsLit "tanh"
-  where
-    pr = panic "MachCode.getRegister: no primrep needed for Alpha"
-
-getRegister (StPrim primop [x, y]) -- dyadic PrimOps
-  = case primop of
-      CharGtOp -> trivialCode (CMP LTT) y x
-      CharGeOp -> trivialCode (CMP LE) y x
-      CharEqOp -> trivialCode (CMP EQQ) x y
-      CharNeOp -> int_NE_code x y
-      CharLtOp -> trivialCode (CMP LTT) x y
-      CharLeOp -> trivialCode (CMP LE) x y
-
-      IntGtOp  -> trivialCode (CMP LTT) y x
-      IntGeOp  -> trivialCode (CMP LE) y x
-      IntEqOp  -> trivialCode (CMP EQQ) x y
-      IntNeOp  -> int_NE_code x y
-      IntLtOp  -> trivialCode (CMP LTT) x y
-      IntLeOp  -> trivialCode (CMP LE) x y
-
-      WordGtOp -> trivialCode (CMP ULT) y x
-      WordGeOp -> trivialCode (CMP ULE) x y
-      WordEqOp -> trivialCode (CMP EQQ)  x y
-      WordNeOp -> int_NE_code x y
-      WordLtOp -> trivialCode (CMP ULT) x y
-      WordLeOp -> trivialCode (CMP ULE) x y
-
-      AddrGtOp -> trivialCode (CMP ULT) y x
-      AddrGeOp -> trivialCode (CMP ULE) y x
-      AddrEqOp -> trivialCode (CMP EQQ)  x y
-      AddrNeOp -> int_NE_code x y
-      AddrLtOp -> trivialCode (CMP ULT) x y
-      AddrLeOp -> trivialCode (CMP ULE) x y
-       
-      FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
-      FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
-      FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
-      FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
-      FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
-      FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
-
-      DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
-      DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
-      DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
-      DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
-      DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
-      DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
-
-      IntAddOp  -> trivialCode (ADD Q False) x y
-      IntSubOp  -> trivialCode (SUB Q False) x y
-      IntMulOp  -> trivialCode (MUL Q False) x y
-      IntQuotOp -> trivialCode (DIV Q False) x y
-      IntRemOp  -> trivialCode (REM Q False) x y
-
-      WordAddOp  -> trivialCode (ADD Q False) x y
-      WordSubOp  -> trivialCode (SUB Q False) x y
-      WordMulOp  -> trivialCode (MUL Q False) x y
-      WordQuotOp -> trivialCode (DIV Q True) x y
-      WordRemOp  -> trivialCode (REM Q True) x y
-
-      FloatAddOp -> trivialFCode  W32 (FADD TF) x y
-      FloatSubOp -> trivialFCode  W32 (FSUB TF) x y
-      FloatMulOp -> trivialFCode  W32 (FMUL TF) x y
-      FloatDivOp -> trivialFCode  W32 (FDIV TF) x y
-
-      DoubleAddOp -> trivialFCode  W64 (FADD TF) x y
-      DoubleSubOp -> trivialFCode  W64 (FSUB TF) x y
-      DoubleMulOp -> trivialFCode  W64 (FMUL TF) x y
-      DoubleDivOp -> trivialFCode  W64 (FDIV TF) x y
-
-      AddrAddOp  -> trivialCode (ADD Q False) x y
-      AddrSubOp  -> trivialCode (SUB Q False) x y
-      AddrRemOp  -> trivialCode (REM Q True) x y
-
-      AndOp  -> trivialCode AND x y
-      OrOp   -> trivialCode OR  x y
-      XorOp  -> trivialCode XOR x y
-      SllOp  -> trivialCode SLL x y
-      SrlOp  -> trivialCode SRL x y
-
-      ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
-      ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
-      ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
-
-      FloatPowerOp  -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
-      DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
-  where
-    {- ------------------------------------------------------------
-       Some bizarre special code for getting condition codes into
-       registers.  Integer non-equality is a test for equality
-       followed by an XOR with 1.  (Integer comparisons always set
-       the result register to 0 or 1.)  Floating point comparisons of
-       any kind leave the result in a floating point register, so we
-       need to wrangle an integer register out of things.
-    -}
-    int_NE_code :: StixTree -> StixTree -> NatM Register
-
-    int_NE_code x y
-      = trivialCode (CMP EQQ) x y      `thenNat` \ register ->
-       getNewRegNat IntRep             `thenNat` \ tmp ->
-       let
-           code = registerCode register tmp
-           src  = registerName register tmp
-           code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
-       in
-       return (Any IntRep code__2)
-
-    {- ------------------------------------------------------------
-       Comments for int_NE_code also apply to cmpF_code
-    -}
-    cmpF_code
-       :: (Reg -> Reg -> Reg -> Instr)
-       -> Cond
-       -> StixTree -> StixTree
-       -> NatM Register
-
-    cmpF_code instr cond x y
-      = trivialFCode pr instr x y      `thenNat` \ register ->
-       getNewRegNat FF64               `thenNat` \ tmp ->
-       getBlockIdNat                   `thenNat` \ lbl ->
-       let
-           code = registerCode register tmp
-           result  = registerName register tmp
-
-           code__2 dst = code . mkSeqInstrs [
-               OR zeroh (RIImm (ImmInt 1)) dst,
-               BF cond  result (ImmCLbl lbl),
-               OR zeroh (RIReg zeroh) dst,
-               NEWBLOCK lbl]
-       in
-       return (Any IntRep code__2)
-      where
-       pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
-      ------------------------------------------------------------
-
-getRegister (CmmLoad pk mem)
-  = getAmode mem                   `thenNat` \ amode ->
-    let
-       code = amodeCode amode
-       src   = amodeAddr amode
-       size = primRepToSize pk
-       code__2 dst = code . mkSeqInstr (LD size dst src)
-    in
-    return (Any pk code__2)
-
-getRegister (StInt i)
-  | fits8Bits i
-  = let
-       code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
-    in
-    return (Any IntRep code)
-  | otherwise
-  = let
-       code dst = mkSeqInstr (LDI Q dst src)
-    in
-    return (Any IntRep code)
-  where
-    src = ImmInt (fromInteger i)
-
-getRegister leaf
-  | isJust imm
-  = let
-       code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
-    in
-    return (Any PtrRep code)
-  where
-    imm = maybeImm leaf
-    imm__2 = case imm of Just x -> x
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat f W32)) = do
-    lbl <- getNewLabelNat
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let code dst =
-           LDATA ReadOnlyData
-                       [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat f W32)]
-           `consOL` (addr_code `snocOL`
-           GLD FF32 addr dst)
-    -- in
-    return (Any FF32 code)
-
-
-getRegister (CmmLit (CmmFloat d W64))
-  | d == 0.0
-  = let code dst = unitOL (GLDZ dst)
-    in  return (Any FF64 code)
-
-  | d == 1.0
-  = let code dst = unitOL (GLD1 dst)
-    in  return (Any FF64 code)
-
-  | otherwise = do
-    lbl <- getNewLabelNat
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let code dst =
-           LDATA ReadOnlyData
-                       [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat d W64)]
-           `consOL` (addr_code `snocOL`
-           GLD FF64 addr dst)
-    -- in
-    return (Any FF64 code)
-
-#endif /* i386_TARGET_ARCH */
-
-#if x86_64_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat 0.0 w)) = do
-   let size = floatSize w
-       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
-       -- I don't know why there are xorpd, xorps, and pxor instructions.
-       -- They all appear to do the same thing --SDM
-   return (Any size code)
-
-getRegister (CmmLit (CmmFloat f w)) = do
-    lbl <- getNewLabelNat
-    let code dst = toOL [
-           LDATA ReadOnlyData
-                       [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat f w)],
-           MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-           ]
-    -- in
-    return (Any size code)
-  where size = floatSize w
-
-#endif /* x86_64_TARGET_ARCH */
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II8) addr
-  return (Any II32 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II8) addr
-  return (Any II32 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II16) addr
-  return (Any II32 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II16) addr
-  return (Any II32 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II8) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II8) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVZxL II16) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II16) addr
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
-  return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
-  code <- intLoadCode (MOVSxL II32) addr
-  return (Any II64 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
-                                     CmmLit displacement])
-    = return $ Any II64 (\dst -> unitOL $
-        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
-#endif
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
-  x_code <- getAnyReg x
-  lbl <- getNewLabelNat
-  let
-    code dst = x_code dst `appOL` toOL [
-       -- This is how gcc does it, so it can't be that bad:
-       LDATA ReadOnlyData16 [
-               CmmAlign 16,
-               CmmDataLabel lbl,
-               CmmStaticLit (CmmInt 0x80000000 W32),
-               CmmStaticLit (CmmInt 0 W32),
-               CmmStaticLit (CmmInt 0 W32),
-               CmmStaticLit (CmmInt 0 W32)
-       ],
-       XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-               -- xorps, so we need the 128-bit constant
-               -- ToDo: rip-relative
-       ]
-  --
-  return (Any FF32 code)
-
-getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
-  x_code <- getAnyReg x
-  lbl <- getNewLabelNat
-  let
-       -- This is how gcc does it, so it can't be that bad:
-    code dst = x_code dst `appOL` toOL [
-       LDATA ReadOnlyData16 [
-               CmmAlign 16,
-               CmmDataLabel lbl,
-               CmmStaticLit (CmmInt 0x8000000000000000 W64),
-               CmmStaticLit (CmmInt 0 W64)
-       ],
-               -- gcc puts an unpck here.  Wonder if we need it.
-       XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
-               -- xorpd, so we need the 128-bit constant
-       ]
-  --
-  return (Any FF64 code)
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
-  = case mop of
-#if i386_TARGET_ARCH
-      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
-      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
-#endif
-
-      MO_S_Neg w -> triv_ucode NEGI (intSize w)
-      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
-      MO_Not w   -> triv_ucode NOT  (intSize w)
-
-      -- Nop conversions
-      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
-      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
-      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
-      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
-      MO_UU_Conv W32 W16 -> toI16Reg W32 x
-      MO_SS_Conv W32 W16 -> toI16Reg W32 x
-
-#if x86_64_TARGET_ARCH
-      MO_UU_Conv W64 W32 -> conversionNop II64 x
-      MO_SS_Conv W64 W32 -> conversionNop II64 x
-      MO_UU_Conv W64 W16 -> toI16Reg W64 x
-      MO_SS_Conv W64 W16 -> toI16Reg W64 x
-      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
-      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
-#endif
-
-      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
-      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
-
-      -- widenings
-      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
-      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
-      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
-
-      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
-      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
-      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
-
-#if x86_64_TARGET_ARCH
-      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
-      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
-      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
-      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
-      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
-      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
-       -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
-       -- However, we don't want the register allocator to throw it
-       -- away as an unnecessary reg-to-reg move, so we keep it in
-       -- the form of a movzl and print it as a movl later.
-#endif
-
-#if i386_TARGET_ARCH
-      MO_FF_Conv W32 W64 -> conversionNop FF64 x
-      MO_FF_Conv W64 W32 -> conversionNop FF32 x
-#else
-      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
-      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
-#endif
-
-      MO_FS_Conv from to -> coerceFP2Int from to x
-      MO_SF_Conv from to -> coerceInt2FP from to x
-
-      other -> pprPanic "getRegister" (pprMachOp mop)
-   where
-       triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
-       triv_ucode instr size = trivialUCode size (instr size) x
-
-       -- signed or unsigned extension.
-       integerExtend :: Width -> Width
-                     -> (Size -> Operand -> Operand -> Instr)
-                     -> CmmExpr -> NatM Register
-       integerExtend from to instr expr = do
-           (reg,e_code) <- if from == W8 then getByteReg expr
-                                         else getSomeReg expr
-           let 
-               code dst = 
-                 e_code `snocOL`
-                 instr (intSize from) (OpReg reg) (OpReg dst)
-           return (Any (intSize to) code)
-
-       toI8Reg :: Width -> CmmExpr -> NatM Register
-       toI8Reg new_rep expr
-            = do codefn <- getAnyReg expr
-                return (Any (intSize new_rep) codefn)
-               -- HACK: use getAnyReg to get a byte-addressable register.
-               -- If the source was a Fixed register, this will add the
-               -- mov instruction to put it into the desired destination.
-               -- We're assuming that the destination won't be a fixed
-               -- non-byte-addressable register; it won't be, because all
-               -- fixed registers are word-sized.
-
-       toI16Reg = toI8Reg -- for now
-
-       conversionNop :: Size -> CmmExpr -> NatM Register
-        conversionNop new_size expr
-            = do e_code <- getRegister expr
-                 return (swizzleRegisterRep e_code new_size)
-
-
-getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
-  = case mop of
-      MO_F_Eq w -> condFltReg EQQ x y
-      MO_F_Ne w -> condFltReg NE x y
-      MO_F_Gt w -> condFltReg GTT x y
-      MO_F_Ge w -> condFltReg GE x y
-      MO_F_Lt w -> condFltReg LTT x y
-      MO_F_Le w -> condFltReg LE x y
-
-      MO_Eq rep   -> condIntReg EQQ x y
-      MO_Ne rep   -> condIntReg NE x y
-
-      MO_S_Gt rep -> condIntReg GTT x y
-      MO_S_Ge rep -> condIntReg GE x y
-      MO_S_Lt rep -> condIntReg LTT x y
-      MO_S_Le rep -> condIntReg LE x y
-
-      MO_U_Gt rep -> condIntReg GU  x y
-      MO_U_Ge rep -> condIntReg GEU x y
-      MO_U_Lt rep -> condIntReg LU  x y
-      MO_U_Le rep -> condIntReg LEU x y
-
-#if i386_TARGET_ARCH
-      MO_F_Add w -> trivialFCode w GADD x y
-      MO_F_Sub w -> trivialFCode w GSUB x y
-      MO_F_Quot w -> trivialFCode w GDIV x y
-      MO_F_Mul w -> trivialFCode w GMUL x y
-#endif
-
-#if x86_64_TARGET_ARCH
-      MO_F_Add w -> trivialFCode w ADD x y
-      MO_F_Sub w -> trivialFCode w SUB x y
-      MO_F_Quot w -> trivialFCode w FDIV x y
-      MO_F_Mul w -> trivialFCode w MUL x y
-#endif
-
-      MO_Add rep -> add_code rep x y
-      MO_Sub rep -> sub_code rep x y
-
-      MO_S_Quot rep -> div_code rep True  True  x y
-      MO_S_Rem  rep -> div_code rep True  False x y
-      MO_U_Quot rep -> div_code rep False True  x y
-      MO_U_Rem  rep -> div_code rep False False x y
-
-      MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
-      MO_Mul rep -> triv_op rep IMUL
-      MO_And rep -> triv_op rep AND
-      MO_Or  rep -> triv_op rep OR
-      MO_Xor rep -> triv_op rep XOR
-
-       {- Shift ops on x86s have constraints on their source, it
-          either has to be Imm, CL or 1
-           => trivialCode is not restrictive enough (sigh.)
-       -}         
-      MO_Shl rep   -> shift_code rep SHL x y {-False-}
-      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
-      MO_S_Shr rep -> shift_code rep SAR x y {-False-}
-
-      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
-  where
-    --------------------
-    triv_op width instr = trivialCode width op (Just op) x y
-                       where op   = instr (intSize width)
-
-    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo rep a b = do
-         (a_reg, a_code) <- getNonClobberedReg a
-         b_code <- getAnyReg b
-         let 
-            shift_amt  = case rep of
-                          W32 -> 31
-                          W64 -> 63
-                          _ -> panic "shift_amt"
-
-            size = intSize rep
-             code = a_code `appOL` b_code eax `appOL`
-                        toOL [
-                          IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
-                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
-                               -- sign extend lower part
-                           SUB size (OpReg edx) (OpReg eax)
-                               -- compare against upper
-                           -- eax==0 if high part == sign extended low part
-                        ]
-         -- in
-        return (Fixed size eax code)
-
-    --------------------
-    shift_code :: Width
-              -> (Size -> Operand -> Operand -> Instr)
-              -> CmmExpr
-              -> CmmExpr
-              -> NatM Register
-
-    {- Case1: shift length as immediate -}
-    shift_code width instr x y@(CmmLit lit) = do
-         x_code <- getAnyReg x
-         let
-              size = intSize width
-              code dst
-                 = x_code dst `snocOL` 
-                   instr size (OpImm (litToImm lit)) (OpReg dst)
-         -- in
-         return (Any size code)
-        
-    {- Case2: shift length is complex (non-immediate)
-      * y must go in %ecx.
-      * we cannot do y first *and* put its result in %ecx, because
-        %ecx might be clobbered by x.
-      * if we do y second, then x cannot be 
-        in a clobbered reg.  Also, we cannot clobber x's reg
-        with the instruction itself.
-      * so we can either:
-        - do y first, put its result in a fresh tmp, then copy it to %ecx later
-        - do y second and put its result into %ecx.  x gets placed in a fresh
-          tmp.  This is likely to be better, becuase the reg alloc can
-          eliminate this reg->reg move here (it won't eliminate the other one,
-          because the move is into the fixed %ecx).
-    -}
-    shift_code width instr x y{-amount-} = do
-        x_code <- getAnyReg x
-       let size = intSize width
-       tmp <- getNewRegNat size
-        y_code <- getAnyReg y
-       let 
-          code = x_code tmp `appOL`
-                 y_code ecx `snocOL`
-                 instr size (OpReg ecx) (OpReg tmp)
-        -- in
-        return (Fixed size tmp code)
-
-    --------------------
-    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    add_code rep x (CmmLit (CmmInt y _))
-       | is32BitInteger y = add_int rep x y
-    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
-      where size = intSize rep
-
-    --------------------
-    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    sub_code rep x (CmmLit (CmmInt y _))
-       | is32BitInteger (-y) = add_int rep x (-y)
-    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
-
-    -- our three-operand add instruction:
-    add_int width x y = do
-       (x_reg, x_code) <- getSomeReg x
-       let
-           size = intSize width
-           imm = ImmInt (fromInteger y)
-           code dst
-               = x_code `snocOL`
-                LEA size
-                       (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
-                        (OpReg dst)
-       -- 
-       return (Any size code)
-
-    ----------------------
-    div_code width signed quotient x y = do
-          (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
-          x_code <- getAnyReg x
-          let
-            size = intSize width
-            widen | signed    = CLTD size
-                  | otherwise = XOR size (OpReg edx) (OpReg edx)
-
-            instr | signed    = IDIV
-                  | otherwise = DIV
-
-            code = y_code `appOL`
-                   x_code eax `appOL`
-                   toOL [widen, instr size y_op]
-
-            result | quotient  = eax
-                   | otherwise = edx
-
-          -- in
-           return (Fixed size result code)
-
-
-getRegister (CmmLoad mem pk)
-  | isFloatType pk
-  = do
-    Amode src mem_code <- getAmode mem
-    let
-       size = cmmTypeSize pk
-       code dst = mem_code `snocOL` 
-                  IF_ARCH_i386(GLD size src dst,
-                               MOV size (OpAddr src) (OpReg dst))
-    return (Any size code)
-
-#if i386_TARGET_ARCH
-getRegister (CmmLoad mem pk)
-  | not (isWord64 pk)
-  = do 
-    code <- intLoadCode instr mem
-    return (Any size code)
-  where
-    width = typeWidth pk
-    size = intSize width
-    instr = case width of
-               W8     -> MOVZxL II8
-               _other -> MOV size
-       -- We always zero-extend 8-bit loads, if we
-       -- can't think of anything better.  This is because
-       -- we can't guarantee access to an 8-bit variant of every register
-       -- (esi and edi don't have 8-bit variants), so to make things
-       -- simpler we do our 8-bit arithmetic with full 32-bit registers.
-#endif
-
-#if x86_64_TARGET_ARCH
--- Simpler memory load code on x86_64
-getRegister (CmmLoad mem pk)
-  = do 
-    code <- intLoadCode (MOV size) mem
-    return (Any size code)
-  where size = intSize $ typeWidth pk
-#endif
-
-getRegister (CmmLit (CmmInt 0 width))
-  = let
-       size = intSize width
-
-       -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
-       adj_size = case size of II64 -> II32; _ -> size
-       size1 = IF_ARCH_i386( size, adj_size ) 
-       code dst 
-           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
-    in
-       return (Any size code)
-
-#if x86_64_TARGET_ARCH
-  -- optimisation for loading small literals on x86_64: take advantage
-  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
-  -- instruction forms are shorter.
-getRegister (CmmLit lit) 
-  | isWord64 (cmmLitType lit), not (isBigLit lit)
-  = let 
-       imm = litToImm lit
-       code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
-    in
-       return (Any II64 code)
-  where
-   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
-   isBigLit _ = False
-       -- note1: not the same as (not.is32BitLit), because that checks for
-       -- signed literals that fit in 32 bits, but we want unsigned
-       -- literals here.
-       -- note2: all labels are small, because we're assuming the
-       -- small memory model (see gcc docs, -mcmodel=small).
-#endif
-
-getRegister (CmmLit lit)
-  = let 
-       size = cmmTypeSize (cmmLitType lit)
-       imm = litToImm lit
-       code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
-    in
-       return (Any size code)
-
-getRegister other = pprPanic "getRegister(x86)" (ppr other)
-
-
-intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
-   -> NatM (Reg -> InstrBlock)
-intLoadCode instr mem = do
-  Amode src mem_code <- getAmode mem
-  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
-
--- Compute an expression into *any* register, adding the appropriate
--- move instruction if necessary.
-getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
-getAnyReg expr = do
-  r <- getRegister expr
-  anyReg r
-
-anyReg :: Register -> NatM (Reg -> InstrBlock)
-anyReg (Any _ code)          = return code
-anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
-
--- A bit like getSomeReg, but we want a reg that can be byte-addressed.
--- Fixed registers might not be byte-addressable, so we make sure we've
--- got a temporary, inserting an extra reg copy if necessary.
-getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
-#if x86_64_TARGET_ARCH
-getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
-#else
-getByteReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed rep reg code 
-       | isVirtualReg reg -> return (reg,code)
-       | otherwise -> do
-           tmp <- getNewRegNat rep
-           return (tmp, code `snocOL` reg2reg rep reg tmp)
-       -- ToDo: could optimise slightly by checking for byte-addressable
-       -- real registers, but that will happen very rarely if at all.
-#endif
-
--- Another variant: this time we want the result in a register that cannot
--- be modified by code to evaluate an arbitrary expression.
-getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getNonClobberedReg expr = do
-  r <- getRegister expr
-  case r of
-    Any rep code -> do
-       tmp <- getNewRegNat rep
-       return (tmp, code tmp)
-    Fixed rep reg code
-       -- only free regs can be clobbered
-       | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
-               tmp <- getNewRegNat rep
-               return (tmp, code `snocOL` reg2reg rep reg tmp)
-       | otherwise -> 
-               return (reg, code)
-
-reg2reg :: Size -> Reg -> Reg -> Instr
-reg2reg size src dst 
-#if i386_TARGET_ARCH
-  | isFloatSize size = GMOV src dst
-#endif
-  | otherwise       = MOV size (OpReg src) (OpReg dst)
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
--- getRegister :: CmmExpr -> NatM Register
-
--- Load a literal float into a float register.
---     The actual literal is stored in a new data area, and we load it 
---     at runtime.
-getRegister (CmmLit (CmmFloat f W32)) = do
-
-    -- a label for the new data area
-    lbl <- getNewLabelNat
-    tmp <- getNewRegNat II32
-
-    let code dst = toOL [
-            -- the data area         
-           LDATA ReadOnlyData
-                       [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat f W32)],
-
-            -- load the literal
-           SETHI (HI (ImmCLbl lbl)) tmp,
-           LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
-
-    return (Any FF32 code)
-
-getRegister (CmmLit (CmmFloat d W64)) = do
-    lbl <- getNewLabelNat
-    tmp <- getNewRegNat II32
-    let code dst = toOL [
-           LDATA ReadOnlyData
-                       [CmmDataLabel lbl,
-                        CmmStaticLit (CmmFloat d W64)],
-           SETHI (HI (ImmCLbl lbl)) tmp,
-           LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
-    return (Any FF64 code)
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
-  = case mop of
-      MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
-      MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
-
-      MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
-      MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
-
-      MO_FF_Conv W64 W32-> coerceDbl2Flt x
-      MO_FF_Conv W32 W64-> coerceFlt2Dbl x
-
-      MO_FS_Conv from to -> coerceFP2Int from to x
-      MO_SF_Conv from to -> coerceInt2FP from to x
-
-      -- Conversions which are a nop on sparc
-      MO_UU_Conv from to
-       | from == to    -> conversionNop (intSize to)  x
-      MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
-      MO_UU_Conv W32 to -> conversionNop (intSize to) x
-      MO_SS_Conv W32 to -> conversionNop (intSize to) x
-
-      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
-      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
-      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
-
-      -- sign extension
-      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
-      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
-      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
-
-      other_op -> panic ("Unknown unary mach op: " ++ show mop)
-    where
-
-       -- | sign extend and widen
-       integerExtend 
-               :: Width                -- ^ width of source expression
-               -> Width                -- ^ width of result
-               -> CmmExpr              -- ^ source expression
-               -> NatM Register        
-
-       integerExtend from to expr
-        = do   -- load the expr into some register
-               (reg, e_code)   <- getSomeReg expr
-               tmp             <- getNewRegNat II32
-               let bitCount
-                       = case (from, to) of
-                               (W8,  W32)      -> 24
-                               (W16, W32)      -> 16
-                               (W8,  W16)      -> 24
-               let code dst
-                       = e_code        
-
-                       -- local shift word left to load the sign bit
-                       `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
-                       
-                       -- arithmetic shift right to sign extend
-                       `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
-                       
-               return (Any (intSize to) code)
-                               
-
-        conversionNop new_rep expr
-            = do e_code <- getRegister expr
-                 return (swizzleRegisterRep e_code new_rep)
-
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
-  = case mop of
-      MO_Eq rep -> condIntReg EQQ x y
-      MO_Ne rep -> condIntReg NE x y
-
-      MO_S_Gt rep -> condIntReg GTT x y
-      MO_S_Ge rep -> condIntReg GE x y
-      MO_S_Lt rep -> condIntReg LTT x y
-      MO_S_Le rep -> condIntReg LE x y
-             
-      MO_U_Gt W32  -> condIntReg GTT x y
-      MO_U_Ge W32  -> condIntReg GE x y
-      MO_U_Lt W32  -> condIntReg LTT x y
-      MO_U_Le W32  -> condIntReg LE x y
-
-      MO_U_Gt W16 -> condIntReg GU  x y
-      MO_U_Ge W16 -> condIntReg GEU x y
-      MO_U_Lt W16 -> condIntReg LU  x y
-      MO_U_Le W16 -> condIntReg LEU x y
-
-      MO_Add W32 -> trivialCode W32 (ADD False False) x y
-      MO_Sub W32 -> trivialCode W32 (SUB False False) x y
-
-      MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
-      MO_S_Quot W32    -> idiv True  False x y
-      MO_U_Quot W32    -> idiv False False x y
-       
-      MO_S_Rem  W32    -> irem True  x y
-      MO_U_Rem W32     -> irem False x y
-       
-      MO_F_Eq w -> condFltReg EQQ x y
-      MO_F_Ne w -> condFltReg NE x y
-
-      MO_F_Gt w -> condFltReg GTT x y
-      MO_F_Ge w -> condFltReg GE x y 
-      MO_F_Lt w -> condFltReg LTT x y
-      MO_F_Le w -> condFltReg LE x y
-
-      MO_F_Add  w  -> trivialFCode w FADD x y
-      MO_F_Sub  w  -> trivialFCode w FSUB x y
-      MO_F_Mul  w  -> trivialFCode w FMUL x y
-      MO_F_Quot w  -> trivialFCode w FDIV x y
-
-      MO_And rep   -> trivialCode rep (AND False) x y
-      MO_Or rep    -> trivialCode rep (OR  False) x y
-      MO_Xor rep   -> trivialCode rep (XOR False) x y
-
-      MO_Mul rep -> trivialCode rep (SMUL False) x y
-
-      MO_Shl rep   -> trivialCode rep SLL  x y
-      MO_U_Shr rep   -> trivialCode rep SRL x y
-      MO_S_Shr rep   -> trivialCode rep SRA x y
-
-{-
-      MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
-                                         [promote x, promote y])
-                      where promote x = CmmMachOp MO_F32_to_Dbl [x]
-      MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
-                                        [x, y])
--}
-      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
-  where
-    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
-
-
-    -- | Generate an integer division instruction.
-    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
-       
-    -- For unsigned division with a 32 bit numerator, 
-    --         we can just clear the Y register.
-    idiv False cc x y = do
-       (a_reg, a_code)         <- getSomeReg x
-               (b_reg, b_code)         <- getSomeReg y
-       
-       let code dst
-               =       a_code 
-               `appOL` b_code  
-               `appOL` toOL
-                       [ WRY  g0 g0
-                       , UDIV cc a_reg (RIReg b_reg) dst]
-                       
-       return (Any II32 code)
-       
-
-    -- For _signed_ division with a 32 bit numerator,
-    --         we have to sign extend the numerator into the Y register.
-    idiv True cc x y = do
-       (a_reg, a_code)         <- getSomeReg x
-               (b_reg, b_code)         <- getSomeReg y
-       
-       tmp                     <- getNewRegNat II32
-       
-       let code dst
-               =       a_code 
-               `appOL` b_code  
-               `appOL` toOL
-                       [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
-                       , SRA  tmp   (RIImm (ImmInt 16)) tmp
-
-                       , WRY  tmp g0                           
-                       , SDIV cc a_reg (RIReg b_reg) dst]
-                       
-       return (Any II32 code)
-
-
-    -- | Do an integer remainder.
-    --
-    --  NOTE:  The SPARC v8 architecture manual says that integer division
-    --         instructions _may_ generate a remainder, depending on the implementation.
-    --         If so it is _recommended_ that the remainder is placed in the Y register.
-    --
-    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
-    --
-    --         The SPARC T2 doesn't store the remainder, not sure about the others. 
-    --         It's probably best not to worry about it, and just generate our own
-    --         remainders. 
-    --
-    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
-
-    -- For unsigned operands: 
-    --         Division is between a 64 bit numerator and a 32 bit denominator, 
-    --         so we still have to clear the Y register.
-    irem False x y = do
-       (a_reg, a_code) <- getSomeReg x
-       (b_reg, b_code) <- getSomeReg y
-
-       tmp_reg         <- getNewRegNat II32
-
-       let code dst
-               =       a_code
-               `appOL` b_code
-               `appOL` toOL
-                       [ WRY   g0 g0
-                       , UDIV  False         a_reg (RIReg b_reg) tmp_reg
-                       , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
-                       , SUB   False False   a_reg (RIReg tmp_reg) dst]
-    
-       return  (Any II32 code)
-
-    
-    -- For signed operands:
-    --         Make sure to sign extend into the Y register, or the remainder
-    --         will have the wrong sign when the numerator is negative.
-    --
-    -- TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
-    --         not the full 32. Not sure why this is, something to do with overflow?
-    --         If anyone cares enough about the speed of signed remainder they
-    --         can work it out themselves (then tell me). -- BL 2009/01/20
-    
-    irem True x y = do
-       (a_reg, a_code) <- getSomeReg x
-       (b_reg, b_code) <- getSomeReg y
-       
-       tmp1_reg        <- getNewRegNat II32
-       tmp2_reg        <- getNewRegNat II32
-               
-       let code dst
-               =       a_code
-               `appOL` b_code
-               `appOL` toOL
-                       [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
-                       , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
-                       , WRY   tmp1_reg g0
-
-                       , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg  
-                       , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
-                       , SUB   False False    a_reg (RIReg tmp2_reg) dst]
-                       
-       return (Any II32 code)
-   
-
-    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
-    imulMayOflo rep a b = do
-         (a_reg, a_code) <- getSomeReg a
-        (b_reg, b_code) <- getSomeReg b
-        res_lo <- getNewRegNat II32
-        res_hi <- getNewRegNat II32
-        let
-           shift_amt  = case rep of
-                         W32 -> 31
-                         W64 -> 63
-                         _ -> panic "shift_amt"
-           code dst = a_code `appOL` b_code `appOL`
-                       toOL [
-                           SMUL False a_reg (RIReg b_reg) res_lo,
-                           RDY res_hi,
-                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
-                           SUB False False res_lo (RIReg res_hi) dst
-                        ]
-         return (Any II32 code)
-
-getRegister (CmmLoad mem pk) = do
-    Amode src code <- getAmode mem
-    let
-       code__2 dst     = code `snocOL` LD (cmmTypeSize pk) src dst
-    return (Any (cmmTypeSize pk) code__2)
-
-getRegister (CmmLit (CmmInt i _))
-  | fits13Bits i
-  = let
-       src = ImmInt (fromInteger i)
-       code dst = unitOL (OR False g0 (RIImm src) dst)
-    in
-       return (Any II32 code)
-
-getRegister (CmmLit lit)
-  = let rep = cmmLitType lit
-       imm = litToImm lit
-       code dst = toOL [
-           SETHI (HI imm) dst,
-           OR False dst (RIImm (LO imm)) dst]
-    in return (Any II32 code)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-getRegister (CmmLoad mem pk)
-  | not (isWord64 pk)
-  = do
-        Amode addr addr_code <- getAmode mem
-        let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
-                       addr_code `snocOL` LD size dst addr
-        return (Any size code)
-          where size = cmmTypeSize pk
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
-    Amode addr addr_code <- getAmode mem
-    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
--- Note: there is no Load Byte Arithmetic instruction, so no signed case here
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
-    Amode addr addr_code <- getAmode mem
-    return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
-    Amode addr addr_code <- getAmode mem
-    return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
-  = case mop of
-      MO_Not rep   -> triv_ucode_int rep NOT
-
-      MO_F_Neg w   -> triv_ucode_float w FNEG
-      MO_S_Neg w   -> triv_ucode_int   w NEG
-
-      MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
-      MO_FF_Conv W32 W64 -> conversionNop FF64 x
-
-      MO_FS_Conv from to -> coerceFP2Int from to x
-      MO_SF_Conv from to -> coerceInt2FP from to x
-
-      MO_SS_Conv from to
-        | from == to    -> conversionNop (intSize to) x
-
-        -- narrowing is a nop: we treat the high bits as undefined
-      MO_SS_Conv W32 to -> conversionNop (intSize to) x
-      MO_SS_Conv W16 W8 -> conversionNop II8 x
-      MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
-      MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
-
-      MO_UU_Conv from to
-        | from == to -> conversionNop (intSize to) x
-        -- narrowing is a nop: we treat the high bits as undefined
-      MO_UU_Conv W32 to -> conversionNop (intSize to) x
-      MO_UU_Conv W16 W8 -> conversionNop II8 x
-      MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
-      MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) 
-
-    where
-       triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
-       triv_ucode_float width instr = trivialUCode (floatSize width) instr x
-
-        conversionNop new_size expr
-            = do e_code <- getRegister expr
-                 return (swizzleRegisterRep e_code new_size)
-
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
-  = case mop of
-      MO_F_Eq w -> condFltReg EQQ x y
-      MO_F_Ne w -> condFltReg NE  x y
-      MO_F_Gt w -> condFltReg GTT x y
-      MO_F_Ge w -> condFltReg GE  x y
-      MO_F_Lt w -> condFltReg LTT x y
-      MO_F_Le w -> condFltReg LE  x y
-
-      MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
-      MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
-
-      MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
-
-      MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
-
-      MO_F_Add w  -> triv_float w FADD
-      MO_F_Sub w  -> triv_float w FSUB
-      MO_F_Mul w  -> triv_float w FMUL
-      MO_F_Quot w -> triv_float w FDIV
-      
-         -- optimize addition with 32-bit immediate
-         -- (needed for PIC)
-      MO_Add W32 ->
-        case y of
-          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
-            -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
-          CmmLit lit
-            -> do
-                (src, srcCode) <- getSomeReg x
-                let imm = litToImm lit
-                    code dst = srcCode `appOL` toOL [
-                                    ADDIS dst src (HA imm),
-                                    ADD dst dst (RIImm (LO imm))
-                                ]
-                return (Any II32 code)
-          _ -> trivialCode W32 True ADD x y
-
-      MO_Add rep -> trivialCode rep True ADD x y
-      MO_Sub rep ->
-        case y of    -- subfi ('substract from' with immediate) doesn't exist
-          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
-            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
-          _ -> trivialCodeNoImm' (intSize rep) SUBF y x
-
-      MO_Mul rep -> trivialCode rep True MULLW x y
-
-      MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
-      
-      MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
-      MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
-
-      MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
-      MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
-      
-      MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
-      MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
-      
-      MO_And rep   -> trivialCode rep False AND x y
-      MO_Or rep    -> trivialCode rep False OR x y
-      MO_Xor rep   -> trivialCode rep False XOR x y
-
-      MO_Shl rep   -> trivialCode rep False SLW x y
-      MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
-      MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
-  where
-    triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
-    triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
-
-getRegister (CmmLit (CmmInt i rep))
-  | Just imm <- makeImmediate rep True i
-  = let
-       code dst = unitOL (LI dst imm)
-    in
-       return (Any (intSize rep) code)
-
-getRegister (CmmLit (CmmFloat f frep)) = do
-    lbl <- getNewLabelNat
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let size = floatSize frep
-        code dst = 
-           LDATA ReadOnlyData  [CmmDataLabel lbl,
-                                CmmStaticLit (CmmFloat f frep)]
-            `consOL` (addr_code `snocOL` LD size dst addr)
-    return (Any size code)
-
-getRegister (CmmLit lit)
-  = let rep = cmmLitType lit
-        imm = litToImm lit
-        code dst = toOL [
-              LIS dst (HA imm),
-              ADD dst dst (RIImm (LO imm))
-          ]
-    in return (Any (cmmTypeSize rep) code)
-
-getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
-    
-    -- extend?Rep: wrap integer expression of type rep
-    -- in a conversion to II32
-extendSExpr W32 x = x
-extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
-extendUExpr W32 x = x
-extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  The 'Amode' type: Memory addressing modes passed up the tree.
-
-data Amode = Amode AddrMode InstrBlock
-
-{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
-produce a suitable addressing mode.
-
-A Rule of the Game (tm) for Amodes: use of the addr bit must
-immediately follow use of the code part, since the code part puts
-values in registers which the addr then refers to.  So you can't put
-anything in between, lest it overwrite some of those registers.  If
-you need to do some other computation between the code part and use of
-the addr bit, first store the effective address from the amode in a
-temporary, then do the other computation, and then use the temporary:
-
-    code
-    LEA amode, tmp
-    ... other computation ...
-    ... (tmp) ...
--}
-
-getAmode :: CmmExpr -> NatM Amode
-getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-getAmode (StPrim IntSubOp [x, StInt i])
-  = getNewRegNat PtrRep                `thenNat` \ tmp ->
-    getRegister x              `thenNat` \ register ->
-    let
-       code = registerCode register tmp
-       reg  = registerName register tmp
-       off  = ImmInt (-(fromInteger i))
-    in
-    return (Amode (AddrRegImm reg off) code)
-
-getAmode (StPrim IntAddOp [x, StInt i])
-  = getNewRegNat PtrRep                `thenNat` \ tmp ->
-    getRegister x              `thenNat` \ register ->
-    let
-       code = registerCode register tmp
-       reg  = registerName register tmp
-       off  = ImmInt (fromInteger i)
-    in
-    return (Amode (AddrRegImm reg off) code)
-
-getAmode leaf
-  | isJust imm
-  = return (Amode (AddrImm imm__2) id)
-  where
-    imm = maybeImm leaf
-    imm__2 = case imm of Just x -> x
-
-getAmode other
-  = getNewRegNat PtrRep                `thenNat` \ tmp ->
-    getRegister other          `thenNat` \ register ->
-    let
-       code = registerCode register tmp
-       reg  = registerName register tmp
-    in
-    return (Amode (AddrReg reg) code)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
-                                     CmmLit displacement])
-    = return $ Amode (ripRel (litToImm displacement)) nilOL
-
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- This is all just ridiculous, since it carefully undoes 
--- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
-  | is32BitLit lit
-  -- ASSERT(rep == II32)???
-  = do (x_reg, x_code) <- getSomeReg x
-       let off = ImmInt (-(fromInteger i))
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-  
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
-  | is32BitLit lit
-  -- ASSERT(rep == II32)???
-  = do (x_reg, x_code) <- getSomeReg x
-       let off = ImmInt (fromInteger i)
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-
--- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
--- recognised by the next rule.
-getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
-                                 b@(CmmLit _)])
-  = getAmode (CmmMachOp (MO_Add rep) [b,a])
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
-                                       [y, CmmLit (CmmInt shift _)]])
-  | shift == 0 || shift == 1 || shift == 2 || shift == 3
-  = x86_complex_amode x y shift 0
-
-getAmode (CmmMachOp (MO_Add rep) 
-                [x, CmmMachOp (MO_Add _)
-                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
-                         CmmLit (CmmInt offset _)]])
-  | shift == 0 || shift == 1 || shift == 2 || shift == 3
-  && is32BitInteger offset
-  = x86_complex_amode x y shift offset
-
-getAmode (CmmMachOp (MO_Add rep) [x,y])
-  = x86_complex_amode x y 0 0
-
-getAmode (CmmLit lit) | is32BitLit lit
-  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
-
-getAmode expr = do
-  (reg,code) <- getSomeReg expr
-  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
-
-
-x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
-x86_complex_amode base index shift offset
-  = do (x_reg, x_code) <- getNonClobberedReg base
-       -- x must be in a temp, because it has to stay live over y_code
-       -- we could compre x_reg and y_reg and do something better here...
-       (y_reg, y_code) <- getSomeReg index
-       let
-          code = x_code `appOL` y_code
-           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
-       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
-               code)
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
-  | fits13Bits (-i)
-  = do
-       (reg, code) <- getSomeReg x
-       let
-         off  = ImmInt (-(fromInteger i))
-       return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
-  | fits13Bits i
-  = do
-       (reg, code) <- getSomeReg x
-       let
-        off  = ImmInt (fromInteger i)
-       return (Amode (AddrRegImm reg off) code)
-
-getAmode (CmmMachOp (MO_Add rep) [x, y])
-  = do
-    (regX, codeX) <- getSomeReg x
-    (regY, codeY) <- getSomeReg y
-    let
-       code = codeX `appOL` codeY
-    return (Amode (AddrRegReg regX regY) code)
-
-getAmode (CmmLit lit)
-  = do
-       let imm__2      = litToImm lit
-       tmp1    <- getNewRegNat II32
-       tmp2    <- getNewRegNat II32
-
-       let code = toOL [ SETHI (HI imm__2) tmp1
-                       , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
-               
-       return (Amode (AddrRegReg tmp2 g0) code)
-
-getAmode other
-  = do
-       (reg, code) <- getSomeReg other
-       let
-           off  = ImmInt 0
-       return (Amode (AddrRegImm reg off) code)
-
-#endif /* sparc_TARGET_ARCH */
-
-#ifdef powerpc_TARGET_ARCH
-getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
-  | Just off <- makeImmediate W32 True (-i)
-  = do
-        (reg, code) <- getSomeReg x
-        return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
-  | Just off <- makeImmediate W32 True i
-  = do
-        (reg, code) <- getSomeReg x
-        return (Amode (AddrRegImm reg off) code)
-
-   -- optimize addition with 32-bit immediate
-   -- (needed for PIC)
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
-  = do
-        tmp <- getNewRegNat II32
-        (src, srcCode) <- getSomeReg x
-        let imm = litToImm lit
-            code = srcCode `snocOL` ADDIS tmp src (HA imm)
-        return (Amode (AddrRegImm tmp (LO imm)) code)
-
-getAmode (CmmLit lit)
-  = do
-        tmp <- getNewRegNat II32
-        let imm = litToImm lit
-            code = unitOL (LIS tmp (HA imm))
-        return (Amode (AddrRegImm tmp (LO imm)) code)
-    
-getAmode (CmmMachOp (MO_Add W32) [x, y])
-  = do
-        (regX, codeX) <- getSomeReg x
-        (regY, codeY) <- getSomeReg y
-        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
-    
-getAmode other
-  = do
-        (reg, code) <- getSomeReg other
-        let
-            off  = ImmInt 0
-        return (Amode (AddrRegImm reg off) code)
-#endif /* powerpc_TARGET_ARCH */
-
--- -----------------------------------------------------------------------------
--- getOperand: sometimes any operand will do.
-
--- getNonClobberedOperand: the value of the operand will remain valid across
--- the computation of an arbitrary expression, unless the expression
--- is computed directly into a register which the operand refers to
--- (see trivialCode where this function is used for an example).
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getNonClobberedOperand (CmmLit lit)
-  | isSuitableFloatingPointLit lit = do
-    lbl <- getNewLabelNat
-    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
-                                          CmmStaticLit lit])
-    return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getNonClobberedOperand (CmmLit lit)
-  | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
-    return (OpImm (litToImm lit), nilOL)
-getNonClobberedOperand (CmmLoad mem pk) 
-  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
-    Amode src mem_code <- getAmode mem
-    (src',save_code) <- 
-       if (amodeCouldBeClobbered src) 
-               then do
-                  tmp <- getNewRegNat wordSize
-                  return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
-                          unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
-               else
-                  return (src, nilOL)
-    return (OpAddr src', save_code `appOL` mem_code)
-getNonClobberedOperand e = do
-    (reg, code) <- getNonClobberedReg e
-    return (OpReg reg, code)
-
-amodeCouldBeClobbered :: AddrMode -> Bool
-amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
-
-regClobbered (RealReg rr) = isFastTrue (freeReg rr)
-regClobbered _ = False
-
--- getOperand: the operand is not required to remain valid across the
--- computation of an arbitrary expression.
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getOperand (CmmLit lit)
-  | isSuitableFloatingPointLit lit = do
-    lbl <- getNewLabelNat
-    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
-                                          CmmStaticLit lit])
-    return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getOperand (CmmLit lit)
-  | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
-    return (OpImm (litToImm lit), nilOL)
-getOperand (CmmLoad mem pk)
-  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
-    Amode src mem_code <- getAmode mem
-    return (OpAddr src, mem_code)
-getOperand e = do
-    (reg, code) <- getSomeReg e
-    return (OpReg reg, code)
-
-isOperand :: CmmExpr -> Bool
-isOperand (CmmLoad _ _) = True
-isOperand (CmmLit lit)  = is32BitLit lit
-                         || isSuitableFloatingPointLit lit
-isOperand _             = False
-
--- if we want a floating-point literal as an operand, we can
--- use it directly from memory.  However, if the literal is
--- zero, we're better off generating it into a register using
--- xor.
-isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
-isSuitableFloatingPointLit _ = False
-
-getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem (CmmLoad mem pk)
-  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
-    Amode src mem_code <- getAmode mem
-    return (OpAddr src, mem_code)
-getRegOrMem e = do
-    (reg, code) <- getNonClobberedReg e
-    return (OpReg reg, code)
-
-#if x86_64_TARGET_ARCH
-is32BitLit (CmmInt i W64) = is32BitInteger i
-   -- assume that labels are in the range 0-2^31-1: this assumes the
-   -- small memory model (see gcc docs, -mcmodel=small).
-#endif
-is32BitLit x = True
-#endif
-
-is32BitInteger :: Integer -> Bool
-is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
-  where i64 = fromIntegral i :: Int64
-  -- a CmmInt is intended to be truncated to the appropriate 
-  -- number of bits, so here we truncate it to Int64.  This is
-  -- important because e.g. -1 as a CmmInt might be either
-  -- -1 or 18446744073709551615.
-
--- -----------------------------------------------------------------------------
---  The 'CondCode' type:  Condition codes passed up the tree.
-
-data CondCode = CondCode Bool Cond InstrBlock
-
--- Set up a condition code for a conditional branch.
-
-getCondCode :: CmmExpr -> NatM CondCode
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-getCondCode = panic "MachCode.getCondCode: not on Alphas"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
--- yes, they really do seem to want exactly the same!
-
-getCondCode (CmmMachOp mop [x, y])
-  = 
-    case mop of
-      MO_F_Eq W32 -> condFltCode EQQ x y
-      MO_F_Ne W32 -> condFltCode NE  x y
-      MO_F_Gt W32 -> condFltCode GTT x y
-      MO_F_Ge W32 -> condFltCode GE  x y
-      MO_F_Lt W32 -> condFltCode LTT x y
-      MO_F_Le W32 -> condFltCode LE  x y
-
-      MO_F_Eq W64 -> condFltCode EQQ x y
-      MO_F_Ne W64 -> condFltCode NE  x y
-      MO_F_Gt W64 -> condFltCode GTT x y
-      MO_F_Ge W64 -> condFltCode GE  x y
-      MO_F_Lt W64 -> condFltCode LTT x y
-      MO_F_Le W64 -> condFltCode LE  x y
-
-      MO_Eq rep -> condIntCode EQQ  x y
-      MO_Ne rep -> condIntCode NE   x y
-
-      MO_S_Gt rep -> condIntCode GTT  x y
-      MO_S_Ge rep -> condIntCode GE   x y
-      MO_S_Lt rep -> condIntCode LTT  x y
-      MO_S_Le rep -> condIntCode LE   x y
-
-      MO_U_Gt rep -> condIntCode GU   x y
-      MO_U_Ge rep -> condIntCode GEU  x y
-      MO_U_Lt rep -> condIntCode LU   x y
-      MO_U_Le rep -> condIntCode LEU  x y
-
-      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
-
-getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
-
-#elif powerpc_TARGET_ARCH
-
--- almost the same as everywhere else - but we need to
--- extend small integers to 32 bit first
-
-getCondCode (CmmMachOp mop [x, y])
-  = case mop of
-      MO_F_Eq W32 -> condFltCode EQQ x y
-      MO_F_Ne W32 -> condFltCode NE  x y
-      MO_F_Gt W32 -> condFltCode GTT x y
-      MO_F_Ge W32 -> condFltCode GE  x y
-      MO_F_Lt W32 -> condFltCode LTT x y
-      MO_F_Le W32 -> condFltCode LE  x y
-
-      MO_F_Eq W64 -> condFltCode EQQ x y
-      MO_F_Ne W64 -> condFltCode NE  x y
-      MO_F_Gt W64 -> condFltCode GTT x y
-      MO_F_Ge W64 -> condFltCode GE  x y
-      MO_F_Lt W64 -> condFltCode LTT x y
-      MO_F_Le W64 -> condFltCode LE  x y
-
-      MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
-      MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
-
-      MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
-      MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
-
-      MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
-      MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
-
-      other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
-
-getCondCode other =  panic "getCondCode(2)(powerpc)"
-
-
-#endif
-
-
--- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
--- passed back up the tree.
-
-condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-
-#if alpha_TARGET_ARCH
-condIntCode = panic "MachCode.condIntCode: not on Alphas"
-condFltCode = panic "MachCode.condFltCode: not on Alphas"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
-    Amode x_addr x_code <- getAmode x
-    let
-       imm  = litToImm lit
-       code = x_code `snocOL`
-                 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
-    --
-    return (CondCode False cond code)
-
--- anything vs zero, using a mask
--- TODO: Add some sanity checking!!!!
-condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
-    | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
-    = do
-      (x_reg, x_code) <- getSomeReg x
-      let
-         code = x_code `snocOL`
-                TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
-      --
-      return (CondCode False cond code)
-
--- anything vs zero
-condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
-    (x_reg, x_code) <- getSomeReg x
-    let
-       code = x_code `snocOL`
-                 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
-    --
-    return (CondCode False cond code)
-
--- anything vs operand
-condIntCode cond x y | isOperand y = do
-    (x_reg, x_code) <- getNonClobberedReg x
-    (y_op,  y_code) <- getOperand y    
-    let
-       code = x_code `appOL` y_code `snocOL`
-                  CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
-    -- in
-    return (CondCode False cond code)
-
--- anything vs anything
-condIntCode cond x y = do
-  (y_reg, y_code) <- getNonClobberedReg y
-  (x_op, x_code) <- getRegOrMem x
-  let
-       code = y_code `appOL`
-              x_code `snocOL`
-                 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
-  -- in
-  return (CondCode False cond code)
-#endif
-
-#if i386_TARGET_ARCH
-condFltCode cond x y 
-  = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
-  (x_reg, x_code) <- getNonClobberedReg x
-  (y_reg, y_code) <- getSomeReg y
-  let
-       code = x_code `appOL` y_code `snocOL`
-               GCMP cond x_reg y_reg
-  -- The GCMP insn does the test and sets the zero flag if comparable
-  -- and true.  Hence we always supply EQQ as the condition to test.
-  return (CondCode True EQQ code)
-#endif /* i386_TARGET_ARCH */
-
-#if x86_64_TARGET_ARCH
--- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
--- an operand, but the right must be a reg.  We can probably do better
--- than this general case...
-condFltCode cond x y = do
-  (x_reg, x_code) <- getNonClobberedReg x
-  (y_op, y_code) <- getOperand y
-  let
-       code = x_code `appOL`
-              y_code `snocOL`
-                 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
-       -- NB(1): we need to use the unsigned comparison operators on the
-       -- result of this comparison.
-  -- in
-  return (CondCode True (condToUnsigned cond) code)
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-condIntCode cond x (CmmLit (CmmInt y rep))
-  | fits13Bits y
-  = do
-       (src1, code) <- getSomeReg x
-       let
-           src2 = ImmInt (fromInteger y)
-           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
-       return (CondCode False cond code')
-
-condIntCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let
-       code__2 = code1 `appOL` code2 `snocOL`
-                 SUB False True src1 (RIReg src2) g0
-    return (CondCode False cond code__2)
-
------------
-condFltCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp <- getNewRegNat FF64
-    let
-       promote x = FxTOy FF32 FF64 x tmp
-
-       pk1   = cmmExprType x
-       pk2   = cmmExprType y
-
-       code__2 =
-               if pk1 `cmmEqType` pk2 then
-                   code1 `appOL` code2 `snocOL`
-                   FCMP True (cmmTypeSize pk1) src1 src2
-               else if typeWidth pk1 == W32 then
-                   code1 `snocOL` promote src1 `appOL` code2 `snocOL`
-                   FCMP True FF64 tmp src2
-               else
-                   code1 `appOL` code2 `snocOL` promote src2 `snocOL`
-                   FCMP True FF64 src1 tmp
-    return (CondCode True cond code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
---  ###FIXME: I16 and I8!
-condIntCode cond x (CmmLit (CmmInt y rep))
-  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
-  = do
-        (src1, code) <- getSomeReg x
-        let
-            code' = code `snocOL` 
-                (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
-        return (CondCode False cond code')
-
-condIntCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let
-       code' = code1 `appOL` code2 `snocOL`
-                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
-    return (CondCode False cond code')
-
-condFltCode cond x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let
-       code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
-       code'' = case cond of -- twiddle CR to handle unordered case
-                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
-                   LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
-                   _ -> code'
-                 where
-                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
-    return (CondCode True cond code'')
-
-#endif /* powerpc_TARGET_ARCH */
-
--- -----------------------------------------------------------------------------
--- Generating assignments
-
--- Assignments are really at the heart of the whole code generation
--- business.  Almost all top-level nodes of any real importance are
--- assignments, which correspond to loads, stores, or register
--- transfers.  If we're really lucky, some of the register transfers
--- will go away, because we can use the destination register to
--- complete the code generation for the right hand side.  This only
--- fails when the right hand side is forced into a fixed register
--- (e.g. the result of a call).
-
-assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
-
-assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-assignIntCode pk (CmmLoad dst _) src
-  = getNewRegNat IntRep            `thenNat` \ tmp ->
-    getAmode dst                   `thenNat` \ amode ->
-    getRegister src                `thenNat` \ register ->
-    let
-       code1   = amodeCode amode []
-       dst__2  = amodeAddr amode
-       code2   = registerCode register tmp []
-       src__2  = registerName register tmp
-       sz      = primRepToSize pk
-       code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
-    in
-    return code__2
-
-assignIntCode pk dst src
-  = getRegister dst                        `thenNat` \ register1 ->
-    getRegister src                        `thenNat` \ register2 ->
-    let
-       dst__2  = registerName register1 zeroh
-       code    = registerCode register2 dst__2
-       src__2  = registerName register2 dst__2
-       code__2 = if isFixed register2
-                 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
-                 else code
-    in
-    return code__2
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- integer assignment to memory
-
--- specific case of adding/subtracting an integer to a particular address.
--- ToDo: catch other cases where we can use an operation directly on a memory 
--- address.
-assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
-                                                 CmmLit (CmmInt i _)])
-   | addr == addr2, pk /= II64 || is32BitInteger i,
-     Just instr <- check op
-   = do Amode amode code_addr <- getAmode addr
-        let code = code_addr `snocOL`
-                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
-        return code
-   where
-        check (MO_Add _) = Just ADD
-        check (MO_Sub _) = Just SUB
-        check _ = Nothing
-        -- ToDo: more?
-
--- general case
-assignMem_IntCode pk addr src = do
-    Amode addr code_addr <- getAmode addr
-    (code_src, op_src)   <- get_op_RI src
-    let
-       code = code_src `appOL`
-              code_addr `snocOL`
-                  MOV pk op_src (OpAddr addr)
-       -- NOTE: op_src is stable, so it will still be valid
-       -- after code_addr.  This may involve the introduction 
-       -- of an extra MOV to a temporary register, but we hope
-       -- the register allocator will get rid of it.
-    --
-    return code
-  where
-    get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)  -- code, operator
-    get_op_RI (CmmLit lit) | is32BitLit lit
-      = return (nilOL, OpImm (litToImm lit))
-    get_op_RI op
-      = do (reg,code) <- getNonClobberedReg op
-          return (code, OpReg reg)
-
-
--- Assign; dst is a reg, rhs is mem
-assignReg_IntCode pk reg (CmmLoad src _) = do
-  load_code <- intLoadCode (MOV pk) src
-  return (load_code (getRegisterReg reg))
-
--- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src = do
-  code <- getAnyReg src
-  return (code (getRegisterReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-assignMem_IntCode pk addr src = do
-    (srcReg, code) <- getSomeReg src
-    Amode dstAddr addr_code <- getAmode addr
-    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
-assignReg_IntCode pk reg src = do
-    r <- getRegister src
-    return $ case r of
-       Any _ code         -> code dst
-       Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
-    where
-      dst = getRegisterReg reg
-
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-assignMem_IntCode pk addr src = do
-    (srcReg, code) <- getSomeReg src
-    Amode dstAddr addr_code <- getAmode addr
-    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
--- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src
-    = do
-        r <- getRegister src
-        return $ case r of
-            Any _ code         -> code dst
-            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
-    where
-        dst = getRegisterReg reg
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Floating-point assignments
-
-#if alpha_TARGET_ARCH
-
-assignFltCode pk (CmmLoad dst _) src
-  = getNewRegNat pk                `thenNat` \ tmp ->
-    getAmode dst                   `thenNat` \ amode ->
-    getRegister src                        `thenNat` \ register ->
-    let
-       code1   = amodeCode amode []
-       dst__2  = amodeAddr amode
-       code2   = registerCode register tmp []
-       src__2  = registerName register tmp
-       sz      = primRepToSize pk
-       code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
-    in
-    return code__2
-
-assignFltCode pk dst src
-  = getRegister dst                        `thenNat` \ register1 ->
-    getRegister src                        `thenNat` \ register2 ->
-    let
-       dst__2  = registerName register1 zeroh
-       code    = registerCode register2 dst__2
-       src__2  = registerName register2 dst__2
-       code__2 = if isFixed register2
-                 then code . mkSeqInstr (FMOV src__2 dst__2)
-                 else code
-    in
-    return code__2
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
-  (src_reg, src_code) <- getNonClobberedReg src
-  Amode addr addr_code <- getAmode addr
-  let
-       code = src_code `appOL`
-              addr_code `snocOL`
-                IF_ARCH_i386(GST pk src_reg addr,
-                            MOV pk (OpReg src_reg) (OpAddr addr))
-  return code
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode pk reg src = do
-  src_code <- getAnyReg src
-  return (src_code (getRegisterReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
-    Amode dst__2 code1 <- getAmode addr
-    (src__2, code2) <- getSomeReg src
-    tmp1 <- getNewRegNat pk
-    let
-       pk__2   = cmmExprType src
-       code__2 = code1 `appOL` code2 `appOL`
-           if   sizeToWidth pk == typeWidth pk__2 
-            then unitOL (ST pk src__2 dst__2)
-           else toOL   [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
-                       , ST    pk tmp1 dst__2]
-    return code__2
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode pk dstCmmReg srcCmmExpr = do
-    srcRegister <- getRegister srcCmmExpr
-    let dstReg = getRegisterReg dstCmmReg
-
-    return $ case srcRegister of
-        Any _ code                 -> code dstReg
-       Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
--- Easy, isn't it?
-assignMem_FltCode = assignMem_IntCode
-assignReg_FltCode = assignReg_IntCode
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating an non-local jump
-
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-genJump (CmmLabel lbl)
-  | isAsmTemp lbl = returnInstr (BR target)
-  | otherwise     = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
-  where
-    target = ImmCLbl lbl
-
-genJump tree
-  = getRegister tree               `thenNat` \ register ->
-    getNewRegNat PtrRep            `thenNat` \ tmp ->
-    let
-       dst    = registerName register pv
-       code   = registerCode register pv
-       target = registerName register pv
-    in
-    if isFixed register then
-       returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
-    else
-    return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-genJump (CmmLoad mem pk) = do
-  Amode target code <- getAmode mem
-  return (code `snocOL` JMP (OpAddr target))
-
-genJump (CmmLit lit) = do
-  return (unitOL (JMP (OpImm (litToImm lit))))
-
-genJump expr = do
-  (reg,code) <- getSomeReg expr
-  return (code `snocOL` JMP (OpReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-genJump (CmmLit (CmmLabel lbl))
-  = return (toOL [CALL (Left target) 0 True, NOP])
-  where
-    target = ImmCLbl lbl
-
-genJump tree
-  = do
-        (target, code) <- getSomeReg tree
-       return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-genJump (CmmLit (CmmLabel lbl))
-  = return (unitOL $ JMP lbl)
-
-genJump tree
-  = do
-        (target,code) <- getSomeReg tree
-        return (code `snocOL` MTCTR target `snocOL` BCTR [])
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  Unconditional branches
-
-genBranch :: BlockId -> NatM InstrBlock
-
-genBranch = return . toOL . mkBranchInstr
-
--- -----------------------------------------------------------------------------
---  Conditional jumps
-
-{-
-Conditional jumps are always to local labels, so we can use branch
-instructions.  We peek at the arguments to decide what kind of
-comparison to do.
-
-ALPHA: For comparisons with 0, we're laughing, because we can just do
-the desired conditional branch.
-
-I386: First, we have to ensure that the condition
-codes are set according to the supplied comparison operation.
-
-SPARC: First, we have to ensure that the condition codes are set
-according to the supplied comparison operation.  We generate slightly
-different code for floating point comparisons, because a floating
-point operation cannot directly precede a @BF@.  We assume the worst
-and fill that slot with a @NOP@.
-
-SPARC: Do not fill the delay slots here; you will confuse the register
-allocator.
--}
-
-
-genCondJump
-    :: BlockId     -- the branch target
-    -> CmmExpr      -- the condition on which to branch
-    -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-genCondJump id (StPrim op [x, StInt 0])
-  = getRegister x                          `thenNat` \ register ->
-    getNewRegNat (registerRep register)
-                                   `thenNat` \ tmp ->
-    let
-       code   = registerCode register tmp
-       value  = registerName register tmp
-       pk     = registerRep register
-       target = ImmCLbl lbl
-    in
-    returnSeq code [BI (cmpOp op) value target]
-  where
-    cmpOp CharGtOp = GTT
-    cmpOp CharGeOp = GE
-    cmpOp CharEqOp = EQQ
-    cmpOp CharNeOp = NE
-    cmpOp CharLtOp = LTT
-    cmpOp CharLeOp = LE
-    cmpOp IntGtOp = GTT
-    cmpOp IntGeOp = GE
-    cmpOp IntEqOp = EQQ
-    cmpOp IntNeOp = NE
-    cmpOp IntLtOp = LTT
-    cmpOp IntLeOp = LE
-    cmpOp WordGtOp = NE
-    cmpOp WordGeOp = ALWAYS
-    cmpOp WordEqOp = EQQ
-    cmpOp WordNeOp = NE
-    cmpOp WordLtOp = NEVER
-    cmpOp WordLeOp = EQQ
-    cmpOp AddrGtOp = NE
-    cmpOp AddrGeOp = ALWAYS
-    cmpOp AddrEqOp = EQQ
-    cmpOp AddrNeOp = NE
-    cmpOp AddrLtOp = NEVER
-    cmpOp AddrLeOp = EQQ
-
-genCondJump lbl (StPrim op [x, StDouble 0.0])
-  = getRegister x                          `thenNat` \ register ->
-    getNewRegNat (registerRep register)
-                                   `thenNat` \ tmp ->
-    let
-       code   = registerCode register tmp
-       value  = registerName register tmp
-       pk     = registerRep register
-       target = ImmCLbl lbl
-    in
-    return (code . mkSeqInstr (BF (cmpOp op) value target))
-  where
-    cmpOp FloatGtOp = GTT
-    cmpOp FloatGeOp = GE
-    cmpOp FloatEqOp = EQQ
-    cmpOp FloatNeOp = NE
-    cmpOp FloatLtOp = LTT
-    cmpOp FloatLeOp = LE
-    cmpOp DoubleGtOp = GTT
-    cmpOp DoubleGeOp = GE
-    cmpOp DoubleEqOp = EQQ
-    cmpOp DoubleNeOp = NE
-    cmpOp DoubleLtOp = LTT
-    cmpOp DoubleLeOp = LE
-
-genCondJump lbl (StPrim op [x, y])
-  | fltCmpOp op
-  = trivialFCode pr instr x y      `thenNat` \ register ->
-    getNewRegNat FF64              `thenNat` \ tmp ->
-    let
-       code   = registerCode register tmp
-       result = registerName register tmp
-       target = ImmCLbl lbl
-    in
-    return (code . mkSeqInstr (BF cond result target))
-  where
-    pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
-
-    fltCmpOp op = case op of
-       FloatGtOp -> True
-       FloatGeOp -> True
-       FloatEqOp -> True
-       FloatNeOp -> True
-       FloatLtOp -> True
-       FloatLeOp -> True
-       DoubleGtOp -> True
-       DoubleGeOp -> True
-       DoubleEqOp -> True
-       DoubleNeOp -> True
-       DoubleLtOp -> True
-       DoubleLeOp -> True
-       _ -> False
-    (instr, cond) = case op of
-       FloatGtOp -> (FCMP TF LE, EQQ)
-       FloatGeOp -> (FCMP TF LTT, EQQ)
-       FloatEqOp -> (FCMP TF EQQ, NE)
-       FloatNeOp -> (FCMP TF EQQ, EQQ)
-       FloatLtOp -> (FCMP TF LTT, NE)
-       FloatLeOp -> (FCMP TF LE, NE)
-       DoubleGtOp -> (FCMP TF LE, EQQ)
-       DoubleGeOp -> (FCMP TF LTT, EQQ)
-       DoubleEqOp -> (FCMP TF EQQ, NE)
-       DoubleNeOp -> (FCMP TF EQQ, EQQ)
-       DoubleLtOp -> (FCMP TF LTT, NE)
-       DoubleLeOp -> (FCMP TF LE, NE)
-
-genCondJump lbl (StPrim op [x, y])
-  = trivialCode instr x y          `thenNat` \ register ->
-    getNewRegNat IntRep            `thenNat` \ tmp ->
-    let
-       code   = registerCode register tmp
-       result = registerName register tmp
-       target = ImmCLbl lbl
-    in
-    return (code . mkSeqInstr (BI cond result target))
-  where
-    (instr, cond) = case op of
-       CharGtOp -> (CMP LE, EQQ)
-       CharGeOp -> (CMP LTT, EQQ)
-       CharEqOp -> (CMP EQQ, NE)
-       CharNeOp -> (CMP EQQ, EQQ)
-       CharLtOp -> (CMP LTT, NE)
-       CharLeOp -> (CMP LE, NE)
-       IntGtOp -> (CMP LE, EQQ)
-       IntGeOp -> (CMP LTT, EQQ)
-       IntEqOp -> (CMP EQQ, NE)
-       IntNeOp -> (CMP EQQ, EQQ)
-       IntLtOp -> (CMP LTT, NE)
-       IntLeOp -> (CMP LE, NE)
-       WordGtOp -> (CMP ULE, EQQ)
-       WordGeOp -> (CMP ULT, EQQ)
-       WordEqOp -> (CMP EQQ, NE)
-       WordNeOp -> (CMP EQQ, EQQ)
-       WordLtOp -> (CMP ULT, NE)
-       WordLeOp -> (CMP ULE, NE)
-       AddrGtOp -> (CMP ULE, EQQ)
-       AddrGeOp -> (CMP ULT, EQQ)
-       AddrEqOp -> (CMP EQQ, NE)
-       AddrNeOp -> (CMP EQQ, EQQ)
-       AddrLtOp -> (CMP ULT, NE)
-       AddrLeOp -> (CMP ULE, NE)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-genCondJump id bool = do
-  CondCode _ cond code <- getCondCode bool
-  return (code `snocOL` JXX cond id)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-genCondJump id bool = do
-  CondCode is_float cond cond_code <- getCondCode bool
-  if not is_float
-    then
-       return (cond_code `snocOL` JXX cond id)
-    else do
-       lbl <- getBlockIdNat
-
-       -- see comment with condFltReg
-       let code = case cond of
-                       NE  -> or_unordered
-                       GU  -> plain_test
-                       GEU -> plain_test
-                       _   -> and_ordered
-
-           plain_test = unitOL (
-                 JXX cond id
-               )
-           or_unordered = toOL [
-                 JXX cond id,
-                 JXX PARITY id
-               ]
-           and_ordered = toOL [
-                 JXX PARITY lbl,
-                 JXX cond id,
-                 JXX ALWAYS lbl,
-                 NEWBLOCK lbl
-               ]
-       return (cond_code `appOL` code)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-genCondJump bid bool = do
-  CondCode is_float cond code <- getCondCode bool
-  return (
-       code `appOL` 
-       toOL (
-         if   is_float
-         then [NOP, BF cond False bid, NOP]
-         else [BI cond False bid, NOP]
-       )
-    )
-
-#endif /* sparc_TARGET_ARCH */
-
-
-#if powerpc_TARGET_ARCH
-
-genCondJump id bool = do
-  CondCode is_float cond code <- getCondCode bool
-  return (code `snocOL` BCC cond id)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  Generating C calls
-
--- Now the biggest nightmare---calls.  Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations.  Apart from that, the code is easy.
--- 
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-genCCall
-    :: CmmCallTarget           -- function to call
-    -> HintedCmmFormals                -- where to put the result
-    -> HintedCmmActuals                -- arguments (of mixed type)
-    -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-ccallResultRegs = 
-
-genCCall fn cconv result_regs args
-  = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
-                         `thenNat` \ ((unused,_), argCode) ->
-    let
-       nRegs = length allArgRegs - length unused
-       code = asmSeqThen (map ($ []) argCode)
-    in
-       returnSeq code [
-           LDA pv (AddrImm (ImmLab (ptext fn))),
-           JSR ra (AddrReg pv) nRegs,
-           LDGP gp (AddrReg ra)]
-  where
-    ------------------------
-    {- Try to get a value into a specific register (or registers) for
-       a call.  The first 6 arguments go into the appropriate
-       argument register (separate registers for integer and floating
-       point arguments, but used in lock-step), and the remaining
-       arguments are dumped to the stack, beginning at 0(sp).  Our
-       first argument is a pair of the list of remaining argument
-       registers to be assigned for this call and the next stack
-       offset to use for overflowing arguments.  This way,
-       @get_Arg@ can be applied to all of a call's arguments using
-       @mapAccumLNat@.
-    -}
-    get_arg
-       :: ([(Reg,Reg)], Int)   -- Argument registers and stack offset (accumulator)
-       -> StixTree             -- Current argument
-       -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
-
-    -- We have to use up all of our argument registers first...
-
-    get_arg ((iDst,fDst):dsts, offset) arg
-      = getRegister arg                            `thenNat` \ register ->
-       let
-           reg  = if isFloatType pk then fDst else iDst
-           code = registerCode register reg
-           src  = registerName register reg
-           pk   = registerRep register
-       in
-       return (
-           if isFloatType pk then
-               ((dsts, offset), if isFixed register then
-                   code . mkSeqInstr (FMOV src fDst)
-                   else code)
-           else
-               ((dsts, offset), if isFixed register then
-                   code . mkSeqInstr (OR src (RIReg src) iDst)
-                   else code))
-
-    -- Once we have run out of argument registers, we move to the
-    -- stack...
-
-    get_arg ([], offset) arg
-      = getRegister arg                        `thenNat` \ register ->
-       getNewRegNat (registerRep register)
-                                       `thenNat` \ tmp ->
-       let
-           code = registerCode register tmp
-           src  = registerName register tmp
-           pk   = registerRep register
-           sz   = primRepToSize pk
-       in
-       return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-       -- write barrier compiles to no code on x86/x86-64; 
-       -- we keep it this long in order to prevent earlier optimisations.
-
--- we only cope with a single result for foreign calls
-genCCall (CmmPrim op) [CmmHinted r _] args = do
-  l1 <- getNewLabelNat
-  l2 <- getNewLabelNat
-  case op of
-       MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
-       MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
-       
-       MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
-       MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
-
-       MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
-       MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
-
-       MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
-       MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
-       
-       other_op    -> outOfLineFloatOp op r args
- where
-  actuallyInlineFloatOp instr size [CmmHinted x _]
-       = do res <- trivialUFCode size (instr size) x
-            any <- anyReg res
-            return (any (getRegisterReg (CmmLocal r)))
-
-genCCall target dest_regs args = do
-    let
-        sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
-#if !darwin_TARGET_OS        
-        tot_arg_size        = sum sizes
-#else
-        raw_arg_size        = sum sizes
-        tot_arg_size        = roundTo 16 raw_arg_size
-        arg_pad_size        = tot_arg_size - raw_arg_size
-    delta0 <- getDeltaNat
-    setDeltaNat (delta0 - arg_pad_size)
-#endif
-
-    push_codes <- mapM push_arg (reverse args)
-    delta <- getDeltaNat
-
-    -- in
-    -- deal with static vs dynamic call targets
-    (callinsns,cconv) <-
-      case target of
-       -- CmmPrim -> ...
-        CmmCallee (CmmLit (CmmLabel lbl)) conv
-           -> -- ToDo: stdcall arg sizes
-             return (unitOL (CALL (Left fn_imm) []), conv)
-          where fn_imm = ImmCLbl lbl
-        CmmCallee expr conv
-           -> do { (dyn_c, dyn_r) <- get_op expr
-                 ; ASSERT( isWord32 (cmmExprType expr) )
-                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
-
-    let        push_code
-#if darwin_TARGET_OS
-            | arg_pad_size /= 0
-            = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
-                    DELTA (delta0 - arg_pad_size)]
-              `appOL` concatOL push_codes
-            | otherwise
-#endif
-            = concatOL push_codes
-       call = callinsns `appOL`
-               toOL (
-                       -- Deallocate parameters after call for ccall;
-                       -- but not for stdcall (callee does it)
-                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
-                  [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
-                  ++
-                  [DELTA (delta + tot_arg_size)]
-               )
-    -- in
-    setDeltaNat (delta + tot_arg_size)
-
-    let
-       -- assign the results, if necessary
-       assign_code []     = nilOL
-       assign_code [CmmHinted dest _hint]
-         | isFloatType ty = unitOL (GMOV fake0 r_dest)
-         | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
-                                   MOV II32 (OpReg edx) (OpReg r_dest_hi)]
-         | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
-         where 
-               ty = localRegType dest
-               w  = typeWidth ty
-               r_dest_hi = getHiVRegFromLo r_dest
-               r_dest    = getRegisterReg (CmmLocal dest)
-       assign_code many = panic "genCCall.assign_code many"
-
-    return (push_code `appOL` 
-           call `appOL` 
-           assign_code dest_regs)
-
-  where
-    arg_size :: CmmType -> Int -- Width in bytes
-    arg_size ty = widthInBytes (typeWidth ty)
-
-    roundTo a x | x `mod` a == 0 = x
-                | otherwise = x + a - (x `mod` a)
-
-
-    push_arg :: HintedCmmActual {-current argument-}
-                    -> NatM InstrBlock  -- code
-
-    push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
-      | isWord64 arg_ty = do
-        ChildCode64 code r_lo <- iselExpr64 arg
-        delta <- getDeltaNat
-        setDeltaNat (delta - 8)
-        let 
-            r_hi = getHiVRegFromLo r_lo
-        -- in
-       return (       code `appOL`
-                       toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
-                             PUSH II32 (OpReg r_lo), DELTA (delta - 8),
-                            DELTA (delta-8)]
-            )
-
-      | otherwise = do
-        (code, reg) <- get_op arg
-        delta <- getDeltaNat
-        let size = arg_size arg_ty     -- Byte size
-        setDeltaNat (delta-size)
-        if (isFloatType arg_ty)
-           then return (code `appOL`
-                        toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
-                              DELTA (delta-size),
-                              GST (floatSize (typeWidth arg_ty))
-                                 reg (AddrBaseIndex (EABaseReg esp) 
-                                                        EAIndexNone
-                                                        (ImmInt 0))]
-                       )
-           else return (code `snocOL`
-                        PUSH II32 (OpReg reg) `snocOL`
-                        DELTA (delta-size)
-                       )
-      where
-         arg_ty = cmmExprType arg
-
-    ------------
-    get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
-    get_op op = do
-        (reg,code) <- getSomeReg op
-       return (code, reg)
-
-#endif /* i386_TARGET_ARCH */
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
-  -> NatM InstrBlock
-outOfLineFloatOp mop res args
-  = do
-      dflags <- getDynFlagsNat
-      targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
-      let target = CmmCallee targetExpr CCallConv
-        
-      if isFloat64 (localRegType res)
-        then
-          stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
-        else do
-          uq <- getUniqueNat
-          let 
-            tmp = LocalReg uq f64
-          -- in
-          code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
-          code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
-          return (code1 `appOL` code2)
-  where
-       lbl = mkForeignLabel fn Nothing False IsFunction
-
-       fn = case mop of
-             MO_F32_Sqrt  -> fsLit "sqrtf"
-             MO_F32_Sin   -> fsLit "sinf"
-             MO_F32_Cos   -> fsLit "cosf"
-             MO_F32_Tan   -> fsLit "tanf"
-             MO_F32_Exp   -> fsLit "expf"
-             MO_F32_Log   -> fsLit "logf"
-
-             MO_F32_Asin  -> fsLit "asinf"
-             MO_F32_Acos  -> fsLit "acosf"
-             MO_F32_Atan  -> fsLit "atanf"
-
-             MO_F32_Sinh  -> fsLit "sinhf"
-             MO_F32_Cosh  -> fsLit "coshf"
-             MO_F32_Tanh  -> fsLit "tanhf"
-             MO_F32_Pwr   -> fsLit "powf"
-
-             MO_F64_Sqrt  -> fsLit "sqrt"
-             MO_F64_Sin   -> fsLit "sin"
-             MO_F64_Cos   -> fsLit "cos"
-             MO_F64_Tan   -> fsLit "tan"
-             MO_F64_Exp   -> fsLit "exp"
-             MO_F64_Log   -> fsLit "log"
-
-             MO_F64_Asin  -> fsLit "asin"
-             MO_F64_Acos  -> fsLit "acos"
-             MO_F64_Atan  -> fsLit "atan"
-
-             MO_F64_Sinh  -> fsLit "sinh"
-             MO_F64_Cosh  -> fsLit "cosh"
-             MO_F64_Tanh  -> fsLit "tanh"
-             MO_F64_Pwr   -> fsLit "pow"
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-       -- write barrier compiles to no code on x86/x86-64; 
-       -- we keep it this long in order to prevent earlier optimisations.
-
-
-genCCall (CmmPrim op) [CmmHinted r _] args = 
-  outOfLineFloatOp op r args
-
-genCCall target dest_regs args = do
-
-       -- load up the register arguments
-    (stack_args, aregs, fregs, load_args_code)
-        <- load_args args allArgRegs allFPArgRegs nilOL
-
-    let
-       fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
-       int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
-       arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
-               -- for annotating the call instruction with
-
-       sse_regs = length fp_regs_used
-
-       tot_arg_size = arg_size * length stack_args
-
-       -- On entry to the called function, %rsp should be aligned
-       -- on a 16-byte boundary +8 (i.e. the first stack arg after
-       -- the return address is 16-byte aligned).  In STG land
-       -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
-       -- need to make sure we push a multiple of 16-bytes of args,
-       -- plus the return address, to get the correct alignment.
-       -- Urg, this is hard.  We need to feed the delta back into
-       -- the arg pushing code.
-    (real_size, adjust_rsp) <-
-       if tot_arg_size `rem` 16 == 0
-           then return (tot_arg_size, nilOL)
-           else do -- we need to adjust...
-               delta <- getDeltaNat
-               setDeltaNat (delta-8)
-               return (tot_arg_size+8, toOL [
-                               SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
-                               DELTA (delta-8)
-                       ])
-
-       -- push the stack args, right to left
-    push_code <- push_args (reverse stack_args) nilOL
-    delta <- getDeltaNat
-
-    -- deal with static vs dynamic call targets
-    (callinsns,cconv) <-
-      case target of
-       -- CmmPrim -> ...
-        CmmCallee (CmmLit (CmmLabel lbl)) conv
-           -> -- ToDo: stdcall arg sizes
-             return (unitOL (CALL (Left fn_imm) arg_regs), conv)
-          where fn_imm = ImmCLbl lbl
-        CmmCallee expr conv
-           -> do (dyn_r, dyn_c) <- getSomeReg expr
-                return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
-
-    let
-       -- The x86_64 ABI requires us to set %al to the number of SSE
-       -- registers that contain arguments, if the called routine
-       -- is a varargs function.  We don't know whether it's a
-       -- varargs function or not, so we have to assume it is.
-       --
-       -- It's not safe to omit this assignment, even if the number
-       -- of SSE regs in use is zero.  If %al is larger than 8
-       -- on entry to a varargs function, seg faults ensue.
-       assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
-
-    let call = callinsns `appOL`
-               toOL (
-                       -- Deallocate parameters after call for ccall;
-                       -- but not for stdcall (callee does it)
-                  (if cconv == StdCallConv || real_size==0 then [] else 
-                  [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
-                  ++
-                  [DELTA (delta + real_size)]
-               )
-    -- in
-    setDeltaNat (delta + real_size)
-
-    let
-       -- assign the results, if necessary
-       assign_code []     = nilOL
-       assign_code [CmmHinted dest _hint] = 
-         case typeWidth rep of
-               W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
-               W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
-               _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
-         where 
-               rep = localRegType dest
-               r_dest = getRegisterReg (CmmLocal dest)
-       assign_code many = panic "genCCall.assign_code many"
-
-    return (load_args_code     `appOL` 
-           adjust_rsp          `appOL`
-           push_code           `appOL`
-           assign_eax sse_regs `appOL`
-           call                `appOL` 
-           assign_code dest_regs)
-
-  where
-    arg_size = 8 -- always, at the mo
-
-    load_args :: [CmmHinted CmmExpr]
-             -> [Reg]                  -- int regs avail for args
-             -> [Reg]                  -- FP regs avail for args
-             -> InstrBlock
-             -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
-    load_args args [] [] code     =  return (args, [], [], code)
-       -- no more regs to use
-    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
-       -- no more args to push
-    load_args ((CmmHinted arg hint) : rest) aregs fregs code
-       | isFloatType arg_rep = 
-       case fregs of
-         [] -> push_this_arg
-         (r:rs) -> do
-            arg_code <- getAnyReg arg
-            load_args rest aregs rs (code `appOL` arg_code r)
-       | otherwise =
-       case aregs of
-         [] -> push_this_arg
-         (r:rs) -> do
-            arg_code <- getAnyReg arg
-            load_args rest rs fregs (code `appOL` arg_code r)
-       where
-         arg_rep = cmmExprType arg
-
-         push_this_arg = do
-           (args',ars,frs,code') <- load_args rest aregs fregs code
-           return ((CmmHinted arg hint):args', ars, frs, code')
-
-    push_args [] code = return code
-    push_args ((CmmHinted arg hint):rest) code
-       | isFloatType arg_rep = do
-        (arg_reg, arg_code) <- getSomeReg arg
-         delta <- getDeltaNat
-         setDeltaNat (delta-arg_size)
-        let code' = code `appOL` arg_code `appOL` toOL [
-                       SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
-                       DELTA (delta-arg_size),
-                       MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
-        push_args rest code'
-
-       | otherwise = do
-       -- we only ever generate word-sized function arguments.  Promotion
-       -- has already happened: our Int8# type is kept sign-extended
-       -- in an Int#, for example.
-        ASSERT(width == W64) return ()
-        (arg_op, arg_code) <- getOperand arg
-         delta <- getDeltaNat
-         setDeltaNat (delta-arg_size)
-        let code' = code `appOL` arg_code `appOL` toOL [
-                        PUSH II64 arg_op, 
-                       DELTA (delta-arg_size)]
-        push_args rest code'
-       where
-         arg_rep = cmmExprType arg
-         width = typeWidth arg_rep
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-{- 
-   The SPARC calling convention is an absolute
-   nightmare.  The first 6x32 bits of arguments are mapped into
-   %o0 through %o5, and the remaining arguments are dumped to the
-   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
-
-   If we have to put args on the stack, move %o6==%sp down by
-   the number of words to go on the stack, to ensure there's enough space.
-
-   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
-   16 words above the stack pointer is a word for the address of
-   a structure return value.  I use this as a temporary location
-   for moving values from float to int regs.  Certainly it isn't
-   safe to put anything in the 16 words starting at %sp, since
-   this area can get trashed at any time due to window overflows
-   caused by signal handlers.
-
-   A final complication (if the above isn't enough) is that 
-   we can't blithely calculate the arguments one by one into
-   %o0 .. %o5.  Consider the following nested calls:
-
-       fff a (fff b c)
-
-   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
-   the inner call will itself use %o0, which trashes the value put there
-   in preparation for the outer call.  Upshot: we need to calculate the
-   args into temporary regs, and move those to arg regs or onto the
-   stack only immediately prior to the call proper.  Sigh.
-
-genCCall
-    :: CmmCallTarget           -- function to call
-    -> HintedCmmFormals                -- where to put the result
-    -> HintedCmmActuals                -- arguments (of mixed type)
-    -> NatM InstrBlock
-
--}
-
-
--- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
--- are guaranteed to take place before writes afterwards (unlike on PowerPC). 
--- Ref: Section 8.4 of the SPARC V9 Architecture manual.
---
--- In the SPARC case we don't need a barrier.
---
-genCCall (CmmPrim (MO_WriteBarrier)) _ _
- = do  return nilOL
-
-genCCall target dest_regs argsAndHints 
- = do          
-       -- strip hints from the arg regs
-       let args :: [CmmExpr]
-           args  = map hintlessCmm argsAndHints
-
-
-       -- work out the arguments, and assign them to integer regs
-       argcode_and_vregs       <- mapM arg_to_int_vregs args
-       let (argcodes, vregss)  = unzip argcode_and_vregs
-       let vregs               = concat vregss
-
-       let n_argRegs           = length allArgRegs
-       let n_argRegs_used      = min (length vregs) n_argRegs
-
-
-       -- deal with static vs dynamic call targets
-       callinsns <- case target of
-               CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
-                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
-               CmmCallee expr conv 
-                -> do  (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
-                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
-               CmmPrim mop 
-                -> do  res     <- outOfLineFloatOp mop
-                       lblOrMopExpr <- case res of
-                               Left lbl -> do
-                                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
-                               Right mopExpr -> do
-                                       (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
-                                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
-                       return lblOrMopExpr
-
-       let argcode = concatOL argcodes
-
-       let (move_sp_down, move_sp_up)
-                  = let diff = length vregs - n_argRegs
-                        nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
-                    in  if   nn <= 0
-                        then (nilOL, nilOL)
-                        else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
-
-        let transfer_code
-               = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
-                               
-       return 
-        $      argcode                 `appOL`
-               move_sp_down            `appOL`
-               transfer_code           `appOL`
-               callinsns               `appOL`
-               unitOL NOP              `appOL`
-               move_sp_up              `appOL`
-               assign_code dest_regs
-
-
--- | Generate code to calculate an argument, and move it into one
---     or two integer vregs.
-arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-arg_to_int_vregs arg
-
-       -- If the expr produces a 64 bit int, then we can just use iselExpr64
-       | isWord64 (cmmExprType arg)
-       = do    (ChildCode64 code r_lo) <- iselExpr64 arg
-               let r_hi                = getHiVRegFromLo r_lo
-               return (code, [r_hi, r_lo])
-
-       | otherwise
-       = do    (src, code)     <- getSomeReg arg
-               tmp             <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
-               let pk          = cmmExprType arg
-
-               case cmmTypeSize pk of
-
-                -- Load a 64 bit float return value into two integer regs.
-                FF64 -> do
-                       v1 <- getNewRegNat II32
-                       v2 <- getNewRegNat II32
-
-                       let Just f0_high = fPair f0
-                       
-                       let code2 = 
-                               code                            `snocOL`
-                               FMOV FF64 src f0                `snocOL`
-                               ST   FF32  f0 (spRel 16)        `snocOL`
-                               LD   II32  (spRel 16) v1        `snocOL`
-                               ST   FF32  f0_high (spRel 16)   `snocOL`
-                               LD   II32  (spRel 16) v2
-
-                       return  (code2, [v1,v2])
-
-                -- Load a 32 bit float return value into an integer reg
-                FF32 -> do
-                       v1 <- getNewRegNat II32
-                       
-                       let code2 =
-                               code                            `snocOL`
-                               ST   FF32  src (spRel 16)       `snocOL`
-                               LD   II32  (spRel 16) v1
-                               
-                       return (code2, [v1])
-
-                -- Move an integer return value into its destination reg.
-                other -> do
-                       v1 <- getNewRegNat II32
-                       
-                       let code2 = 
-                               code                            `snocOL`
-                               OR False g0 (RIReg src) v1
-                       
-                       return (code2, [v1])
-
-
--- | Move args from the integer vregs into which they have been 
---     marshalled, into %o0 .. %o5, and the rest onto the stack.
---
-move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
--- all args done
-move_final [] _ offset          
-       = []
-
--- out of aregs; move to stack
-move_final (v:vs) [] offset     
-       = ST II32 v (spRel offset)
-       : move_final vs [] (offset+1)
-
--- move into an arg (%o[0..5]) reg
-move_final (v:vs) (a:az) offset 
-       = OR False g0 (RIReg v) a
-       : move_final vs az offset
-
-
--- | Assign results returned from the call into their 
---     desination regs.
---
-assign_code :: [CmmHinted LocalReg] -> OrdList Instr
-assign_code [] = nilOL
-
-assign_code [CmmHinted dest _hint]     
- = let rep     = localRegType dest
-       width   = typeWidth rep
-       r_dest  = getRegisterReg (CmmLocal dest)
-
-       result
-               | isFloatType rep 
-               , W32   <- width
-               = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
-
-               | isFloatType rep
-               , W64   <- width
-               = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
-
-               | not $ isFloatType rep
-               , W32   <- width
-               = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
-
-               | not $ isFloatType rep
-               , W64           <- width
-               , r_dest_hi     <- getHiVRegFromLo r_dest
-               = toOL  [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
-                       , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
-   in  result
-
-
--- | Generate a call to implement an out-of-line floating point operation
-outOfLineFloatOp 
-       :: CallishMachOp 
-       -> NatM (Either CLabel CmmExpr)
-
-outOfLineFloatOp mop 
- = do  let functionName
-               = outOfLineFloatOp_table mop
-       
-       dflags  <- getDynFlagsNat
-       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
-               $  mkForeignLabel functionName Nothing True IsFunction
-
-       let mopLabelOrExpr 
-               = case mopExpr of
-                       CmmLit (CmmLabel lbl)   -> Left lbl
-                        _                      -> Right mopExpr
-
-       return mopLabelOrExpr
-
-
--- | Decide what C function to use to implement a CallishMachOp
---
-outOfLineFloatOp_table 
-       :: CallishMachOp
-       -> FastString
-       
-outOfLineFloatOp_table mop
- = case mop of
-       MO_F32_Exp    -> fsLit "expf"
-       MO_F32_Log    -> fsLit "logf"
-       MO_F32_Sqrt   -> fsLit "sqrtf"
-       MO_F32_Pwr    -> fsLit "powf"
-
-       MO_F32_Sin    -> fsLit "sinf"
-       MO_F32_Cos    -> fsLit "cosf"
-       MO_F32_Tan    -> fsLit "tanf"
-
-       MO_F32_Asin   -> fsLit "asinf"
-       MO_F32_Acos   -> fsLit "acosf"
-       MO_F32_Atan   -> fsLit "atanf"
-
-       MO_F32_Sinh   -> fsLit "sinhf"
-       MO_F32_Cosh   -> fsLit "coshf"
-       MO_F32_Tanh   -> fsLit "tanhf"
-
-       MO_F64_Exp    -> fsLit "exp"
-       MO_F64_Log    -> fsLit "log"
-       MO_F64_Sqrt   -> fsLit "sqrt"
-       MO_F64_Pwr    -> fsLit "pow"
-
-       MO_F64_Sin    -> fsLit "sin"
-       MO_F64_Cos    -> fsLit "cos"
-       MO_F64_Tan    -> fsLit "tan"
-
-       MO_F64_Asin   -> fsLit "asin"
-       MO_F64_Acos   -> fsLit "acos"
-       MO_F64_Atan   -> fsLit "atan"
-
-       MO_F64_Sinh   -> fsLit "sinh"
-       MO_F64_Cosh   -> fsLit "cosh"
-       MO_F64_Tanh   -> fsLit "tanh"
-
-       other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
-                       (pprCallishMachOp mop)
-
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-#if darwin_TARGET_OS || linux_TARGET_OS
-{-
-    The PowerPC calling convention for Darwin/Mac OS X
-    is described in Apple's document
-    "Inside Mac OS X - Mach-O Runtime Architecture".
-    
-    PowerPC Linux uses the System V Release 4 Calling Convention
-    for PowerPC. It is described in the
-    "System V Application Binary Interface PowerPC Processor Supplement".
-
-    Both conventions are similar:
-    Parameters may be passed in general-purpose registers starting at r3, in
-    floating point registers starting at f1, or on the stack. 
-    
-    But there are substantial differences:
-    * The number of registers used for parameter passing and the exact set of
-      nonvolatile registers differs (see MachRegs.lhs).
-    * On Darwin, stack space is always reserved for parameters, even if they are
-      passed in registers. The called routine may choose to save parameters from
-      registers to the corresponding space on the stack.
-    * On Darwin, a corresponding amount of GPRs is skipped when a floating point
-      parameter is passed in an FPR.
-    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
-      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
-      Darwin just treats an I64 like two separate II32s (high word first).
-    * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
-      4-byte aligned like everything else on Darwin.
-    * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
-      PowerPC Linux does not agree, so neither do we.
-      
-    According to both conventions, The parameter area should be part of the
-    caller's stack frame, allocated in the caller's prologue code (large enough
-    to hold the parameter lists for all called routines). The NCG already
-    uses the stack for register spilling, leaving 64 bytes free at the top.
-    If we need a larger parameter area than that, we just allocate a new stack
-    frame just before ccalling.
--}
-
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ 
- = return $ unitOL LWSYNC
-
-genCCall target dest_regs argsAndHints
-  = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
-        -- we rely on argument promotion in the codeGen
-    do
-        (finalStack,passArgumentsCode,usedRegs) <- passArguments
-                                                        (zip args argReps)
-                                                        allArgRegs allFPArgRegs
-                                                        initialStackOffset
-                                                        (toOL []) []
-                                                
-        (labelOrExpr, reduceToFF32) <- case target of
-            CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
-            CmmCallee expr conv -> return  (Right expr, False)
-            CmmPrim mop -> outOfLineFloatOp mop
-                                                        
-        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
-            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
-
-        case labelOrExpr of
-            Left lbl -> do
-               return (         codeBefore
-                        `snocOL` BL lbl usedRegs
-                        `appOL`         codeAfter)
-            Right dyn -> do
-               (dynReg, dynCode) <- getSomeReg dyn
-               return (         dynCode
-                       `snocOL` MTCTR dynReg
-                        `appOL`         codeBefore
-                        `snocOL` BCTRL usedRegs
-                        `appOL`         codeAfter)
-    where
-#if darwin_TARGET_OS
-        initialStackOffset = 24
-           -- size of linkage area + size of arguments, in bytes       
-       stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
-                                map (widthInBytes . typeWidth) argReps
-#elif linux_TARGET_OS
-        initialStackOffset = 8
-        stackDelta finalStack = roundTo 16 finalStack
-#endif
-       args = map hintlessCmm argsAndHints
-       argReps = map cmmExprType args
-
-       roundTo a x | x `mod` a == 0 = x
-                   | otherwise = x + a - (x `mod` a)
-
-        move_sp_down finalStack
-               | delta > 64 =
-                        toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
-                             DELTA (-delta)]
-              | otherwise = nilOL
-              where delta = stackDelta finalStack
-       move_sp_up finalStack
-              | delta > 64 =
-                        toOL [ADD sp sp (RIImm (ImmInt delta)),
-                              DELTA 0]
-              | otherwise = nilOL
-              where delta = stackDelta finalStack
-              
-
-        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
-        passArguments ((arg,arg_ty):args) gprs fprs stackOffset
-               accumCode accumUsed | isWord64 arg_ty =
-            do
-                ChildCode64 code vr_lo <- iselExpr64 arg
-                let vr_hi = getHiVRegFromLo vr_lo
-
-#if darwin_TARGET_OS                
-                passArguments args
-                              (drop 2 gprs)
-                              fprs
-                              (stackOffset+8)
-                              (accumCode `appOL` code
-                                    `snocOL` storeWord vr_hi gprs stackOffset
-                                    `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
-                              ((take 2 gprs) ++ accumUsed)
-            where
-                storeWord vr (gpr:_) offset = MR gpr vr
-                storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
-                
-#elif linux_TARGET_OS
-                let stackOffset' = roundTo 8 stackOffset
-                    stackCode = accumCode `appOL` code
-                        `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
-                        `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
-                    regCode hireg loreg =
-                        accumCode `appOL` code
-                            `snocOL` MR hireg vr_hi
-                            `snocOL` MR loreg vr_lo
-                                        
-                case gprs of
-                    hireg : loreg : regs | even (length gprs) ->
-                        passArguments args regs fprs stackOffset
-                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
-                    _skipped : hireg : loreg : regs ->
-                        passArguments args regs fprs stackOffset
-                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
-                    _ -> -- only one or no regs left
-                        passArguments args [] fprs (stackOffset'+8)
-                                      stackCode accumUsed
-#endif
-        
-        passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
-            | reg : _ <- regs = do
-                register <- getRegister arg
-                let code = case register of
-                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
-                            Any _ acode -> acode reg
-                passArguments args
-                              (drop nGprs gprs)
-                              (drop nFprs fprs)
-#if darwin_TARGET_OS
-        -- The Darwin ABI requires that we reserve stack slots for register parameters
-                              (stackOffset + stackBytes)
-#elif linux_TARGET_OS
-        -- ... the SysV ABI doesn't.
-                              stackOffset
-#endif
-                              (accumCode `appOL` code)
-                              (reg : accumUsed)
-            | otherwise = do
-                (vr, code) <- getSomeReg arg
-                passArguments args
-                              (drop nGprs gprs)
-                              (drop nFprs fprs)
-                              (stackOffset' + stackBytes)
-                              (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
-                              accumUsed
-            where
-#if darwin_TARGET_OS
-        -- stackOffset is at least 4-byte aligned
-        -- The Darwin ABI is happy with that.
-                stackOffset' = stackOffset
-#else
-        -- ... the SysV ABI requires 8-byte alignment for doubles.
-                stackOffset' | isFloatType rep && typeWidth rep == W64 =
-                                roundTo 8 stackOffset
-                             | otherwise  =           stackOffset
-#endif
-                stackSlot = AddrRegImm sp (ImmInt stackOffset')
-                (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
-                    II32 -> (1, 0, 4, gprs)
-#if darwin_TARGET_OS
-        -- The Darwin ABI requires that we skip a corresponding number of GPRs when
-        -- we use the FPRs.
-                    FF32 -> (1, 1, 4, fprs)
-                    FF64 -> (2, 1, 8, fprs)
-#elif linux_TARGET_OS
-        -- ... the SysV ABI doesn't.
-                    FF32 -> (0, 1, 4, fprs)
-                    FF64 -> (0, 1, 8, fprs)
-#endif
-        
-        moveResult reduceToFF32 =
-            case dest_regs of
-                [] -> nilOL
-                [CmmHinted dest _hint]
-                    | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
-                    | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
-                    | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
-                                          MR r_dest r4]
-                    | otherwise -> unitOL (MR r_dest r3)
-                    where rep = cmmRegType (CmmLocal dest)
-                          r_dest = getRegisterReg (CmmLocal dest)
-                          
-        outOfLineFloatOp mop =
-            do
-                dflags <- getDynFlagsNat
-                mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
-                              mkForeignLabel functionName Nothing True IsFunction
-                let mopLabelOrExpr = case mopExpr of
-                        CmmLit (CmmLabel lbl) -> Left lbl
-                        _ -> Right mopExpr
-                return (mopLabelOrExpr, reduce)
-            where
-                (functionName, reduce) = case mop of
-                    MO_F32_Exp   -> (fsLit "exp", True)
-                    MO_F32_Log   -> (fsLit "log", True)
-                    MO_F32_Sqrt  -> (fsLit "sqrt", True)
-                        
-                    MO_F32_Sin   -> (fsLit "sin", True)
-                    MO_F32_Cos   -> (fsLit "cos", True)
-                    MO_F32_Tan   -> (fsLit "tan", True)
-                    
-                    MO_F32_Asin  -> (fsLit "asin", True)
-                    MO_F32_Acos  -> (fsLit "acos", True)
-                    MO_F32_Atan  -> (fsLit "atan", True)
-                    
-                    MO_F32_Sinh  -> (fsLit "sinh", True)
-                    MO_F32_Cosh  -> (fsLit "cosh", True)
-                    MO_F32_Tanh  -> (fsLit "tanh", True)
-                    MO_F32_Pwr   -> (fsLit "pow", True)
-                        
-                    MO_F64_Exp   -> (fsLit "exp", False)
-                    MO_F64_Log   -> (fsLit "log", False)
-                    MO_F64_Sqrt  -> (fsLit "sqrt", False)
-                        
-                    MO_F64_Sin   -> (fsLit "sin", False)
-                    MO_F64_Cos   -> (fsLit "cos", False)
-                    MO_F64_Tan   -> (fsLit "tan", False)
-                     
-                    MO_F64_Asin  -> (fsLit "asin", False)
-                    MO_F64_Acos  -> (fsLit "acos", False)
-                    MO_F64_Atan  -> (fsLit "atan", False)
-                    
-                    MO_F64_Sinh  -> (fsLit "sinh", False)
-                    MO_F64_Cosh  -> (fsLit "cosh", False)
-                    MO_F64_Tanh  -> (fsLit "tanh", False)
-                    MO_F64_Pwr   -> (fsLit "pow", False)
-                    other -> pprPanic "genCCall(ppc): unknown callish op"
-                                    (pprCallishMachOp other)
-
-#endif /* darwin_TARGET_OS || linux_TARGET_OS */
-                
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating a table-branch
-
-genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-genSwitch expr ids
-  | opt_PIC
-  = do
-        (reg,e_code) <- getSomeReg expr
-        lbl <- getNewLabelNat
-        dflags <- getDynFlagsNat
-        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-        (tableReg,t_code) <- getSomeReg $ dynRef
-        let
-            jumpTable = map jumpTableEntryRel ids
-            
-            jumpTableEntryRel Nothing
-                = CmmStaticLit (CmmInt 0 wordWidth)
-            jumpTableEntryRel (Just (BlockId id))
-                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
-                where blockLabel = mkAsmTempLabel id
-
-            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
-                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
-
-#if x86_64_TARGET_ARCH
-#if darwin_TARGET_OS
-    -- on Mac OS X/x86_64, put the jump table in the text section
-    -- to work around a limitation of the linker.
-    -- ld64 is unable to handle the relocations for
-    --     .quad L1 - L0
-    -- if L0 is not preceded by a non-anonymous label in its section.
-    
-            code = e_code `appOL` t_code `appOL` toOL [
-                            ADD (intSize wordWidth) op (OpReg tableReg),
-                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
-                            LDATA Text (CmmDataLabel lbl : jumpTable)
-                    ]
-#else
-    -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
-    -- relocations, hence we only get 32-bit offsets in the jump
-    -- table. As these offsets are always negative we need to properly
-    -- sign extend them to 64-bit. This hack should be removed in
-    -- conjunction with the hack in PprMach.hs/pprDataItem once
-    -- binutils 2.17 is standard.
-            code = e_code `appOL` t_code `appOL` toOL [
-                           LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                           MOVSxL II32
-                                  (OpAddr (AddrBaseIndex (EABaseReg tableReg)
-                                                         (EAIndex reg wORD_SIZE) (ImmInt 0)))
-                                  (OpReg reg),
-                           ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
-                           JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
-                  ]
-#endif
-#else
-            code = e_code `appOL` t_code `appOL` toOL [
-                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                            ADD (intSize wordWidth) op (OpReg tableReg),
-                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
-                    ]
-#endif
-        return code
-  | otherwise
-  = do
-        (reg,e_code) <- getSomeReg expr
-        lbl <- getNewLabelNat
-        let
-            jumpTable = map jumpTableEntry ids
-            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
-            code = e_code `appOL` toOL [
-                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                    JMP_TBL op [ id | Just id <- ids ]
-                 ]
-        -- in
-        return code
-#elif powerpc_TARGET_ARCH
-genSwitch expr ids 
-  | opt_PIC
-  = do
-        (reg,e_code) <- getSomeReg expr
-        tmp <- getNewRegNat II32
-        lbl <- getNewLabelNat
-        dflags <- getDynFlagsNat
-        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-        (tableReg,t_code) <- getSomeReg $ dynRef
-        let
-            jumpTable = map jumpTableEntryRel ids
-            
-            jumpTableEntryRel Nothing
-                = CmmStaticLit (CmmInt 0 wordWidth)
-            jumpTableEntryRel (Just (BlockId id))
-                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
-                where blockLabel = mkAsmTempLabel id
-
-            code = e_code `appOL` t_code `appOL` toOL [
-                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                            SLW tmp reg (RIImm (ImmInt 2)),
-                            LD II32 tmp (AddrRegReg tableReg tmp),
-                            ADD tmp tmp (RIReg tableReg),
-                            MTCTR tmp,
-                            BCTR [ id | Just id <- ids ]
-                    ]
-        return code
-  | otherwise
-  = do
-        (reg,e_code) <- getSomeReg expr
-        tmp <- getNewRegNat II32
-        lbl <- getNewLabelNat
-        let
-            jumpTable = map jumpTableEntry ids
-        
-            code = e_code `appOL` toOL [
-                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
-                            SLW tmp reg (RIImm (ImmInt 2)),
-                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
-                            LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
-                            MTCTR tmp,
-                            BCTR [ id | Just id <- ids ]
-                    ]
-        return code
-#elif sparc_TARGET_ARCH
-genSwitch expr ids
-       | opt_PIC
-       = error "MachCodeGen: sparc genSwitch PIC not finished\n"
-  
-       | otherwise
-       = do    (e_reg, e_code) <- getSomeReg expr
-
-               base_reg        <- getNewRegNat II32
-               offset_reg      <- getNewRegNat II32
-               dst             <- getNewRegNat II32
-
-               label           <- getNewLabelNat
-               let jumpTable   = map jumpTableEntry ids
-
-               return $ e_code `appOL`
-                toOL   
-                       -- the jump table
-                       [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
-
-                       -- load base of jump table
-                       , SETHI (HI (ImmCLbl label)) base_reg
-                       , OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
-                       
-                       -- the addrs in the table are 32 bits wide..
-                       , SLL   e_reg (RIImm $ ImmInt 2) offset_reg
-
-                       -- load and jump to the destination
-                       , LD      II32 (AddrRegReg base_reg offset_reg) dst
-                       , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
-                       , NOP ]
-
-#else
-#error "ToDo: genSwitch"
-#endif
-
-
--- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: Maybe BlockId -> CmmStatic
-jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
-jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
-    where blockLabel = mkAsmTempLabel id
-
--- -----------------------------------------------------------------------------
--- Support bits
--- -----------------------------------------------------------------------------
-
-
--- -----------------------------------------------------------------------------
--- 'condIntReg' and 'condFltReg': condition codes into registers
-
--- Turn those condition codes into integers now (when they appear on
--- the right hand side of an assignment).
--- 
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-condIntReg = panic "MachCode.condIntReg (not on Alpha)"
-condFltReg = panic "MachCode.condFltReg (not on Alpha)"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-condIntReg cond x y = do
-  CondCode _ cond cond_code <- condIntCode cond x y
-  tmp <- getNewRegNat II8
-  let 
-       code dst = cond_code `appOL` toOL [
-                   SETCC cond (OpReg tmp),
-                   MOVZxL II8 (OpReg tmp) (OpReg dst)
-                 ]
-  -- in
-  return (Any II32 code)
-
-#endif
-
-#if i386_TARGET_ARCH
-
-condFltReg cond x y = do
-  CondCode _ cond cond_code <- condFltCode cond x y
-  tmp <- getNewRegNat II8
-  let 
-       code dst = cond_code `appOL` toOL [
-                   SETCC cond (OpReg tmp),
-                   MOVZxL II8 (OpReg tmp) (OpReg dst)
-                 ]
-  -- in
-  return (Any II32 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-
-condFltReg cond x y = do
-  CondCode _ cond cond_code <- condFltCode cond x y
-  tmp1 <- getNewRegNat wordSize
-  tmp2 <- getNewRegNat wordSize
-  let 
-       -- We have to worry about unordered operands (eg. comparisons
-       -- against NaN).  If the operands are unordered, the comparison
-       -- sets the parity flag, carry flag and zero flag.
-       -- All comparisons are supposed to return false for unordered
-       -- operands except for !=, which returns true.
-       --
-       -- Optimisation: we don't have to test the parity flag if we
-       -- know the test has already excluded the unordered case: eg >
-       -- and >= test for a zero carry flag, which can only occur for
-       -- ordered operands.
-       --
-       -- ToDo: by reversing comparisons we could avoid testing the
-       -- parity flag in more cases.
-
-       code dst = 
-          cond_code `appOL` 
-            (case cond of
-               NE  -> or_unordered dst
-               GU  -> plain_test   dst
-               GEU -> plain_test   dst
-               _   -> and_ordered  dst)
-
-       plain_test dst = toOL [
-                   SETCC cond (OpReg tmp1),
-                   MOVZxL II8 (OpReg tmp1) (OpReg dst)
-                ]
-       or_unordered dst = toOL [
-                   SETCC cond (OpReg tmp1),
-                   SETCC PARITY (OpReg tmp2),
-                   OR II8 (OpReg tmp1) (OpReg tmp2),
-                   MOVZxL II8 (OpReg tmp2) (OpReg dst)
-                 ]
-       and_ordered dst = toOL [
-                   SETCC cond (OpReg tmp1),
-                   SETCC NOTPARITY (OpReg tmp2),
-                   AND II8 (OpReg tmp1) (OpReg tmp2),
-                   MOVZxL II8 (OpReg tmp2) (OpReg dst)
-                 ]
-  -- in
-  return (Any II32 code)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat II32
-    let
-       code__2 dst = code `appOL` toOL [
-           SUB False True g0 (RIReg src) g0,
-           SUB True False g0 (RIImm (ImmInt (-1))) dst]
-    return (Any II32 code__2)
-
-condIntReg EQQ x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat II32
-    tmp2 <- getNewRegNat II32
-    let
-       code__2 dst = code1 `appOL` code2 `appOL` toOL [
-           XOR False src1 (RIReg src2) dst,
-           SUB False True g0 (RIReg dst) g0,
-           SUB True False g0 (RIImm (ImmInt (-1))) dst]
-    return (Any II32 code__2)
-
-condIntReg NE x (CmmLit (CmmInt 0 d)) = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat II32
-    let
-       code__2 dst = code `appOL` toOL [
-           SUB False True g0 (RIReg src) g0,
-           ADD True False g0 (RIImm (ImmInt 0)) dst]
-    return (Any II32 code__2)
-
-condIntReg NE x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat II32
-    tmp2 <- getNewRegNat II32
-    let
-       code__2 dst = code1 `appOL` code2 `appOL` toOL [
-           XOR False src1 (RIReg src2) dst,
-           SUB False True g0 (RIReg dst) g0,
-           ADD True False g0 (RIImm (ImmInt 0)) dst]
-    return (Any II32 code__2)
-
-condIntReg cond x y = do
-    bid1@(BlockId lbl1) <- getBlockIdNat
-    bid2@(BlockId lbl2) <- getBlockIdNat
-    CondCode _ cond cond_code <- condIntCode cond x y
-    let
-       code__2 dst = cond_code `appOL` toOL [
-           BI cond False bid1, NOP,
-           OR False g0 (RIImm (ImmInt 0)) dst,
-           BI ALWAYS False bid2, NOP,
-           NEWBLOCK bid1,
-           OR False g0 (RIImm (ImmInt 1)) dst,
-           NEWBLOCK bid2]
-    return (Any II32 code__2)
-
-condFltReg cond x y = do
-    bid1@(BlockId lbl1) <- getBlockIdNat
-    bid2@(BlockId lbl2) <- getBlockIdNat
-    CondCode _ cond cond_code <- condFltCode cond x y
-    let
-       code__2 dst = cond_code `appOL` toOL [ 
-           NOP,
-           BF cond False bid1, NOP,
-           OR False g0 (RIImm (ImmInt 0)) dst,
-           BI ALWAYS False bid2, NOP,
-           NEWBLOCK bid1,
-           OR False g0 (RIImm (ImmInt 1)) dst,
-           NEWBLOCK bid2]
-    return (Any II32 code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-condReg getCond = do
-    lbl1 <- getBlockIdNat
-    lbl2 <- getBlockIdNat
-    CondCode _ cond cond_code <- getCond
-    let
-{-        code dst = cond_code `appOL` toOL [
-                BCC cond lbl1,
-                LI dst (ImmInt 0),
-                BCC ALWAYS lbl2,
-                NEWBLOCK lbl1,
-                LI dst (ImmInt 1),
-                BCC ALWAYS lbl2,
-                NEWBLOCK lbl2
-            ]-}
-        code dst = cond_code
-            `appOL` negate_code
-            `appOL` toOL [
-                MFCR dst,
-                RLWINM dst dst (bit + 1) 31 31
-            ]
-        
-        negate_code | do_negate = unitOL (CRNOR bit bit bit)
-                    | otherwise = nilOL
-                    
-        (bit, do_negate) = case cond of
-            LTT -> (0, False)
-            LE  -> (1, True)
-            EQQ -> (2, False)
-            GE  -> (0, True)
-            GTT -> (1, False)
-            
-            NE  -> (2, True)
-            
-            LU  -> (0, False)
-            LEU -> (1, True)
-            GEU -> (0, True)
-            GU  -> (1, False)
-                
-    return (Any II32 code)
-    
-condIntReg cond x y = condReg (condIntCode cond x y)
-condFltReg cond x y = condReg (condFltCode cond x y)
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- 'trivial*Code': deal with trivial instructions
-
--- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
--- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
--- Only look for constants on the right hand side, because that's
--- where the generic optimizer will have put them.
-
--- Similarly, for unary instructions, we don't have to worry about
--- matching an StInt as the argument, because genericOpt will already
--- have handled the constant-folding.
-
-trivialCode
-    :: Width   -- Int only 
-    -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Operand -> Operand -> Instr) 
-                     -> Maybe (Operand -> Operand -> Instr)
-      ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) 
-                     -> Maybe (Operand -> Operand -> Instr)
-      ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
-      ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
-      ,)))))
-    -> CmmExpr -> CmmExpr -- the two arguments
-    -> NatM Register
-
-#ifndef powerpc_TARGET_ARCH
-trivialFCode
-    :: Width   -- Floating point only
-    -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
-      ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
-      ,))))
-    -> CmmExpr -> CmmExpr -- the two arguments
-    -> NatM Register
-#endif
-
-trivialUCode
-    :: Size
-    -> IF_ARCH_alpha((RI -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Operand -> Instr)
-      ,IF_ARCH_x86_64 ((Operand -> Instr)
-      ,IF_ARCH_sparc((RI -> Reg -> Instr)
-      ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
-      ,)))))
-    -> CmmExpr -- the one argument
-    -> NatM Register
-
-#ifndef powerpc_TARGET_ARCH
-trivialUFCode
-    :: Size
-    -> IF_ARCH_alpha((Reg -> Reg -> Instr)
-      ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
-      ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
-      ,IF_ARCH_sparc((Reg -> Reg -> Instr)
-      ,))))
-    -> CmmExpr -- the one argument
-    -> NatM Register
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-trivialCode instr x (StInt y)
-  | fits8Bits y
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ tmp ->
-    let
-       code = registerCode register tmp
-       src1 = registerName register tmp
-       src2 = ImmInt (fromInteger y)
-       code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
-    in
-    return (Any IntRep code__2)
-
-trivialCode instr x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat IntRep                `thenNat` \ tmp1 ->
-    getNewRegNat IntRep                `thenNat` \ tmp2 ->
-    let
-       code1 = registerCode register1 tmp1 []
-       src1  = registerName register1 tmp1
-       code2 = registerCode register2 tmp2 []
-       src2  = registerName register2 tmp2
-       code__2 dst = asmSeqThen [code1, code2] .
-                    mkSeqInstr (instr src1 (RIReg src2) dst)
-    in
-    return (Any IntRep code__2)
-
-------------
-trivialUCode instr x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ tmp ->
-    let
-       code = registerCode register tmp
-       src  = registerName register tmp
-       code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
-    in
-    return (Any IntRep code__2)
-
-------------
-trivialFCode _ instr x y
-  = getRegister x              `thenNat` \ register1 ->
-    getRegister y              `thenNat` \ register2 ->
-    getNewRegNat FF64  `thenNat` \ tmp1 ->
-    getNewRegNat FF64  `thenNat` \ tmp2 ->
-    let
-       code1 = registerCode register1 tmp1
-       src1  = registerName register1 tmp1
-
-       code2 = registerCode register2 tmp2
-       src2  = registerName register2 tmp2
-
-       code__2 dst = asmSeqThen [code1 [], code2 []] .
-                     mkSeqInstr (instr src1 src2 dst)
-    in
-    return (Any FF64 code__2)
-
-trivialUFCode _ instr x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat FF64  `thenNat` \ tmp ->
-    let
-       code = registerCode register tmp
-       src  = registerName register tmp
-       code__2 dst = code . mkSeqInstr (instr src dst)
-    in
-    return (Any FF64 code__2)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-{-
-The Rules of the Game are:
-
-* You cannot assume anything about the destination register dst;
-  it may be anything, including a fixed reg.
-
-* You may compute an operand into a fixed reg, but you may not 
-  subsequently change the contents of that fixed reg.  If you
-  want to do so, first copy the value either to a temporary
-  or into dst.  You are free to modify dst even if it happens
-  to be a fixed reg -- that's not your problem.
-
-* You cannot assume that a fixed reg will stay live over an
-  arbitrary computation.  The same applies to the dst reg.
-
-* Temporary regs obtained from getNewRegNat are distinct from 
-  each other and from all other regs, and stay live over 
-  arbitrary computations.
-
---------------------
-
-SDM's version of The Rules:
-
-* If getRegister returns Any, that means it can generate correct
-  code which places the result in any register, period.  Even if that
-  register happens to be read during the computation.
-
-  Corollary #1: this means that if you are generating code for an
-  operation with two arbitrary operands, you cannot assign the result
-  of the first operand into the destination register before computing
-  the second operand.  The second operand might require the old value
-  of the destination register.
-
-  Corollary #2: A function might be able to generate more efficient
-  code if it knows the destination register is a new temporary (and
-  therefore not read by any of the sub-computations).
-
-* If getRegister returns Any, then the code it generates may modify only:
-       (a) fresh temporaries
-       (b) the destination register
-       (c) known registers (eg. %ecx is used by shifts)
-  In particular, it may *not* modify global registers, unless the global
-  register happens to be the destination register.
--}
-
-trivialCode width instr (Just revinstr) (CmmLit lit_a) b
-  | is32BitLit lit_a = do
-  b_code <- getAnyReg b
-  let
-       code dst 
-        = b_code dst `snocOL`
-           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
-  -- in
-  return (Any (intSize width) code)
-
-trivialCode width instr maybe_revinstr a b
-  = genTrivialCode (intSize width) instr a b
-
--- This is re-used for floating pt instructions too.
-genTrivialCode rep instr a b = do
-  (b_op, b_code) <- getNonClobberedOperand b
-  a_code <- getAnyReg a
-  tmp <- getNewRegNat rep
-  let
-     -- We want the value of b to stay alive across the computation of a.
-     -- But, we want to calculate a straight into the destination register,
-     -- because the instruction only has two operands (dst := dst `op` src).
-     -- The troublesome case is when the result of b is in the same register
-     -- as the destination reg.  In this case, we have to save b in a
-     -- new temporary across the computation of a.
-     code dst
-       | dst `regClashesWithOp` b_op =
-               b_code `appOL`
-               unitOL (MOV rep b_op (OpReg tmp)) `appOL`
-               a_code dst `snocOL`
-               instr (OpReg tmp) (OpReg dst)
-       | otherwise =
-               b_code `appOL`
-               a_code dst `snocOL`
-               instr b_op (OpReg dst)
-  -- in
-  return (Any rep code)
-
-reg `regClashesWithOp` OpReg reg2   = reg == reg2
-reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
-reg `regClashesWithOp` _            = False
-
------------
-
-trivialUCode rep instr x = do
-  x_code <- getAnyReg x
-  let
-     code dst =
-       x_code dst `snocOL`
-       instr (OpReg dst)
-  return (Any rep code)
-
------------
-
-#if i386_TARGET_ARCH
-
-trivialFCode width instr x y = do
-  (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
-  (y_reg, y_code) <- getSomeReg y
-  let
-     size = floatSize width
-     code dst =
-       x_code `appOL`
-       y_code `snocOL`
-       instr size x_reg y_reg dst
-  return (Any size code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-trivialFCode pk instr x y 
-  = genTrivialCode size (instr size) x y
-  where size = floatSize pk
-#endif
-
--------------
-
-trivialUFCode size instr x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-     code dst =
-       x_code `snocOL`
-       instr x_reg dst
-  -- in
-  return (Any size code)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-trivialCode pk instr x (CmmLit (CmmInt y d))
-  | fits13Bits y
-  = do
-      (src1, code) <- getSomeReg x
-      tmp <- getNewRegNat II32
-      let
-       src2 = ImmInt (fromInteger y)
-       code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
-      return (Any II32 code__2)
-
-trivialCode pk instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat II32
-    tmp2 <- getNewRegNat II32
-    let
-       code__2 dst = code1 `appOL` code2 `snocOL`
-                     instr src1 (RIReg src2) dst
-    return (Any II32 code__2)
-
-------------
-trivialFCode pk instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
-    tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
-    tmp <- getNewRegNat FF64
-    let
-       promote x = FxTOy FF32 FF64 x tmp
-
-       pk1   = cmmExprType x
-       pk2   = cmmExprType y
-
-       code__2 dst =
-               if pk1 `cmmEqType` pk2 then
-                   code1 `appOL` code2 `snocOL`
-                   instr (floatSize pk) src1 src2 dst
-               else if typeWidth pk1 == W32 then
-                   code1 `snocOL` promote src1 `appOL` code2 `snocOL`
-                   instr FF64 tmp src2 dst
-               else
-                   code1 `appOL` code2 `snocOL` promote src2 `snocOL`
-                   instr FF64 src1 tmp dst
-    return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) 
-               code__2)
-
-------------
-trivialUCode size instr x = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat size
-    let
-       code__2 dst = code `snocOL` instr (RIReg src) dst
-    return (Any size code__2)
-
--------------
-trivialUFCode pk instr x = do
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat pk
-    let
-       code__2 dst = code `snocOL` instr src dst
-    return (Any pk code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-{-
-Wolfgang's PowerPC version of The Rules:
-
-A slightly modified version of The Rules to take advantage of the fact
-that PowerPC instructions work on all registers and don't implicitly
-clobber any fixed registers.
-
-* The only expression for which getRegister returns Fixed is (CmmReg reg).
-
-* If getRegister returns Any, then the code it generates may modify only:
-       (a) fresh temporaries
-       (b) the destination register
-  It may *not* modify global registers, unless the global
-  register happens to be the destination register.
-  It may not clobber any other registers. In fact, only ccalls clobber any
-  fixed registers.
-  Also, it may not modify the counter register (used by genCCall).
-  
-  Corollary: If a getRegister for a subexpression returns Fixed, you need
-  not move it to a fresh temporary before evaluating the next subexpression.
-  The Fixed register won't be modified.
-  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
-  
-* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
-  the value of the destination register.
--}
-
-trivialCode rep signed instr x (CmmLit (CmmInt y _))
-    | Just imm <- makeImmediate rep signed y 
-    = do
-        (src1, code1) <- getSomeReg x
-        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
-        return (Any (intSize rep) code)
-  
-trivialCode rep signed instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
-    return (Any (intSize rep) code)
-
-trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
-                -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm' size instr x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
-    return (Any size code)
-    
-trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
-                -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
-    
-trivialUCode rep instr x = do
-    (src, code) <- getSomeReg x
-    let code' dst = code `snocOL` instr dst src
-    return (Any rep code')
-    
--- There is no "remainder" instruction on the PPC, so we have to do
--- it the hard way.
--- The "div" parameter is the division instruction to use (DIVW or DIVWU)
-
-remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
-    -> CmmExpr -> CmmExpr -> NatM Register
-remainderCode rep div x y = do
-    (src1, code1) <- getSomeReg x
-    (src2, code2) <- getSomeReg y
-    let code dst = code1 `appOL` code2 `appOL` toOL [
-                div dst src1 src2,
-                MULLW dst dst (RIReg src2),
-                SUBF dst dst src1
-            ]
-    return (Any (intSize rep) code)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
---  Coercing to/from integer/floating-point...
-
--- When going to integer, we truncate (round towards 0).
-
--- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
--- conversions.  We have to store temporaries in memory to move
--- between the integer and the floating point register sets.
-
--- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
--- pretend, on sparc at least, that double and float regs are seperate
--- kinds, so the value has to be computed into one kind before being
--- explicitly "converted" to live in the other kind.
-
-coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-
-#if sparc_TARGET_ARCH
-coerceDbl2Flt :: CmmExpr -> NatM Register
-coerceFlt2Dbl :: CmmExpr -> NatM Register
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-coerceInt2FP _ x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat IntRep                `thenNat` \ reg ->
-    let
-       code = registerCode register reg
-       src  = registerName register reg
-
-       code__2 dst = code . mkSeqInstrs [
-           ST Q src (spRel 0),
-           LD TF dst (spRel 0),
-           CVTxy Q TF dst dst]
-    in
-    return (Any FF64 code__2)
-
--------------
-coerceFP2Int x
-  = getRegister x              `thenNat` \ register ->
-    getNewRegNat FF64  `thenNat` \ tmp ->
-    let
-       code = registerCode register tmp
-       src  = registerName register tmp
-
-       code__2 dst = code . mkSeqInstrs [
-           CVTxy TF Q src tmp,
-           ST TF tmp (spRel 0),
-           LD Q dst (spRel 0)]
-    in
-    return (Any IntRep code__2)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-coerceInt2FP from to x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-        opc  = case to of W32 -> GITOF; W64 -> GITOD
-        code dst = x_code `snocOL` opc x_reg dst
-       -- ToDo: works for non-II32 reps?
-  return (Any (floatSize to) code)
-
-------------
-
-coerceFP2Int from to x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-        opc  = case from of W32 -> GFTOI; W64 -> GDTOI
-        code dst = x_code `snocOL` opc x_reg dst
-       -- ToDo: works for non-II32 reps?
-  -- in
-  return (Any (intSize to) code)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-coerceFP2Int from to x = do
-  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
-  let
-        opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
-        code dst = x_code `snocOL` opc x_op dst
-  -- in
-  return (Any (intSize to) code) -- works even if the destination rep is <II32
-
-coerceInt2FP from to x = do
-  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
-  let
-        opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
-        code dst = x_code `snocOL` opc x_op dst
-  -- in
-  return (Any (floatSize to) code) -- works even if the destination rep is <II32
-
-coerceFP2FP :: Width -> CmmExpr -> NatM Register
-coerceFP2FP to x = do
-  (x_reg, x_code) <- getSomeReg x
-  let
-        opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
-        code dst = x_code `snocOL` opc x_reg dst
-  -- in
-  return (Any (floatSize to) code)
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-coerceInt2FP width1 width2 x = do
-    (src, code) <- getSomeReg x
-    let
-       code__2 dst = code `appOL` toOL [
-           ST (intSize width1) src (spRel (-2)),
-           LD (intSize width1) (spRel (-2)) dst,
-           FxTOy (intSize width1) (floatSize width2) dst dst]
-    return (Any (floatSize $ width2) code__2)
-
-
--- | Coerce a floating point value to integer
---
---   NOTE: On sparc v9 there are no instructions to move a value from an
---        FP register directly to an int register, so we have to use a load/store.
---
-coerceFP2Int width1 width2 x 
- = do  let fsize1      = floatSize width1
-           fsize2      = floatSize width2
-       
-            isize2     = intSize   width2
-
-       (fsrc, code)    <- getSomeReg x
-       fdst            <- getNewRegNat fsize2
-    
-       let code2 dst   
-               =       code
-               `appOL` toOL
-                       -- convert float to int format, leaving it in a float reg.
-                       [ FxTOy fsize1 isize2 fsrc fdst
-
-                       -- store the int into mem, then load it back to move
-                       --      it into an actual int reg.
-                       , ST    fsize2 fdst (spRel (-2))
-                       , LD    isize2 (spRel (-2)) dst]
-
-       return (Any isize2 code2)
-
-------------
-coerceDbl2Flt x = do
-    (src, code) <- getSomeReg x
-    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) 
-
-------------
-coerceFlt2Dbl x = do
-    (src, code) <- getSomeReg x
-    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-coerceInt2FP fromRep toRep x = do
-    (src, code) <- getSomeReg x
-    lbl <- getNewLabelNat
-    itmp <- getNewRegNat II32
-    ftmp <- getNewRegNat FF64
-    dflags <- getDynFlagsNat
-    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
-    Amode addr addr_code <- getAmode dynRef
-    let
-       code' dst = code `appOL` maybe_exts `appOL` toOL [
-               LDATA ReadOnlyData
-                               [CmmDataLabel lbl,
-                                CmmStaticLit (CmmInt 0x43300000 W32),
-                                CmmStaticLit (CmmInt 0x80000000 W32)],
-               XORIS itmp src (ImmInt 0x8000),
-               ST II32 itmp (spRel 3),
-               LIS itmp (ImmInt 0x4330),
-               ST II32 itmp (spRel 2),
-               LD FF64 ftmp (spRel 2)
-            ] `appOL` addr_code `appOL` toOL [
-               LD FF64 dst addr,
-               FSUB FF64 dst ftmp dst
-           ] `appOL` maybe_frsp dst
-            
-        maybe_exts = case fromRep of
-                        W8 ->  unitOL $ EXTS II8 src src
-                        W16 -> unitOL $ EXTS II16 src src
-                        W32 -> nilOL
-        maybe_frsp dst = case toRep of
-                        W32 -> unitOL $ FRSP dst dst
-                        W64 -> nilOL
-    return (Any (floatSize toRep) code')
-
-coerceFP2Int fromRep toRep x = do
-    -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
-    (src, code) <- getSomeReg x
-    tmp <- getNewRegNat FF64
-    let
-       code' dst = code `appOL` toOL [
-               -- convert to int in FP reg
-           FCTIWZ tmp src,
-               -- store value (64bit) from FP to stack
-           ST FF64 tmp (spRel 2),
-               -- read low word of value (high word is undefined)
-           LD II32 dst (spRel 3)]      
-    return (Any (intSize toRep) code')
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- eXTRA_STK_ARGS_HERE
-
--- We (allegedly) put the first six C-call arguments in registers;
--- where do we start putting the rest of them?
-
--- Moved from Instrs (SDM):
-
-#if alpha_TARGET_ARCH || sparc_TARGET_ARCH
-eXTRA_STK_ARGS_HERE :: Int
-eXTRA_STK_ARGS_HERE
-  = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
-#endif
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs

index d19cda4..ed59d2b 100644 (file)
--- a/compiler/nativeGen/NCGMonad.hs
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -10,28 +10,43 @@ module NCGMonad (
         NatM_State(..), mkNatM_State,
  
         NatM, -- instance Monad
         NatM_State(..), mkNatM_State,
  
         NatM, -- instance Monad
-       initNat, addImportNat, getUniqueNat,
-       mapAccumLNat, setDeltaNat, getDeltaNat,
-       getBlockIdNat, getNewLabelNat, getNewRegNat, getNewRegPairNat,
-       getPicBaseMaybeNat, getPicBaseNat, getDynFlagsNat
- ) where
+       initNat, 
+       addImportNat, 
+       getUniqueNat,
+       mapAccumLNat, 
+       setDeltaNat, 
+       getDeltaNat,
+       getBlockIdNat, 
+       getNewLabelNat, 
+       getNewRegNat, 
+       getNewRegPairNat,
+       getPicBaseMaybeNat, 
+       getPicBaseNat, 
+       getDynFlagsNat
+) 
+ 
+where
    
  #include "HsVersions.h"
  
    
  #include "HsVersions.h"
  
+import Reg
+import Size
+import TargetReg
+
  import BlockId
  import CLabel          ( CLabel, mkAsmTempLabel )
  import BlockId
  import CLabel          ( CLabel, mkAsmTempLabel )
-import Regs
  import UniqSupply
  import Unique          ( Unique )
  import DynFlags
  
  import UniqSupply
  import Unique          ( Unique )
  import DynFlags
  
-data NatM_State = NatM_State {
-                       natm_us      :: UniqSupply,
-                       natm_delta   :: Int,
-                       natm_imports :: [(CLabel)],
-                       natm_pic     :: Maybe Reg,
-                       natm_dflags  :: DynFlags
-               }
+data NatM_State 
+       = NatM_State {
+               natm_us      :: UniqSupply,
+               natm_delta   :: Int,
+               natm_imports :: [(CLabel)],
+               natm_pic     :: Maybe Reg,
+               natm_dflags  :: DynFlags
+       }
  
  newtype NatM result = NatM (NatM_State -> (result, NatM_State))
  
  
  newtype NatM result = NatM (NatM_State -> (result, NatM_State))
  
@@ -39,22 +54,27 @@ unNat :: NatM a -> NatM_State -> (a, NatM_State)
  unNat (NatM a) = a
  
  mkNatM_State :: UniqSupply -> Int -> DynFlags -> NatM_State
  unNat (NatM a) = a
  
  mkNatM_State :: UniqSupply -> Int -> DynFlags -> NatM_State
-mkNatM_State us delta dflags = NatM_State us delta [] Nothing dflags
+mkNatM_State us delta dflags 
+       = NatM_State us delta [] Nothing dflags
  
  initNat :: NatM_State -> NatM a -> (a, NatM_State)
  
  initNat :: NatM_State -> NatM a -> (a, NatM_State)
-initNat init_st m = case unNat m init_st of { (r,st) -> (r,st) }
+initNat init_st m 
+       = case unNat m init_st of { (r,st) -> (r,st) }
+
  
  instance Monad NatM where
    (>>=) = thenNat
    return = returnNat
  
  
  instance Monad NatM where
    (>>=) = thenNat
    return = returnNat
  
+
  thenNat :: NatM a -> (a -> NatM b) -> NatM b
  thenNat expr cont
  thenNat :: NatM a -> (a -> NatM b) -> NatM b
  thenNat expr cont
-  = NatM $ \st -> case unNat expr st of
+       = NatM $ \st -> case unNat expr st of
                         (result, st') -> unNat (cont result) st'
  
  returnNat :: a -> NatM a
                         (result, st') -> unNat (cont result) st'
  
  returnNat :: a -> NatM a
-returnNat result = NatM $ \st ->  (result, st)
+returnNat result 
+       = NatM $ \st ->  (result, st)
  
  mapAccumLNat :: (acc -> x -> NatM (acc, y))
                  -> acc
  
  mapAccumLNat :: (acc -> x -> NatM (acc, y))
                  -> acc
@@ -75,43 +95,64 @@ getUniqueNat = NatM $ \ (NatM_State us delta imports pic dflags) ->
  
  
  getDynFlagsNat :: NatM DynFlags
  
  
  getDynFlagsNat :: NatM DynFlags
-getDynFlagsNat = NatM $ \ (NatM_State us delta imports pic dflags) ->
-                         (dflags, (NatM_State us delta imports pic dflags))
+getDynFlagsNat 
+       = NatM $ \ (NatM_State us delta imports pic dflags) ->
+                 (dflags, (NatM_State us delta imports pic dflags))
+
  
  getDeltaNat :: NatM Int
  
  getDeltaNat :: NatM Int
-getDeltaNat = NatM $ \ st -> (natm_delta st, st)
+getDeltaNat 
+       = NatM $ \ st -> (natm_delta st, st)
+
  
  setDeltaNat :: Int -> NatM ()
  
  setDeltaNat :: Int -> NatM ()
-setDeltaNat delta = NatM $ \ (NatM_State us _ imports pic dflags) ->
-   ((), NatM_State us delta imports pic dflags)
+setDeltaNat delta 
+       = NatM $ \ (NatM_State us _ imports pic dflags) ->
+                  ((), NatM_State us delta imports pic dflags)
+
  
  addImportNat :: CLabel -> NatM ()
  
  addImportNat :: CLabel -> NatM ()
-addImportNat imp = NatM $ \ (NatM_State us delta imports pic dflags) ->
-   ((), NatM_State us delta (imp:imports) pic dflags)
+addImportNat imp 
+       = NatM $ \ (NatM_State us delta imports pic dflags) ->
+                  ((), NatM_State us delta (imp:imports) pic dflags)
+
  
  getBlockIdNat :: NatM BlockId
  
  getBlockIdNat :: NatM BlockId
-getBlockIdNat = do u <- getUniqueNat; return (BlockId u)
+getBlockIdNat 
+ = do  u <- getUniqueNat
+       return (BlockId u)
+
  
  getNewLabelNat :: NatM CLabel
  
  getNewLabelNat :: NatM CLabel
-getNewLabelNat = do u <- getUniqueNat; return (mkAsmTempLabel u)
+getNewLabelNat 
+ = do  u <- getUniqueNat
+       return (mkAsmTempLabel u)
+
  
  getNewRegNat :: Size -> NatM Reg
  
  getNewRegNat :: Size -> NatM Reg
-getNewRegNat rep = do u <- getUniqueNat; return (mkVReg u rep)
+getNewRegNat rep 
+ = do  u <- getUniqueNat
+       return (targetMkVReg u rep)
+
  
  getNewRegPairNat :: Size -> NatM (Reg,Reg)
  
  getNewRegPairNat :: Size -> NatM (Reg,Reg)
-getNewRegPairNat rep = do 
-  u <- getUniqueNat
-  let lo = mkVReg u rep; hi = getHiVRegFromLo lo
-  return (lo,hi)
+getNewRegPairNat rep 
+ = do  u       <- getUniqueNat
+       let lo  = targetMkVReg u rep; hi = getHiVRegFromLo lo
+       return (lo,hi)
+
  
  getPicBaseMaybeNat :: NatM (Maybe Reg)
  
  getPicBaseMaybeNat :: NatM (Maybe Reg)
-getPicBaseMaybeNat = NatM (\state -> (natm_pic state, state))
+getPicBaseMaybeNat 
+       = NatM (\state -> (natm_pic state, state))
+
  
  getPicBaseNat :: Size -> NatM Reg
  
  getPicBaseNat :: Size -> NatM Reg
-getPicBaseNat rep = do
-  mbPicBase <- getPicBaseMaybeNat
-  case mbPicBase of
-        Just picBase -> return picBase
-        Nothing -> do
-            reg <- getNewRegNat rep
-            NatM (\state -> (reg, state { natm_pic = Just reg }))
+getPicBaseNat rep 
+ = do  mbPicBase <- getPicBaseMaybeNat
+       case mbPicBase of
+               Just picBase -> return picBase
+               Nothing 
+                -> do
+                       reg <- getNewRegNat rep
+                       NatM (\state -> (reg, state { natm_pic = Just reg }))
diff --git a/compiler/nativeGen/PositionIndependentCode.hs b/compiler/nativeGen/PIC.hs

similarity index 57%

rename from compiler/nativeGen/PositionIndependentCode.hs

rename to compiler/nativeGen/PIC.hs

index a1e11d8..98e4f9f 100644 (file)
--- a/compiler/nativeGen/PositionIndependentCode.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -1,24 +1,9 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module PositionIndependentCode (
-        cmmMakeDynamicReference,
-        ReferenceKind(..),
-        needImportedSymbols,
-        pprImportedSymbol,
-        pprGotDeclaration,
-        initializePicBase
-     ) where
-
-#include "HsVersions.h"
-
  {-
    This module handles generation of position independent code and
    dynamic-linking related issues for the native code generator.
  {-
    This module handles generation of position independent code and
    dynamic-linking related issues for the native code generator.
+
+  This depends both the architecture and OS, so we define it here
+  instead of in one of the architecture specific modules.
    
    Things outside this module which are related to this:
    
    
    Things outside this module which are related to this:
    
@@ -53,7 +38,30 @@ module PositionIndependentCode (
        and ppc-linux).
  -}
  
        and ppc-linux).
  -}
  
-#include "nativeGen/NCG.h"
+module PIC (
+        cmmMakeDynamicReference,
+        ReferenceKind(..),
+        needImportedSymbols,
+        pprImportedSymbol,
+        pprGotDeclaration,
+
+        initializePicBase_ppc,
+        initializePicBase_x86
+)
+
+where
+
+import qualified PPC.Instr     as PPC
+import qualified PPC.Regs      as PPC
+
+import qualified X86.Instr     as X86
+
+import Platform
+import Instruction
+import Size
+import Reg
+import NCGMonad
+
  
  import Cmm
  import CLabel           ( CLabel, pprCLabel,
  
  import Cmm
  import CLabel           ( CLabel, pprCLabel,
@@ -61,13 +69,8 @@ import CLabel           ( CLabel, pprCLabel,
                            dynamicLinkerLabelInfo, mkPicBaseLabel,
                            labelDynamic, externallyVisibleCLabel )
  
                            dynamicLinkerLabelInfo, mkPicBaseLabel,
                            labelDynamic, externallyVisibleCLabel )
  
-#if linux_TARGET_OS
  import CLabel           ( mkForeignLabel )
  import CLabel           ( mkForeignLabel )
-#endif
  
  
-import Regs
-import Instrs
-import NCGMonad         ( NatM, getNewRegNat, getNewLabelNat )
  
  import StaticFlags     ( opt_PIC, opt_Static )
  import BasicTypes
  
  import StaticFlags     ( opt_PIC, opt_Static )
  import BasicTypes
@@ -80,8 +83,7 @@ import DynFlags
  import FastString
  
  
  import FastString
  
  
--- The most important function here is cmmMakeDynamicReference.
-
+--------------------------------------------------------------------------------
  -- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm
  -- code. It does The Right Thing(tm) to convert the CmmLabel into a
  -- position-independent, dynamic-linking-aware reference to the thing
  -- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm
  -- code. It does The Right Thing(tm) to convert the CmmLabel into a
  -- position-independent, dynamic-linking-aware reference to the thing
@@ -94,10 +96,12 @@ import FastString
  -- - addImportCmmOpt for the CmmOptM monad
  -- - addImportNat for the NatM monad.
  
  -- - addImportCmmOpt for the CmmOptM monad
  -- - addImportNat for the NatM monad.
  
-data ReferenceKind = DataReference
-                   | CallReference
-                   | JumpReference
-                   deriving(Eq)
+data ReferenceKind 
+       = DataReference
+        | CallReference
+        | JumpReference
+        deriving(Eq)
+
  
  cmmMakeDynamicReference
    :: Monad m => DynFlags
  
  cmmMakeDynamicReference
    :: Monad m => DynFlags
@@ -110,80 +114,98 @@ cmmMakeDynamicReference
  cmmMakeDynamicReference dflags addImport referenceKind lbl
    | Just _ <- dynamicLinkerLabelInfo lbl
    = return $ CmmLit $ CmmLabel lbl   -- already processed it, pass through
  cmmMakeDynamicReference dflags addImport referenceKind lbl
    | Just _ <- dynamicLinkerLabelInfo lbl
    = return $ CmmLit $ CmmLabel lbl   -- already processed it, pass through
-  | otherwise = case howToAccessLabel dflags referenceKind lbl of
+
+  | otherwise 
+  = case howToAccessLabel 
+               dflags 
+               (platformArch $ targetPlatform dflags)
+               (platformOS   $ targetPlatform dflags)
+               referenceKind lbl of
+
          AccessViaStub -> do
                let stub = mkDynamicLinkerLabel CodeStub lbl
                addImport stub
                return $ CmmLit $ CmmLabel stub
          AccessViaStub -> do
                let stub = mkDynamicLinkerLabel CodeStub lbl
                addImport stub
                return $ CmmLit $ CmmLabel stub
+
          AccessViaSymbolPtr -> do
                let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
                addImport symbolPtr
          AccessViaSymbolPtr -> do
                let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
                addImport symbolPtr
-              return $ CmmLoad (cmmMakePicReference symbolPtr) bWord
+              return $ CmmLoad (cmmMakePicReference dflags symbolPtr) bWord
+
          AccessDirectly -> case referenceKind of
                  -- for data, we might have to make some calculations:
          AccessDirectly -> case referenceKind of
                  -- for data, we might have to make some calculations:
-              DataReference -> return $ cmmMakePicReference lbl  
+              DataReference -> return $ cmmMakePicReference dflags lbl  
                  -- all currently supported processors support
                  -- PC-relative branch and call instructions,
                  -- so just jump there if it's a call or a jump
                _ -> return $ CmmLit $ CmmLabel lbl
                  -- all currently supported processors support
                  -- PC-relative branch and call instructions,
                  -- so just jump there if it's a call or a jump
                _ -> return $ CmmLit $ CmmLabel lbl
-  
--- -------------------------------------------------------------------
-  
+
+
+-- -----------------------------------------------------------------------------
  -- Create a position independent reference to a label.
  -- (but do not bother with dynamic linking).
  -- We calculate the label's address by adding some (platform-dependent)
  -- offset to our base register; this offset is calculated by
  -- the function picRelative in the platform-dependent part below.
  
  -- Create a position independent reference to a label.
  -- (but do not bother with dynamic linking).
  -- We calculate the label's address by adding some (platform-dependent)
  -- offset to our base register; this offset is calculated by
  -- the function picRelative in the platform-dependent part below.
  
-cmmMakePicReference :: CLabel -> CmmExpr
-  
-#if !mingw32_TARGET_OS
+cmmMakePicReference :: DynFlags -> CLabel -> CmmExpr
+cmmMakePicReference dflags lbl
+
          -- Windows doesn't need PIC,
          -- everything gets relocated at runtime
          -- Windows doesn't need PIC,
          -- everything gets relocated at runtime
+       | OSMinGW32     <- platformOS $ targetPlatform dflags
+       = CmmLit $ CmmLabel lbl
  
  
-cmmMakePicReference lbl
-    | (opt_PIC || not opt_Static) && absoluteLabel lbl = CmmMachOp (MO_Add wordWidth) [
-            CmmReg (CmmGlobal PicBaseReg),
-            CmmLit $ picRelative lbl
-        ]
-    where
-        absoluteLabel lbl = case dynamicLinkerLabelInfo lbl of
-                                Just (GotSymbolPtr, _) -> False
-                                Just (GotSymbolOffset, _) -> False
-                                _ -> True
  
  
-#endif
-cmmMakePicReference lbl = CmmLit $ CmmLabel lbl
+       | (opt_PIC || not opt_Static) && absoluteLabel lbl 
+       = CmmMachOp (MO_Add wordWidth) 
+               [ CmmReg (CmmGlobal PicBaseReg)
+               , CmmLit $ picRelative 
+                               (platformArch   $ targetPlatform dflags)
+                               (platformOS     $ targetPlatform dflags)
+                               lbl ]
  
  
--- ===================================================================
--- Platform dependent stuff
--- ===================================================================
+       | otherwise
+       = CmmLit $ CmmLabel lbl
+  
+  
+absoluteLabel :: CLabel -> Bool
+absoluteLabel lbl 
+ = case dynamicLinkerLabelInfo lbl of
+       Just (GotSymbolPtr, _)          -> False
+       Just (GotSymbolOffset, _)       -> False
+       _                               -> True
  
  
+
+--------------------------------------------------------------------------------
  -- Knowledge about how special dynamic linker labels like symbol
  -- pointers, code stubs and GOT offsets look like is located in the
  -- module CLabel.
  
  -- Knowledge about how special dynamic linker labels like symbol
  -- pointers, code stubs and GOT offsets look like is located in the
  -- module CLabel.
  
--- -------------------------------------------------------------------
-
  -- We have to decide which labels need to be accessed
  -- indirectly or via a piece of stub code.
  -- We have to decide which labels need to be accessed
  -- indirectly or via a piece of stub code.
+data LabelAccessStyle 
+       = AccessViaStub
+       | AccessViaSymbolPtr
+       | AccessDirectly
  
  
-data LabelAccessStyle = AccessViaStub
-                      | AccessViaSymbolPtr
-                      | AccessDirectly
+howToAccessLabel 
+       :: DynFlags -> Arch -> OS -> ReferenceKind -> CLabel -> LabelAccessStyle
  
  
-howToAccessLabel :: DynFlags -> ReferenceKind -> CLabel -> LabelAccessStyle
-
-#if mingw32_TARGET_OS
  -- Windows
  -- 
  -- We need to use access *exactly* those things that
  -- are imported from a DLL via an __imp_* label.
  -- There are no stubs for imported code.
  -- Windows
  -- 
  -- We need to use access *exactly* those things that
  -- are imported from a DLL via an __imp_* label.
  -- There are no stubs for imported code.
+--
+howToAccessLabel dflags _ OSMinGW32 _ lbl
+       | labelDynamic (thisPackage dflags) lbl 
+       = AccessViaSymbolPtr
+
+       | otherwise
+       = AccessDirectly
+
  
  
-howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-                             | otherwise        = AccessDirectly
-#elif darwin_TARGET_OS
  -- Mach-O (Darwin, Mac OS X)
  --
  -- Indirect access is required in the following cases:
  -- Mach-O (Darwin, Mac OS X)
  --
  -- Indirect access is required in the following cases:
@@ -191,54 +213,48 @@ howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessVi
  --  * (not on x86_64) data from a different module, if we're generating PIC code
  -- It is always possible to access something indirectly,
  -- even when it's not necessary.
  --  * (not on x86_64) data from a different module, if we're generating PIC code
  -- It is always possible to access something indirectly,
  -- even when it's not necessary.
+--
+howToAccessLabel dflags arch OSDarwin DataReference lbl
+       -- data access to a dynamic library goes via a symbol pointer
+       | labelDynamic (thisPackage dflags) lbl 
+       = AccessViaSymbolPtr
+
+       -- when generating PIC code, all cross-module data references must
+       -- must go via a symbol pointer, too, because the assembler
+       -- cannot generate code for a label difference where one
+       -- label is undefined. Doesn't apply t x86_64.
+       -- Unfortunately, we don't know whether it's cross-module,
+       -- so we do it for all externally visible labels.
+       -- This is a slight waste of time and space, but otherwise
+       -- we'd need to pass the current Module all the way in to
+       -- this function.
+       | arch /= ArchX86_64
+       , opt_PIC && externallyVisibleCLabel lbl 
+       = AccessViaSymbolPtr
+        
+       | otherwise 
+       = AccessDirectly
+
+howToAccessLabel dflags arch OSDarwin JumpReference lbl
+       -- dyld code stubs don't work for tailcalls because the
+       -- stack alignment is only right for regular calls.
+       -- Therefore, we have to go via a symbol pointer:
+       | arch == ArchX86 || arch == ArchX86_64
+       , labelDynamic (thisPackage dflags) lbl
+       = AccessViaSymbolPtr
+           
+
+howToAccessLabel dflags arch OSDarwin _ lbl
+       -- Code stubs are the usual method of choice for imported code;
+       -- not needed on x86_64 because Apple's new linker, ld64, generates
+       -- them automatically.
+       | arch /= ArchX86_64
+       , labelDynamic (thisPackage dflags) lbl
+       = AccessViaStub
+
+       | otherwise
+       = AccessDirectly
  
  
-howToAccessLabel dflags DataReference lbl
-      -- data access to a dynamic library goes via a symbol pointer
-    | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-    
-#if !x86_64_TARGET_ARCH
-      -- when generating PIC code, all cross-module data references must
-      -- must go via a symbol pointer, too, because the assembler
-      -- cannot generate code for a label difference where one
-      -- label is undefined. Doesn't apply t x86_64.
-      -- Unfortunately, we don't know whether it's cross-module,
-      -- so we do it for all externally visible labels.
-      -- This is a slight waste of time and space, but otherwise
-      -- we'd need to pass the current Module all the way in to
-      -- this function.
-    | opt_PIC && externallyVisibleCLabel lbl = AccessViaSymbolPtr
-#endif
-    | otherwise = AccessDirectly
-
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-    -- dyld code stubs don't work for tailcalls because the
-    -- stack alignment is only right for regular calls.
-    -- Therefore, we have to go via a symbol pointer:
-howToAccessLabel dflags JumpReference lbl
-    | labelDynamic (thisPackage dflags) lbl
-    = AccessViaSymbolPtr
-#endif
-
-howToAccessLabel dflags _ lbl
-#if !x86_64_TARGET_ARCH
-    -- Code stubs are the usual method of choice for imported code;
-    -- not needed on x86_64 because Apple's new linker, ld64, generates
-    -- them automatically.
-    | labelDynamic (thisPackage dflags) lbl
-    = AccessViaStub
-#endif
-    | otherwise
-    = AccessDirectly
-
-
-#elif linux_TARGET_OS && powerpc64_TARGET_ARCH
--- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
-
-howToAccessLabel _ DataReference lbl = AccessViaSymbolPtr
-howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label
-
-#elif linux_TARGET_OS
  -- ELF (Linux)
  --
  -- ELF tries to pretend to the main application code that dynamic linking does 
  -- ELF (Linux)
  --
  -- ELF tries to pretend to the main application code that dynamic linking does 
@@ -250,63 +266,82 @@ howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label
  -- from position independent code. It is also required from the main program
  -- when dynamic libraries containing Haskell code are used.
  
  -- from position independent code. It is also required from the main program
  -- when dynamic libraries containing Haskell code are used.
  
-howToAccessLabel _ _ lbl
+howToAccessLabel _ ArchPPC_64 OSLinux kind _
+
+       -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
+       | DataReference <- kind
+       = AccessViaSymbolPtr
+       
+       -- actually, .label instead of label
+       | otherwise
+       = AccessDirectly 
+
+howToAccessLabel _ _ OSLinux _ _
         -- no PIC -> the dynamic linker does everything for us;
         --           if we don't dynamically link to Haskell code,
         --           it actually manages to do so without messing thins up.
         -- no PIC -> the dynamic linker does everything for us;
         --           if we don't dynamically link to Haskell code,
         --           it actually manages to do so without messing thins up.
-    | not opt_PIC && opt_Static = AccessDirectly
-   
-howToAccessLabel dflags DataReference lbl
-       -- A dynamic label needs to be accessed via a symbol pointer.
-    | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-#if powerpc_TARGET_ARCH
+       | not opt_PIC && opt_Static 
+       = AccessDirectly
+
+howToAccessLabel dflags arch OSLinux DataReference lbl
+       -- A dynamic label needs to be accessed via a symbol pointer.
+       | labelDynamic (thisPackage dflags) lbl 
+       = AccessViaSymbolPtr
+
         -- For PowerPC32 -fPIC, we have to access even static data
         -- via a symbol pointer (see below for an explanation why
         -- PowerPC32 Linux is especially broken).
         -- For PowerPC32 -fPIC, we have to access even static data
         -- via a symbol pointer (see below for an explanation why
         -- PowerPC32 Linux is especially broken).
-    | opt_PIC = AccessViaSymbolPtr
-#endif
-    | otherwise = AccessDirectly
-
-
--- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
--- * on i386, the position-independent symbol stubs in the Procedure Linkage Table
---   require the address of the GOT to be loaded into register %ebx on entry.
--- * The linker will take any reference to the symbol stub as a hint that
---   the label in question is a code label. When linking executables, this
---   will cause the linker to replace even data references to the label with
---   references to the symbol stub.
-
--- This leaves calling a (foreign) function from non-PIC code
--- (AccessDirectly, because we get an implicit symbol stub)
--- and calling functions from PIC code on non-i386 platforms (via a symbol stub) 
-
-howToAccessLabel dflags CallReference lbl
-    | labelDynamic (thisPackage dflags) lbl && not opt_PIC
-    = AccessDirectly
-#if !i386_TARGET_ARCH
-    | labelDynamic (thisPackage dflags) lbl && opt_PIC
-    = AccessViaStub
-#endif
-
-howToAccessLabel dflags _ lbl
-    | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-    | otherwise = AccessDirectly
-#else
---
+       | arch == ArchPPC
+       , opt_PIC
+       = AccessViaSymbolPtr
+       
+       | otherwise 
+       = AccessDirectly
+
+
+       -- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
+       -- * on i386, the position-independent symbol stubs in the Procedure Linkage Table
+       --   require the address of the GOT to be loaded into register %ebx on entry.
+       -- * The linker will take any reference to the symbol stub as a hint that
+       --   the label in question is a code label. When linking executables, this
+       --   will cause the linker to replace even data references to the label with
+       --   references to the symbol stub.
+
+       -- This leaves calling a (foreign) function from non-PIC code
+       -- (AccessDirectly, because we get an implicit symbol stub)
+       -- and calling functions from PIC code on non-i386 platforms (via a symbol stub) 
+
+howToAccessLabel dflags arch OSLinux CallReference lbl
+       | labelDynamic (thisPackage dflags) lbl && not opt_PIC
+       = AccessDirectly
+
+       | arch /= ArchX86
+       , labelDynamic (thisPackage dflags) lbl && opt_PIC
+       = AccessViaStub
+
+howToAccessLabel dflags _ OSLinux _ lbl
+       | labelDynamic (thisPackage dflags) lbl 
+       = AccessViaSymbolPtr
+
+       | otherwise 
+       = AccessDirectly
+
  -- all other platforms
  -- all other platforms
---
-howToAccessLabel _ _ _
-        | not opt_PIC = AccessDirectly
-        | otherwise   = panic "howToAccessLabel: PIC not defined for this platform"
-#endif
+howToAccessLabel _ _ _ _ _
+       | not opt_PIC 
+       = AccessDirectly
+       
+       | otherwise
+       = panic "howToAccessLabel: PIC not defined for this platform"
+
+
  
  -- -------------------------------------------------------------------
  
  -- -------------------------------------------------------------------
+-- | Says what we we have to add to our 'PIC base register' in order to
+--     get the address of a label.
  
  
--- What do we have to add to our 'PIC base register' in order to
--- get the address of a label?
+picRelative :: Arch -> OS -> CLabel -> CmmLit
  
  
-picRelative :: CLabel -> CmmLit
-#if darwin_TARGET_OS && !x86_64_TARGET_ARCH
  -- Darwin, but not x86_64:
  -- The PIC base register points to the PIC base label at the beginning
  -- of the current CmmTop. We just have to use a label difference to
  -- Darwin, but not x86_64:
  -- The PIC base register points to the PIC base label at the beginning
  -- of the current CmmTop. We just have to use a label difference to
@@ -314,21 +349,21 @@ picRelative :: CLabel -> CmmLit
  -- We have already made sure that all labels that are not from the current
  -- module are accessed indirectly ('as' can't calculate differences between
  -- undefined labels).
  -- We have already made sure that all labels that are not from the current
  -- module are accessed indirectly ('as' can't calculate differences between
  -- undefined labels).
+picRelative arch OSDarwin lbl
+       | arch /= ArchX86_64
+       = CmmLabelDiffOff lbl mkPicBaseLabel 0
+       
  
  
-picRelative lbl
-  = CmmLabelDiffOff lbl mkPicBaseLabel 0
-
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
  -- PowerPC Linux:
  -- The PIC base register points to our fake GOT. Use a label difference
  -- to get the offset.
  -- We have made sure that *everything* is accessed indirectly, so this
  -- is only used for offsets from the GOT to symbol pointers inside the
  -- GOT.
  -- PowerPC Linux:
  -- The PIC base register points to our fake GOT. Use a label difference
  -- to get the offset.
  -- We have made sure that *everything* is accessed indirectly, so this
  -- is only used for offsets from the GOT to symbol pointers inside the
  -- GOT.
-picRelative lbl
-  = CmmLabelDiffOff lbl gotLabel 0
+picRelative ArchPPC OSLinux lbl
+       = CmmLabelDiffOff lbl gotLabel 0
+
  
  
-#elif linux_TARGET_OS || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
  -- Most Linux versions:
  -- The PIC base register points to the GOT. Use foo@got for symbol
  -- pointers, and foo@gotoff for everything else.
  -- Most Linux versions:
  -- The PIC base register points to the GOT. Use foo@got for symbol
  -- pointers, and foo@gotoff for everything else.
@@ -336,61 +371,116 @@ picRelative lbl
  -- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers,
  -- and a GotSymbolOffset label for other things.
  -- For reasons of tradition, the symbol offset label is written as a plain label.
  -- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers,
  -- and a GotSymbolOffset label for other things.
  -- For reasons of tradition, the symbol offset label is written as a plain label.
+picRelative arch os lbl
+       | os == OSLinux || (os == OSDarwin && arch == ArchX86_64)
+       = let   result
+                       | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
+                       = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
  
  
-picRelative lbl
-  | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
-  = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
-  | otherwise
-  = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+                       | otherwise
+                       = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+                       
+         in    result
+
+picRelative _ _ _
+       = panic "PositionIndependentCode.picRelative undefined for this platform"
  
  
-#else
-picRelative lbl = panic "PositionIndependentCode.picRelative"
-#endif
  
  
--- -------------------------------------------------------------------
  
  
--- What do we have to add to every assembly file we generate?
+--------------------------------------------------------------------------------
  
  -- utility function for pretty-printing asm-labels,
  -- copied from PprMach
  
  -- utility function for pretty-printing asm-labels,
  -- copied from PprMach
-asmSDoc d = Outputable.withPprStyleDoc (
-             Outputable.mkCodeStyle Outputable.AsmStyle) d
-pprCLabel_asm l = asmSDoc (pprCLabel l)
+--
+asmSDoc :: Outputable.SDoc -> Doc
+asmSDoc d 
+       = Outputable.withPprStyleDoc 
+               (Outputable.mkCodeStyle Outputable.AsmStyle) d
+
+pprCLabel_asm :: CLabel -> Doc
+pprCLabel_asm l 
+       = asmSDoc (pprCLabel l)
+
+
+needImportedSymbols :: Arch -> OS -> Bool
+needImportedSymbols arch os
+       | os    == OSDarwin
+       , arch  /= ArchX86_64
+       = True
+       
+       -- PowerPC Linux: -fPIC or -dynamic
+       | os    == OSLinux
+       , arch  == ArchPPC
+       = opt_PIC || not opt_Static
+
+       -- i386 (and others?): -dynamic but not -fPIC
+       | os    == OSLinux
+       , arch  /= ArchPPC_64
+       = not opt_Static && not opt_PIC
+
+       | otherwise
+       = False
  
  
+-- gotLabel
+-- The label used to refer to our "fake GOT" from
+-- position-independent code.
+gotLabel :: CLabel
+gotLabel 
+       = mkForeignLabel -- HACK: it's not really foreign
+               (fsLit ".LCTOC1") Nothing False IsData
  
  
-#if darwin_TARGET_OS && !x86_64_TARGET_ARCH
  
  
-needImportedSymbols = True
  
  
+--------------------------------------------------------------------------------
  -- We don't need to declare any offset tables.
  -- However, for PIC on x86, we need a small helper function.
  -- We don't need to declare any offset tables.
  -- However, for PIC on x86, we need a small helper function.
-#if i386_TARGET_ARCH
-pprGotDeclaration
-    | opt_PIC
-    = vcat [
-        ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"),
-        ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"),
-        ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"),
-        ptext (sLit "___i686.get_pc_thunk.ax:"),
-            ptext (sLit "\tmovl (%esp), %eax"),
-            ptext (sLit "\tret")
-    ]
-    | otherwise = Pretty.empty
-#else
-pprGotDeclaration = Pretty.empty
-#endif
+pprGotDeclaration :: Arch -> OS -> Doc
+pprGotDeclaration ArchX86 OSDarwin
+       | opt_PIC
+       = vcat [
+               ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"),
+               ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"),
+               ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"),
+               ptext (sLit "___i686.get_pc_thunk.ax:"),
+               ptext (sLit "\tmovl (%esp), %eax"),
+               ptext (sLit "\tret") ]
+
+       | otherwise
+       = empty
+       
+               
+-- pprGotDeclaration
+-- Output whatever needs to be output once per .s file.
+-- The .LCTOC1 label is defined to point 32768 bytes into the table,
+-- to make the most of the PPC's 16-bit displacements.
+-- Only needed for PIC.
+pprGotDeclaration arch OSLinux
+       | arch  /= ArchPPC_64
+       , not opt_PIC 
+       = Pretty.empty
+
+       | arch  /= ArchPPC_64
+       = vcat [
+               ptext (sLit ".section \".got2\",\"aw\""),
+               ptext (sLit ".LCTOC1 = .+32768") ]
+
+pprGotDeclaration _ _
+       = panic "pprGotDeclaration: no match"   
  
  
+
+--------------------------------------------------------------------------------
  -- On Darwin, we have to generate our own stub code for lazy binding..
  -- For each processor architecture, there are two versions, one for PIC
  -- and one for non-PIC.
  --
  -- Whenever you change something in this assembler output, make sure
  -- the splitter in driver/split/ghc-split.lprl recognizes the new output
  -- On Darwin, we have to generate our own stub code for lazy binding..
  -- For each processor architecture, there are two versions, one for PIC
  -- and one for non-PIC.
  --
  -- Whenever you change something in this assembler output, make sure
  -- the splitter in driver/split/ghc-split.lprl recognizes the new output
-pprImportedSymbol importedLbl
-#if powerpc_TARGET_ARCH
-    | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = case opt_PIC of
-        False ->
+
+pprImportedSymbol :: Arch -> OS -> CLabel -> Doc
+pprImportedSymbol ArchPPC OSDarwin importedLbl
+       | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+       = case opt_PIC of
+           False ->
              vcat [
                  ptext (sLit ".symbol_stub"),
                  ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
              vcat [
                  ptext (sLit ".symbol_stub"),
                  ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
@@ -404,7 +494,7 @@ pprImportedSymbol importedLbl
                          <> ptext (sLit "$lazy_ptr)"),
                      ptext (sLit "\tbctr")
              ]
                          <> ptext (sLit "$lazy_ptr)"),
                      ptext (sLit "\tbctr")
              ]
-        True ->
+           True ->
              vcat [
                  ptext (sLit ".section __TEXT,__picsymbolstub1,")
                    <> ptext (sLit "symbol_stubs,pure_instructions,32"),
              vcat [
                  ptext (sLit ".section __TEXT,__picsymbolstub1,")
                    <> ptext (sLit "symbol_stubs,pure_instructions,32"),
@@ -424,16 +514,27 @@ pprImportedSymbol importedLbl
                      ptext (sLit "\tmtctr r12"),
                      ptext (sLit "\tbctr")
              ]
                      ptext (sLit "\tmtctr r12"),
                      ptext (sLit "\tbctr")
              ]
-    $+$ vcat [
-        ptext (sLit ".lazy_symbol_pointer"),
-        ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
-            ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
-            ptext (sLit "\t.long dyld_stub_binding_helper")
-    ]
-#elif i386_TARGET_ARCH
-    | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = case opt_PIC of
-        False ->
+         $+$ vcat [
+               ptext (sLit ".lazy_symbol_pointer"),
+               ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
+               ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+               ptext (sLit "\t.long dyld_stub_binding_helper")]
+
+       | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+       = vcat [
+               ptext (sLit ".non_lazy_symbol_pointer"),
+               char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
+               ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+               ptext (sLit "\t.long\t0")]
+
+       | otherwise 
+       = empty
+
+               
+pprImportedSymbol ArchX86 OSDarwin importedLbl
+       | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+       = case opt_PIC of
+           False ->
              vcat [
                  ptext (sLit ".symbol_stub"),
                  ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
              vcat [
                  ptext (sLit ".symbol_stub"),
                  ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
@@ -446,7 +547,7 @@ pprImportedSymbol importedLbl
                          <> ptext (sLit "$lazy_ptr"),
                      ptext (sLit "\tjmp dyld_stub_binding_helper")
              ]
                          <> ptext (sLit "$lazy_ptr"),
                      ptext (sLit "\tjmp dyld_stub_binding_helper")
              ]
-        True ->
+           True ->
              vcat [
                  ptext (sLit ".section __TEXT,__picsymbolstub2,")
                      <> ptext (sLit "symbol_stubs,pure_instructions,25"),
              vcat [
                  ptext (sLit ".section __TEXT,__picsymbolstub2,")
                      <> ptext (sLit "symbol_stubs,pure_instructions,25"),
@@ -464,27 +565,28 @@ pprImportedSymbol importedLbl
                      ptext (sLit "\tpushl %eax"),
                      ptext (sLit "\tjmp dyld_stub_binding_helper")
              ]
                      ptext (sLit "\tpushl %eax"),
                      ptext (sLit "\tjmp dyld_stub_binding_helper")
              ]
-    $+$ vcat [        ptext (sLit ".section __DATA, __la_sym_ptr")
+         $+$ vcat [        ptext (sLit ".section __DATA, __la_sym_ptr")
                      <> (if opt_PIC then int 2 else int 3)
                      <> ptext (sLit ",lazy_symbol_pointers"),
                      <> (if opt_PIC then int 2 else int 3)
                      <> ptext (sLit ",lazy_symbol_pointers"),
-        ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
-            ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
-            ptext (sLit "\t.long L") <> pprCLabel_asm lbl
-                    <> ptext (sLit "$stub_binder")
-    ]
-#endif
--- We also have to declare our symbol pointers ourselves:
-    | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = vcat [
-        ptext (sLit ".non_lazy_symbol_pointer"),
-        char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
-           ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
-            ptext (sLit "\t.long\t0")
-    ]
-
-    | otherwise = empty
-
-#elif linux_TARGET_OS && !powerpc64_TARGET_ARCH
+               ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
+                   ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+                   ptext (sLit "\t.long L") <> pprCLabel_asm lbl
+                    <> ptext (sLit "$stub_binder")]
+
+       | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+       = vcat [
+               ptext (sLit ".non_lazy_symbol_pointer"),
+               char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
+               ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+               ptext (sLit "\t.long\t0")]
+
+       | otherwise 
+       = empty
+
+
+pprImportedSymbol _ OSDarwin _
+       = empty
+       
  
  -- ELF / Linux
  --
  
  -- ELF / Linux
  --
@@ -514,64 +616,29 @@ pprImportedSymbol importedLbl
  -- When needImportedSymbols is defined,
  -- the NCG will keep track of all DynamicLinkerLabels it uses
  -- and output each of them using pprImportedSymbol.
  -- When needImportedSymbols is defined,
  -- the NCG will keep track of all DynamicLinkerLabels it uses
  -- and output each of them using pprImportedSymbol.
-#if powerpc_TARGET_ARCH
-    -- PowerPC Linux: -fPIC or -dynamic
-needImportedSymbols = opt_PIC || not opt_Static
-#else
-    -- i386 (and others?): -dynamic but not -fPIC
-needImportedSymbols = not opt_Static && not opt_PIC
-#endif
  
  
--- gotLabel
--- The label used to refer to our "fake GOT" from
--- position-independent code.
-gotLabel = mkForeignLabel -- HACK: it's not really foreign
-                           (fsLit ".LCTOC1") Nothing False IsData
+pprImportedSymbol ArchPPC_64 OSLinux _
+       = empty
  
  
--- pprGotDeclaration
--- Output whatever needs to be output once per .s file.
--- The .LCTOC1 label is defined to point 32768 bytes into the table,
--- to make the most of the PPC's 16-bit displacements.
--- Only needed for PIC.
-
-pprGotDeclaration
-    | not opt_PIC = Pretty.empty
-    | otherwise = vcat [
-        ptext (sLit ".section \".got2\",\"aw\""),
-        ptext (sLit ".LCTOC1 = .+32768")
-    ]
-
--- We generate one .long/.quad literal for every symbol we import;
--- the dynamic linker will relocate those addresses.
-
-pprImportedSymbol importedLbl
-    | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
-    = vcat [
-        ptext (sLit ".section \".got2\", \"aw\""),
-        ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':',
-        ptext symbolSize <+> pprCLabel_asm lbl
-    ]
-
--- PLT code stubs are generated automatically by the dynamic linker.
-    | otherwise = empty
-    where
-      symbolSize = case wordWidth of
+pprImportedSymbol _ OSLinux importedLbl
+       | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+       = let   symbolSize = case wordWidth of
                      W32 -> sLit "\t.long"
                      W64 -> sLit "\t.quad"
                      _ -> panic "Unknown wordRep in pprImportedSymbol"
  
                      W32 -> sLit "\t.long"
                      W64 -> sLit "\t.quad"
                      _ -> panic "Unknown wordRep in pprImportedSymbol"
  
-#else
-
--- For all other currently supported platforms, we don't need to do
--- anything at all.
+         in vcat [
+               ptext (sLit ".section \".got2\", \"aw\""),
+               ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':',
+               ptext symbolSize <+> pprCLabel_asm lbl ]
  
  
-needImportedSymbols = False
-pprGotDeclaration = Pretty.empty
-pprImportedSymbol _ = empty
-#endif
+       -- PLT code stubs are generated automatically by the dynamic linker.
+       | otherwise = empty
  
  
--- -------------------------------------------------------------------
+pprImportedSymbol _ _ _
+       = panic "PIC.pprImportedSymbol: no match"
  
  
+--------------------------------------------------------------------------------
  -- Generate code to calculate the address that should be put in the
  -- PIC base register.
  -- This is called by MachCodeGen for every CmmProc that accessed the
  -- Generate code to calculate the address that should be put in the
  -- PIC base register.
  -- This is called by MachCodeGen for every CmmProc that accessed the
@@ -581,10 +648,6 @@ pprImportedSymbol _ = empty
  -- It is assumed that the first NatCmmTop in the input list is a Proc
  -- and the rest are CmmDatas.
  
  -- It is assumed that the first NatCmmTop in the input list is a Proc
  -- and the rest are CmmDatas.
  
-initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
-
-#if darwin_TARGET_OS
-
  -- Darwin is simple: just fetch the address of a local label.
  -- The FETCHPC pseudo-instruction is expanded to multiple instructions
  -- during pretty-printing so that we don't have to deal with the
  -- Darwin is simple: just fetch the address of a local label.
  -- The FETCHPC pseudo-instruction is expanded to multiple instructions
  -- during pretty-printing so that we don't have to deal with the
@@ -598,12 +661,7 @@ initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
  --          call 1f
  --      1:  popl %picReg
  
  --          call 1f
  --      1:  popl %picReg
  
-initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
-    = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
-    where BasicBlock bID insns = head blocks
-          b' = BasicBlock bID (FETCHPC picReg : insns)
  
  
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
  
  -- Get a pointer to our own fake GOT, which is defined on a per-module basis.
  -- This is exactly how GCC does it, and it's quite horrible:
  
  -- Get a pointer to our own fake GOT, which is defined on a per-module basis.
  -- This is exactly how GCC does it, and it's quite horrible:
@@ -612,7 +670,13 @@ initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
  -- define in .text space right next to the proc. This .long literal contains
  -- the (32-bit) offset from our local label to our global offset table
  -- (.LCTOC1 aka gotOffLabel).
  -- define in .text space right next to the proc. This .long literal contains
  -- the (32-bit) offset from our local label to our global offset table
  -- (.LCTOC1 aka gotOffLabel).
-initializePicBase picReg
+
+initializePicBase_ppc 
+       :: Arch -> OS -> Reg 
+       -> [NatCmmTop PPC.Instr] 
+       -> NatM [NatCmmTop PPC.Instr]
+
+initializePicBase_ppc ArchPPC OSLinux picReg
      (CmmProc info lab params (ListGraph blocks) : statics)
      = do
          gotOffLabel <- getNewLabelNat
      (CmmProc info lab params (ListGraph blocks) : statics)
      = do
          gotOffLabel <- getNewLabelNat
@@ -624,16 +688,33 @@ initializePicBase picReg
                                                           mkPicBaseLabel
                                                           0)
                          ]
                                                           mkPicBaseLabel
                                                           0)
                          ]
-            offsetToOffset = ImmConstantDiff (ImmCLbl gotOffLabel)
-                                             (ImmCLbl mkPicBaseLabel)
-            BasicBlock bID insns = head blocks
-            b' = BasicBlock bID (FETCHPC picReg
-                               : LD wordSize tmp
-                                    (AddrRegImm picReg offsetToOffset)
-                               : ADD picReg picReg (RIReg tmp)
+            offsetToOffset
+                       = PPC.ImmConstantDiff 
+                               (PPC.ImmCLbl gotOffLabel)
+                               (PPC.ImmCLbl mkPicBaseLabel)
+
+            BasicBlock bID insns 
+                       = head blocks
+
+            b' = BasicBlock bID (PPC.FETCHPC picReg
+                               : PPC.LD PPC.archWordSize tmp
+                                    (PPC.AddrRegImm picReg offsetToOffset)
+                               : PPC.ADD picReg picReg (PPC.RIReg tmp)
                                 : insns)
                                 : insns)
+
          return (CmmProc info lab params (ListGraph (b' : tail blocks)) : gotOffset : statics)
          return (CmmProc info lab params (ListGraph (b' : tail blocks)) : gotOffset : statics)
-#elif i386_TARGET_ARCH && linux_TARGET_OS
+
+initializePicBase_ppc ArchPPC OSDarwin picReg
+       (CmmProc info lab params (ListGraph blocks) : statics)
+       = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
+
+       where   BasicBlock bID insns = head blocks
+               b' = BasicBlock bID (PPC.FETCHPC picReg : insns)
+
+
+initializePicBase_ppc _ _ _ _
+       = panic "initializePicBase_ppc: not needed"
+
  
  -- We cheat a bit here by defining a pseudo-instruction named FETCHGOT
  -- which pretty-prints as:
  
  -- We cheat a bit here by defining a pseudo-instruction named FETCHGOT
  -- which pretty-prints as:
@@ -642,13 +723,24 @@ initializePicBase picReg
  --              addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg
  -- (See PprMach.lhs)
  
  --              addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg
  -- (See PprMach.lhs)
  
-initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
+initializePicBase_x86
+       :: Arch -> OS -> Reg 
+       -> [NatCmmTop X86.Instr] 
+       -> NatM [NatCmmTop X86.Instr]
+
+initializePicBase_x86 ArchX86 OSLinux picReg 
+       (CmmProc info lab params (ListGraph blocks) : statics)
      = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
      where BasicBlock bID insns = head blocks
      = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
      where BasicBlock bID insns = head blocks
-          b' = BasicBlock bID (FETCHGOT picReg : insns)
+          b' = BasicBlock bID (X86.FETCHGOT picReg : insns)
+
+initializePicBase_x86 ArchX86 OSDarwin picReg
+       (CmmProc info lab params (ListGraph blocks) : statics)
+       = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
+
+       where   BasicBlock bID insns = head blocks
+               b' = BasicBlock bID (X86.FETCHPC picReg : insns)
  
  
-#else
-initializePicBase picReg proc = panic "initializePicBase"
+initializePicBase_x86 _ _ _ _
+       = panic "initializePicBase_x86: not needed"
  
  
--- mingw32_TARGET_OS: not needed, won't be called
-#endif
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs

new file mode 100644 (file)

index 0000000..6661a3e
--- /dev/null
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -0,0 +1,1364 @@
+{-# OPTIONS -w #-}
+
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module PPC.CodeGen ( 
+       cmmTopCodeGen, 
+       InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import PPC.Instr
+import PPC.Cond
+import PPC.Regs
+import PPC.RegInfo
+import NCGMonad
+import Instruction
+import PIC
+import Size
+import RegClass
+import Reg
+import Platform
+
+-- Our intermediate code:
+import BlockId
+import PprCmm          ( pprExpr )
+import Cmm
+import CLabel
+
+-- The rest:
+import StaticFlags     ( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import DynFlags
+
+import Control.Monad   ( mapAndUnzipM )
+import Data.Bits
+import Data.Int
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal (pre-order?) yields the insns in the correct
+-- order.
+
+cmmTopCodeGen 
+       :: DynFlags 
+       -> RawCmmTop 
+       -> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do
+  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+      tops = proc : concat statics
+      os   = platformOS $ targetPlatform dflags
+  case picBaseMb of
+      Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
+      Nothing -> return tops
+  
+cmmTopCodeGen dflags (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+basicBlockCodeGen 
+       :: CmmBasicBlock 
+       -> NatM ( [NatBasicBlock Instr]
+               , [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+       (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+       
+       mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+         = ([], BasicBlock id instrs : blocks, statics)
+       mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+         = (instrs, blocks, CmmData sec dat:statics)
+       mkBlocks instr (instrs,blocks,statics)
+         = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop        -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty -> assignReg_FltCode size reg src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty    -> assignReg_I64Code      reg src
+#endif
+      | otherwise       -> assignReg_IntCode size reg src
+       where ty = cmmRegType reg
+             size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty -> assignMem_FltCode size addr src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty     -> assignMem_I64Code      addr src
+#endif
+      | otherwise       -> assignMem_IntCode size addr src
+       where ty = cmmExprType src
+             size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch id         -> genBranch id
+    CmmCondBranch arg id  -> genCondJump id arg
+    CmmSwitch arg ids     -> genSwitch arg ids
+    CmmJump arg params   -> genJump arg
+    CmmReturn params     ->
+      panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+--     They are really trees of insns to facilitate fast appending, where a
+--     left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock 
+       = OrdList Instr
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+--     to live in a pre-decided machine register, it comes out as @Fixed@;
+--     otherwise, it comes out as @Any@, and the parent can decide which
+--     register to put it in.
+--
+data Register
+       = Fixed Size Reg InstrBlock
+       | Any   Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--     A CmmInt is intended to be truncated to the appropriate 
+--     number of bits, so here we truncate it to Int64.  This is
+--     important because e.g. -1 as a CmmInt might be either
+--     -1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+mangleIndexTree _
+       = panic "PPC.CodeGen.mangleIndexTree: no match"
+
+-- -----------------------------------------------------------------------------
+--  Code gen for 64-bit arithmetic on 32-bit platforms
+
+{-
+Simple support for generating 64-bit code (ie, 64 bit values and 64
+bit assignments) on 32-bit platforms.  Unlike the main code generator
+we merely shoot for generating working code as simply as possible, and
+pay little attention to code quality.  Specifically, there is no
+attempt to deal cleverly with the fixed-vs-floating register
+distinction; all values are generated into (pairs of) floating
+registers, even if this would mean some redundant reg-reg moves as a
+result.  Only one of the VRegUniques is returned, since it will be
+of the VRegUniqueLo form, and the upper-half VReg can be determined
+by applying getHiVRegFromLo to it.
+-}
+
+data ChildCode64       -- a.k.a "Register64"
+      = ChildCode64 
+          InstrBlock   -- code
+          Reg          -- the lower 32-bit temporary which contains the
+                       -- result; use getHiVRegFromLo to find the other
+                       -- VRegUnique.  Rules of this simplified insn
+                       -- selection game are therefore that the returned
+                       -- Reg may be modified
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+--     we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed _ reg code -> 
+       return (reg, code)
+
+getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
+getI64Amodes addrTree = do
+    Amode hi_addr addr_code <- getAmode addrTree
+    case addrOffset hi_addr 4 of
+        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
+        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
+                           return (AddrRegImm hi_ptr (ImmInt 0),
+                                   AddrRegImm hi_ptr (ImmInt 4),
+                                   code)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+       ChildCode64 vcode rlo <- iselExpr64 valueTree
+       let 
+               rhi = getHiVRegFromLo rlo
+
+               -- Big-endian store
+               mov_hi = ST II32 rhi hi_addr
+               mov_lo = ST II32 rlo lo_addr
+       -- in
+       return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst II32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MR r_dst_lo r_src_lo
+         mov_hi = MR r_dst_hi r_src_hi
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(powerpc): invalid lvalue"
+
+
+iselExpr64        :: CmmExpr -> NatM ChildCode64
+iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+    (rlo, rhi) <- getNewRegPairNat II32
+    let mov_hi = LD II32 rhi hi_addr
+        mov_lo = LD II32 rlo lo_addr
+    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                         rlo
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
+   = return (ChildCode64 nilOL (mkVReg vu II32))
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat II32
+  let
+       half0 = fromIntegral (fromIntegral i :: Word16)
+       half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
+       half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
+       half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
+       
+       code = toOL [
+               LIS rlo (ImmInt half1),
+               OR rlo rlo (RIImm $ ImmInt half0),
+               LIS rhi (ImmInt half3),
+               OR rlo rlo (RIImm $ ImmInt half2)
+               ]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat II32
+   let
+       r1hi = getHiVRegFromLo r1lo
+       r2hi = getHiVRegFromLo r2lo
+       code =  code1 `appOL`
+               code2 `appOL`
+               toOL [ ADDC rlo r1lo r2lo,
+                      ADDE rhi r1hi r2hi ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
+    (expr_reg,expr_code) <- getSomeReg expr
+    (rlo, rhi) <- getNewRegPairNat II32
+    let mov_hi = LI rhi (ImmInt 0)
+        mov_lo = MR rlo expr_reg
+    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
+                         rlo
+iselExpr64 expr
+   = pprPanic "iselExpr64(powerpc)" (ppr expr)
+
+
+
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+                 (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+#endif
+
+
+getRegister (CmmLoad mem pk)
+  | not (isWord64 pk)
+  = do
+        Amode addr addr_code <- getAmode mem
+        let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
+                       addr_code `snocOL` LD size dst addr
+        return (Any size code)
+          where size = cmmTypeSize pk
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
+
+-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
+    Amode addr addr_code <- getAmode mem
+    return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_Not rep   -> triv_ucode_int rep NOT
+
+      MO_F_Neg w   -> triv_ucode_float w FNEG
+      MO_S_Neg w   -> triv_ucode_int   w NEG
+
+      MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
+      MO_FF_Conv W32 W64 -> conversionNop FF64 x
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      MO_SS_Conv from to
+        | from == to    -> conversionNop (intSize to) x
+
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_SS_Conv W32 to -> conversionNop (intSize to) x
+      MO_SS_Conv W16 W8 -> conversionNop II8 x
+      MO_SS_Conv W8  to -> triv_ucode_int to (EXTS II8)
+      MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
+
+      MO_UU_Conv from to
+        | from == to -> conversionNop (intSize to) x
+        -- narrowing is a nop: we treat the high bits as undefined
+      MO_UU_Conv W32 to -> conversionNop (intSize to) x
+      MO_UU_Conv W16 W8 -> conversionNop II8 x
+      MO_UU_Conv W8 to  -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
+      MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) 
+      _        -> panic "PPC.CodeGen.getRegister: no match"
+
+    where
+       triv_ucode_int   width instr = trivialUCode (intSize   width) instr x
+       triv_ucode_float width instr = trivialUCode (floatSize width) instr x
+
+        conversionNop new_size expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_size)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE  x y
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE  x y
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE  x y
+
+      MO_Eq rep -> condIntReg EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntReg NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntReg GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntReg GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntReg LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntReg LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntReg GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntReg GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntReg LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntReg LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_F_Add w  -> triv_float w FADD
+      MO_F_Sub w  -> triv_float w FSUB
+      MO_F_Mul w  -> triv_float w FMUL
+      MO_F_Quot w -> triv_float w FDIV
+      
+         -- optimize addition with 32-bit immediate
+         -- (needed for PIC)
+      MO_Add W32 ->
+        case y of
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
+            -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
+          CmmLit lit
+            -> do
+                (src, srcCode) <- getSomeReg x
+                let imm = litToImm lit
+                    code dst = srcCode `appOL` toOL [
+                                    ADDIS dst src (HA imm),
+                                    ADD dst dst (RIImm (LO imm))
+                                ]
+                return (Any II32 code)
+          _ -> trivialCode W32 True ADD x y
+
+      MO_Add rep -> trivialCode rep True ADD x y
+      MO_Sub rep ->
+        case y of    -- subfi ('substract from' with immediate) doesn't exist
+          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
+            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
+          _ -> trivialCodeNoImm' (intSize rep) SUBF y x
+
+      MO_Mul rep -> trivialCode rep True MULLW x y
+
+      MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
+      
+      MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
+      MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
+
+      MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+      MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+      
+      MO_And rep   -> trivialCode rep False AND x y
+      MO_Or rep    -> trivialCode rep False OR x y
+      MO_Xor rep   -> trivialCode rep False XOR x y
+
+      MO_Shl rep   -> trivialCode rep False SLW x y
+      MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
+      MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
+      _                -> panic "PPC.CodeGen.getRegister: no match"
+
+  where
+    triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
+    triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
+
+getRegister (CmmLit (CmmInt i rep))
+  | Just imm <- makeImmediate rep True i
+  = let
+       code dst = unitOL (LI dst imm)
+    in
+       return (Any (intSize rep) code)
+
+getRegister (CmmLit (CmmFloat f frep)) = do
+    lbl <- getNewLabelNat
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let size = floatSize frep
+        code dst = 
+           LDATA ReadOnlyData  [CmmDataLabel lbl,
+                                CmmStaticLit (CmmFloat f frep)]
+            `consOL` (addr_code `snocOL` LD size dst addr)
+    return (Any size code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitType lit
+        imm = litToImm lit
+        code dst = toOL [
+              LIS dst (HA imm),
+              ADD dst dst (RIImm (LO imm))
+          ]
+    in return (Any (cmmTypeSize rep) code)
+
+getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
+    
+    -- extend?Rep: wrap integer expression of type rep
+    -- in a conversion to II32
+extendSExpr W32 x = x
+extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
+extendUExpr W32 x = x
+extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
+
+-- -----------------------------------------------------------------------------
+--  The 'Amode' type: Memory addressing modes passed up the tree.
+
+data Amode 
+       = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate W32 True (-i)
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
+  | Just off <- makeImmediate W32 True i
+  = do
+        (reg, code) <- getSomeReg x
+        return (Amode (AddrRegImm reg off) code)
+
+   -- optimize addition with 32-bit immediate
+   -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
+  = do
+        tmp <- getNewRegNat II32
+        (src, srcCode) <- getSomeReg x
+        let imm = litToImm lit
+            code = srcCode `snocOL` ADDIS tmp src (HA imm)
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmLit lit)
+  = do
+        tmp <- getNewRegNat II32
+        let imm = litToImm lit
+            code = unitOL (LIS tmp (HA imm))
+        return (Amode (AddrRegImm tmp (LO imm)) code)
+    
+getAmode (CmmMachOp (MO_Add W32) [x, y])
+  = do
+        (regX, codeX) <- getSomeReg x
+        (regY, codeY) <- getSomeReg y
+        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+    
+getAmode other
+  = do
+        (reg, code) <- getSomeReg other
+        let
+            off  = ImmInt 0
+        return (Amode (AddrRegImm reg off) code)
+
+
+
+--  The 'CondCode' type:  Condition codes passed up the tree.
+data CondCode  
+       = CondCode Bool Cond InstrBlock
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- almost the same as everywhere else - but we need to
+-- extend small integers to 32 bit first
+
+getCondCode (CmmMachOp mop [x, y])
+  = case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  (extendUExpr rep x) (extendUExpr rep y)
+      MO_Ne rep -> condIntCode NE   (extendUExpr rep x) (extendUExpr rep y)
+
+      MO_S_Gt rep -> condIntCode GTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Ge rep -> condIntCode GE   (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Lt rep -> condIntCode LTT  (extendSExpr rep x) (extendSExpr rep y)
+      MO_S_Le rep -> condIntCode LE   (extendSExpr rep x) (extendSExpr rep y)
+
+      MO_U_Gt rep -> condIntCode GU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Ge rep -> condIntCode GEU  (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Lt rep -> condIntCode LU   (extendUExpr rep x) (extendUExpr rep y)
+      MO_U_Le rep -> condIntCode LEU  (extendUExpr rep x) (extendUExpr rep y)
+
+      other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
+
+getCondCode other =  panic "getCondCode(2)(powerpc)"
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+--  ###FIXME: I16 and I8!
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
+  = do
+        (src1, code) <- getSomeReg x
+        let
+            code' = code `snocOL` 
+                (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
+        return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+       code' = code1 `appOL` code2 `snocOL`
+                 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
+    return (CondCode False cond code')
+
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+       code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
+       code'' = case cond of -- twiddle CR to handle unordered case
+                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
+                   LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
+                   _ -> code'
+                 where
+                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
+    return (CondCode True cond code'')
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode _ reg src
+    = do
+        r <- getRegister src
+        return $ case r of
+            Any _ code         -> code dst
+            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
+    where
+        dst = getRegisterReg reg
+
+
+
+-- Easy, isn't it?
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (unitOL $ JMP lbl)
+
+genJump tree
+  = do
+        (target,code) <- getSomeReg tree
+        return (code `snocOL` MTCTR target `snocOL` BCTR [])
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId     -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+genCondJump id bool = do
+  CondCode _ cond code <- getCondCode bool
+  return (code `snocOL` BCC cond id)
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
+
+#if darwin_TARGET_OS || linux_TARGET_OS
+{-
+    The PowerPC calling convention for Darwin/Mac OS X
+    is described in Apple's document
+    "Inside Mac OS X - Mach-O Runtime Architecture".
+    
+    PowerPC Linux uses the System V Release 4 Calling Convention
+    for PowerPC. It is described in the
+    "System V Application Binary Interface PowerPC Processor Supplement".
+
+    Both conventions are similar:
+    Parameters may be passed in general-purpose registers starting at r3, in
+    floating point registers starting at f1, or on the stack. 
+    
+    But there are substantial differences:
+    * The number of registers used for parameter passing and the exact set of
+      nonvolatile registers differs (see MachRegs.lhs).
+    * On Darwin, stack space is always reserved for parameters, even if they are
+      passed in registers. The called routine may choose to save parameters from
+      registers to the corresponding space on the stack.
+    * On Darwin, a corresponding amount of GPRs is skipped when a floating point
+      parameter is passed in an FPR.
+    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
+      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
+      Darwin just treats an I64 like two separate II32s (high word first).
+    * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
+      4-byte aligned like everything else on Darwin.
+    * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
+      PowerPC Linux does not agree, so neither do we.
+      
+    According to both conventions, The parameter area should be part of the
+    caller's stack frame, allocated in the caller's prologue code (large enough
+    to hold the parameter lists for all called routines). The NCG already
+    uses the stack for register spilling, leaving 64 bytes free at the top.
+    If we need a larger parameter area than that, we just allocate a new stack
+    frame just before ccalling.
+-}
+
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ 
+ = return $ unitOL LWSYNC
+
+genCCall target dest_regs argsAndHints
+  = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
+        -- we rely on argument promotion in the codeGen
+    do
+        (finalStack,passArgumentsCode,usedRegs) <- passArguments
+                                                        (zip args argReps)
+                                                        allArgRegs allFPArgRegs
+                                                        initialStackOffset
+                                                        (toOL []) []
+                                                
+        (labelOrExpr, reduceToFF32) <- case target of
+            CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+            CmmCallee expr conv -> return  (Right expr, False)
+            CmmPrim mop -> outOfLineFloatOp mop
+                                                        
+        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
+            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
+
+        case labelOrExpr of
+            Left lbl -> do
+               return (         codeBefore
+                        `snocOL` BL lbl usedRegs
+                        `appOL`         codeAfter)
+            Right dyn -> do
+               (dynReg, dynCode) <- getSomeReg dyn
+               return (         dynCode
+                       `snocOL` MTCTR dynReg
+                        `appOL`         codeBefore
+                        `snocOL` BCTRL usedRegs
+                        `appOL`         codeAfter)
+    where
+#if darwin_TARGET_OS
+        initialStackOffset = 24
+           -- size of linkage area + size of arguments, in bytes       
+       stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
+                                map (widthInBytes . typeWidth) argReps
+#elif linux_TARGET_OS
+        initialStackOffset = 8
+        stackDelta finalStack = roundTo 16 finalStack
+#endif
+       args = map hintlessCmm argsAndHints
+       argReps = map cmmExprType args
+
+       roundTo a x | x `mod` a == 0 = x
+                   | otherwise = x + a - (x `mod` a)
+
+        move_sp_down finalStack
+               | delta > 64 =
+                        toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
+                             DELTA (-delta)]
+              | otherwise = nilOL
+              where delta = stackDelta finalStack
+       move_sp_up finalStack
+              | delta > 64 =
+                        toOL [ADD sp sp (RIImm (ImmInt delta)),
+                              DELTA 0]
+              | otherwise = nilOL
+              where delta = stackDelta finalStack
+              
+
+        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
+        passArguments ((arg,arg_ty):args) gprs fprs stackOffset
+               accumCode accumUsed | isWord64 arg_ty =
+            do
+                ChildCode64 code vr_lo <- iselExpr64 arg
+                let vr_hi = getHiVRegFromLo vr_lo
+
+#if darwin_TARGET_OS                
+                passArguments args
+                              (drop 2 gprs)
+                              fprs
+                              (stackOffset+8)
+                              (accumCode `appOL` code
+                                    `snocOL` storeWord vr_hi gprs stackOffset
+                                    `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
+                              ((take 2 gprs) ++ accumUsed)
+            where
+                storeWord vr (gpr:_) offset = MR gpr vr
+                storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
+                
+#elif linux_TARGET_OS
+                let stackOffset' = roundTo 8 stackOffset
+                    stackCode = accumCode `appOL` code
+                        `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+                        `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
+                    regCode hireg loreg =
+                        accumCode `appOL` code
+                            `snocOL` MR hireg vr_hi
+                            `snocOL` MR loreg vr_lo
+                                        
+                case gprs of
+                    hireg : loreg : regs | even (length gprs) ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _skipped : hireg : loreg : regs ->
+                        passArguments args regs fprs stackOffset
+                                      (regCode hireg loreg) (hireg : loreg : accumUsed)
+                    _ -> -- only one or no regs left
+                        passArguments args [] fprs (stackOffset'+8)
+                                      stackCode accumUsed
+#endif
+        
+        passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
+            | reg : _ <- regs = do
+                register <- getRegister arg
+                let code = case register of
+                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
+                            Any _ acode -> acode reg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we reserve stack slots for register parameters
+                              (stackOffset + stackBytes)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                              stackOffset
+#endif
+                              (accumCode `appOL` code)
+                              (reg : accumUsed)
+            | otherwise = do
+                (vr, code) <- getSomeReg arg
+                passArguments args
+                              (drop nGprs gprs)
+                              (drop nFprs fprs)
+                              (stackOffset' + stackBytes)
+                              (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
+                              accumUsed
+            where
+#if darwin_TARGET_OS
+        -- stackOffset is at least 4-byte aligned
+        -- The Darwin ABI is happy with that.
+                stackOffset' = stackOffset
+#else
+        -- ... the SysV ABI requires 8-byte alignment for doubles.
+                stackOffset' | isFloatType rep && typeWidth rep == W64 =
+                                roundTo 8 stackOffset
+                             | otherwise  =           stackOffset
+#endif
+                stackSlot = AddrRegImm sp (ImmInt stackOffset')
+                (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
+                    II32 -> (1, 0, 4, gprs)
+#if darwin_TARGET_OS
+        -- The Darwin ABI requires that we skip a corresponding number of GPRs when
+        -- we use the FPRs.
+                    FF32 -> (1, 1, 4, fprs)
+                    FF64 -> (2, 1, 8, fprs)
+#elif linux_TARGET_OS
+        -- ... the SysV ABI doesn't.
+                    FF32 -> (0, 1, 4, fprs)
+                    FF64 -> (0, 1, 8, fprs)
+#endif
+        
+        moveResult reduceToFF32 =
+            case dest_regs of
+                [] -> nilOL
+                [CmmHinted dest _hint]
+                    | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
+                    | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
+                    | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
+                                          MR r_dest r4]
+                    | otherwise -> unitOL (MR r_dest r3)
+                    where rep = cmmRegType (CmmLocal dest)
+                          r_dest = getRegisterReg (CmmLocal dest)
+                          
+        outOfLineFloatOp mop =
+            do
+                dflags <- getDynFlagsNat
+                mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
+                              mkForeignLabel functionName Nothing True
+                let mopLabelOrExpr = case mopExpr of
+                        CmmLit (CmmLabel lbl) -> Left lbl
+                        _ -> Right mopExpr
+                return (mopLabelOrExpr, reduce)
+            where
+                (functionName, reduce) = case mop of
+                    MO_F32_Exp   -> (fsLit "exp", True)
+                    MO_F32_Log   -> (fsLit "log", True)
+                    MO_F32_Sqrt  -> (fsLit "sqrt", True)
+                        
+                    MO_F32_Sin   -> (fsLit "sin", True)
+                    MO_F32_Cos   -> (fsLit "cos", True)
+                    MO_F32_Tan   -> (fsLit "tan", True)
+                    
+                    MO_F32_Asin  -> (fsLit "asin", True)
+                    MO_F32_Acos  -> (fsLit "acos", True)
+                    MO_F32_Atan  -> (fsLit "atan", True)
+                    
+                    MO_F32_Sinh  -> (fsLit "sinh", True)
+                    MO_F32_Cosh  -> (fsLit "cosh", True)
+                    MO_F32_Tanh  -> (fsLit "tanh", True)
+                    MO_F32_Pwr   -> (fsLit "pow", True)
+                        
+                    MO_F64_Exp   -> (fsLit "exp", False)
+                    MO_F64_Log   -> (fsLit "log", False)
+                    MO_F64_Sqrt  -> (fsLit "sqrt", False)
+                        
+                    MO_F64_Sin   -> (fsLit "sin", False)
+                    MO_F64_Cos   -> (fsLit "cos", False)
+                    MO_F64_Tan   -> (fsLit "tan", False)
+                     
+                    MO_F64_Asin  -> (fsLit "asin", False)
+                    MO_F64_Acos  -> (fsLit "acos", False)
+                    MO_F64_Atan  -> (fsLit "atan", False)
+                    
+                    MO_F64_Sinh  -> (fsLit "sinh", False)
+                    MO_F64_Cosh  -> (fsLit "cosh", False)
+                    MO_F64_Tanh  -> (fsLit "tanh", False)
+                    MO_F64_Pwr   -> (fsLit "pow", False)
+                    other -> pprPanic "genCCall(ppc): unknown callish op"
+                                    (pprCallishMachOp other)
+
+#else /* darwin_TARGET_OS || linux_TARGET_OS */
+genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
+#endif           
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids 
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat II32
+        lbl <- getNewLabelNat
+        dflags <- getDynFlagsNat
+        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordWidth)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            LD II32 tmp (AddrRegReg tableReg tmp),
+                            ADD tmp tmp (RIReg tableReg),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        tmp <- getNewRegNat II32
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+        
+            code = e_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            SLW tmp reg (RIImm (ImmInt 2)),
+                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
+                            LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+                            MTCTR tmp,
+                            BCTR [ id | Just id <- ids ]
+                    ]
+        return code
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condReg :: NatM CondCode -> NatM Register
+condReg getCond = do
+    CondCode _ cond cond_code <- getCond
+    let
+{-        code dst = cond_code `appOL` toOL [
+                BCC cond lbl1,
+                LI dst (ImmInt 0),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl1,
+                LI dst (ImmInt 1),
+                BCC ALWAYS lbl2,
+                NEWBLOCK lbl2
+            ]-}
+        code dst = cond_code
+            `appOL` negate_code
+            `appOL` toOL [
+                MFCR dst,
+                RLWINM dst dst (bit + 1) 31 31
+            ]
+        
+        negate_code | do_negate = unitOL (CRNOR bit bit bit)
+                    | otherwise = nilOL
+                    
+        (bit, do_negate) = case cond of
+            LTT -> (0, False)
+            LE  -> (1, True)
+            EQQ -> (2, False)
+            GE  -> (0, True)
+            GTT -> (1, False)
+            
+            NE  -> (2, True)
+            
+            LU  -> (0, False)
+            LEU -> (1, True)
+            GEU -> (0, True)
+            GU  -> (1, False)
+           _   -> panic "PPC.CodeGen.codeReg: no match"
+                
+    return (Any II32 code)
+    
+condIntReg cond x y = condReg (condIntCode cond x y)
+condFltReg cond x y = condReg (condFltCode cond x y)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+
+
+{-
+Wolfgang's PowerPC version of The Rules:
+
+A slightly modified version of The Rules to take advantage of the fact
+that PowerPC instructions work on all registers and don't implicitly
+clobber any fixed registers.
+
+* The only expression for which getRegister returns Fixed is (CmmReg reg).
+
+* If getRegister returns Any, then the code it generates may modify only:
+       (a) fresh temporaries
+       (b) the destination register
+  It may *not* modify global registers, unless the global
+  register happens to be the destination register.
+  It may not clobber any other registers. In fact, only ccalls clobber any
+  fixed registers.
+  Also, it may not modify the counter register (used by genCCall).
+  
+  Corollary: If a getRegister for a subexpression returns Fixed, you need
+  not move it to a fresh temporary before evaluating the next subexpression.
+  The Fixed register won't be modified.
+  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
+  
+* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
+  the value of the destination register.
+-}
+
+trivialCode 
+       :: Width
+       -> Bool
+       -> (Reg -> Reg -> RI -> Instr)
+       -> CmmExpr
+       -> CmmExpr
+       -> NatM Register
+
+trivialCode rep signed instr x (CmmLit (CmmInt y _))
+    | Just imm <- makeImmediate rep signed y 
+    = do
+        (src1, code1) <- getSomeReg x
+        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
+        return (Any (intSize rep) code)
+  
+trivialCode rep _ instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
+    return (Any (intSize rep) code)
+
+trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
+                -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm' size instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
+    return (Any size code)
+    
+trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
+                -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
+    
+    
+trivialUCode 
+       :: Size
+       -> (Reg -> Reg -> Instr)
+       -> CmmExpr
+       -> NatM Register
+trivialUCode rep instr x = do
+    (src, code) <- getSomeReg x
+    let code' dst = code `snocOL` instr dst src
+    return (Any rep code')
+    
+-- There is no "remainder" instruction on the PPC, so we have to do
+-- it the hard way.
+-- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+
+remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
+    -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep div x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let code dst = code1 `appOL` code2 `appOL` toOL [
+                div dst src1 src2,
+                MULLW dst dst (RIReg src2),
+                SUBF dst dst src1
+            ]
+    return (Any (intSize rep) code)
+
+
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+coerceInt2FP fromRep toRep x = do
+    (src, code) <- getSomeReg x
+    lbl <- getNewLabelNat
+    itmp <- getNewRegNat II32
+    ftmp <- getNewRegNat FF64
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let
+       code' dst = code `appOL` maybe_exts `appOL` toOL [
+               LDATA ReadOnlyData
+                               [CmmDataLabel lbl,
+                                CmmStaticLit (CmmInt 0x43300000 W32),
+                                CmmStaticLit (CmmInt 0x80000000 W32)],
+               XORIS itmp src (ImmInt 0x8000),
+               ST II32 itmp (spRel 3),
+               LIS itmp (ImmInt 0x4330),
+               ST II32 itmp (spRel 2),
+               LD FF64 ftmp (spRel 2)
+            ] `appOL` addr_code `appOL` toOL [
+               LD FF64 dst addr,
+               FSUB FF64 dst ftmp dst
+           ] `appOL` maybe_frsp dst
+            
+        maybe_exts = case fromRep of
+                        W8 ->  unitOL $ EXTS II8 src src
+                        W16 -> unitOL $ EXTS II16 src src
+                        W32 -> nilOL
+                       _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
+
+        maybe_frsp dst 
+               = case toRep of
+                        W32 -> unitOL $ FRSP dst dst
+                        W64 -> nilOL
+                       _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
+
+    return (Any (floatSize toRep) code')
+
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+coerceFP2Int _ toRep x = do
+    -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat FF64
+    let
+       code' dst = code `appOL` toOL [
+               -- convert to int in FP reg
+           FCTIWZ tmp src,
+               -- store value (64bit) from FP to stack
+           ST FF64 tmp (spRel 2),
+               -- read low word of value (high word is undefined)
+           LD II32 dst (spRel 3)]      
+    return (Any (intSize toRep) code')
diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs

new file mode 100644 (file)

index 0000000..7345ee5
--- /dev/null
+++ b/compiler/nativeGen/PPC/Cond.hs
@@ -0,0 +1,62 @@
+
+module PPC.Cond (
+       Cond(..),
+       condNegate,
+       condUnsigned,
+       condToSigned,
+       condToUnsigned,
+)
+
+where
+
+import Panic
+
+data Cond
+       = ALWAYS
+       | EQQ
+       | GE
+       | GEU
+       | GTT
+       | GU
+       | LE
+       | LEU
+       | LTT
+       | LU
+       | NE
+       deriving Eq
+
+
+condNegate :: Cond -> Cond
+condNegate ALWAYS  = panic "condNegate: ALWAYS"
+condNegate EQQ     = NE
+condNegate GE      = LTT
+condNegate GEU     = LU
+condNegate GTT     = LE
+condNegate GU      = LEU
+condNegate LE      = GTT
+condNegate LEU     = GU
+condNegate LTT     = GE
+condNegate LU      = GEU
+condNegate NE      = EQQ
+
+-- Condition utils
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs

index 85aa494..55affc6 100644 (file)
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -10,49 +10,50 @@
  #include "nativeGen/NCG.h"
  
  module PPC.Instr (
  #include "nativeGen/NCG.h"
  
  module PPC.Instr (
-       Cond(..),
-       condNegate,
+       archWordSize,
         RI(..),
         RI(..),
-       Instr(..)
+       Instr(..),
+       maxSpillSlots
  )
  
  where
  
  )
  
  where
  
-import BlockId
  import PPC.Regs
  import PPC.Regs
-import RegsBase
+import PPC.Cond
+import Instruction
+import Size
+import RegClass
+import Reg
+
+import Constants       (rESERVED_C_STACK_BYTES)
+import BlockId
  import Cmm
  import Cmm
-import Outputable
  import FastString
  import CLabel
  import FastString
  import CLabel
+import Outputable
+import FastBool
+
+--------------------------------------------------------------------------------
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize   :: Size
+archWordSize   = II32
  
  
-data Cond
-       = ALWAYS
-       | EQQ
-       | GE
-       | GEU
-       | GTT
-       | GU
-       | LE
-       | LEU
-       | LTT
-       | LU
-       | NE
-       deriving Eq
-
-
-condNegate :: Cond -> Cond
-condNegate ALWAYS  = panic "condNegate: ALWAYS"
-condNegate EQQ     = NE
-condNegate GE      = LTT
-condNegate GEU     = LU
-condNegate GTT     = LE
-condNegate GU      = LEU
-condNegate LE      = GTT
-condNegate LEU     = GU
-condNegate LTT     = GE
-condNegate LU      = GEU
-condNegate NE      = EQQ
+
+-- | Instruction instance for powerpc
+instance Instruction Instr where
+       regUsageOfInstr         = ppc_regUsageOfInstr
+       patchRegsOfInstr        = ppc_patchRegsOfInstr
+       isJumpishInstr          = ppc_isJumpishInstr
+       jumpDestsOfInstr        = ppc_jumpDestsOfInstr
+       patchJumpInstr          = ppc_patchJumpInstr
+       mkSpillInstr            = ppc_mkSpillInstr
+       mkLoadInstr             = ppc_mkLoadInstr
+       takeDeltaInstr          = ppc_takeDeltaInstr
+       isMetaInstr             = ppc_isMetaInstr
+       mkRegRegMoveInstr       = ppc_mkRegRegMoveInstr
+       takeRegRegMoveInstr     = ppc_takeRegRegMoveInstr
+       mkJumpInstr             = ppc_mkJumpInstr
  
  
  -- -----------------------------------------------------------------------------
  
  
  -- -----------------------------------------------------------------------------
@@ -85,12 +86,6 @@ data Instr
          -- benefit of subsequent passes
         | DELTA   Int
  
          -- benefit of subsequent passes
         | DELTA   Int
  
-       -- | spill this reg to a stack slot
-       | SPILL   Reg Int
-
-       -- | reload this reg from a stack slot
-       | RELOAD  Int Reg
-
         -- Loads and stores.
         | LD    Size Reg AddrMode       -- Load size, dst, src
         | LA      Size Reg AddrMode     -- Load arithmetic size, dst, src
         -- Loads and stores.
         | LD    Size Reg AddrMode       -- Load size, dst, src
         | LA      Size Reg AddrMode     -- Load arithmetic size, dst, src
@@ -165,3 +160,293 @@ data Instr
                                         -- bcl to next insn, mflr reg
               
         | LWSYNC -- memory barrier
                                         -- bcl to next insn, mflr reg
               
         | LWSYNC -- memory barrier
+
+
+-- | Get the registers that are being used by this instruction.
+--     regUsage doesn't need to do any trickery for jumps and such.  
+--     Just state precisely the regs read and written by that insn.  
+--     The consequences of control flow transfers, as far as register
+--     allocation goes, are taken care of by the register allocator.
+--
+ppc_regUsageOfInstr :: Instr -> RegUsage
+ppc_regUsageOfInstr instr 
+ = case instr of
+    LD    _ reg addr   -> usage (regAddr addr, [reg])
+    LA    _ reg addr   -> usage (regAddr addr, [reg])
+    ST    _ reg addr   -> usage (reg : regAddr addr, [])
+    STU    _ reg addr          -> usage (reg : regAddr addr, [])
+    LIS   reg _                -> usage ([], [reg])
+    LI    reg _                -> usage ([], [reg])
+    MR   reg1 reg2     -> usage ([reg2], [reg1])
+    CMP   _ reg ri     -> usage (reg : regRI ri,[])
+    CMPL  _ reg ri     -> usage (reg : regRI ri,[])
+    BCC           _ _          -> noUsage
+    BCCFAR _ _         -> noUsage
+    MTCTR reg          -> usage ([reg],[])
+    BCTR  _            -> noUsage
+    BL    _ params     -> usage (params, callClobberedRegs)
+    BCTRL params       -> usage (params, callClobberedRegs)
+    ADD          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    ADDC  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    ADDE  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    ADDIS reg1 reg2 _   -> usage ([reg2], [reg1])
+    SUBF  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    MULLW reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    DIVW  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+    MULLW_MayOflo reg1 reg2 reg3        
+                        -> usage ([reg2,reg3], [reg1])
+    AND          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    OR   reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    XOR          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    XORIS reg1 reg2 _   -> usage ([reg2], [reg1])
+    EXTS  _  reg1 reg2 -> usage ([reg2], [reg1])
+    NEG          reg1 reg2     -> usage ([reg2], [reg1])
+    NOT          reg1 reg2     -> usage ([reg2], [reg1])
+    SLW          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    SRW          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    SRAW  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
+    RLWINM reg1 reg2 _ _ _
+                        -> usage ([reg2], [reg1])
+    FADD  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FSUB  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FMUL  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FDIV  _ r1 r2 r3   -> usage ([r2,r3], [r1])
+    FNEG  r1 r2                -> usage ([r2], [r1])
+    FCMP  r1 r2                -> usage ([r1,r2], [])
+    FCTIWZ r1 r2       -> usage ([r2], [r1])
+    FRSP r1 r2         -> usage ([r2], [r1])
+    MFCR reg            -> usage ([], [reg])
+    MFLR reg            -> usage ([], [reg])
+    FETCHPC reg         -> usage ([], [reg])
+    _                  -> noUsage
+  where
+    usage (src, dst) = RU (filter interesting src)
+                         (filter interesting dst)
+    regAddr (AddrRegReg r1 r2) = [r1, r2]
+    regAddr (AddrRegImm r1 _)  = [r1]
+
+    regRI (RIReg r) = [r]
+    regRI  _   = []
+
+interesting :: Reg -> Bool
+interesting (VirtualRegI  _)  = True
+interesting (VirtualRegHi _)  = True
+interesting (VirtualRegF  _)  = True
+interesting (VirtualRegD  _)  = True
+interesting (RealReg i)       = isFastTrue (freeReg i)
+
+
+
+
+-- | Apply a given mapping to all the register references in this
+--     instruction.
+ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+ppc_patchRegsOfInstr instr env 
+ = case instr of
+    LD    sz reg addr   -> LD sz (env reg) (fixAddr addr)
+    LA    sz reg addr   -> LA sz (env reg) (fixAddr addr)
+    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
+    STU    sz reg addr  -> STU sz (env reg) (fixAddr addr)
+    LIS   reg imm      -> LIS (env reg) imm
+    LI    reg imm      -> LI (env reg) imm
+    MR   reg1 reg2     -> MR (env reg1) (env reg2)
+    CMP          sz reg ri     -> CMP sz (env reg) (fixRI ri)
+    CMPL  sz reg ri    -> CMPL sz (env reg) (fixRI ri)
+    BCC          cond lbl      -> BCC cond lbl
+    BCCFAR cond lbl    -> BCCFAR cond lbl
+    MTCTR reg          -> MTCTR (env reg)
+    BCTR  targets      -> BCTR targets
+    BL    imm argRegs  -> BL imm argRegs       -- argument regs
+    BCTRL argRegs      -> BCTRL argRegs        -- cannot be remapped
+    ADD          reg1 reg2 ri  -> ADD (env reg1) (env reg2) (fixRI ri)
+    ADDC  reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
+    ADDE  reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
+    ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
+    SUBF  reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
+    MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
+    DIVW  reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
+    DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
+    MULLW_MayOflo reg1 reg2 reg3
+                        -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
+    AND          reg1 reg2 ri  -> AND (env reg1) (env reg2) (fixRI ri)
+    OR           reg1 reg2 ri  -> OR  (env reg1) (env reg2) (fixRI ri)
+    XOR          reg1 reg2 ri  -> XOR (env reg1) (env reg2) (fixRI ri)
+    XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
+    EXTS  sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
+    NEG          reg1 reg2     -> NEG (env reg1) (env reg2)
+    NOT          reg1 reg2     -> NOT (env reg1) (env reg2)
+    SLW          reg1 reg2 ri  -> SLW (env reg1) (env reg2) (fixRI ri)
+    SRW          reg1 reg2 ri  -> SRW (env reg1) (env reg2) (fixRI ri)
+    SRAW  reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri)
+    RLWINM reg1 reg2 sh mb me
+                        -> RLWINM (env reg1) (env reg2) sh mb me
+    FADD  sz r1 r2 r3   -> FADD sz (env r1) (env r2) (env r3)
+    FSUB  sz r1 r2 r3   -> FSUB sz (env r1) (env r2) (env r3)
+    FMUL  sz r1 r2 r3   -> FMUL sz (env r1) (env r2) (env r3)
+    FDIV  sz r1 r2 r3   -> FDIV sz (env r1) (env r2) (env r3)
+    FNEG  r1 r2                -> FNEG (env r1) (env r2)
+    FCMP  r1 r2                -> FCMP (env r1) (env r2)
+    FCTIWZ r1 r2       -> FCTIWZ (env r1) (env r2)
+    FRSP r1 r2         -> FRSP (env r1) (env r2)
+    MFCR reg            -> MFCR (env reg)
+    MFLR reg            -> MFLR (env reg)
+    FETCHPC reg         -> FETCHPC (env reg)
+    _ -> instr
+  where
+    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
+
+    fixRI (RIReg r) = RIReg (env r)
+    fixRI other        = other
+
+
+--------------------------------------------------------------------------------
+-- | Checks whether this instruction is a jump/branch instruction. 
+--     One that can change the flow of control in a way that the 
+--     register allocator needs to worry about. 
+ppc_isJumpishInstr :: Instr -> Bool
+ppc_isJumpishInstr instr
+ = case instr of
+       BCC{}           -> True
+       BCCFAR{}        -> True
+       BCTR{}          -> True
+       BCTRL{}         -> True
+       BL{}            -> True
+       JMP{}           -> True
+       _               -> False
+
+
+-- | Checks whether this instruction is a jump/branch instruction. 
+--     One that can change the flow of control in a way that the 
+--     register allocator needs to worry about. 
+ppc_jumpDestsOfInstr :: Instr -> [BlockId] 
+ppc_jumpDestsOfInstr insn 
+  = case insn of
+        BCC _ id        -> [id]
+        BCCFAR _ id     -> [id]
+        BCTR targets    -> targets
+       _               -> []
+       
+       
+-- | Change the destination of this jump instruction.
+--     Used in the linear allocator when adding fixup blocks for join
+--     points.
+ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+ppc_patchJumpInstr insn patchF
+  = case insn of
+        BCC cc id      -> BCC cc (patchF id)
+        BCCFAR cc id   -> BCCFAR cc (patchF id)
+        BCTR _         -> error "Cannot patch BCTR"
+       _               -> insn
+
+
+-- -----------------------------------------------------------------------------
+
+-- | An instruction to spill a register into a spill slot.
+ppc_mkSpillInstr
+   :: Reg              -- register to spill
+   -> Int              -- current stack delta
+   -> Int              -- spill slot to use
+   -> Instr
+
+ppc_mkSpillInstr reg delta slot
+  = let        off     = spillSlotToOffset slot
+    in
+    let sz = case regClass reg of
+                RcInteger -> II32
+                RcDouble  -> FF64
+               _         -> panic "PPC.Instr.mkSpillInstr: no match"
+    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+
+
+ppc_mkLoadInstr
+   :: Reg              -- register to load
+   -> Int              -- current stack delta
+   -> Int              -- spill slot to use
+   -> Instr
+
+ppc_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+    let sz = case regClass reg of
+                RcInteger -> II32
+                RcDouble  -> FF64
+               _         -> panic "PPC.Instr.mkLoadInstr: no match"
+    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+
+
+spillSlotSize :: Int
+spillSlotSize = 8
+
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
+
+-- convert a spill slot number to a *byte* offset, with no sign:
+-- decide on a per arch basis whether you are spilling above or below
+-- the C stack pointer.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+   | slot >= 0 && slot < maxSpillSlots
+   = 64 + spillSlotSize * slot
+   | otherwise
+   = pprPanic "spillSlotToOffset:" 
+              (   text "invalid spill location: " <> int slot
+             $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+ppc_takeDeltaInstr
+       :: Instr
+       -> Maybe Int
+       
+ppc_takeDeltaInstr instr
+ = case instr of
+       DELTA i         -> Just i
+       _               -> Nothing
+
+
+ppc_isMetaInstr
+       :: Instr
+       -> Bool
+       
+ppc_isMetaInstr instr
+ = case instr of
+       COMMENT{}       -> True
+       LDATA{}         -> True
+       NEWBLOCK{}      -> True
+       DELTA{}         -> True
+       _               -> False
+
+
+-- | Copy the value in a register to another one.
+--     Must work for all register classes.
+ppc_mkRegRegMoveInstr
+       :: Reg
+       -> Reg
+       -> Instr
+
+ppc_mkRegRegMoveInstr src dst
+       = MR dst src
+
+
+-- | Make an unconditional jump instruction.
+--     For architectures with branch delay slots, its ok to put
+--     a NOP after the jump. Don't fill the delay slot with an
+--     instruction that references regs or you'll confuse the 
+--     linear allocator.
+ppc_mkJumpInstr
+       :: BlockId
+       -> [Instr]
+
+ppc_mkJumpInstr id 
+       = [BCC ALWAYS id]
+
+
+-- | Take the source and destination from this reg -> reg move instruction
+--     or Nothing if it's not one
+ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst)
+ppc_takeRegRegMoveInstr _  = Nothing
+
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs

index ac83600..f12d32a 100644 (file)
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -7,12 +7,15 @@
  -----------------------------------------------------------------------------
  
  module PPC.Ppr (
  -----------------------------------------------------------------------------
  
  module PPC.Ppr (
+       pprNatCmmTop,
+       pprBasicBlock,
+       pprSectionHeader,
+       pprData,
+       pprInstr,
         pprUserReg,
         pprSize,
         pprImm,
         pprUserReg,
         pprSize,
         pprImm,
-       pprSectionHeader,
         pprDataItem,
         pprDataItem,
-       pprInstr
  )
  
  where
  )
  
  where
@@ -20,26 +23,134 @@ where
  #include "nativeGen/NCG.h"
  #include "HsVersions.h"
  
  #include "nativeGen/NCG.h"
  #include "HsVersions.h"
  
-import RegsBase
-import PprBase
  import PPC.Regs
  import PPC.Instr
  import PPC.Regs
  import PPC.Instr
+import PPC.Cond
+import PprBase
+import Instruction
+import Size
+import Reg
+import RegClass
  
  import BlockId
  import Cmm
  
  
  import BlockId
  import Cmm
  
-import CLabel          ( mkAsmTempLabel )
+import CLabel
  
  import Unique          ( pprUnique )
  import Pretty
  import FastString
  import qualified Outputable
  
  import Unique          ( pprUnique )
  import Pretty
  import FastString
  import qualified Outputable
-import Outputable      ( panic )
+import Outputable      ( Outputable, panic )
  
  
-import Data.Word(Word32)
+import Data.Word
  import Data.Bits
  
  
  import Data.Bits
  
  
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+                   then text "\t.long "
+                     <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+                     <+> char '-'
+                     <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                   else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+                                   (sLit ".globl ")) <>
+               pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+               pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+       ptext (sLit ".align ") <> int pow2
+  where
+       pow2 = log2 bytes
+       
+       log2 :: Int -> Int  -- cache the common ones
+       log2 1 = 0 
+       log2 2 = 1
+       log2 4 = 2
+       log2 8 = 3
+       log2 n = 1 + log2 (n `quot` 2)
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr         instr  = Outputable.docToSDoc $ pprInstr instr
+
+
  pprUserReg :: Reg -> Doc
  pprUserReg = pprReg
  
  pprUserReg :: Reg -> Doc
  pprUserReg = pprReg
  
@@ -255,7 +366,7 @@ pprInstr (NEWBLOCK _)
  pprInstr (LDATA _ _)
     = panic "PprMach.pprInstr: LDATA"
  
  pprInstr (LDATA _ _)
     = panic "PprMach.pprInstr: LDATA"
  
-
+{-
  pprInstr (SPILL reg slot)
     = hcat [
         ptext (sLit "\tSPILL"),
  pprInstr (SPILL reg slot)
     = hcat [
         ptext (sLit "\tSPILL"),
@@ -271,6 +382,7 @@ pprInstr (RELOAD slot reg)
         ptext (sLit "SLOT") <> parens (int slot),
         comma,
         pprReg reg]
         ptext (sLit "SLOT") <> parens (int slot),
         comma,
         pprReg reg]
+-}
  
  pprInstr (LD sz reg addr) = hcat [
         char '\t',
  
  pprInstr (LD sz reg addr) = hcat [
         char '\t',
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs

index ea882a0..b2806c7 100644 (file)
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ b/compiler/nativeGen/PPC/RegInfo.hs
@@ -7,27 +7,14 @@
  -----------------------------------------------------------------------------
  
  module PPC.RegInfo (
  -----------------------------------------------------------------------------
  
  module PPC.RegInfo (
-       RegUsage(..),
-       noUsage,
-       regUsage,
-       patchRegs,
-       jumpDests,
-       isJumpish,
-       patchJump,
-       isRegRegMove,
+       mkVReg,
  
  
-        JumpDest(..), 
+        JumpDest, 
         canShortcut, 
         shortcutJump, 
  
         canShortcut, 
         shortcutJump, 
  
-       mkSpillInstr,
-       mkLoadInstr,
-       mkRegRegMoveInstr,
-       mkBranchInstr,
-
-       spillSlotSize,
-       maxSpillSlots,
-       spillSlotToOffset               
+       shortcutStatic,
+       regDotColor
  )
  
  where
  )
  
  where
@@ -35,203 +22,29 @@ where
  #include "nativeGen/NCG.h"
  #include "HsVersions.h"
  
  #include "nativeGen/NCG.h"
  #include "HsVersions.h"
  
-import BlockId
-import RegsBase
  import PPC.Regs
  import PPC.Instr
  import PPC.Regs
  import PPC.Instr
-import Outputable
-import Constants       ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-data RegUsage = RU [Reg] [Reg]
-
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-regUsage :: Instr -> RegUsage
-regUsage instr = case instr of
-    SPILL  reg _       -> usage ([reg], [])
-    RELOAD _ reg       -> usage ([], [reg])
-
-    LD    _ reg addr   -> usage (regAddr addr, [reg])
-    LA    _ reg addr   -> usage (regAddr addr, [reg])
-    ST    _ reg addr   -> usage (reg : regAddr addr, [])
-    STU    _ reg addr  -> usage (reg : regAddr addr, [])
-    LIS   reg _                -> usage ([], [reg])
-    LI    reg _                -> usage ([], [reg])
-    MR   reg1 reg2     -> usage ([reg2], [reg1])
-    CMP   _ reg ri     -> usage (reg : regRI ri,[])
-    CMPL  _ reg ri     -> usage (reg : regRI ri,[])
-    BCC           _ _          -> noUsage
-    BCCFAR _ _         -> noUsage
-    MTCTR reg          -> usage ([reg],[])
-    BCTR  _            -> noUsage
-    BL    _  params    -> usage (params, callClobberedRegs)
-    BCTRL params       -> usage (params, callClobberedRegs)
-    ADD          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    ADDC  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    ADDE  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    ADDIS reg1 reg2 _   -> usage ([reg2], [reg1])
-    SUBF  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    MULLW reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    DIVW  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
-    MULLW_MayOflo reg1 reg2 reg3        
-                        -> usage ([reg2,reg3], [reg1])
-    AND          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    OR   reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    XOR          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    XORIS reg1 reg2 _   -> usage ([reg2], [reg1])
-    EXTS  _   reg1 reg2 -> usage ([reg2], [reg1])
-    NEG          reg1 reg2     -> usage ([reg2], [reg1])
-    NOT          reg1 reg2     -> usage ([reg2], [reg1])
-    SLW          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    SRW          reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    SRAW  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
-    RLWINM reg1 reg2 _ _ _
-                        -> usage ([reg2], [reg1])
-    FADD  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FSUB  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FMUL  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FDIV  _ r1 r2 r3   -> usage ([r2,r3], [r1])
-    FNEG  r1 r2                -> usage ([r2], [r1])
-    FCMP  r1 r2                -> usage ([r1,r2], [])
-    FCTIWZ r1 r2       -> usage ([r2], [r1])
-    FRSP r1 r2         -> usage ([r2], [r1])
-    MFCR reg            -> usage ([], [reg])
-    MFLR reg            -> usage ([], [reg])
-    FETCHPC reg         -> usage ([], [reg])
-    _                  -> noUsage
-  where
-    usage (src, dst) = RU (filter interesting src)
-                         (filter interesting dst)
-    regAddr (AddrRegReg r1 r2) = [r1, r2]
-    regAddr (AddrRegImm r1 _)  = [r1]
-
-    regRI (RIReg r) = [r]
-    regRI  _   = []
-
-interesting :: Reg -> Bool
-interesting (VirtualRegI  _)  = True
-interesting (VirtualRegHi _)  = True
-interesting (VirtualRegF  _)  = True
-interesting (VirtualRegD  _)  = True
-interesting (RealReg i)       = isFastTrue (freeReg i)
-
-
--- -----------------------------------------------------------------------------
--- 'patchRegs' function
-
--- 'patchRegs' takes an instruction and applies the given mapping to
--- all the register references.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    SPILL reg slot     -> SPILL (env reg) slot
-    RELOAD slot reg    -> RELOAD slot (env reg)
-
-    LD    sz reg addr   -> LD sz (env reg) (fixAddr addr)
-    LA    sz reg addr   -> LA sz (env reg) (fixAddr addr)
-    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
-    STU    sz reg addr  -> STU sz (env reg) (fixAddr addr)
-    LIS   reg imm      -> LIS (env reg) imm
-    LI    reg imm      -> LI (env reg) imm
-    MR   reg1 reg2     -> MR (env reg1) (env reg2)
-    CMP          sz reg ri     -> CMP sz (env reg) (fixRI ri)
-    CMPL  sz reg ri    -> CMPL sz (env reg) (fixRI ri)
-    BCC          cond lbl      -> BCC cond lbl
-    BCCFAR cond lbl    -> BCCFAR cond lbl
-    MTCTR reg          -> MTCTR (env reg)
-    BCTR  targets      -> BCTR targets
-    BL    imm argRegs  -> BL imm argRegs       -- argument regs
-    BCTRL argRegs      -> BCTRL argRegs        -- cannot be remapped
-    ADD          reg1 reg2 ri  -> ADD (env reg1) (env reg2) (fixRI ri)
-    ADDC  reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
-    ADDE  reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
-    ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
-    SUBF  reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
-    MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
-    DIVW  reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
-    DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
-    MULLW_MayOflo reg1 reg2 reg3
-                        -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
-    AND          reg1 reg2 ri  -> AND (env reg1) (env reg2) (fixRI ri)
-    OR           reg1 reg2 ri  -> OR  (env reg1) (env reg2) (fixRI ri)
-    XOR          reg1 reg2 ri  -> XOR (env reg1) (env reg2) (fixRI ri)
-    XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
-    EXTS  sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
-    NEG          reg1 reg2     -> NEG (env reg1) (env reg2)
-    NOT          reg1 reg2     -> NOT (env reg1) (env reg2)
-    SLW          reg1 reg2 ri  -> SLW (env reg1) (env reg2) (fixRI ri)
-    SRW          reg1 reg2 ri  -> SRW (env reg1) (env reg2) (fixRI ri)
-    SRAW  reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri)
-    RLWINM reg1 reg2 sh mb me
-                        -> RLWINM (env reg1) (env reg2) sh mb me
-    FADD  sz r1 r2 r3   -> FADD sz (env r1) (env r2) (env r3)
-    FSUB  sz r1 r2 r3   -> FSUB sz (env r1) (env r2) (env r3)
-    FMUL  sz r1 r2 r3   -> FMUL sz (env r1) (env r2) (env r3)
-    FDIV  sz r1 r2 r3   -> FDIV sz (env r1) (env r2) (env r3)
-    FNEG  r1 r2                -> FNEG (env r1) (env r2)
-    FCMP  r1 r2                -> FCMP (env r1) (env r2)
-    FCTIWZ r1 r2       -> FCTIWZ (env r1) (env r2)
-    FRSP r1 r2         -> FRSP (env r1) (env r2)
-    MFCR reg            -> MFCR (env reg)
-    MFLR reg            -> MFLR (env reg)
-    FETCHPC reg         -> FETCHPC (env reg)
-    _ -> instr
-  where
-    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
-    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
-
-    fixRI (RIReg r) = RIReg (env r)
-    fixRI other        = other
-
+import RegClass
+import Reg
+import Size
  
  
+import BlockId
+import Cmm
+import CLabel
  
  
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-        BCC _ id        -> id : acc
-        BCCFAR _ id     -> id : acc
-        BCTR targets    -> targets ++ acc
-       _               -> acc
-       
-       
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
---     We can't just use jumpDests above because the jump might take its arg,
---     so the instr won't contain a blockid.
---
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-       BCC{}           -> True
-       BCCFAR{}        -> True
-        BCTR{}         -> True
-        BCTRL{}                -> True
-       BL{}            -> True
-       JMP{}           -> True
-       _               -> False
-
--- | Change the destination of this jump instruction
---     Used in joinToTargets in the linear allocator, when emitting fixup code
---     for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-        BCC cc id 
-        | id == old    -> BCC cc new
-
-        BCCFAR cc id 
-        | id == old    -> BCCFAR cc new
-
-        BCTR _         -> error "Cannot patch BCTR"
+import Outputable
+import Unique
  
  
-       _               -> insn
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+   | not (isFloatSize size) = VirtualRegI u
+   | otherwise
+   = case size of
+        FF32   -> VirtualRegD u
+        FF64   -> VirtualRegD u
+       _       -> panic "mkVReg"
  
  
  
  
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove (MR dst src) = Just (src,dst)
-isRegRegMove _  = Nothing
  
  
  data JumpDest = DestBlockId BlockId | DestImm Imm
  
  
  data JumpDest = DestBlockId BlockId | DestImm Imm
@@ -243,71 +56,39 @@ shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
  shortcutJump _ other = other
  
  
  shortcutJump _ other = other
  
  
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
  
  
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
  
  
--- -----------------------------------------------------------------------------
--- Generating spill instructions
-
-mkSpillInstr
-   :: Reg              -- register to spill
-   -> Int              -- current stack delta
-   -> Int              -- spill slot to use
-   -> Instr
-mkSpillInstr reg delta slot
-  = let        off     = spillSlotToOffset slot
-    in
-    let sz = case regClass reg of
-                RcInteger -> II32
-                RcDouble  -> FF64
-               RcFloat   -> panic "PPC.RegInfo.mkSpillInstr: no match"
-    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
  
  
+shortcutStatic _ other_static
+        = other_static
  
  
-mkLoadInstr
-   :: Reg              -- register to load
-   -> Int              -- current stack delta
-   -> Int              -- spill slot to use
-   -> Instr
-mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-    let sz = case regClass reg of
-                RcInteger -> II32
-                RcDouble  -> FF64
-               RcFloat   -> panic "PPC.RegInfo.mkSpillInstr: no match"
-    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+shortBlockId 
+       :: (BlockId -> Maybe JumpDest)
+       -> BlockId
+       -> CLabel
  
  
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
  
  
-mkRegRegMoveInstr
-    :: Reg
-    -> Reg
-    -> Instr
-mkRegRegMoveInstr src dst
-    = MR dst src
  
  
  
  
-mkBranchInstr
-    :: BlockId
-    -> [Instr]
-
-mkBranchInstr id = [BCC ALWAYS id]
-
-
-
-spillSlotSize :: Int
-spillSlotSize = 8
-
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
--- convert a spill slot number to a *byte* offset, with no sign:
--- decide on a per arch basis whether you are spilling above or below
--- the C stack pointer.
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
-   | slot >= 0 && slot < maxSpillSlots
-   = 64 + spillSlotSize * slot
-   | otherwise
-   = pprPanic "spillSlotToOffset:" 
-              (   text "invalid spill location: " <> int slot
-             $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+       RcInteger       -> text "blue"
+       RcFloat         -> text "red"
+       RcDouble        -> text "green"
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs

index d6993b2..80c68dd 100644 (file)
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -5,16 +5,6 @@
  -- -----------------------------------------------------------------------------
  
  module PPC.Regs (
  -- -----------------------------------------------------------------------------
  
  module PPC.Regs (
-       -- sizes
-       Size(..),
-       intSize, 
-       floatSize, 
-       isFloatSize, 
-       wordSize,
-       cmmTypeSize,
-       sizeToWidth,
-       mkVReg,
-
         -- immediates
         Imm(..),
         strImmLit,
         -- immediates
         Imm(..),
         strImmLit,
@@ -42,7 +32,10 @@ module PPC.Regs (
  
         -- horrow show
         freeReg,
  
         -- horrow show
         freeReg,
-       globalRegMaybe
+       globalRegMaybe,
+       get_GlobalReg_reg_or_addr,
+       allocatableRegs
+
  )
  
  where
  )
  
  where
@@ -51,78 +44,22 @@ where
  #include "HsVersions.h"
  #include "../includes/MachRegs.h"
  
  #include "HsVersions.h"
  #include "../includes/MachRegs.h"
  
-import RegsBase
+import Reg
+import RegClass
  
  
+import CgUtils          ( get_GlobalReg_addr )
  import BlockId
  import Cmm
  import CLabel           ( CLabel )
  import Pretty
  import Outputable      ( Outputable(..), pprPanic, panic )
  import qualified Outputable
  import BlockId
  import Cmm
  import CLabel           ( CLabel )
  import Pretty
  import Outputable      ( Outputable(..), pprPanic, panic )
  import qualified Outputable
-import Unique
  import Constants
  import FastBool
  
  import Data.Word       ( Word8, Word16, Word32 )
  import Data.Int        ( Int8, Int16, Int32 )
  
  import Constants
  import FastBool
  
  import Data.Word       ( Word8, Word16, Word32 )
  import Data.Int        ( Int8, Int16, Int32 )
  
--- sizes -----------------------------------------------------------------------
--- For these three, the "size" also gives the int/float
--- distinction, because the instructions for int/float
--- differ only in their suffices
-data Size      
-       = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
-       deriving Eq
-
-intSize, floatSize :: Width -> Size
-intSize W8 = II8
-intSize W16 = II16
-intSize W32 = II32
-intSize W64 = II64
-intSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-floatSize W32 = FF32
-floatSize W64 = FF64
-floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-isFloatSize :: Size -> Bool
-isFloatSize FF32 = True
-isFloatSize FF64 = True
-isFloatSize FF80 = True
-isFloatSize _    = False
-
-
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty 
-       | isFloatType ty        = floatSize (typeWidth ty)
-       | otherwise             = intSize (typeWidth ty)
-
-
-sizeToWidth :: Size -> Width
-sizeToWidth II8  = W8
-sizeToWidth II16 = W16
-sizeToWidth II32 = W32
-sizeToWidth II64 = W64
-sizeToWidth FF32 = W32
-sizeToWidth FF64 = W64
-sizeToWidth _ = panic "MachInstrs.sizeToWidth"
-
-
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-   | not (isFloatSize size) = VirtualRegI u
-   | otherwise
-   = case size of
-        FF32   -> VirtualRegD u
-        FF64   -> VirtualRegD u
-       _       -> panic "mkVReg"
-
-
  
  -- immediates ------------------------------------------------------------------
  data Imm
  
  -- immediates ------------------------------------------------------------------
  data Imm
@@ -490,7 +427,7 @@ freeReg REG_Hp   = fastBool False
  #ifdef REG_HpLim
  freeReg REG_HpLim = fastBool False
  #endif
  #ifdef REG_HpLim
  freeReg REG_HpLim = fastBool False
  #endif
-freeReg n               = fastBool True
+freeReg _               = fastBool True
  
  
  --  | Returns 'Nothing' if this global register is not stored
  
  
  --  | Returns 'Nothing' if this global register is not stored
@@ -582,3 +519,26 @@ freeReg _          = 0#
  globalRegMaybe _       = panic "PPC.Regs.globalRegMaybe: not defined"
  
  #endif /* powerpc_TARGET_ARCH */
  globalRegMaybe _       = panic "PPC.Regs.globalRegMaybe: not defined"
  
  #endif /* powerpc_TARGET_ARCH */
+
+
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
diff --git a/compiler/nativeGen/Platform.hs b/compiler/nativeGen/Platform.hs

new file mode 100644 (file)

index 0000000..8b01f5c
--- /dev/null
+++ b/compiler/nativeGen/Platform.hs
@@ -0,0 +1,92 @@
+
+-- | A description of the platform we're compiling for.
+--     Used by the native code generator.
+--     In the future, this module should be the only one that references
+--     the evil #defines for each TARGET_ARCH and TARGET_OS
+--
+module Platform (
+       Platform(..),
+       Arch(..),
+       OS(..),
+
+       defaultTargetPlatform
+)
+
+where
+
+#include "HsVersions.h"
+
+
+-- | Contains enough information for the native code generator to emit
+--     code for this platform.
+data Platform
+       = Platform 
+       { platformArch  :: Arch
+       , platformOS    :: OS }
+
+
+-- | Architectures that the native code generator knows about.
+--     TODO: It might be nice to extend these constructors with information
+--     about what instruction set extensions an architecture might support.
+--
+data Arch
+       = ArchAlpha
+       | ArchX86
+       | ArchX86_64
+       | ArchPPC
+       | ArchPPC_64
+       | ArchSPARC
+       deriving (Show, Eq)
+       
+
+-- | Operating systems that the native code generator knows about.
+--     Having OSUnknown should produce a sensible default, but no promises.
+data OS
+       = OSUnknown
+       | OSLinux
+       | OSDarwin
+       | OSSolaris
+       | OSMinGW32
+       deriving (Show, Eq)
+
+
+-- | This is the target platform as far as the #ifdefs are concerned.
+--     These are set in includes/ghcplatform.h by the autoconf scripts
+defaultTargetPlatform :: Platform
+defaultTargetPlatform
+       = Platform defaultTargetArch defaultTargetOS
+
+
+-- | Move the evil TARGET_ARCH #ifdefs into Haskell land.
+defaultTargetArch :: Arch
+#if   alpha_TARGET_ARCH
+defaultTargetArch      = ArchAlpha
+#elif i386_TARGET_ARCH
+defaultTargetArch      = ArchX86
+#elif x86_64_TARGET_ARCH
+defaultTargetArch      = ArchX86_64
+#elif powerpc_TARGET_ARCH
+defaultTargetArch      = ArchPPC
+#elif powerpc64_TARGET_ARCH
+defaultTargetArch      = ArchPPC_64
+#elif sparc_TARGET_ARCH
+defaultTargetArch      = ArchSPARC
+#else
+#error "Platform.buildArch: undefined"
+#endif
+
+
+-- | Move the evil TARGET_OS #ifdefs into Haskell land.
+defaultTargetOS :: OS
+#if   linux_TARGET_OS
+defaultTargetOS        = OSLinux
+#elif darwin_TARGET_OS
+defaultTargetOS        = OSDarwin
+#elif solaris_TARGET_OS
+defaultTargetOS        = OSSolaris
+#elif mingw32_TARGET_OS
+defaultTargetOS        = OSMinGW32
+#else
+defaultTargetOS        = OSUnknown
+#endif
+
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs

deleted file mode 100644 (file)

index 532d852..0000000
--- a/compiler/nativeGen/PprMach.hs
+++ /dev/null
@@ -1,183 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
--- We start with the @pprXXX@s with some cross-platform commonality
--- (e.g., 'pprReg'); we conclude with the no-commonality monster,
--- 'pprInstr'.
-
-#include "nativeGen/NCG.h"
-
-module PprMach ( 
-       pprNatCmmTop, pprBasicBlock, pprSectionHeader, pprData,
-       pprInstr, pprSize, pprUserReg, pprImm
-  ) where
-
-#include "HsVersions.h"
-
-import PprBase
-
-import BlockId
-import Cmm
-import Regs            -- may differ per-platform
-import Instrs
-import Regs
-
-import CLabel          ( CLabel, pprCLabel, externallyVisibleCLabel,
-                         labelDynamic, mkAsmTempLabel, entryLblToInfoLbl )
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-import CLabel       ( mkDeadStripPreventer )
-#endif
-
-import Panic           ( panic )
-import Unique          ( pprUnique )
-import Pretty
-import FastString
-import qualified Outputable
-import Outputable      ( Outputable, pprPanic, ppr, docToSDoc)
-
-import Data.Array.ST
-import Data.Word       ( Word8 )
-import Control.Monad.ST
-import Data.Char       ( chr, ord )
-import Data.Maybe       ( isJust )
-
-
-#if   alpha_TARGET_ARCH
-import Alpha.Ppr
-#elif powerpc_TARGET_ARCH
-import PPC.Ppr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Ppr
-#elif sparc_TARGET_ARCH
-import SPARC.Ppr
-#else
-#error "Regs: not defined for this architecture"
-#endif
-
-
-
--- -----------------------------------------------------------------------------
--- Printing this stuff out
-
-pprNatCmmTop :: NatCmmTop -> Doc
-pprNatCmmTop (CmmData section dats) = 
-  pprSectionHeader section $$ vcat (map pprData dats)
-
- -- special case for split markers:
-pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
-
-pprNatCmmTop (CmmProc info lbl params (ListGraph blocks)) = 
-  pprSectionHeader Text $$
-  (if null info then -- blocks guaranteed not null, so label needed
-       pprLabel lbl
-   else
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
-                <> char ':' $$
-#endif
-       vcat (map pprData info) $$
-       pprLabel (entryLblToInfoLbl lbl)
-  ) $$
-  vcat (map pprBasicBlock blocks)
-     -- above: Even the first block gets a label, because with branch-chain
-     -- elimination, it might be the target of a goto.
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-        -- If we are using the .subsections_via_symbols directive
-        -- (available on recent versions of Darwin),
-        -- we have to make sure that there is some kind of reference
-        -- from the entry code to a label on the _top_ of of the info table,
-        -- so that the linker will not think it is unreferenced and dead-strip
-        -- it. That's why the label is called a DeadStripPreventer (_dsp).
-  $$ if not (null info)
-                   then text "\t.long "
-                     <+> pprCLabel_asm (entryLblToInfoLbl lbl)
-                     <+> char '-'
-                     <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
-                   else empty
-#endif
-
-
-pprBasicBlock :: NatBasicBlock -> Doc
-pprBasicBlock (BasicBlock (BlockId id) instrs) =
-  pprLabel (mkAsmTempLabel id) $$
-  vcat (map pprInstr instrs)
-
-
-pprData :: CmmStatic -> Doc
-pprData (CmmAlign bytes)         = pprAlign bytes
-pprData (CmmDataLabel lbl)       = pprLabel lbl
-pprData (CmmString str)          = pprASCII str
-pprData (CmmUninitialised bytes) = ptext (sLit s) <> int bytes
-    where s =
-#if defined(solaris2_TARGET_OS)
-              ".skip "
-#else
-              ".space "
-#endif
-pprData (CmmStaticLit lit)       = pprDataItem lit
-
-pprGloblDecl :: CLabel -> Doc
-pprGloblDecl lbl
-  | not (externallyVisibleCLabel lbl) = empty
-  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
-                                   (sLit ".globl ")) <>
-               pprCLabel_asm lbl
-
-pprTypeAndSizeDecl :: CLabel -> Doc
-pprTypeAndSizeDecl lbl
-#if linux_TARGET_OS
-  | not (externallyVisibleCLabel lbl) = empty
-  | otherwise = ptext (sLit ".type ") <>
-               pprCLabel_asm lbl <> ptext (sLit ", @object")
-#else
-  = empty
-#endif
-
-pprLabel :: CLabel -> Doc
-pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
-
-
-pprASCII str
-  = vcat (map do1 str) $$ do1 0
-    where
-       do1 :: Word8 -> Doc
-       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
-
-pprAlign bytes =
-       IF_ARCH_alpha(ptext (sLit ".align ") <> int pow2,
-       IF_ARCH_i386(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes),
-       IF_ARCH_x86_64(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes),
-       IF_ARCH_sparc(ptext (sLit ".align ") <> int bytes,
-       IF_ARCH_powerpc(ptext (sLit ".align ") <> int pow2,)))))
-  where
-       pow2 = log2 bytes
-       
-       log2 :: Int -> Int  -- cache the common ones
-       log2 1 = 0 
-       log2 2 = 1
-       log2 4 = 2
-       log2 8 = 3
-       log2 n = 1 + log2 (n `quot` 2)
-
-
--- -----------------------------------------------------------------------------
--- pprInstr: print an 'Instr'
-
-instance Outputable Instr where
-    ppr         instr  = Outputable.docToSDoc $ pprInstr instr
-
-
-
-
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs

new file mode 100644 (file)

index 0000000..1a341bb
--- /dev/null
+++ b/compiler/nativeGen/Reg.hs
@@ -0,0 +1,113 @@
+
+-- | An architecture independent description of a register.
+--     This needs to stay architecture independent because it is used
+--     by NCGMonad and the register allocators, which are shared
+--     by all architectures.
+--
+module Reg (
+       RegNo,
+       Reg(..),
+       isRealReg,
+       unRealReg,
+       isVirtualReg,
+       renameVirtualReg,
+       getHiVRegFromLo
+)
+
+where
+
+import Outputable
+import Unique
+import Panic
+
+-- | An identifier for a real machine register.
+type RegNo 
+       = Int
+
+-- RealRegs are machine regs which are available for allocation, in
+--     the usual way.  We know what class they are, because that's part of
+--     the processor's architecture.
+
+-- VirtualRegs are virtual registers.  The register allocator will
+--     eventually have to map them into RealRegs, or into spill slots.
+--
+--     VirtualRegs are allocated on the fly, usually to represent a single
+--     value in the abstract assembly code (i.e. dynamic registers are
+--     usually single assignment).  
+--
+--     With the new register allocator, the
+--     single assignment restriction isn't necessary to get correct code,
+--     although a better register allocation will result if single
+--     assignment is used -- because the allocator maps a VirtualReg into
+--     a single RealReg, even if the VirtualReg has multiple live ranges.
+
+--     Virtual regs can be of either class, so that info is attached.
+data Reg
+       = RealReg      {-# UNPACK #-} !RegNo
+       | VirtualRegI  {-# UNPACK #-} !Unique
+       | VirtualRegHi {-# UNPACK #-} !Unique  -- High part of 2-word register
+       | VirtualRegF  {-# UNPACK #-} !Unique
+       | VirtualRegD  {-# UNPACK #-} !Unique
+       deriving (Eq, Ord)
+
+
+-- We like to have Uniques for Reg so that we can make UniqFM and UniqSets 
+-- in the register allocator.
+instance Uniquable Reg where
+       getUnique (RealReg i)      = mkUnique 'C' i
+       getUnique (VirtualRegI u)  = u
+       getUnique (VirtualRegHi u) = u
+       getUnique (VirtualRegF u)  = u
+       getUnique (VirtualRegD u)  = u
+
+
+-- | Print a reg in a generic manner
+--     If you want the architecture specific names, then use the pprReg 
+--     function from the appropriate Ppr module.
+instance Outputable Reg where
+       ppr reg
+        = case reg of
+               RealReg i       -> text "%r"    <> int i
+               VirtualRegI  u  -> text "%vI_"  <> pprUnique u
+               VirtualRegHi u  -> text "%vHi_" <> pprUnique u
+               VirtualRegF  u  -> text "%vF_"  <> pprUnique u
+               VirtualRegD  u  -> text "%vD_"  <> pprUnique u
+
+
+
+isRealReg :: Reg -> Bool
+isRealReg = not . isVirtualReg
+
+-- | Take the RegNo from a real reg
+unRealReg :: Reg -> RegNo
+unRealReg (RealReg i)  = i
+unRealReg _            = panic "unRealReg on VirtualReg"
+
+isVirtualReg :: Reg -> Bool
+isVirtualReg (RealReg _)      = False
+isVirtualReg (VirtualRegI _)  = True
+isVirtualReg (VirtualRegHi _) = True
+isVirtualReg (VirtualRegF _)  = True
+isVirtualReg (VirtualRegD _)  = True
+
+
+renameVirtualReg :: Unique -> Reg -> Reg
+renameVirtualReg u r
+ = case r of
+       RealReg _       -> error "renameVirtualReg: can't change unique on a real reg"
+       VirtualRegI _   -> VirtualRegI  u
+       VirtualRegHi _  -> VirtualRegHi u
+       VirtualRegF _   -> VirtualRegF  u
+       VirtualRegD _   -> VirtualRegD  u
+
+-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
+-- when supplied with the vreg for the lower-half of the quantity.
+-- (NB. Not reversible).
+getHiVRegFromLo :: Reg -> Reg
+getHiVRegFromLo (VirtualRegI u) 
+   = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
+
+getHiVRegFromLo _ 
+   = panic "RegsBase.getHiVRegFromLo"
+
+
diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs

index 18e4b0e..8521e92 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
@@ -8,11 +8,11 @@ module RegAlloc.Graph.Coalesce (
  
  where
  
  
  where
  
-import Cmm
-import Regs
  import RegAlloc.Liveness
  import RegAlloc.Liveness
-import RegAllocInfo
+import Instruction
+import Reg
  
  
+import Cmm
  import Bag
  import UniqFM
  import UniqSet
  import Bag
  import UniqFM
  import UniqSet
@@ -26,7 +26,11 @@ import Data.List
  --     then the mov only serves to join live ranges. The two regs can be renamed to be 
  --     the same and the move instruction safely erased.
  
  --     then the mov only serves to join live ranges. The two regs can be renamed to be 
  --     the same and the move instruction safely erased.
  
-regCoalesce :: [LiveCmmTop] -> UniqSM [LiveCmmTop]
+regCoalesce 
+       :: Instruction instr
+       => [LiveCmmTop instr] 
+       -> UniqSM [LiveCmmTop instr]
+
  regCoalesce code
   = do  
         let joins       = foldl' unionBags emptyBag
  regCoalesce code
   = do  
         let joins       = foldl' unionBags emptyBag
@@ -57,7 +61,11 @@ sinkReg fm r
  --     During a mov, if the source reg dies and the destiation reg is born
  --     then we can rename the two regs to the same thing and eliminate the move.
  --
  --     During a mov, if the source reg dies and the destiation reg is born
  --     then we can rename the two regs to the same thing and eliminate the move.
  --
-slurpJoinMovs :: LiveCmmTop -> Bag (Reg, Reg)
+slurpJoinMovs 
+       :: Instruction instr
+       => LiveCmmTop instr 
+       -> Bag (Reg, Reg)
+
  slurpJoinMovs live
         = slurpCmm emptyBag live
   where 
  slurpJoinMovs live
         = slurpCmm emptyBag live
   where 
@@ -68,7 +76,7 @@ slurpJoinMovs live
                  
          slurpLI    rs (Instr _ Nothing)                 = rs
         slurpLI    rs (Instr instr (Just live))
                  
          slurpLI    rs (Instr _ Nothing)                 = rs
         slurpLI    rs (Instr instr (Just live))
-               | Just (r1, r2) <- isRegRegMove instr
+               | Just (r1, r2) <- takeRegRegMoveInstr instr
                 , elementOfUniqSet r1 $ liveDieRead live
                 , elementOfUniqSet r2 $ liveBorn live
  
                 , elementOfUniqSet r1 $ liveDieRead live
                 , elementOfUniqSet r2 $ liveBorn live
  
@@ -80,4 +88,7 @@ slurpJoinMovs live
                 | otherwise
                 = rs
         
                 | otherwise
                 = rs
         
+       slurpLI    rs SPILL{}   = rs
+       slurpLI    rs RELOAD{}  = rs
+               
         
         
diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs

index fe99aba..2e58461 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Graph/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Main.hs
@@ -5,8 +5,7 @@
  --
  
  module RegAlloc.Graph.Main ( 
  --
  
  module RegAlloc.Graph.Main ( 
-       regAlloc,
-       regDotColor
+       regAlloc
  ) 
  
  where
  ) 
  
  where
@@ -17,9 +16,12 @@ import RegAlloc.Graph.Spill
  import RegAlloc.Graph.SpillClean
  import RegAlloc.Graph.SpillCost
  import RegAlloc.Graph.Stats
  import RegAlloc.Graph.SpillClean
  import RegAlloc.Graph.SpillCost
  import RegAlloc.Graph.Stats
-import Regs
-import Instrs
-import PprMach
+import RegAlloc.Graph.TrivColorable
+import Instruction
+import TargetReg
+import RegClass
+import Reg
+
  
  import UniqSupply
  import UniqSet
  
  import UniqSupply
  import UniqSet
@@ -43,18 +45,26 @@ maxSpinCount        = 10
  -- | The top level of the graph coloring register allocator.
  --     
  regAlloc
  -- | The top level of the graph coloring register allocator.
  --     
  regAlloc
-       :: DynFlags
+       :: (Outputable instr, Instruction instr)
+       => DynFlags
         -> UniqFM (UniqSet Reg)         -- ^ the registers we can use for allocation
         -> UniqSet Int                  -- ^ the set of available spill slots.
         -> UniqFM (UniqSet Reg)         -- ^ the registers we can use for allocation
         -> UniqSet Int                  -- ^ the set of available spill slots.
-       -> [LiveCmmTop]                 -- ^ code annotated with liveness information.
-       -> UniqSM ( [NatCmmTop], [RegAllocStats] )
+       -> [LiveCmmTop instr]           -- ^ code annotated with liveness information.
+       -> UniqSM ( [NatCmmTop instr], [RegAllocStats instr] )
             -- ^ code with registers allocated and stats for each stage of
             -- allocation
                 
  regAlloc dflags regsFree slotsFree code
   = do
             -- ^ code with registers allocated and stats for each stage of
             -- allocation
                 
  regAlloc dflags regsFree slotsFree code
   = do
+       -- TODO: the regClass function is currently hard coded to the default target
+       --       architecture. Would prefer to determine this from dflags.
+       --       There are other uses of targetRegClass later in this module.
+       let triv = trivColorable targetRegClass
+
         (code_final, debug_codeGraphs, _)
         (code_final, debug_codeGraphs, _)
-               <- regAlloc_spin dflags 0 trivColorable regsFree slotsFree [] code
+               <- regAlloc_spin dflags 0 
+                       triv
+                       regsFree slotsFree [] code
         
         return  ( code_final
                 , reverse debug_codeGraphs )
         
         return  ( code_final
                 , reverse debug_codeGraphs )
@@ -74,7 +84,7 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
          $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
                 (  text "It looks like the register allocator is stuck in an infinite loop."
                 $$ text "max cycles  = " <> int maxSpinCount
          $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
                 (  text "It looks like the register allocator is stuck in an infinite loop."
                 $$ text "max cycles  = " <> int maxSpinCount
-               $$ text "regsFree    = " <> (hcat       $ punctuate space $ map (docToSDoc . pprUserReg)
+               $$ text "regsFree    = " <> (hcat       $ punctuate space $ map ppr
                                                 $ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree)
                 $$ text "slotsFree   = " <> ppr (sizeUniqSet slotsFree))
  
                                                 $ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree)
                 $$ text "slotsFree   = " <> ppr (sizeUniqSet slotsFree))
  
@@ -139,12 +149,12 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
                 -- clean out unneeded SPILL/RELOADs
                 let code_spillclean     = map cleanSpills code_patched
  
                 -- clean out unneeded SPILL/RELOADs
                 let code_spillclean     = map cleanSpills code_patched
  
-               -- strip off liveness information
-               let code_nat            = map stripLive code_spillclean
+               -- strip off liveness information, 
+               --      and rewrite SPILL/RELOAD pseudos into real instructions along the way
+               let code_final          = map stripLive code_spillclean
  
  
-               -- rewrite SPILL/RELOAD pseudos into real instructions
-               let spillNatTop         = mapGenBlockTop spillNatBlock
-               let code_final          = map spillNatTop code_nat
+--             let spillNatTop         = mapGenBlockTop spillNatBlock
+--             let code_final          = map spillNatTop code_nat
                 
                 -- record what happened in this stage for debugging
                 let stat                =
                 
                 -- record what happened in this stage for debugging
                 let stat                =
@@ -213,7 +223,8 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
  -- | Build a graph from the liveness and coalesce information in this code.
  
  buildGraph 
  -- | Build a graph from the liveness and coalesce information in this code.
  
  buildGraph 
-       :: [LiveCmmTop]
+       :: Instruction instr
+       => [LiveCmmTop instr]
         -> UniqSM (Color.Graph Reg RegClass Reg)
         
  buildGraph code
         -> UniqSM (Color.Graph Reg RegClass Reg)
         
  buildGraph code
@@ -248,8 +259,8 @@ graphAddConflictSet set graph
   = let reals           = filterUFM isRealReg set
         virtuals        = filterUFM (not . isRealReg) set
   
   = let reals           = filterUFM isRealReg set
         virtuals        = filterUFM (not . isRealReg) set
   
-       graph1  = Color.addConflicts virtuals regClass graph
-       graph2  = foldr (\(r1, r2) -> Color.addExclusion r1 regClass r2)
+       graph1  = Color.addConflicts virtuals targetRegClass graph
+       graph2  = foldr (\(r1, r2) -> Color.addExclusion r1 targetRegClass r2)
                         graph1
                         [ (a, b) 
                                 | a <- uniqSetToList virtuals
                         graph1
                         [ (a, b) 
                                 | a <- uniqSetToList virtuals
@@ -276,13 +287,14 @@ graphAddCoalesce (r1, r2) graph
         | otherwise
         = Color.addCoalesce (regWithClass r1) (regWithClass r2) graph
  
         | otherwise
         = Color.addCoalesce (regWithClass r1) (regWithClass r2) graph
  
-       where   regWithClass r  = (r, regClass r)
+       where   regWithClass r  = (r, targetRegClass r)
  
  
  -- | Patch registers in code using the reg -> reg mapping in this graph.
  patchRegsFromGraph 
  
  
  -- | Patch registers in code using the reg -> reg mapping in this graph.
  patchRegsFromGraph 
-       :: Color.Graph Reg RegClass Reg
-       -> LiveCmmTop -> LiveCmmTop
+       :: (Outputable instr, Instruction instr)
+       => Color.Graph Reg RegClass Reg
+       -> LiveCmmTop instr -> LiveCmmTop instr
  
  patchRegsFromGraph graph code
   = let
  
  patchRegsFromGraph graph code
   = let
@@ -303,7 +315,7 @@ patchRegsFromGraph graph code
                 = pprPanic "patchRegsFromGraph: register mapping failed." 
                         (  text "There is no node in the graph for register " <> ppr reg
                         $$ ppr code
                 = pprPanic "patchRegsFromGraph: register mapping failed." 
                         (  text "There is no node in the graph for register " <> ppr reg
                         $$ ppr code
-                       $$ Color.dotGraph (\_ -> text "white") trivColorable graph)
+                       $$ Color.dotGraph (\_ -> text "white") (trivColorable targetRegClass) graph)
  
     in  patchEraseLive patchF code
     
  
     in  patchEraseLive patchF code
     
diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs

index b5a6451..e6e5622 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
@@ -10,9 +10,8 @@ module RegAlloc.Graph.Spill (
  where
  
  import RegAlloc.Liveness
  where
  
  import RegAlloc.Liveness
-import RegAllocInfo
-import Regs
-import Instrs
+import Instruction
+import Reg
  import Cmm
  
  import State
  import Cmm
  
  import State
@@ -35,11 +34,12 @@ import Data.Maybe
  --             address the spill slot directly.
  --
  regSpill
  --             address the spill slot directly.
  --
  regSpill
-       :: [LiveCmmTop]                 -- ^ the code
+       :: Instruction instr
+       => [LiveCmmTop instr]           -- ^ the code
         -> UniqSet Int                  -- ^ available stack slots
         -> UniqSet Reg                  -- ^ the regs to spill
         -> UniqSM
         -> UniqSet Int                  -- ^ available stack slots
         -> UniqSet Reg                  -- ^ the regs to spill
         -> UniqSM
-               ([LiveCmmTop]           -- code will spill instructions
+               ([LiveCmmTop instr]     -- code will spill instructions
                 , UniqSet Int           -- left over slots
                 , SpillStats )          -- stats about what happened during spilling
  
                 , UniqSet Int           -- left over slots
                 , SpillStats )          -- stats about what happened during spilling
  
@@ -75,6 +75,20 @@ regSpill_block regSlotMap (BasicBlock i instrs)
   = do  instrss'        <- mapM (regSpill_instr regSlotMap) instrs
         return  $ BasicBlock i (concat instrss')
  
   = do  instrss'        <- mapM (regSpill_instr regSlotMap) instrs
         return  $ BasicBlock i (concat instrss')
  
+
+regSpill_instr
+       :: Instruction instr
+       => UniqFM Int 
+       -> LiveInstr instr -> SpillM [LiveInstr instr]
+
+-- | The thing we're spilling shouldn't already have spill or reloads in it
+regSpill_instr _ SPILL{}
+       = panic "regSpill_instr: unexpected SPILL"
+
+regSpill_instr _ RELOAD{}
+       = panic "regSpill_instr: unexpected RELOAD"
+
+
  regSpill_instr _       li@(Instr _ Nothing)
   = do  return [li]
  
  regSpill_instr _       li@(Instr _ Nothing)
   = do  return [li]
  
@@ -82,7 +96,7 @@ regSpill_instr regSlotMap
         (Instr instr (Just _))
   = do
         -- work out which regs are read and written in this instr
         (Instr instr (Just _))
   = do
         -- work out which regs are read and written in this instr
-       let RU rlRead rlWritten = regUsage instr
+       let RU rlRead rlWritten = regUsageOfInstr instr
  
         -- sometimes a register is listed as being read more than once,
         --      nub this so we don't end up inserting two lots of spill code.
  
         -- sometimes a register is listed as being read more than once,
         --      nub this so we don't end up inserting two lots of spill code.
@@ -109,9 +123,9 @@ regSpill_instr regSlotMap
         let postfixes                   = concat mPostfixes
  
         -- final code
         let postfixes                   = concat mPostfixes
  
         -- final code
-       let instrs'     =  map (\i -> Instr i Nothing) prefixes
-                       ++ [ Instr instr3 Nothing ]
-                       ++ map (\i -> Instr i Nothing) postfixes
+       let instrs'     =  prefixes
+                       ++ [Instr instr3 Nothing]
+                       ++ postfixes
  
         return
  {-             $ pprTrace "* regSpill_instr spill"
  
         return
  {-             $ pprTrace "* regSpill_instr spill"
@@ -139,6 +153,7 @@ spillRead regSlotMap instr reg
  
         | otherwise     = panic "RegSpill.spillRead: no slot defined for spilled reg"
  
  
         | otherwise     = panic "RegSpill.spillRead: no slot defined for spilled reg"
  
+
  spillWrite regSlotMap instr reg
         | Just slot     <- lookupUFM regSlotMap reg
         = do    (instr', nReg)  <- patchInstr reg instr
  spillWrite regSlotMap instr reg
         | Just slot     <- lookupUFM regSlotMap reg
         = do    (instr', nReg)  <- patchInstr reg instr
@@ -152,6 +167,7 @@ spillWrite regSlotMap instr reg
  
         | otherwise     = panic "RegSpill.spillWrite: no slot defined for spilled reg"
  
  
         | otherwise     = panic "RegSpill.spillWrite: no slot defined for spilled reg"
  
+
  spillModify regSlotMap instr reg
         | Just slot     <- lookupUFM regSlotMap reg
         = do    (instr', nReg)  <- patchInstr reg instr
  spillModify regSlotMap instr reg
         | Just slot     <- lookupUFM regSlotMap reg
         = do    (instr', nReg)  <- patchInstr reg instr
@@ -168,19 +184,25 @@ spillModify regSlotMap instr reg
  
  
  -- | rewrite uses of this virtual reg in an instr to use a different virtual reg
  
  
  -- | rewrite uses of this virtual reg in an instr to use a different virtual reg
-patchInstr :: Reg -> Instr -> SpillM (Instr, Reg)
+patchInstr 
+       :: Instruction instr
+       => Reg -> instr -> SpillM (instr, Reg)
+
  patchInstr reg instr
   = do  nUnique         <- newUnique
         let nReg        = renameVirtualReg nUnique reg
         let instr'      = patchReg1 reg nReg instr
         return          (instr', nReg)
  
  patchInstr reg instr
   = do  nUnique         <- newUnique
         let nReg        = renameVirtualReg nUnique reg
         let instr'      = patchReg1 reg nReg instr
         return          (instr', nReg)
  
-patchReg1 :: Reg -> Reg -> Instr -> Instr
+patchReg1 
+       :: Instruction instr
+       => Reg -> Reg -> instr -> instr
+
  patchReg1 old new instr
   = let patchF r
                 | r == old      = new
                 | otherwise     = r
  patchReg1 old new instr
   = let patchF r
                 | r == old      = new
                 | otherwise     = r
-   in  patchRegs instr patchF
+   in  patchRegsOfInstr instr patchF
  
  
  ------------------------------------------------------
  
  
  ------------------------------------------------------
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs

index b68648b..4f129c4 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
@@ -29,13 +29,12 @@ module RegAlloc.Graph.SpillClean (
  )
  where
  
  )
  where
  
-import BlockId
  import RegAlloc.Liveness
  import RegAlloc.Liveness
-import RegAllocInfo
-import Regs
-import Instrs
-import Cmm
+import Instruction
+import Reg
  
  
+import BlockId
+import Cmm
  import UniqSet
  import UniqFM
  import Unique
  import UniqSet
  import UniqFM
  import Unique
@@ -51,12 +50,19 @@ type Slot = Int
  
  
  -- | Clean out unneeded spill\/reloads from this top level thing.
  
  
  -- | Clean out unneeded spill\/reloads from this top level thing.
-cleanSpills :: LiveCmmTop -> LiveCmmTop
+cleanSpills 
+       :: Instruction instr
+       => LiveCmmTop instr -> LiveCmmTop instr
+
  cleanSpills cmm
         = evalState (cleanSpin 0 cmm) initCleanS
  
  -- | do one pass of cleaning
  cleanSpills cmm
         = evalState (cleanSpin 0 cmm) initCleanS
  
  -- | do one pass of cleaning
-cleanSpin :: Int -> LiveCmmTop -> CleanM LiveCmmTop
+cleanSpin 
+       :: Instruction instr
+       => Int 
+       -> LiveCmmTop instr 
+       -> CleanM (LiveCmmTop instr)
  
  {-
  cleanSpin spinCount code
  
  {-
  cleanSpin spinCount code
@@ -103,7 +109,11 @@ cleanSpin spinCount code
  
  
  -- | Clean one basic block
  
  
  -- | Clean one basic block
-cleanBlockForward :: LiveBasicBlock -> CleanM LiveBasicBlock
+cleanBlockForward 
+       :: Instruction instr
+       => LiveBasicBlock instr 
+       -> CleanM (LiveBasicBlock instr)
+
  cleanBlockForward (BasicBlock blockId instrs)
   = do
         -- see if we have a valid association for the entry to this block
  cleanBlockForward (BasicBlock blockId instrs)
   = do
         -- see if we have a valid association for the entry to this block
@@ -116,7 +126,11 @@ cleanBlockForward (BasicBlock blockId instrs)
         return  $ BasicBlock blockId instrs_reload
  
  
         return  $ BasicBlock blockId instrs_reload
  
  
-cleanBlockBackward :: LiveBasicBlock -> CleanM LiveBasicBlock
+cleanBlockBackward 
+       :: Instruction instr
+       => LiveBasicBlock instr 
+       -> CleanM (LiveBasicBlock instr)
+
  cleanBlockBackward (BasicBlock blockId instrs)
   = do  instrs_spill    <- cleanBackward  emptyUniqSet  [] instrs
         return  $ BasicBlock blockId instrs_spill
  cleanBlockBackward (BasicBlock blockId instrs)
   = do  instrs_spill    <- cleanBackward  emptyUniqSet  [] instrs
         return  $ BasicBlock blockId instrs_spill
@@ -130,11 +144,12 @@ cleanBlockBackward (BasicBlock blockId instrs)
  --       then we don't need to do the reload.
  --
  cleanForward
  --       then we don't need to do the reload.
  --
  cleanForward
-       :: BlockId              -- ^ the block that we're currently in
-       -> Assoc Store          -- ^ two store locations are associated if they have the same value
-       -> [LiveInstr]          -- ^ acc
-       -> [LiveInstr]          -- ^ instrs to clean (in backwards order)
-       -> CleanM [LiveInstr]   -- ^ cleaned instrs  (in forward   order)
+       :: Instruction instr
+       => BlockId                      -- ^ the block that we're currently in
+       -> Assoc Store                  -- ^ two store locations are associated if they have the same value
+       -> [LiveInstr instr]            -- ^ acc
+       -> [LiveInstr instr]            -- ^ instrs to clean (in backwards order)
+       -> CleanM [LiveInstr instr]     -- ^ cleaned instrs  (in forward   order)
  
  cleanForward _ _ acc []
         = return acc
  
  cleanForward _ _ acc []
         = return acc
@@ -142,19 +157,19 @@ cleanForward _ _ acc []
  -- write out live range joins via spill slots to just a spill and a reg-reg move
  --     hopefully the spill will be also be cleaned in the next pass
  --
  -- write out live range joins via spill slots to just a spill and a reg-reg move
  --     hopefully the spill will be also be cleaned in the next pass
  --
-cleanForward blockId assoc acc (Instr i1 live1 : Instr i2 _ : instrs)
+cleanForward blockId assoc acc (li1 : li2 : instrs)
  
  
-       | SPILL  reg1  slot1    <- i1
-       , RELOAD slot2 reg2     <- i2
+       | SPILL  reg1  slot1    <- li1
+       , RELOAD slot2 reg2     <- li2
         , slot1 == slot2
         = do
                 modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
                 cleanForward blockId assoc acc
         , slot1 == slot2
         = do
                 modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
                 cleanForward blockId assoc acc
-                       (Instr i1 live1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs)
+                       (li1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs)
  
  
  cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
  
  
  cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
-       | Just (r1, r2) <- isRegRegMove i1
+       | Just (r1, r2) <- takeRegRegMoveInstr i1
         = if r1 == r2
                 -- erase any left over nop reg reg moves while we're here
                 --      this will also catch any nop moves that the "write out live range joins" case above
         = if r1 == r2
                 -- erase any left over nop reg reg moves while we're here
                 --      this will also catch any nop moves that the "write out live range joins" case above
@@ -170,38 +185,50 @@ cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
                         cleanForward blockId assoc' (li : acc) instrs
  
  
                         cleanForward blockId assoc' (li : acc) instrs
  
  
-cleanForward blockId assoc acc (li@(Instr instr _) : instrs)
+cleanForward blockId assoc acc (li : instrs)
  
         -- update association due to the spill
  
         -- update association due to the spill
-       | SPILL reg slot        <- instr
+       | SPILL reg slot        <- li
         = let   assoc'  = addAssoc (SReg reg)  (SSlot slot)
                         $ delAssoc (SSlot slot)
                         $ assoc
           in    cleanForward blockId assoc' (li : acc) instrs
  
         -- clean a reload instr
         = let   assoc'  = addAssoc (SReg reg)  (SSlot slot)
                         $ delAssoc (SSlot slot)
                         $ assoc
           in    cleanForward blockId assoc' (li : acc) instrs
  
         -- clean a reload instr
-       | RELOAD{}              <- instr
+       | RELOAD{}              <- li
         = do    (assoc', mli)   <- cleanReload blockId assoc li
                 case mli of
                  Nothing        -> cleanForward blockId assoc' acc              instrs
                  Just li'       -> cleanForward blockId assoc' (li' : acc)      instrs
  
         -- remember the association over a jump
         = do    (assoc', mli)   <- cleanReload blockId assoc li
                 case mli of
                  Nothing        -> cleanForward blockId assoc' acc              instrs
                  Just li'       -> cleanForward blockId assoc' (li' : acc)      instrs
  
         -- remember the association over a jump
-       | targets       <- jumpDests instr []
+       | Instr instr _         <- li
+       , targets               <- jumpDestsOfInstr instr
         , not $ null targets
         = do    mapM_ (accJumpValid assoc) targets
                 cleanForward blockId assoc (li : acc) instrs
  
         -- writing to a reg changes its value.
         , not $ null targets
         = do    mapM_ (accJumpValid assoc) targets
                 cleanForward blockId assoc (li : acc) instrs
  
         -- writing to a reg changes its value.
-       | RU _ written  <- regUsage instr
+       | Instr instr _         <- li
+       , RU _ written          <- regUsageOfInstr instr
         = let assoc'    = foldr delAssoc assoc (map SReg $ nub written)
           in  cleanForward blockId assoc' (li : acc) instrs
  
         = let assoc'    = foldr delAssoc assoc (map SReg $ nub written)
           in  cleanForward blockId assoc' (li : acc) instrs
  
+-- bogus, to stop pattern match warning
+cleanForward _ _ _ _ 
+       = panic "RegAlloc.Graph.SpillClean.cleanForward: no match"
+
  
  -- | Try and rewrite a reload instruction to something more pleasing
  --
  
  -- | Try and rewrite a reload instruction to something more pleasing
  --
-cleanReload :: BlockId -> Assoc Store -> LiveInstr -> CleanM (Assoc Store, Maybe LiveInstr)
-cleanReload blockId assoc li@(Instr (RELOAD slot reg) _)
+cleanReload 
+       :: Instruction instr
+       => BlockId 
+       -> Assoc Store 
+       -> LiveInstr instr
+       -> CleanM (Assoc Store, Maybe (LiveInstr instr))
+
+cleanReload blockId assoc li@(RELOAD slot reg)
  
         -- if the reg we're reloading already has the same value as the slot
         --      then we can erase the instruction outright
  
         -- if the reg we're reloading already has the same value as the slot
         --      then we can erase the instruction outright
@@ -264,10 +291,10 @@ cleanReload _ _ _
  --      we should really be updating the noReloads set as we cross jumps also.
  --
  cleanBackward
  --      we should really be updating the noReloads set as we cross jumps also.
  --
  cleanBackward
-       :: UniqSet Int          -- ^ slots that have been spilled, but not reloaded from
-       -> [LiveInstr]          -- ^ acc
-       -> [LiveInstr]          -- ^ instrs to clean (in forwards order)
-       -> CleanM [LiveInstr]   -- ^ cleaned instrs  (in backwards order)
+       :: UniqSet Int                  -- ^ slots that have been spilled, but not reloaded from
+       -> [LiveInstr instr]            -- ^ acc
+       -> [LiveInstr instr]            -- ^ instrs to clean (in forwards order)
+       -> CleanM [LiveInstr instr]     -- ^ cleaned instrs  (in backwards order)
  
  
  cleanBackward noReloads acc lis
  
  
  cleanBackward noReloads acc lis
@@ -277,15 +304,15 @@ cleanBackward noReloads acc lis
  cleanBackward' _ _      acc []
         = return  acc
  
  cleanBackward' _ _      acc []
         = return  acc
  
-cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs)
+cleanBackward' reloadedBy noReloads acc (li : instrs)
  
         -- if nothing ever reloads from this slot then we don't need the spill
  
         -- if nothing ever reloads from this slot then we don't need the spill
-       | SPILL _ slot  <- instr
+       | SPILL _ slot  <- li
         , Nothing       <- lookupUFM reloadedBy (SSlot slot)
         = do    modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
                 cleanBackward noReloads acc instrs
  
         , Nothing       <- lookupUFM reloadedBy (SSlot slot)
         = do    modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
                 cleanBackward noReloads acc instrs
  
-       | SPILL _ slot  <- instr
+       | SPILL _ slot  <- li
         = if elementOfUniqSet slot noReloads
  
            -- we can erase this spill because the slot won't be read until after the next one
         = if elementOfUniqSet slot noReloads
  
            -- we can erase this spill because the slot won't be read until after the next one
@@ -299,7 +326,7 @@ cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs)
                 cleanBackward noReloads' (li : acc) instrs
  
         -- if we reload from a slot then it's no longer unused
                 cleanBackward noReloads' (li : acc) instrs
  
         -- if we reload from a slot then it's no longer unused
-       | RELOAD slot _         <- instr
+       | RELOAD slot _         <- li
         , noReloads'            <- delOneFromUniqSet noReloads slot
         = cleanBackward noReloads' (li : acc) instrs
  
         , noReloads'            <- delOneFromUniqSet noReloads slot
         = cleanBackward noReloads' (li : acc) instrs
  
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs

index 1d37cf7..d4dd75a 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
@@ -16,14 +16,16 @@ module RegAlloc.Graph.SpillCost (
  
  where
  
  
  where
  
-import GraphBase
  import RegAlloc.Liveness
  import RegAlloc.Liveness
-import RegAllocInfo
-import Instrs
-import Regs
+import Instruction
+import RegClass
+import Reg
+
+import GraphBase
+
+
  import BlockId
  import Cmm
  import BlockId
  import Cmm
-
  import UniqFM
  import UniqSet
  import Outputable
  import UniqFM
  import UniqSet
  import Outputable
@@ -62,7 +64,8 @@ plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2)
  --     and the number of instructions it was live on entry to (lifetime)
  --
  slurpSpillCostInfo
  --     and the number of instructions it was live on entry to (lifetime)
  --
  slurpSpillCostInfo
-       :: LiveCmmTop
+       :: (Outputable instr, Instruction instr)
+       => LiveCmmTop instr
         -> SpillCostInfo
  
  slurpSpillCostInfo cmm
         -> SpillCostInfo
  
  slurpSpillCostInfo cmm
@@ -89,11 +92,14 @@ slurpSpillCostInfo cmm
                 = return ()
  
         -- skip over comment and delta pseudo instrs
                 = return ()
  
         -- skip over comment and delta pseudo instrs
-       countLIs rsLive (Instr instr Nothing : lis)
-               | COMMENT{}     <- instr
+       countLIs rsLive (SPILL{} : lis)
+               = countLIs rsLive lis
+               
+       countLIs rsLive (RELOAD{} : lis)
                 = countLIs rsLive lis
  
                 = countLIs rsLive lis
  
-               | DELTA{}       <- instr
+       countLIs rsLive (Instr instr Nothing : lis)
+               | isMetaInstr instr
                 = countLIs rsLive lis
  
                 | otherwise
                 = countLIs rsLive lis
  
                 | otherwise
@@ -106,7 +112,7 @@ slurpSpillCostInfo cmm
                 mapM_ incLifetime $ uniqSetToList rsLiveEntry
  
                 -- increment counts for what regs were read/written from
                 mapM_ incLifetime $ uniqSetToList rsLiveEntry
  
                 -- increment counts for what regs were read/written from
-               let (RU read written)   = regUsage instr
+               let (RU read written)   = regUsageOfInstr instr
                 mapM_ incUses   $ filter (not . isRealReg) $ nub read
                 mapM_ incDefs   $ filter (not . isRealReg) $ nub written
  
                 mapM_ incUses   $ filter (not . isRealReg) $ nub read
                 mapM_ incDefs   $ filter (not . isRealReg) $ nub written
  
@@ -226,8 +232,11 @@ lifeMapFromSpillCostInfo info
  
  
  -- | Work out the degree (number of neighbors) of this node which have the same class.
  
  
  -- | Work out the degree (number of neighbors) of this node which have the same class.
-nodeDegree :: Graph Reg RegClass Reg -> Reg -> Int
-nodeDegree graph reg
+nodeDegree 
+       :: (Reg -> RegClass)
+       -> Graph Reg RegClass Reg -> Reg -> Int
+
+nodeDegree regClass graph reg
         | Just node     <- lookupUFM (graphMap graph) reg
         , virtConflicts <- length       $ filter (\r -> regClass r == regClass reg)
                                         $ uniqSetToList $ nodeConflicts node
         | Just node     <- lookupUFM (graphMap graph) reg
         , virtConflicts <- length       $ filter (\r -> regClass r == regClass reg)
                                         $ uniqSetToList $ nodeConflicts node
@@ -238,12 +247,17 @@ nodeDegree graph reg
  
  
  -- | Show a spill cost record, including the degree from the graph and final calulated spill cos
  
  
  -- | Show a spill cost record, including the degree from the graph and final calulated spill cos
-pprSpillCostRecord :: Graph Reg RegClass Reg -> SpillCostRecord -> SDoc
-pprSpillCostRecord graph (reg, uses, defs, life)
+pprSpillCostRecord 
+       :: (Reg -> RegClass)
+       -> (Reg -> SDoc)
+       -> Graph Reg RegClass Reg -> SpillCostRecord -> SDoc
+
+pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life)
         =  hsep
         =  hsep
-       [ ppr reg
+       [ pprReg reg
         , ppr uses
         , ppr defs
         , ppr life
         , ppr uses
         , ppr defs
         , ppr life
-       , ppr $ nodeDegree graph reg
-       , text $ show $ (fromIntegral (uses + defs) / fromIntegral (nodeDegree graph reg) :: Float) ]
+       , ppr $ nodeDegree regClass graph reg
+       , text $ show $ (fromIntegral (uses + defs) 
+                       / fromIntegral (nodeDegree regClass graph reg) :: Float) ]
diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs

index 8082f9e..5e3dd32 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
@@ -5,7 +5,6 @@
  
  module RegAlloc.Graph.Stats (
         RegAllocStats (..),
  
  module RegAlloc.Graph.Stats (
         RegAllocStats (..),
-       regDotColor,
  
         pprStats,
         pprStatsSpills,
  
         pprStats,
         pprStatsSpills,
@@ -22,13 +21,13 @@ where
  
  import qualified GraphColor as Color
  import RegAlloc.Liveness
  
  import qualified GraphColor as Color
  import RegAlloc.Liveness
-import RegAllocInfo
  import RegAlloc.Graph.Spill
  import RegAlloc.Graph.SpillCost
  import RegAlloc.Graph.Spill
  import RegAlloc.Graph.SpillCost
-import Regs
-import Instrs
-import Cmm
+import Instruction
+import RegClass
+import Reg
  
  
+import Cmm
  import Outputable
  import UniqFM
  import UniqSet
  import Outputable
  import UniqFM
  import UniqSet
@@ -36,11 +35,11 @@ import State
  
  import Data.List
  
  
  import Data.List
  
-data RegAllocStats
+data RegAllocStats instr
  
         -- initial graph
         = RegAllocStatsStart
  
         -- initial graph
         = RegAllocStatsStart
-       { raLiveCmm     :: [LiveCmmTop]                   -- ^ initial code, with liveness
+       { raLiveCmm     :: [LiveCmmTop instr]             -- ^ initial code, with liveness
         , raGraph       :: Color.Graph Reg RegClass Reg   -- ^ the initial, uncolored graph
         , raSpillCosts  :: SpillCostInfo }                -- ^ information to help choose which regs to spill
  
         , raGraph       :: Color.Graph Reg RegClass Reg   -- ^ the initial, uncolored graph
         , raSpillCosts  :: SpillCostInfo }                -- ^ information to help choose which regs to spill
  
@@ -50,35 +49,35 @@ data RegAllocStats
         , raCoalesced   :: UniqFM Reg                   -- ^ the regs that were coaleced
         , raSpillStats  :: SpillStats                   -- ^ spiller stats
         , raSpillCosts  :: SpillCostInfo                -- ^ number of instrs each reg lives for
         , raCoalesced   :: UniqFM Reg                   -- ^ the regs that were coaleced
         , raSpillStats  :: SpillStats                   -- ^ spiller stats
         , raSpillCosts  :: SpillCostInfo                -- ^ number of instrs each reg lives for
-       , raSpilled     :: [LiveCmmTop] }               -- ^ code with spill instructions added
+       , raSpilled     :: [LiveCmmTop instr] }         -- ^ code with spill instructions added
  
         -- a successful coloring
         | RegAllocStatsColored
         { raGraph        :: Color.Graph Reg RegClass Reg -- ^ the uncolored graph
         , raGraphColored :: Color.Graph Reg RegClass Reg -- ^ the coalesced and colored graph
         , raCoalesced   :: UniqFM Reg                   -- ^ the regs that were coaleced
  
         -- a successful coloring
         | RegAllocStatsColored
         { raGraph        :: Color.Graph Reg RegClass Reg -- ^ the uncolored graph
         , raGraphColored :: Color.Graph Reg RegClass Reg -- ^ the coalesced and colored graph
         , raCoalesced   :: UniqFM Reg                   -- ^ the regs that were coaleced
-       , raPatched     :: [LiveCmmTop]                 -- ^ code with vregs replaced by hregs
-       , raSpillClean  :: [LiveCmmTop]                 -- ^ code with unneeded spill\/reloads cleaned out
-       , raFinal       :: [NatCmmTop]                  -- ^ final code
+       , raPatched     :: [LiveCmmTop instr]           -- ^ code with vregs replaced by hregs
+       , raSpillClean  :: [LiveCmmTop instr]           -- ^ code with unneeded spill\/reloads cleaned out
+       , raFinal       :: [NatCmmTop instr]            -- ^ final code
         , raSRMs        :: (Int, Int, Int) }            -- ^ spill\/reload\/reg-reg moves present in this code
  
         , raSRMs        :: (Int, Int, Int) }            -- ^ spill\/reload\/reg-reg moves present in this code
  
-instance Outputable RegAllocStats where
+instance Outputable instr => Outputable (RegAllocStats instr) where
  
   ppr (s@RegAllocStatsStart{})
         =  text "#  Start"
         $$ text "#  Native code with liveness information."
         $$ ppr (raLiveCmm s)
         $$ text ""
  
   ppr (s@RegAllocStatsStart{})
         =  text "#  Start"
         $$ text "#  Native code with liveness information."
         $$ ppr (raLiveCmm s)
         $$ text ""
-       $$ text "#  Initial register conflict graph."
-       $$ Color.dotGraph regDotColor trivColorable (raGraph s)
+--     $$ text "#  Initial register conflict graph."
+--     $$ Color.dotGraph regDotColor trivColorable (raGraph s)
  
  
   ppr (s@RegAllocStatsSpill{})
         =  text "#  Spill"
  
  
  
   ppr (s@RegAllocStatsSpill{})
         =  text "#  Spill"
  
-       $$ text "#  Register conflict graph."
-       $$ Color.dotGraph regDotColor trivColorable (raGraph s)
-       $$ text ""
+--     $$ text "#  Register conflict graph."
+--     $$ Color.dotGraph regDotColor trivColorable (raGraph s)
+--     $$ text ""
  
         $$ (if (not $ isNullUFM $ raCoalesced s)
                 then    text "#  Registers coalesced."
  
         $$ (if (not $ isNullUFM $ raCoalesced s)
                 then    text "#  Registers coalesced."
@@ -86,9 +85,9 @@ instance Outputable RegAllocStats where
                         $$ text ""
                 else empty)
  
                         $$ text ""
                 else empty)
  
-       $$ text "#  Spill costs.  reg uses defs lifetime degree cost"
-       $$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s)
-       $$ text ""
+--     $$ text "#  Spill costs.  reg uses defs lifetime degree cost"
+--     $$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s)
+--     $$ text ""
  
         $$ text "#  Spills inserted."
         $$ ppr (raSpillStats s)
  
         $$ text "#  Spills inserted."
         $$ ppr (raSpillStats s)
@@ -101,13 +100,13 @@ instance Outputable RegAllocStats where
   ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) })
         =  text "#  Colored"
  
   ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) })
         =  text "#  Colored"
  
-       $$ text "#  Register conflict graph (initial)."
-       $$ Color.dotGraph regDotColor trivColorable (raGraph s)
-       $$ text ""
+--     $$ text "#  Register conflict graph (initial)."
+--     $$ Color.dotGraph regDotColor trivColorable (raGraph s)
+--     $$ text ""
  
  
-       $$ text "#  Register conflict graph (colored)."
-       $$ Color.dotGraph regDotColor trivColorable (raGraphColored s)
-       $$ text ""
+--     $$ text "#  Register conflict graph (colored)."
+--     $$ Color.dotGraph regDotColor trivColorable (raGraphColored s)
+--     $$ text ""
  
         $$ (if (not $ isNullUFM $ raCoalesced s)
                 then    text "#  Registers coalesced."
  
         $$ (if (not $ isNullUFM $ raCoalesced s)
                 then    text "#  Registers coalesced."
@@ -133,7 +132,7 @@ instance Outputable RegAllocStats where
         $$ text ""
  
  -- | Do all the different analysis on this list of RegAllocStats
         $$ text ""
  
  -- | Do all the different analysis on this list of RegAllocStats
-pprStats :: [RegAllocStats] -> Color.Graph Reg RegClass Reg -> SDoc
+pprStats :: [RegAllocStats instr] -> Color.Graph Reg RegClass Reg -> SDoc
  pprStats stats graph
   = let         outSpills       = pprStatsSpills    stats
         outLife         = pprStatsLifetimes stats
  pprStats stats graph
   = let         outSpills       = pprStatsSpills    stats
         outLife         = pprStatsLifetimes stats
@@ -145,7 +144,7 @@ pprStats stats graph
  
  -- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
  pprStatsSpills
  
  -- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
  pprStatsSpills
-       :: [RegAllocStats] -> SDoc
+       :: [RegAllocStats instr] -> SDoc
  
  pprStatsSpills stats
   = let
  
  pprStatsSpills stats
   = let
@@ -163,7 +162,7 @@ pprStatsSpills stats
  
  -- | Dump a table of how long vregs tend to live for in the initial code.
  pprStatsLifetimes
  
  -- | Dump a table of how long vregs tend to live for in the initial code.
  pprStatsLifetimes
-       :: [RegAllocStats] -> SDoc
+       :: [RegAllocStats instr] -> SDoc
  
  pprStatsLifetimes stats
   = let info            = foldl' plusSpillCostInfo zeroSpillCostInfo
  
  pprStatsLifetimes stats
   = let info            = foldl' plusSpillCostInfo zeroSpillCostInfo
@@ -191,7 +190,7 @@ binLifetimeCount fm
  
  -- | Dump a table of how many conflicts vregs tend to have in the initial code.
  pprStatsConflict
  
  -- | Dump a table of how many conflicts vregs tend to have in the initial code.
  pprStatsConflict
-       :: [RegAllocStats] -> SDoc
+       :: [RegAllocStats instr] -> SDoc
  
  pprStatsConflict stats
   = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
  
  pprStatsConflict stats
   = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
@@ -208,7 +207,7 @@ pprStatsConflict stats
  -- | For every vreg, dump it's how many conflicts it has and its lifetime
  --     good for making a scatter plot.
  pprStatsLifeConflict
  -- | For every vreg, dump it's how many conflicts it has and its lifetime
  --     good for making a scatter plot.
  pprStatsLifeConflict
-       :: [RegAllocStats]
+       :: [RegAllocStats instr]
         -> Color.Graph Reg RegClass Reg         -- ^ global register conflict graph
         -> SDoc
  
         -> Color.Graph Reg RegClass Reg         -- ^ global register conflict graph
         -> SDoc
  
@@ -238,7 +237,10 @@ pprStatsLifeConflict stats graph
  -- | Count spill/reload/reg-reg moves.
  --     Lets us see how well the register allocator has done.
  --
  -- | Count spill/reload/reg-reg moves.
  --     Lets us see how well the register allocator has done.
  --
-countSRMs :: LiveCmmTop -> (Int, Int, Int)
+countSRMs 
+       :: Instruction instr
+       => LiveCmmTop instr -> (Int, Int, Int)
+
  countSRMs cmm
         = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
  
  countSRMs cmm
         = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
  
@@ -246,16 +248,17 @@ countSRM_block (BasicBlock i instrs)
   = do  instrs' <- mapM countSRM_instr instrs
         return  $ BasicBlock i instrs'
  
   = do  instrs' <- mapM countSRM_instr instrs
         return  $ BasicBlock i instrs'
  
-countSRM_instr li@(Instr instr _)
-       | SPILL _ _     <- instr
+countSRM_instr li
+       | SPILL _ _     <- li
         = do    modify  $ \(s, r, m)    -> (s + 1, r, m)
                 return li
  
         = do    modify  $ \(s, r, m)    -> (s + 1, r, m)
                 return li
  
-       | RELOAD _ _    <- instr
+       | RELOAD _ _    <- li
         = do    modify  $ \(s, r, m)    -> (s, r + 1, m)
                 return li
  
         = do    modify  $ \(s, r, m)    -> (s, r + 1, m)
                 return li
  
-       | Just _                <- isRegRegMove instr
+       | Instr instr _ <- li
+       , Just _        <- takeRegRegMoveInstr instr
         = do    modify  $ \(s, r, m)    -> (s, r, m + 1)
                 return li
  
         = do    modify  $ \(s, r, m)    -> (s, r, m + 1)
                 return li
  
@@ -266,77 +269,9 @@ countSRM_instr li@(Instr instr _)
  addSRM (s1, r1, m1) (s2, r2, m2)
         = (s1+s2, r1+r2, m1+m2)
  
  addSRM (s1, r1, m1) (s2, r2, m2)
         = (s1+s2, r1+r2, m1+m2)
  
------
--- Register colors for drawing conflict graphs
---     Keep this out of MachRegs.hs because it's specific to the graph coloring allocator.
-
-
--- reg colors for x86
-#if i386_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = let Just    str     = lookupUFM regColors reg
-   in  text str
-
-regColors
- = listToUFM
- $     [ (eax, "#00ff00")
-       , (ebx, "#0000ff")
-       , (ecx, "#00ffff")
-       , (edx, "#0080ff")
-
-       , (fake0, "#ff00ff")
-       , (fake1, "#ff00aa")
-       , (fake2, "#aa00ff")
-       , (fake3, "#aa00aa")
-       , (fake4, "#ff0055")
-       , (fake5, "#5500ff") ]
-
-
--- reg colors for x86_64
-#elif x86_64_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = let Just    str     = lookupUFM regColors reg
-   in  text str
-
-regColors
- = listToUFM
- $     [ (rax, "#00ff00"), (eax, "#00ff00")
-       , (rbx, "#0000ff"), (ebx, "#0000ff")
-       , (rcx, "#00ffff"), (ecx, "#00ffff")
-       , (rdx, "#0080ff"), (edx, "#00ffff")
-       , (r8,  "#00ff80")
-       , (r9,  "#008080")
-       , (r10, "#0040ff")
-       , (r11, "#00ff40")
-       , (r12, "#008040")
-       , (r13, "#004080")
-       , (r14, "#004040")
-       , (r15, "#002080") ]
-
-       ++ zip (map RealReg [16..31]) (repeat "red")
-
-
--- reg colors for ppc
-#elif powerpc_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = case regClass reg of
-       RcInteger       -> text "blue"
-       RcFloat         -> text "red"
-       RcDouble        -> text "green"
-
-#elif sparc_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = case regClass reg of
-       RcInteger       -> text "blue"
-       RcFloat         -> text "red"
-       RcDouble        -> text "green"
-#else
-#error ToDo: regDotColor
-#endif
+
+
+
  
  
  {-
  
  
  {-
diff --git a/compiler/nativeGen/Regs.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs

similarity index 55%

rename from compiler/nativeGen/Regs.hs

rename to compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs

index 5239520..6a7211d 100644 (file)
--- a/compiler/nativeGen/Regs.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -1,164 +1,21 @@
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1994-2004
--- 
--- Machine-specific info about registers.
--- 
--- Also includes stuff about immediate operands, which are
--- often/usually quite entangled with registers.
--- 
--- -----------------------------------------------------------------------------
-
-#include "nativeGen/NCG.h"
-
-module Regs (
-       --------------------------------
-       -- Generic things, shared by all architectures.
-       module RegsBase,        
-       getHiVRegFromLo,
-       get_GlobalReg_reg_or_addr,
-       allocatableRegs,
-       allocatableRegsInClass,
-       trivColorable,
-
-       --------------------------------
-       -- Things that are defined by the arch specific module.
-       --
-
-       -- sizes
-       Size(..),
-       intSize, 
-       floatSize, 
-       isFloatSize, 
-       wordSize, 
-       cmmTypeSize, 
-       sizeToWidth,
-       mkVReg,
-
-       -- immediates
-       Imm(..), 
-       strImmLit, 
-       litToImm,
-
-       -- addressing modes
-       AddrMode(..),
-       addrOffset,
-
-       -- registers
-       spRel,
-       argRegs, 
-       allArgRegs, 
-       callClobberedRegs,
-       allMachRegNos,
-       regClass,
-       showReg,
-
-       -- machine specific things
-#if   powerpc_TARGET_ARCH
-       allFPArgRegs,
-       fits16Bits,
-       makeImmediate,
-       fReg,
-       sp, r3, r4, r27, r28, f1, f20, f21,
-
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-       EABase(..), EAIndex(..), addrModeRegs,
-       
-       eax, ebx, ecx, edx, esi, edi, ebp, esp,
-       fake0, fake1, fake2, fake3, fake4, fake5,
-       rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
-       r8, r9, r10, r11, r12, r13, r14, r15,
-       xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-       xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
-       xmm,
-
-       ripRel,
-       allFPArgRegs,
  
  
-#elif sparc_TARGET_ARCH
-       fpRel,
-       fits13Bits, 
-       largeOffsetError,
-       gReg, iReg, lReg, oReg, fReg,
-       fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
-       nCG_FirstFloatReg,
-#endif
-       -- horror show
-       freeReg,
-       globalRegMaybe  
-) 
+module RegAlloc.Graph.TrivColorable (
+       trivColorable,
+)
  
  where
  
  #include "HsVersions.h"
  
  where
  
  #include "HsVersions.h"
-#include "../includes/MachRegs.h"
-
-import Cmm
-import CgUtils          ( get_GlobalReg_addr )
-import Outputable      ( Outputable(..), pprPanic )
-import qualified Outputable
-import Panic
-import Unique
-import UniqSet
-import FastTypes
-import FastBool
-import UniqFM
-
-
-import RegsBase
-
-#if   alpha_TARGET_ARCH
-import Alpha.Regs
-#elif powerpc_TARGET_ARCH
-import PPC.Regs
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Regs
-#elif sparc_TARGET_ARCH
-import SPARC.Regs
-#else
-#error "Regs: not defined for this architecture"
-#endif
  
  
+import RegClass
+import Reg
  
  
+import GraphBase
  
  
-instance Show Reg where
-       show (RealReg i)      = showReg i
-       show (VirtualRegI u)  = "%vI_" ++ show u
-       show (VirtualRegHi u) = "%vHi_" ++ show u
-       show (VirtualRegF u)  = "%vF_" ++ show u
-       show (VirtualRegD u)  = "%vD_" ++ show u
-
-instance Outputable Reg where
-       ppr r = Outputable.text (show r)
-
-
--- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
--- when supplied with the vreg for the lower-half of the quantity.
--- (NB. Not reversible).
-getHiVRegFromLo :: Reg -> Reg
-getHiVRegFromLo (VirtualRegI u) 
-   = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
-
-getHiVRegFromLo other 
-   = pprPanic "getHiVRegFromLo" (ppr other)
-
--- -----------------------------------------------------------------------------
--- Global registers
-
--- We map STG registers onto appropriate CmmExprs.  Either they map
--- to real machine registers or stored as offsets from BaseReg.  Given
--- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
--- register it is in, on this platform, or a CmmExpr denoting the
--- address in the register table holding it.
--- (See also get_GlobalReg_addr in CgUtils.)
-
-get_GlobalReg_reg_or_addr       :: GlobalReg -> Either Reg CmmExpr
-get_GlobalReg_reg_or_addr mid
-   = case globalRegMaybe mid of
-        Just rr -> Left rr
-        Nothing -> Right (get_GlobalReg_addr mid)
-
+import UniqFM
+import FastTypes
  
  
+{-
  -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  -- i.e., these are the regs for which we are prepared to allow the
  -- register allocator to attempt to map VRegs to.
  -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  -- i.e., these are the regs for which we are prepared to allow the
  -- register allocator to attempt to map VRegs to.
@@ -187,7 +44,7 @@ allocatableRegsDouble :: Int
  allocatableRegsDouble
         = length $ filter (\r -> regClass r == RcDouble) 
                  $ map RealReg allocatableRegs
  allocatableRegsDouble
         = length $ filter (\r -> regClass r == RcDouble) 
                  $ map RealReg allocatableRegs
-
+-}
  
  
  -- trivColorable ---------------------------------------------------------------
  
  
  -- trivColorable ---------------------------------------------------------------
@@ -277,8 +134,11 @@ worst n classN classC
  #error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE
  #endif
  
  #error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE
  #endif
  
-trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
-trivColorable _ conflicts exclusions
+trivColorable 
+       :: (Reg -> RegClass) 
+       -> Triv Reg RegClass Reg
+       
+trivColorable regClass _ conflicts exclusions
   = {-# SCC "trivColorable" #-}
     let
         isSqueesed cI cF ufm
   = {-# SCC "trivColorable" #-}
     let
         isSqueesed cI cF ufm
@@ -314,5 +174,3 @@ trivColorable _ conflicts exclusions
  
         (# True, _, _ #)
          -> False
  
         (# True, _, _ #)
          -> False
-
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs

index 60d0175..45fd640 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/Base.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs
@@ -21,7 +21,7 @@ where
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Linear.StackMap
  import RegAlloc.Liveness
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Linear.StackMap
  import RegAlloc.Liveness
-import Regs
+import Reg
  
  import Outputable
  import Unique
  
  import Outputable
  import Unique
diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs

index bee8c98..b357160 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
@@ -5,7 +5,8 @@ module RegAlloc.Linear.FreeRegs (
         releaseReg,
         initFreeRegs,
         getFreeRegs,
         releaseReg,
         initFreeRegs,
         getFreeRegs,
-       allocateReg
+       allocateReg,
+       maxSpillSlots
  )
  
  #include "HsVersions.h"
  )
  
  #include "HsVersions.h"
@@ -27,12 +28,15 @@ where
  
  #if   defined(powerpc_TARGET_ARCH) 
  import RegAlloc.Linear.PPC.FreeRegs
  
  #if   defined(powerpc_TARGET_ARCH) 
  import RegAlloc.Linear.PPC.FreeRegs
+import PPC.Instr       (maxSpillSlots)
  
  #elif defined(sparc_TARGET_ARCH)
  import RegAlloc.Linear.SPARC.FreeRegs
  
  #elif defined(sparc_TARGET_ARCH)
  import RegAlloc.Linear.SPARC.FreeRegs
+import SPARC.Instr     (maxSpillSlots)
  
  #elif defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
  import RegAlloc.Linear.X86.FreeRegs
  
  #elif defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
  import RegAlloc.Linear.X86.FreeRegs
+import X86.Instr       (maxSpillSlots)
  
  #else
  #error "RegAlloc.Linear.FreeRegs not defined for this architecture."
  
  #else
  #error "RegAlloc.Linear.FreeRegs not defined for this architecture."
diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs

index d3f821b..7d2cbcd 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
@@ -19,13 +19,11 @@ import RegAlloc.Linear.State
  import RegAlloc.Linear.Base
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Liveness
  import RegAlloc.Linear.Base
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Liveness
+import Instruction
+import Reg
  
  import BlockId
  
  import BlockId
-import Instrs
-import Regs
-import RegAllocInfo
  import Cmm     hiding (RegSet)
  import Cmm     hiding (RegSet)
-
  import Digraph
  import Outputable
  import Unique
  import Digraph
  import Outputable
  import Unique
@@ -37,39 +35,41 @@ import UniqSet
  --     vregs are in the correct regs for its destination.
  --
  joinToTargets
  --     vregs are in the correct regs for its destination.
  --
  joinToTargets
-       :: BlockMap RegSet              -- ^ maps the unique of the blockid to the set of vregs 
+       :: Instruction instr
+       => BlockMap RegSet              -- ^ maps the unique of the blockid to the set of vregs 
                                         --      that are known to be live on the entry to each block.
  
         -> BlockId                      -- ^ id of the current block
                                         --      that are known to be live on the entry to each block.
  
         -> BlockId                      -- ^ id of the current block
-       -> Instr                        -- ^ branch instr on the end of the source block.
+       -> instr                        -- ^ branch instr on the end of the source block.
  
  
-       -> RegM ([NatBasicBlock]        --   fresh blocks of fixup code.
-               , Instr)                --   the original branch instruction, but maybe patched to jump
+       -> RegM ([NatBasicBlock instr]  --   fresh blocks of fixup code.
+               , instr)                --   the original branch instruction, but maybe patched to jump
                                         --      to a fixup block first.
  
  joinToTargets block_live id instr
  
         -- we only need to worry about jump instructions.
                                         --      to a fixup block first.
  
  joinToTargets block_live id instr
  
         -- we only need to worry about jump instructions.
-       | not $ isJumpish instr
+       | not $ isJumpishInstr instr
         = return ([], instr)
  
         | otherwise
         = return ([], instr)
  
         | otherwise
-       = joinToTargets' block_live [] id instr (jumpDests instr [])
+       = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr)
  
  -----
  joinToTargets'
  
  -----
  joinToTargets'
-       :: BlockMap RegSet              -- ^ maps the unique of the blockid to the set of vregs 
+       :: Instruction instr
+       => BlockMap RegSet              -- ^ maps the unique of the blockid to the set of vregs 
                                         --      that are known to be live on the entry to each block.
  
                                         --      that are known to be live on the entry to each block.
  
-       -> [NatBasicBlock]              -- ^ acc blocks of fixup code.
+       -> [NatBasicBlock instr]        -- ^ acc blocks of fixup code.
  
         -> BlockId                      -- ^ id of the current block
  
         -> BlockId                      -- ^ id of the current block
-       -> Instr                        -- ^ branch instr on the end of the source block.
+       -> instr                        -- ^ branch instr on the end of the source block.
  
         -> [BlockId]                    -- ^ branch destinations still to consider.
  
  
         -> [BlockId]                    -- ^ branch destinations still to consider.
  
-       -> RegM ( [NatBasicBlock]
-               , Instr)
+       -> RegM ( [NatBasicBlock instr]
+               , instr)
  
  -- no more targets to consider. all done.
  joinToTargets' _          new_blocks _ instr []
  
  -- no more targets to consider. all done.
  joinToTargets' _          new_blocks _ instr []
@@ -173,7 +173,7 @@ joinToTargets_again
                 --      then that will jump to our original destination.
                 fixup_block_id <- getUniqueR
                 let block = BasicBlock (BlockId fixup_block_id) 
                 --      then that will jump to our original destination.
                 fixup_block_id <- getUniqueR
                 let block = BasicBlock (BlockId fixup_block_id) 
-                               $ fixUpInstrs ++ mkBranchInstr dest
+                               $ fixUpInstrs ++ mkJumpInstr dest
                 
  {-             pprTrace
                         ("joinToTargets: fixup code is:")
                 
  {-             pprTrace
                         ("joinToTargets: fixup code is:")
@@ -187,7 +187,11 @@ joinToTargets_again
  
                  -- patch the original branch instruction so it goes to our
                  --     fixup block instead.
  
                  -- patch the original branch instruction so it goes to our
                  --     fixup block instead.
-                _      -> let  instr'  =  patchJump instr dest (BlockId fixup_block_id)
+                _      -> let  instr'  =  patchJumpInstr instr 
+                                               (\bid -> if bid == dest 
+                                                               then BlockId fixup_block_id 
+                                                               else dest)
+                                               
                            in   joinToTargets' block_live (block : new_blocks) block_id instr' dests
  
  
                            in   joinToTargets' block_live (block : new_blocks) block_id instr' dests
  
  
@@ -256,7 +260,9 @@ expandNode vreg src dst
  --     destinations. We have eliminated any possibility of single-node
  --     cycles in expandNode above.
  --
  --     destinations. We have eliminated any possibility of single-node
  --     cycles in expandNode above.
  --
-handleComponent :: Int -> Instr -> SCC (Unique, Loc, [Loc]) -> RegM [Instr]
+handleComponent 
+       :: Instruction instr
+       => Int -> instr -> SCC (Unique, Loc, [Loc]) -> RegM [instr]
  
  -- If the graph is acyclic then we won't get the swapping problem below.
  --     In this case we can just do the moves directly, and avoid having to
  
  -- If the graph is acyclic then we won't get the swapping problem below.
  --     In this case we can just do the moves directly, and avoid having to
@@ -305,11 +311,12 @@ handleComponent _ _ (CyclicSCC _)
  -- | Move a vreg between these two locations.
  --
  makeMove 
  -- | Move a vreg between these two locations.
  --
  makeMove 
-       :: Int          -- ^ current C stack delta.
+       :: Instruction instr
+       => Int          -- ^ current C stack delta.
         -> Unique       -- ^ unique of the vreg that we're moving.
         -> Loc          -- ^ source location.
         -> Loc          -- ^ destination location.
         -> Unique       -- ^ unique of the vreg that we're moving.
         -> Loc          -- ^ source location.
         -> Loc          -- ^ destination location.
-       -> RegM Instr   -- ^ move instruction.
+       -> RegM instr   -- ^ move instruction.
  
  makeMove _     vreg (InReg src) (InReg dst)
   = do  recordSpill (SpillJoinRR vreg)
  
  makeMove _     vreg (InReg src) (InReg dst)
   = do  recordSpill (SpillJoinRR vreg)
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs

index bfd9ca5..47529d2 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -96,14 +96,14 @@ import RegAlloc.Linear.StackMap
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Linear.Stats
  import RegAlloc.Linear.JoinToTargets
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Linear.Stats
  import RegAlloc.Linear.JoinToTargets
+import TargetReg
  import RegAlloc.Liveness
  import RegAlloc.Liveness
+import Instruction
+import Reg
  
  -- import PprMach
  
  import BlockId
  
  -- import PprMach
  
  import BlockId
-import Regs
-import Instrs
-import RegAllocInfo
  import Cmm hiding (RegSet)
  
  import Digraph
  import Cmm hiding (RegSet)
  
  import Digraph
@@ -112,7 +112,6 @@ import UniqSet
  import UniqFM
  import UniqSupply
  import Outputable
  import UniqFM
  import UniqSupply
  import Outputable
-import FastString
  
  import Data.Maybe
  import Data.List
  
  import Data.Maybe
  import Data.List
@@ -126,8 +125,9 @@ import Control.Monad
  
  -- Allocate registers
  regAlloc 
  
  -- Allocate registers
  regAlloc 
-       :: LiveCmmTop
-       -> UniqSM (NatCmmTop, Maybe RegAllocStats)
+       :: (Outputable instr, Instruction instr)
+       => LiveCmmTop instr
+       -> UniqSM (NatCmmTop instr, Maybe RegAllocStats)
  
  regAlloc (CmmData sec d) 
         = return
  
  regAlloc (CmmData sec d) 
         = return
@@ -171,10 +171,11 @@ regAlloc (CmmProc _ _ _ _)
  --   an entry in the block map or it is the first block.
  --
  linearRegAlloc
  --   an entry in the block map or it is the first block.
  --
  linearRegAlloc
-       :: BlockId                      -- ^ the first block
+       :: (Outputable instr, Instruction instr)
+       => BlockId                      -- ^ the first block
          -> BlockMap RegSet             -- ^ live regs on entry to each basic block
          -> BlockMap RegSet             -- ^ live regs on entry to each basic block
-       -> [SCC LiveBasicBlock]         -- ^ instructions annotated with "deaths"
-       -> UniqSM ([NatBasicBlock], RegAllocStats)
+       -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
+       -> UniqSM ([NatBasicBlock instr], RegAllocStats)
  
  linearRegAlloc first_id block_live sccs
   = do  us      <- getUs
  
  linearRegAlloc first_id block_live sccs
   = do  us      <- getUs
@@ -234,9 +235,10 @@ process first_id block_live (b@(BasicBlock id _) : blocks) next_round accum
  -- | Do register allocation on this basic block
  --
  processBlock
  -- | Do register allocation on this basic block
  --
  processBlock
-       :: BlockMap RegSet              -- ^ live regs on entry to each basic block
-       -> LiveBasicBlock               -- ^ block to do register allocation on
-       -> RegM [NatBasicBlock]         -- ^ block with registers allocated
+       :: (Outputable instr, Instruction instr)
+       => BlockMap RegSet              -- ^ live regs on entry to each basic block
+       -> LiveBasicBlock instr         -- ^ block to do register allocation on
+       -> RegM [NatBasicBlock instr]   -- ^ block with registers allocated
  
  processBlock block_live (BasicBlock id instrs)
   = do  initBlock id
  
  processBlock block_live (BasicBlock id instrs)
   = do  initBlock id
@@ -265,20 +267,21 @@ initBlock id
  
  -- | Do allocation for a sequence of instructions.
  linearRA
  
  -- | Do allocation for a sequence of instructions.
  linearRA
-       :: BlockMap RegSet              -- ^ map of what vregs are live on entry to each block.
-       -> [Instr]                      -- ^ accumulator for instructions already processed.
-       -> [NatBasicBlock]              -- ^ accumulator for blocks of fixup code.
-       -> BlockId                      -- ^ id of the current block, for debugging.
-       -> [LiveInstr]                  -- ^ liveness annotated instructions in this block.
+       :: (Outputable instr, Instruction instr)
+       => BlockMap RegSet                      -- ^ map of what vregs are live on entry to each block.
+       -> [instr]                              -- ^ accumulator for instructions already processed.
+       -> [NatBasicBlock instr]                -- ^ accumulator for blocks of fixup code.
+       -> BlockId                              -- ^ id of the current block, for debugging.
+       -> [LiveInstr instr]                    -- ^ liveness annotated instructions in this block.
  
  
-       -> RegM ( [Instr]               --   instructions after register allocation
-               , [NatBasicBlock])      --   fresh blocks of fixup code.
+       -> RegM ( [instr]                       --   instructions after register allocation
+               , [NatBasicBlock instr])        --   fresh blocks of fixup code.
  
  
  linearRA _          accInstr accFixup _ []
         = return 
  
  
  linearRA _          accInstr accFixup _ []
         = return 
-               ( reverse accInstr      -- instrs need to be returned in the correct order.
-               , accFixup)             -- it doesn't matter what order the fixup blocks are returned in.
+               ( reverse accInstr              -- instrs need to be returned in the correct order.
+               , accFixup)                     -- it doesn't matter what order the fixup blocks are returned in.
  
  
  linearRA block_live accInstr accFixups id (instr:instrs)
  
  
  linearRA block_live accInstr accFixups id (instr:instrs)
@@ -291,21 +294,24 @@ linearRA block_live accInstr accFixups id (instr:instrs)
  
  -- | Do allocation for a single instruction.
  raInsn  
  
  -- | Do allocation for a single instruction.
  raInsn  
-       :: BlockMap RegSet              -- ^ map of what vregs are love on entry to each block.
-       -> [Instr]                      -- ^ accumulator for instructions already processed.
-       -> BlockId                      -- ^ the id of the current block, for debugging
-       -> LiveInstr                    -- ^ the instr to have its regs allocated, with liveness info.
+       :: (Outputable instr, Instruction instr)
+       => BlockMap RegSet                      -- ^ map of what vregs are love on entry to each block.
+       -> [instr]                              -- ^ accumulator for instructions already processed.
+       -> BlockId                              -- ^ the id of the current block, for debugging
+       -> LiveInstr instr                      -- ^ the instr to have its regs allocated, with liveness info.
         -> RegM 
         -> RegM 
-               ( [Instr]               -- new instructions
-               , [NatBasicBlock])      -- extra fixup blocks
+               ( [instr]                       -- new instructions
+               , [NatBasicBlock instr])        -- extra fixup blocks
  
  
-raInsn _     new_instrs _ (Instr (COMMENT _) Nothing)
- = return (new_instrs, [])
+raInsn _     new_instrs _ (Instr ii Nothing)  
+       | Just n        <- takeDeltaInstr ii
+       = do    setDeltaR n
+               return (new_instrs, [])
+
+raInsn _     new_instrs _ (Instr ii Nothing)
+       | isMetaInstr ii
+       = return (new_instrs, [])
  
  
-raInsn _     new_instrs _ (Instr (DELTA n) Nothing)  
- = do
-    setDeltaR n
-    return (new_instrs, [])
  
  raInsn block_live new_instrs id (Instr instr (Just live))
   = do
  
  raInsn block_live new_instrs id (Instr instr (Just live))
   = do
@@ -318,7 +324,7 @@ raInsn block_live new_instrs id (Instr instr (Just live))
      -- then we can eliminate the instruction.
      -- (we can't eliminitate it if the source register is on the stack, because
      --  we do not want to use one spill slot for different virtual registers)
      -- then we can eliminate the instruction.
      -- (we can't eliminitate it if the source register is on the stack, because
      --  we do not want to use one spill slot for different virtual registers)
-    case isRegRegMove instr of
+    case takeRegRegMoveInstr instr of
         Just (src,dst)  | src `elementOfUniqSet` (liveDieRead live), 
                           isVirtualReg dst,
                           not (dst `elemUFM` assig),
         Just (src,dst)  | src `elementOfUniqSet` (liveDieRead live), 
                           isVirtualReg dst,
                           not (dst `elemUFM` assig),
@@ -354,7 +360,7 @@ raInsn _ _ _ instr
  
  
  genRaInsn block_live new_instrs block_id instr r_dying w_dying =
  
  
  genRaInsn block_live new_instrs block_id instr r_dying w_dying =
-    case regUsage instr              of { RU read written ->
+    case regUsageOfInstr instr              of { RU read written ->
      case partition isRealReg written of { (real_written1,virt_written) ->
      do
      let 
      case partition isRealReg written of { (real_written1,virt_written) ->
      do
      let 
@@ -410,7 +416,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying =
                                   (t,r) <- zip virt_read r_allocd
                                           ++ zip virt_written w_allocd ]
  
                                   (t,r) <- zip virt_read r_allocd
                                           ++ zip virt_written w_allocd ]
  
-       patched_instr = patchRegs adjusted_instr patchLookup
+       patched_instr = patchRegsOfInstr adjusted_instr patchLookup
         patchLookup x = case lookupUFM patch_map x of
                                 Nothing -> x
                                 Just y  -> y
         patchLookup x = case lookupUFM patch_map x of
                                 Nothing -> x
                                 Just y  -> y
@@ -424,7 +430,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying =
      -- erase reg->reg moves where the source and destination are the same.
      -- If the src temp didn't die in this instr but happened to be allocated
      -- to the same real reg as the destination, then we can erase the move anyway.
      -- erase reg->reg moves where the source and destination are the same.
      -- If the src temp didn't die in this instr but happened to be allocated
      -- to the same real reg as the destination, then we can erase the move anyway.
-    let        squashed_instr  = case isRegRegMove patched_instr of
+    let        squashed_instr  = case takeRegRegMoveInstr patched_instr of
                                 Just (src, dst)
                                  | src == dst   -> []
                                 _               -> [patched_instr]
                                 Just (src, dst)
                                  | src == dst   -> []
                                 _               -> [patched_instr]
@@ -473,10 +479,11 @@ for allocateRegs on the temps *written*,
  -}
  
  saveClobberedTemps
  -}
  
  saveClobberedTemps
-   :: [RegNo]             -- real registers clobbered by this instruction
-   -> [Reg]               -- registers which are no longer live after this insn
-   -> RegM [Instr]        -- return: instructions to spill any temps that will
-                          -- be clobbered.
+       :: Instruction instr
+       => [RegNo]              -- real registers clobbered by this instruction
+       -> [Reg]                -- registers which are no longer live after this insn
+       -> RegM [instr]         -- return: instructions to spill any temps that will
+                               -- be clobbered.
  
  saveClobberedTemps [] _ = return [] -- common case
  saveClobberedTemps clobbered dying =  do
  
  saveClobberedTemps [] _ = return [] -- common case
  saveClobberedTemps clobbered dying =  do
@@ -498,7 +505,7 @@ saveClobberedTemps clobbered dying =  do
         recordSpill (SpillClobber temp)
  
         let new_assign  = addToUFM assig temp (InBoth reg slot)
         recordSpill (SpillClobber temp)
  
         let new_assign  = addToUFM assig temp (InBoth reg slot)
-       clobber new_assign (spill : COMMENT (fsLit "spill clobber") : instrs) rest
+       clobber new_assign (spill : {- COMMENT (fsLit "spill clobber") : -} instrs) rest
  
  clobberRegs :: [RegNo] -> RegM ()
  clobberRegs [] = return () -- common case
  
  clobberRegs :: [RegNo] -> RegM ()
  clobberRegs [] = return () -- common case
@@ -533,12 +540,13 @@ clobberRegs clobbered = do
  --   the list of free registers and free stack slots.
  
  allocateRegsAndSpill
  --   the list of free registers and free stack slots.
  
  allocateRegsAndSpill
-       :: Bool                 -- True <=> reading (load up spilled regs)
+       :: Instruction instr
+       => Bool                 -- True <=> reading (load up spilled regs)
         -> [Reg]                -- don't push these out
         -> [Reg]                -- don't push these out
-       -> [Instr]              -- spill insns
+       -> [instr]              -- spill insns
         -> [RegNo]              -- real registers allocated (accum.)
         -> [Reg]                -- temps to allocate
         -> [RegNo]              -- real registers allocated (accum.)
         -> [Reg]                -- temps to allocate
-       -> RegM ([Instr], [RegNo])
+       -> RegM ([instr], [RegNo])
  
  allocateRegsAndSpill _       _    spills alloc []
    = return (spills,reverse alloc)
  
  allocateRegsAndSpill _       _    spills alloc []
    = return (spills,reverse alloc)
@@ -563,7 +571,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
       loc -> do
         freeregs <- getFreeRegsR
  
       loc -> do
         freeregs <- getFreeRegsR
  
-        case getFreeRegs (regClass r) freeregs of
+        case getFreeRegs (targetRegClass r) freeregs of
  
         -- case (2): we have a free register
           my_reg:_ -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -}
  
         -- case (2): we have a free register
           my_reg:_ -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -}
@@ -582,10 +590,10 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
               keep' = map getUnique keep
               candidates1 = [ (temp,reg,mem)
                             | (temp, InBoth reg mem) <- ufmToList assig,
               keep' = map getUnique keep
               candidates1 = [ (temp,reg,mem)
                             | (temp, InBoth reg mem) <- ufmToList assig,
-                             temp `notElem` keep', regClass (RealReg reg) == regClass r ]
+                             temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r ]
               candidates2 = [ (temp,reg)
                             | (temp, InReg reg) <- ufmToList assig,
               candidates2 = [ (temp,reg)
                             | (temp, InReg reg) <- ufmToList assig,
-                             temp `notElem` keep', regClass (RealReg reg) == regClass r  ]
+                             temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r  ]
             -- in
             ASSERT2(not (null candidates1 && null candidates2), 
                     text (show freeregs) <+> ppr r <+> ppr assig) do
             -- in
             ASSERT2(not (null candidates1 && null candidates2), 
                     text (show freeregs) <+> ppr r <+> ppr assig) do
@@ -622,8 +630,8 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
                                 
                 (spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out
                 let spill_store  = (if reading then id else reverse)
                                 
                 (spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out
                 let spill_store  = (if reading then id else reverse)
-                                       [ COMMENT (fsLit "spill alloc") 
-                                       , spill_insn ]
+                                       [ -- COMMENT (fsLit "spill alloc") 
+                                          spill_insn ]
  
                 -- record that this temp was spilled
                 recordSpill (SpillAlloc temp_to_push_out)
  
                 -- record that this temp was spilled
                 recordSpill (SpillAlloc temp_to_push_out)
@@ -643,18 +651,19 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
  
  -- | Load up a spilled temporary if we need to.
  loadTemp
  
  -- | Load up a spilled temporary if we need to.
  loadTemp
-       :: Bool
+       :: Instruction instr
+       => Bool
         -> Reg          -- the temp being loaded
         -> Maybe Loc    -- the current location of this temp
         -> RegNo        -- the hreg to load the temp into
         -> Reg          -- the temp being loaded
         -> Maybe Loc    -- the current location of this temp
         -> RegNo        -- the hreg to load the temp into
-       -> [Instr]
-       -> RegM [Instr]
+       -> [instr]
+       -> RegM [instr]
  
  loadTemp True vreg (Just (InMem slot)) hreg spills
   = do
         insn <- loadR (RealReg hreg) slot
         recordSpill (SpillLoad $ getUnique vreg)
  
  loadTemp True vreg (Just (InMem slot)) hreg spills
   = do
         insn <- loadR (RealReg hreg) slot
         recordSpill (SpillLoad $ getUnique vreg)
-       return  $  COMMENT (fsLit "spill load") : insn : spills
+       return  $  {- COMMENT (fsLit "spill load") : -} insn : spills
  
  loadTemp _ _ _ _ spills =
     return spills
  
  loadTemp _ _ _ _ spills =
     return spills
diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs

index 6d8809d..878bfe3 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
@@ -3,7 +3,9 @@
  module RegAlloc.Linear.PPC.FreeRegs
  where
  
  module RegAlloc.Linear.PPC.FreeRegs
  where
  
-import Regs
+import PPC.Regs
+import RegClass
+import Reg
  
  import Outputable
  
  
  import Outputable
  
diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs

index aa716b5..5514056 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
@@ -3,7 +3,9 @@
  module RegAlloc.Linear.SPARC.FreeRegs
  where
  
  module RegAlloc.Linear.SPARC.FreeRegs
  where
  
-import Regs
+import SPARC.Regs
+import RegClass
+import Reg
  
  import Outputable
  import FastBool
  
  import Outputable
  import FastBool
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs

index 5656941..62bf6ad 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -19,7 +19,7 @@ module RegAlloc.Linear.StackMap (
  
  where
  
  
  where
  
-import RegAllocInfo    (maxSpillSlots)
+import RegAlloc.Linear.FreeRegs
  
  import Outputable
  import UniqFM
  
  import Outputable
  import UniqFM
diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs

index 94a8f7b..b9f7049 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/State.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/State.hs
@@ -34,11 +34,8 @@ import RegAlloc.Linear.StackMap
  import RegAlloc.Linear.Base
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Liveness
  import RegAlloc.Linear.Base
  import RegAlloc.Linear.FreeRegs
  import RegAlloc.Liveness
-
-
-import Instrs
-import Regs
-import RegAllocInfo
+import Instruction
+import Reg
  
  import Unique
  import UniqSupply
  
  import Unique
  import UniqSupply
@@ -85,14 +82,19 @@ makeRAStats state
         { ra_spillInstrs        = binSpillReasons (ra_spills state) }
  
  
         { ra_spillInstrs        = binSpillReasons (ra_spills state) }
  
  
-spillR :: Reg -> Unique -> RegM (Instr, Int)
+spillR         :: Instruction instr
+       => Reg -> Unique -> RegM (instr, Int)
+
  spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
    let (stack',slot) = getStackSlotFor stack temp
        instr  = mkSpillInstr reg delta slot
    in
    (# s{ra_stack=stack'}, (instr,slot) #)
  
  spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
    let (stack',slot) = getStackSlotFor stack temp
        instr  = mkSpillInstr reg delta slot
    in
    (# s{ra_stack=stack'}, (instr,slot) #)
  
-loadR :: Reg -> Int -> RegM Instr
+
+loadR  :: Instruction instr
+       => Reg -> Int -> RegM instr
+
  loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
    (# s, mkLoadInstr reg delta slot #)
  
  loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
    (# s, mkLoadInstr reg delta slot #)
  
diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs

index 95bf8ed..137168e 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
@@ -8,9 +8,8 @@ where
  
  import RegAlloc.Linear.Base
  import RegAlloc.Liveness
  
  import RegAlloc.Linear.Base
  import RegAlloc.Liveness
+import Instruction
  
  
-import RegAllocInfo
-import Instrs
  import Cmm             (GenBasicBlock(..))
  
  import UniqFM
  import Cmm             (GenBasicBlock(..))
  
  import UniqFM
@@ -36,7 +35,10 @@ binSpillReasons reasons
  
  
  -- | Count reg-reg moves remaining in this code.
  
  
  -- | Count reg-reg moves remaining in this code.
-countRegRegMovesNat :: NatCmmTop -> Int
+countRegRegMovesNat 
+       :: Instruction instr
+       => NatCmmTop instr -> Int
+
  countRegRegMovesNat cmm
         = execState (mapGenBlockTopM countBlock cmm) 0
   where
  countRegRegMovesNat cmm
         = execState (mapGenBlockTopM countBlock cmm) 0
   where
@@ -45,7 +47,7 @@ countRegRegMovesNat cmm
                 return  b
  
         countInstr instr
                 return  b
  
         countInstr instr
-               | Just _        <- isRegRegMove instr
+               | Just _        <- takeRegRegMoveInstr instr
                 = do    modify (+ 1)
                         return instr
  
                 = do    modify (+ 1)
                         return instr
  
@@ -54,7 +56,10 @@ countRegRegMovesNat cmm
  
  
  -- | Pretty print some RegAllocStats
  
  
  -- | Pretty print some RegAllocStats
-pprStats :: [NatCmmTop] -> [RegAllocStats] -> SDoc
+pprStats 
+       :: Instruction instr 
+       => [NatCmmTop instr] -> [RegAllocStats] -> SDoc
+
  pprStats code statss
   = let -- sum up all the instrs inserted by the spiller
         spills          = foldl' (plusUFM_C (zipWith (+)))
  pprStats code statss
   = let -- sum up all the instrs inserted by the spiller
         spills          = foldl' (plusUFM_C (zipWith (+)))
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs

index 1306deb..eedaca8 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
@@ -3,7 +3,9 @@
  module RegAlloc.Linear.X86.FreeRegs
  where
  
  module RegAlloc.Linear.X86.FreeRegs
  where
  
-import Regs
+import X86.Regs
+import RegClass
+import Reg
  
  import Data.Word
  import Data.Bits
  
  import Data.Word
  import Data.Bits
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs

index 8445034..8faab5a 100644 (file)
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -20,7 +20,7 @@ module RegAlloc.Liveness (
         mapBlockTop,    mapBlockTopM,
         mapGenBlockTop, mapGenBlockTopM,
         stripLive,
         mapBlockTop,    mapBlockTopM,
         mapGenBlockTop, mapGenBlockTopM,
         stripLive,
-       spillNatBlock,
+       stripLiveBlock,
         slurpConflicts,
         slurpReloadCoalesce,
         eraseDeltasLive,
         slurpConflicts,
         slurpReloadCoalesce,
         eraseDeltasLive,
@@ -30,12 +30,13 @@ module RegAlloc.Liveness (
  
    ) where
  
  
    ) where
  
+
+import Reg
+import Instruction
+
  import BlockId
  import BlockId
-import Regs
-import Instrs
-import PprMach
-import RegAllocInfo
  import Cmm hiding (RegSet)
  import Cmm hiding (RegSet)
+import PprCmm()
  
  import Digraph
  import Outputable
  
  import Digraph
  import Outputable
@@ -65,18 +66,25 @@ emptyBlockMap = emptyBlockEnv
  
  
  -- | A top level thing which carries liveness information.
  
  
  -- | A top level thing which carries liveness information.
-type LiveCmmTop
+type LiveCmmTop instr
         = GenCmmTop
                 CmmStatic
                 LiveInfo
         = GenCmmTop
                 CmmStatic
                 LiveInfo
-               (ListGraph (GenBasicBlock LiveInstr))
+               (ListGraph (GenBasicBlock (LiveInstr instr)))
                         -- the "instructions" here are actually more blocks,
                         --      single blocks are acyclic
                         --      multiple blocks are taken to be cyclic.
  
  -- | An instruction with liveness information.
                         -- the "instructions" here are actually more blocks,
                         --      single blocks are acyclic
                         --      multiple blocks are taken to be cyclic.
  
  -- | An instruction with liveness information.
-data LiveInstr
-       = Instr Instr (Maybe Liveness)
+data LiveInstr instr
+       = Instr instr (Maybe Liveness)
+
+       -- | spill this reg to a stack slot
+       | SPILL Reg Int
+
+       -- | reload this reg from a stack slot
+       | RELOAD Int Reg
+       
  
  -- | Liveness information.
  --     The regs which die are ones which are no longer live in the *next* instruction
  
  -- | Liveness information.
  --     The regs which die are ones which are no longer live in the *next* instruction
@@ -100,11 +108,28 @@ data LiveInfo
                 (BlockMap RegSet)       -- argument locals live on entry to this block
  
  -- | A basic block with liveness information.
                 (BlockMap RegSet)       -- argument locals live on entry to this block
  
  -- | A basic block with liveness information.
-type LiveBasicBlock
-       = GenBasicBlock LiveInstr
+type LiveBasicBlock instr
+       = GenBasicBlock (LiveInstr instr)
+
+
+instance Outputable instr 
+      => Outputable (LiveInstr instr) where
+       ppr (SPILL reg slot)
+          = hcat [
+               ptext (sLit "\tSPILL"),
+               char ' ',
+               ppr reg,
+               comma,
+               ptext (sLit "SLOT") <> parens (int slot)]
+
+       ppr (RELOAD slot reg)
+          = hcat [
+               ptext (sLit "\tRELOAD"),
+               char ' ',
+               ptext (sLit "SLOT") <> parens (int slot),
+               comma,
+               ppr reg]
  
  
-
-instance Outputable LiveInstr where
         ppr (Instr instr Nothing)
          = ppr instr
  
         ppr (Instr instr Nothing)
          = ppr instr
  
@@ -120,8 +145,7 @@ instance Outputable LiveInstr where
          where  pprRegs :: SDoc -> RegSet -> SDoc
                 pprRegs name regs
                  | isEmptyUniqSet regs  = empty
          where  pprRegs :: SDoc -> RegSet -> SDoc
                 pprRegs name regs
                  | isEmptyUniqSet regs  = empty
-                | otherwise            = name <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) $ uniqSetToList regs)
-
+                | otherwise            = name <> (hcat $ punctuate space $ map ppr $ uniqSetToList regs)
  
  instance Outputable LiveInfo where
         ppr (LiveInfo static firstId liveOnEntry)
  
  instance Outputable LiveInfo where
         ppr (LiveInfo static firstId liveOnEntry)
@@ -130,11 +154,12 @@ instance Outputable LiveInfo where
                 $$ text "# liveOnEntry = " <> ppr liveOnEntry
  
  
                 $$ text "# liveOnEntry = " <> ppr liveOnEntry
  
  
+
  -- | map a function across all the basic blocks in this code
  --
  mapBlockTop
  -- | map a function across all the basic blocks in this code
  --
  mapBlockTop
-       :: (LiveBasicBlock -> LiveBasicBlock)
-       -> LiveCmmTop -> LiveCmmTop
+       :: (LiveBasicBlock instr -> LiveBasicBlock instr)
+       -> LiveCmmTop instr -> LiveCmmTop instr
  
  mapBlockTop f cmm
         = evalState (mapBlockTopM (\x -> return $ f x) cmm) ()
  
  mapBlockTop f cmm
         = evalState (mapBlockTopM (\x -> return $ f x) cmm) ()
@@ -144,8 +169,8 @@ mapBlockTop f cmm
  --
  mapBlockTopM
         :: Monad m
  --
  mapBlockTopM
         :: Monad m
-       => (LiveBasicBlock -> m LiveBasicBlock)
-       -> LiveCmmTop -> m LiveCmmTop
+       => (LiveBasicBlock instr -> m (LiveBasicBlock instr))
+       -> LiveCmmTop instr -> m (LiveCmmTop instr)
  
  mapBlockTopM _ cmm@(CmmData{})
         = return cmm
  
  mapBlockTopM _ cmm@(CmmData{})
         = return cmm
@@ -187,7 +212,11 @@ mapGenBlockTopM f (CmmProc header label params (ListGraph blocks))
  --     Slurping of conflicts and moves is wrapped up together so we don't have
  --     to make two passes over the same code when we want to build the graph.
  --
  --     Slurping of conflicts and moves is wrapped up together so we don't have
  --     to make two passes over the same code when we want to build the graph.
  --
-slurpConflicts :: LiveCmmTop -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+slurpConflicts 
+       :: Instruction instr
+       => LiveCmmTop instr 
+       -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+
  slurpConflicts live
         = slurpCmm (emptyBag, emptyBag) live
  
  slurpConflicts live
         = slurpCmm (emptyBag, emptyBag) live
  
@@ -205,12 +234,20 @@ slurpConflicts live
                 = (consBag rsLiveEntry conflicts, moves)
  
                 | otherwise
                 = (consBag rsLiveEntry conflicts, moves)
  
                 | otherwise
-               = panic "RegLiveness.slurpBlock: bad block"
+               = panic "Liveness.slurpConflicts: bad block"
  
         slurpLIs rsLive (conflicts, moves) []
                 = (consBag rsLive conflicts, moves)
  
  
         slurpLIs rsLive (conflicts, moves) []
                 = (consBag rsLive conflicts, moves)
  
-       slurpLIs rsLive rs (Instr _ Nothing     : lis)  = slurpLIs rsLive rs lis
+       slurpLIs rsLive rs (Instr _ Nothing     : lis)  
+               = slurpLIs rsLive rs lis
+
+       -- we're not expecting to be slurping conflicts from spilled code
+       slurpLIs _ _ (SPILL _ _ : _)
+               = panic "Liveness.slurpConflicts: unexpected SPILL"
+
+       slurpLIs _ _ (RELOAD _ _ : _)
+               = panic "Liveness.slurpConflicts: unexpected RELOAD"
                 
         slurpLIs rsLiveEntry (conflicts, moves) (Instr instr (Just live) : lis)
          = let
                 
         slurpLIs rsLiveEntry (conflicts, moves) (Instr instr (Just live) : lis)
          = let
@@ -234,7 +271,7 @@ slurpConflicts live
                 --
                 rsConflicts     = unionUniqSets rsLiveNext rsOrphans
  
                 --
                 rsConflicts     = unionUniqSets rsLiveNext rsOrphans
  
-         in    case isRegRegMove instr of
+         in    case takeRegRegMoveInstr instr of
                  Just rr        -> slurpLIs rsLiveNext
                                         ( consBag rsConflicts conflicts
                                         , consBag rr moves) lis
                  Just rr        -> slurpLIs rsLiveNext
                                         ( consBag rsConflicts conflicts
                                         , consBag rr moves) lis
@@ -254,7 +291,11 @@ slurpConflicts live
  --     the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move.
  --
  --
  --     the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move.
  --
  --
-slurpReloadCoalesce :: LiveCmmTop -> Bag (Reg, Reg)
+slurpReloadCoalesce 
+       :: Instruction instr
+       => LiveCmmTop instr
+       -> Bag (Reg, Reg)
+
  slurpReloadCoalesce live
         = slurpCmm emptyBag live
  
  slurpReloadCoalesce live
         = slurpCmm emptyBag live
  
@@ -285,23 +326,24 @@ slurpReloadCoalesce live
                 (_, mMoves)     <- mapAccumLM slurpLI slotMap instrs
                 return $ listToBag $ catMaybes mMoves
  
                 (_, mMoves)     <- mapAccumLM slurpLI slotMap instrs
                 return $ listToBag $ catMaybes mMoves
  
-       slurpLI :: UniqFM Reg                           -- current slotMap
-               -> LiveInstr
+       slurpLI :: Instruction instr
+               => UniqFM Reg                           -- current slotMap
+               -> LiveInstr instr
                 -> State (UniqFM [UniqFM Reg])          -- blockId -> [slot -> reg]
                                                         --      for tracking slotMaps across jumps
  
                          ( UniqFM Reg                   -- new slotMap
                          , Maybe (Reg, Reg))            -- maybe a new coalesce edge
  
                 -> State (UniqFM [UniqFM Reg])          -- blockId -> [slot -> reg]
                                                         --      for tracking slotMaps across jumps
  
                          ( UniqFM Reg                   -- new slotMap
                          , Maybe (Reg, Reg))            -- maybe a new coalesce edge
  
-       slurpLI slotMap (Instr instr _)
+       slurpLI slotMap li
  
                 -- remember what reg was stored into the slot
  
                 -- remember what reg was stored into the slot
-               | SPILL reg slot        <- instr
+               | SPILL reg slot        <- li
                 , slotMap'              <- addToUFM slotMap slot reg
                 = return (slotMap', Nothing)
  
                 -- add an edge betwen the this reg and the last one stored into the slot
                 , slotMap'              <- addToUFM slotMap slot reg
                 = return (slotMap', Nothing)
  
                 -- add an edge betwen the this reg and the last one stored into the slot
-               | RELOAD slot reg       <- instr
+               | RELOAD slot reg       <- li
                 = case lookupUFM slotMap slot of
                         Just reg2
                          | reg /= reg2  -> return (slotMap, Just (reg, reg2))
                 = case lookupUFM slotMap slot of
                         Just reg2
                          | reg /= reg2  -> return (slotMap, Just (reg, reg2))
@@ -310,7 +352,8 @@ slurpReloadCoalesce live
                         Nothing         -> return (slotMap, Nothing)
  
                 -- if we hit a jump, remember the current slotMap
                         Nothing         -> return (slotMap, Nothing)
  
                 -- if we hit a jump, remember the current slotMap
-               | targets       <- jumpDests instr []
+               | Instr instr _         <- li
+               , targets               <- jumpDestsOfInstr instr
                 , not $ null targets
                 = do    mapM_   (accSlotMap slotMap) targets
                         return  (slotMap, Nothing)
                 , not $ null targets
                 = do    mapM_   (accSlotMap slotMap) targets
                         return  (slotMap, Nothing)
@@ -340,7 +383,11 @@ slurpReloadCoalesce live
  
  -- | Strip away liveness information, yielding NatCmmTop
  
  
  -- | Strip away liveness information, yielding NatCmmTop
  
-stripLive :: LiveCmmTop -> NatCmmTop
+stripLive 
+       :: Instruction instr
+       => LiveCmmTop instr 
+       -> NatCmmTop instr
+
  stripLive live
         = stripCmm live
  
  stripLive live
         = stripCmm live
  
@@ -349,26 +396,26 @@ stripLive live
                 = CmmProc info label params
                            (ListGraph $ concatMap stripComp comps)
  
                 = CmmProc info label params
                            (ListGraph $ concatMap stripComp comps)
  
-       stripComp  (BasicBlock _ blocks)        = map stripBlock blocks
-       stripBlock (BasicBlock i instrs)        = BasicBlock i (map stripLI instrs)
-       stripLI    (Instr instr _)              = instr
+       stripComp  (BasicBlock _ blocks)        = map stripLiveBlock blocks
  
  
  
  
--- | Make real spill instructions out of SPILL, RELOAD pseudos
+-- | Strip away liveness information from a basic block,
+--     and make real spill instructions out of SPILL, RELOAD pseudos along the way.
  
  
-spillNatBlock :: NatBasicBlock -> NatBasicBlock
-spillNatBlock (BasicBlock i is)
+stripLiveBlock
+       :: Instruction instr
+       => LiveBasicBlock instr
+       -> NatBasicBlock instr
+
+stripLiveBlock (BasicBlock i lis)
   =     BasicBlock i instrs'
   =     BasicBlock i instrs'
+
   where         (instrs', _)
   where         (instrs', _)
-               = runState (spillNat [] is) 0
+               = runState (spillNat [] lis) 0
  
         spillNat acc []
          =      return (reverse acc)
  
  
         spillNat acc []
          =      return (reverse acc)
  
-       spillNat acc (DELTA i : instrs)
-        = do   put i
-               spillNat acc instrs
-
         spillNat acc (SPILL reg slot : instrs)
          = do   delta   <- get
                 spillNat (mkSpillInstr reg delta slot : acc) instrs
         spillNat acc (SPILL reg slot : instrs)
          = do   delta   <- get
                 spillNat (mkSpillInstr reg delta slot : acc) instrs
@@ -377,22 +424,28 @@ spillNatBlock (BasicBlock i is)
          = do   delta   <- get
                 spillNat (mkLoadInstr reg delta slot : acc) instrs
  
          = do   delta   <- get
                 spillNat (mkLoadInstr reg delta slot : acc) instrs
  
-       spillNat acc (instr : instrs)
+       spillNat acc (Instr instr _ : instrs)
+        | Just i <- takeDeltaInstr instr
+        = do   put i
+               spillNat acc instrs
+
+       spillNat acc (Instr instr _ : instrs)
          =      spillNat (instr : acc) instrs
  
  
  -- | Erase Delta instructions.
  
          =      spillNat (instr : acc) instrs
  
  
  -- | Erase Delta instructions.
  
-eraseDeltasLive :: LiveCmmTop -> LiveCmmTop
+eraseDeltasLive 
+       :: Instruction instr
+       => LiveCmmTop instr
+       -> LiveCmmTop instr
+
  eraseDeltasLive cmm
         = mapBlockTop eraseBlock cmm
   where
  eraseDeltasLive cmm
         = mapBlockTop eraseBlock cmm
   where
-       isDelta (DELTA _)       = True
-       isDelta _               = False
-
         eraseBlock (BasicBlock id lis)
                 = BasicBlock id
         eraseBlock (BasicBlock id lis)
                 = BasicBlock id
-               $ filter (\(Instr i _) -> not $ isDelta i)
+               $ filter (\(Instr i _) -> not $ isJust $ takeDeltaInstr i)
                 $ lis
  
  
                 $ lis
  
  
@@ -401,8 +454,9 @@ eraseDeltasLive cmm
  --     also erase reg -> reg moves when the destination dies in this instr.
  
  patchEraseLive
  --     also erase reg -> reg moves when the destination dies in this instr.
  
  patchEraseLive
-       :: (Reg -> Reg)
-       -> LiveCmmTop -> LiveCmmTop
+       :: Instruction instr
+       => (Reg -> Reg)
+       -> LiveCmmTop instr -> LiveCmmTop instr
  
  patchEraseLive patchF cmm
         = patchCmm cmm
  
  patchEraseLive patchF cmm
         = patchCmm cmm
@@ -427,7 +481,7 @@ patchEraseLive patchF cmm
         patchInstrs (li : lis)
  
                 | Instr i (Just live)   <- li'
         patchInstrs (li : lis)
  
                 | Instr i (Just live)   <- li'
-               , Just (r1, r2) <- isRegRegMove i
+               , Just (r1, r2) <- takeRegRegMoveInstr i
                 , eatMe r1 r2 live
                 = patchInstrs lis
  
                 , eatMe r1 r2 live
                 = patchInstrs lis
  
@@ -451,30 +505,38 @@ patchEraseLive patchF cmm
  -- | Patch registers in this LiveInstr, including the liveness information.
  --
  patchRegsLiveInstr
  -- | Patch registers in this LiveInstr, including the liveness information.
  --
  patchRegsLiveInstr
-       :: (Reg -> Reg)
-       -> LiveInstr -> LiveInstr
+       :: Instruction instr
+       => (Reg -> Reg)
+       -> LiveInstr instr -> LiveInstr instr
  
  patchRegsLiveInstr patchF li
   = case li of
         Instr instr Nothing
  
  patchRegsLiveInstr patchF li
   = case li of
         Instr instr Nothing
-        -> Instr (patchRegs instr patchF) Nothing
+        -> Instr (patchRegsOfInstr instr patchF) Nothing
  
         Instr instr (Just live)
          -> Instr
  
         Instr instr (Just live)
          -> Instr
-               (patchRegs instr patchF)
+               (patchRegsOfInstr instr patchF)
                 (Just live
                         { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
                           liveBorn      = mkUniqSet $ map patchF $ uniqSetToList $ liveBorn live
                         , liveDieRead   = mkUniqSet $ map patchF $ uniqSetToList $ liveDieRead live
                         , liveDieWrite  = mkUniqSet $ map patchF $ uniqSetToList $ liveDieWrite live })
  
                 (Just live
                         { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
                           liveBorn      = mkUniqSet $ map patchF $ uniqSetToList $ liveBorn live
                         , liveDieRead   = mkUniqSet $ map patchF $ uniqSetToList $ liveDieRead live
                         , liveDieWrite  = mkUniqSet $ map patchF $ uniqSetToList $ liveDieWrite live })
  
+       SPILL reg slot
+        -> SPILL (patchF reg) slot
+               
+       RELOAD slot reg
+        -> RELOAD slot (patchF reg)
+
  
  ---------------------------------------------------------------------------------
  -- Annotate code with register liveness information
  --
  regLiveness
  
  ---------------------------------------------------------------------------------
  -- Annotate code with register liveness information
  --
  regLiveness
-       :: NatCmmTop
-       -> UniqSM LiveCmmTop
+       :: Instruction instr
+       => NatCmmTop instr
+       -> UniqSM (LiveCmmTop instr)
  
  regLiveness (CmmData i d)
         = returnUs $ CmmData i d
  
  regLiveness (CmmData i d)
         = returnUs $ CmmData i d
@@ -501,11 +563,15 @@ regLiveness (CmmProc info lbl params (ListGraph blocks@(first : _)))
                            lbl params (ListGraph liveBlocks)
  
  
                            lbl params (ListGraph liveBlocks)
  
  
-sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock]
+sccBlocks 
+       :: Instruction instr
+       => [NatBasicBlock instr] 
+       -> [SCC (NatBasicBlock instr)]
+
  sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
    where
  sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
    where
-       getOutEdges :: [Instr] -> [BlockId]
-       getOutEdges instrs = foldl' (\a x -> jumpDests x a) [] instrs
+       getOutEdges :: Instruction instr => [instr] -> [BlockId]
+       getOutEdges instrs = concat $ map jumpDestsOfInstr instrs
  
         graph = [ (block, getUnique id, map getUnique (getOutEdges instrs))
                 | block@(BasicBlock id instrs) <- blocks ]
  
         graph = [ (block, getUnique id, map getUnique (getOutEdges instrs))
                 | block@(BasicBlock id instrs) <- blocks ]
@@ -515,12 +581,13 @@ sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
  -- Computing liveness
  
  computeLiveness
  -- Computing liveness
  
  computeLiveness
-   :: [SCC NatBasicBlock]
-   -> ([SCC LiveBasicBlock],           -- instructions annotated with list of registers
-                                       -- which are "dead after this instruction".
-       BlockMap RegSet)                        -- blocks annontated with set of live registers
-                                       -- on entry to the block.
-
+       :: Instruction instr
+       => [SCC (NatBasicBlock instr)]
+       -> ([SCC (LiveBasicBlock instr)],       -- instructions annotated with list of registers
+                                               -- which are "dead after this instruction".
+              BlockMap RegSet)                 -- blocks annontated with set of live registers
+                                               -- on entry to the block.
+       
    -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer
    -- control to earlier ones only.  The SCCs returned are in the *opposite* 
    -- order, which is exactly what we want for the next pass.
    -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer
    -- control to earlier ones only.  The SCCs returned are in the *opposite* 
    -- order, which is exactly what we want for the next pass.
@@ -530,10 +597,12 @@ computeLiveness sccs
  
  
  livenessSCCs
  
  
  livenessSCCs
-       :: BlockMap RegSet
-       -> [SCC LiveBasicBlock]         -- accum
-       -> [SCC NatBasicBlock]
-       -> ([SCC LiveBasicBlock], BlockMap RegSet)
+       :: Instruction instr
+       => BlockMap RegSet
+       -> [SCC (LiveBasicBlock instr)]         -- accum
+       -> [SCC (NatBasicBlock instr)]
+       -> ( [SCC (LiveBasicBlock instr)]
+                 , BlockMap RegSet)
  
  livenessSCCs blockmap done [] = (done, blockmap)
  
  
  livenessSCCs blockmap done [] = (done, blockmap)
  
@@ -561,8 +630,11 @@ livenessSCCs blockmap done
                   (a, panic "RegLiveness.livenessSCCs")
  
  
                   (a, panic "RegLiveness.livenessSCCs")
  
  
-            linearLiveness :: BlockMap RegSet -> [NatBasicBlock]
-                           -> (BlockMap RegSet, [LiveBasicBlock])
+            linearLiveness 
+               :: Instruction instr
+               => BlockMap RegSet -> [NatBasicBlock instr]
+               -> (BlockMap RegSet, [LiveBasicBlock instr])
+
              linearLiveness = mapAccumL livenessBlock
  
                  -- probably the least efficient way to compare two
              linearLiveness = mapAccumL livenessBlock
  
                  -- probably the least efficient way to compare two
@@ -578,9 +650,10 @@ livenessSCCs blockmap done
  -- | Annotate a basic block with register liveness information.
  --
  livenessBlock
  -- | Annotate a basic block with register liveness information.
  --
  livenessBlock
-       :: BlockMap RegSet
-       -> NatBasicBlock
-       -> (BlockMap RegSet, LiveBasicBlock)
+       :: Instruction instr
+       => BlockMap RegSet
+       -> NatBasicBlock instr
+       -> (BlockMap RegSet, LiveBasicBlock instr)
  
  livenessBlock blockmap (BasicBlock block_id instrs)
   = let
  
  livenessBlock blockmap (BasicBlock block_id instrs)
   = let
@@ -598,8 +671,9 @@ livenessBlock blockmap (BasicBlock block_id instrs)
  --     filling in when regs are born
  
  livenessForward
  --     filling in when regs are born
  
  livenessForward
-       :: RegSet                       -- regs live on this instr
-       -> [LiveInstr] -> [LiveInstr]
+       :: Instruction instr
+       => RegSet                       -- regs live on this instr
+       -> [LiveInstr instr] -> [LiveInstr instr]
  
  livenessForward _           [] = []
  livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
  
  livenessForward _           [] = []
  livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
@@ -607,7 +681,7 @@ livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
         = li : livenessForward rsLiveEntry lis
  
         | Just live     <- mLive
         = li : livenessForward rsLiveEntry lis
  
         | Just live     <- mLive
-       , RU _ written  <- regUsage instr
+       , RU _ written  <- regUsageOfInstr instr
         = let
                 -- Regs that are written to but weren't live on entry to this instruction
                 --      are recorded as being born here.
         = let
                 -- Regs that are written to but weren't live on entry to this instruction
                 --      are recorded as being born here.
@@ -628,11 +702,12 @@ livenessForward _ _               = panic "RegLiveness.livenessForward: no match"
  --     filling in when regs die, and what regs are live across each instruction
  
  livenessBack
  --     filling in when regs die, and what regs are live across each instruction
  
  livenessBack
-       :: RegSet                       -- regs live on this instr
+       :: Instruction instr 
+       => RegSet                       -- regs live on this instr
         -> BlockMap RegSet              -- regs live on entry to other BBs
         -> BlockMap RegSet              -- regs live on entry to other BBs
-       -> [LiveInstr]                  -- instructions (accum)
-       -> [Instr]                      -- instructions
-       -> (RegSet, [LiveInstr])
+       -> [LiveInstr instr]            -- instructions (accum)
+       -> [instr]                      -- instructions
+       -> (RegSet, [LiveInstr instr])
  
  livenessBack liveregs _        done []  = (liveregs, done)
  
  
  livenessBack liveregs _        done []  = (liveregs, done)
  
@@ -640,32 +715,37 @@ livenessBack liveregs blockmap acc (instr : instrs)
   = let (liveregs', instr')     = liveness1 liveregs blockmap instr
     in  livenessBack liveregs' blockmap (instr' : acc) instrs
  
   = let (liveregs', instr')     = liveness1 liveregs blockmap instr
     in  livenessBack liveregs' blockmap (instr' : acc) instrs
  
--- don't bother tagging comments or deltas with liveness
-liveness1 :: RegSet -> BlockMap RegSet -> Instr -> (RegSet, LiveInstr)
-liveness1 liveregs _   (instr@COMMENT{})
-       = (liveregs, Instr instr Nothing)
  
  
-liveness1 liveregs _   (instr@DELTA{})
+-- don't bother tagging comments or deltas with liveness
+liveness1 
+       :: Instruction instr
+       => RegSet 
+       -> BlockMap RegSet 
+       -> instr
+       -> (RegSet, LiveInstr instr)
+
+liveness1 liveregs _ instr
+       | isMetaInstr instr
         = (liveregs, Instr instr Nothing)
  
  liveness1 liveregs blockmap instr
  
         = (liveregs, Instr instr Nothing)
  
  liveness1 liveregs blockmap instr
  
-      | not_a_branch
-      = (liveregs1, Instr instr
+       | not_a_branch
+       = (liveregs1, Instr instr
                         (Just $ Liveness
                         { liveBorn      = emptyUniqSet
                         , liveDieRead   = mkUniqSet r_dying
                         , liveDieWrite  = mkUniqSet w_dying }))
  
                         (Just $ Liveness
                         { liveBorn      = emptyUniqSet
                         , liveDieRead   = mkUniqSet r_dying
                         , liveDieWrite  = mkUniqSet w_dying }))
  
-      | otherwise
-      = (liveregs_br, Instr instr
+       | otherwise
+       = (liveregs_br, Instr instr
                         (Just $ Liveness
                         { liveBorn      = emptyUniqSet
                         , liveDieRead   = mkUniqSet r_dying_br
                         , liveDieWrite  = mkUniqSet w_dying }))
  
                         (Just $ Liveness
                         { liveBorn      = emptyUniqSet
                         , liveDieRead   = mkUniqSet r_dying_br
                         , liveDieWrite  = mkUniqSet w_dying }))
  
-      where
-           RU read written = regUsage instr
+       where
+           RU read written = regUsageOfInstr instr
  
             -- registers that were written here are dead going backwards.
             -- registers that were read here are live going backwards.
  
             -- registers that were written here are dead going backwards.
             -- registers that were read here are live going backwards.
@@ -682,7 +762,7 @@ liveness1 liveregs blockmap instr
  
             -- union in the live regs from all the jump destinations of this
             -- instruction.
  
             -- union in the live regs from all the jump destinations of this
             -- instruction.
-           targets      = jumpDests instr [] -- where we go from here
+           targets      = jumpDestsOfInstr instr -- where we go from here
             not_a_branch = null targets
  
             targetLiveRegs target
             not_a_branch = null targets
  
             targetLiveRegs target
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs

deleted file mode 100644 (file)

index f0cb8b5..0000000
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ /dev/null
@@ -1,94 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Machine-specific parts of the register allocator
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
-
-module RegAllocInfo (
-       -- shared code
-       shortcutStatic,
-       
-       -- machine specific 
-       RegUsage(..),
-       noUsage,
-       regUsage,
-       patchRegs,
-       jumpDests,
-       isJumpish,
-       patchJump,
-       isRegRegMove,
-
-        JumpDest, 
-       canShortcut, 
-       shortcutJump, 
-
-       mkSpillInstr,
-       mkLoadInstr,
-       mkRegRegMoveInstr,
-       mkBranchInstr,
-
-       maxSpillSlots,
-       spillSlotToOffset
-    ) where
-
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
-import BlockId
-import Cmm
-import CLabel
-import Instrs
-import Regs
-import Outputable
-import Constants       ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-#if   alpha_TARGET_ARCH
-import Alpha.RegInfo
-
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.RegInfo
-
-#elif powerpc_TARGET_ARCH
-import PPC.RegInfo
-
-#elif sparc_TARGET_ARCH
-import SPARC.RegInfo
-
-#endif
-
-
--- Here because it knows about JumpDest
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
-  | Just uq <- maybeAsmTemp lab 
-  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
-  | Just uq <- maybeAsmTemp lbl1
-  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
-        -- slightly dodgy, we're ignoring the second label, but this
-        -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic fn other_static
-        = other_static
-
-shortBlockId fn blockid@(BlockId uq) =
-   case fn blockid of
-      Nothing -> mkAsmTempLabel uq
-      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
-      Just (DestImm (ImmCLbl lbl)) -> lbl
-      _other -> panic "shortBlockId"
-
-
-
-
-
diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs

new file mode 100644 (file)

index 0000000..8b6b2d4
--- /dev/null
+++ b/compiler/nativeGen/RegClass.hs
@@ -0,0 +1,31 @@
+
+-- | An architecture independent description of a register's class.
+module RegClass 
+       ( RegClass (..) )
+
+where
+
+import Outputable
+import Unique
+
+
+-- | The class of a register. 
+--     Used in the register allocator.
+--     We treat all registers in a class as being interchangable.
+--
+data RegClass 
+       = RcInteger 
+       | RcFloat
+       | RcDouble
+       deriving Eq
+
+
+instance Uniquable RegClass where
+    getUnique RcInteger        = mkUnique 'L' 0
+    getUnique RcFloat  = mkUnique 'L' 1
+    getUnique RcDouble = mkUnique 'L' 2
+
+instance Outputable RegClass where
+    ppr RcInteger      = Outputable.text "I"
+    ppr RcFloat                = Outputable.text "F"
+    ppr RcDouble       = Outputable.text "D"
diff --git a/compiler/nativeGen/RegsBase.hs b/compiler/nativeGen/RegsBase.hs

deleted file mode 100644 (file)

index 00c87cb..0000000
--- a/compiler/nativeGen/RegsBase.hs
+++ /dev/null
@@ -1,105 +0,0 @@
-
-module RegsBase (
-       RegNo,
-       Reg(..), 
-       isRealReg, 
-       unRealReg,
-       isVirtualReg, 
-       renameVirtualReg,
-
-        RegClass(..)
-)
-
-where
-
-import Outputable      ( Outputable(..) )
-import qualified Outputable
-import Panic
-import Unique
-
--- ---------------------------------------------------------------------------
--- Registers
-
--- RealRegs are machine regs which are available for allocation, in
--- the usual way.  We know what class they are, because that's part of
--- the processor's architecture.
-
--- VirtualRegs are virtual registers.  The register allocator will
--- eventually have to map them into RealRegs, or into spill slots.
--- VirtualRegs are allocated on the fly, usually to represent a single
--- value in the abstract assembly code (i.e. dynamic registers are
--- usually single assignment).  With the new register allocator, the
--- single assignment restriction isn't necessary to get correct code,
--- although a better register allocation will result if single
--- assignment is used -- because the allocator maps a VirtualReg into
--- a single RealReg, even if the VirtualReg has multiple live ranges.
-
--- Virtual regs can be of either class, so that info is attached.
-
-type RegNo 
-       = Int
-
-data Reg
-       = RealReg      {-# UNPACK #-} !RegNo
-       | VirtualRegI  {-# UNPACK #-} !Unique
-       | VirtualRegHi {-# UNPACK #-} !Unique  -- High part of 2-word register
-       | VirtualRegF  {-# UNPACK #-} !Unique
-       | VirtualRegD  {-# UNPACK #-} !Unique
-       deriving (Eq, Ord)
-
-
--- We like to have Uniques for Reg so that we can make UniqFM and UniqSets 
--- in the register allocator.
-instance Uniquable Reg where
-       getUnique (RealReg i)      = mkUnique 'C' i
-       getUnique (VirtualRegI u)  = u
-       getUnique (VirtualRegHi u) = u
-       getUnique (VirtualRegF u)  = u
-       getUnique (VirtualRegD u)  = u
-
-
-isRealReg :: Reg -> Bool
-isRealReg = not . isVirtualReg
-
--- | Take the RegNo from a real reg
-unRealReg :: Reg -> RegNo
-unRealReg (RealReg i)  = i
-unRealReg _            = panic "unRealReg on VirtualReg"
-
-isVirtualReg :: Reg -> Bool
-isVirtualReg (RealReg _)      = False
-isVirtualReg (VirtualRegI _)  = True
-isVirtualReg (VirtualRegHi _) = True
-isVirtualReg (VirtualRegF _)  = True
-isVirtualReg (VirtualRegD _)  = True
-
-
-renameVirtualReg :: Unique -> Reg -> Reg
-renameVirtualReg u r
- = case r of
-       RealReg _       -> error "renameVirtualReg: can't change unique on a real reg"
-       VirtualRegI _   -> VirtualRegI  u
-       VirtualRegHi _  -> VirtualRegHi u
-       VirtualRegF _   -> VirtualRegF  u
-       VirtualRegD _   -> VirtualRegD  u
-
-
--- RegClass --------------------------------------------------------------------
-data RegClass 
-   = RcInteger 
-   | RcFloat
-   | RcDouble
-     deriving Eq
-
-instance Uniquable RegClass where
-    getUnique RcInteger        = mkUnique 'L' 0
-    getUnique RcFloat  = mkUnique 'L' 1
-    getUnique RcDouble = mkUnique 'L' 2
-
-instance Outputable RegClass where
-    ppr RcInteger      = Outputable.text "I"
-    ppr RcFloat                = Outputable.text "F"
-    ppr RcDouble       = Outputable.text "D"
-
-
-
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs

new file mode 100644 (file)

index 0000000..d921c12
--- /dev/null
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -0,0 +1,1545 @@
+{-# OPTIONS -w #-}
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+module SPARC.CodeGen ( 
+       cmmTopCodeGen, 
+       InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import SPARC.Instr
+import SPARC.Cond
+import SPARC.Regs
+import SPARC.RegInfo
+import Instruction
+import Size
+import Reg
+import PIC
+import NCGMonad
+
+-- Our intermediate code:
+import BlockId
+import Cmm
+import CLabel
+
+-- The rest:
+import BasicTypes
+import StaticFlags     ( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import FastString
+
+import Control.Monad   ( mapAndUnzipM )
+import Data.Int
+import DynFlags
+
+-- | Top level code generation
+cmmTopCodeGen 
+       :: DynFlags
+       -> RawCmmTop 
+       -> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen _
+       (CmmProc info lab params (ListGraph blocks)) 
+ = do  
+       (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+
+--     picBaseMb       <- getPicBaseMaybeNat
+       let proc        = CmmProc info lab params (ListGraph $ concat nat_blocks)
+       let tops        = proc : concat statics
+
+--     case picBaseMb of
+--      Just picBase -> initializePicBase picBase tops
+--      Nothing -> return tops
+  
+       return tops
+  
+  
+cmmTopCodeGen _ (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+
+
+basicBlockCodeGen 
+       :: CmmBasicBlock
+       -> NatM ( [NatBasicBlock Instr]
+               , [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+       (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+       
+       mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+         = ([], BasicBlock id instrs : blocks, statics)
+       mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+         = (instrs, blocks, CmmData sec dat:statics)
+       mkBlocks instr (instrs,blocks,statics)
+         = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop        -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty -> assignReg_FltCode size reg src
+      | isWord64 ty    -> assignReg_I64Code      reg src
+      | otherwise      -> assignReg_IntCode size reg src
+       where ty = cmmRegType reg
+             size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty -> assignMem_FltCode size addr src
+      | isWord64 ty    -> assignMem_I64Code      addr src
+      | otherwise      -> assignMem_IntCode size addr src
+       where ty = cmmExprType src
+             size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch  id              -> genBranch id
+    CmmCondBranch arg id       -> genCondJump id arg
+    CmmSwitch  arg ids         -> genSwitch arg ids
+    CmmJump    arg _           -> genJump arg
+
+    CmmReturn  _               
+     -> panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+--     They are really trees of insns to facilitate fast appending, where a
+--     left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock 
+       = OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode  
+       = CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+--     Reg is the lower 32-bit temporary which contains the result. 
+--     Use getHiVRegFromLo to find the other VRegUnique.  
+--
+--     Rules of this simplified insn selection game are therefore that
+--     the returned Reg may be modified
+--
+data ChildCode64       
+   = ChildCode64 
+        InstrBlock
+        Reg            
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+--     to live in a pre-decided machine register, it comes out as @Fixed@;
+--     otherwise, it comes out as @Any@, and the parent can decide which
+--     register to put it in.
+--
+data Register
+       = Fixed Size Reg InstrBlock
+       | Any   Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode 
+       = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--     A CmmInt is intended to be truncated to the appropriate 
+--     number of bits, so here we truncate it to Int64.  This is
+--     important because e.g. -1 as a CmmInt might be either
+--     -1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+     Amode _ addr_code                 <- getAmode addrTree
+     ChildCode64 vcode rlo     <- iselExpr64 valueTree  
+
+     (src, code) <- getSomeReg addrTree
+     let 
+         rhi = getHiVRegFromLo rlo
+         -- Big-endian store
+         mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
+         mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
+
+     return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+     ChildCode64 vcode r_src_lo <- iselExpr64 valueTree    
+     let 
+         r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty) 
+ | isWord64 ty
+ = do  Amode amode addr_code   <- getAmode addrTree
+       let result
+
+               | AddrRegReg r1 r2      <- amode
+               = do    rlo     <- getNewRegNat II32
+                       tmp     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ ADD False False r1 (RIReg r2) tmp
+                                        , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+                                        , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+                               rlo
+
+               | AddrRegImm r1 (ImmInt i) <- amode
+               = do    rlo     <- getNewRegNat II32
+                       let rhi = getHiVRegFromLo rlo
+                       
+                       return  $ ChildCode64 
+                               (        addr_code 
+                               `appOL`  toOL
+                                        [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+                                        , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+                               rlo
+               
+       result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) 
+ = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
+       let r1_hi       = getHiVRegFromLo r1_lo
+       
+       r_dst_lo        <- getNewRegNat II32
+       let r_dst_hi    =  getHiVRegFromLo r_dst_lo 
+       
+       return  $ ChildCode64
+                       ( toOL
+                       [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+                       , ADD True  False r1_hi (RIReg g0)         r_dst_hi ])
+                       r_dst_lo
+
+
+-- Addition of II64
+iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
+ = do  ChildCode64 code1 r1_lo <- iselExpr64 e1
+       let r1_hi       = getHiVRegFromLo r1_lo
+
+       ChildCode64 code2 r2_lo <- iselExpr64 e2
+       let r2_hi       = getHiVRegFromLo r2_lo
+       
+       r_dst_lo        <- getNewRegNat II32
+       let r_dst_hi    = getHiVRegFromLo r_dst_lo
+       
+       let code =      code1
+               `appOL` code2
+               `appOL` toOL
+                       [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
+                       , ADD True  False r1_hi (RIReg r2_hi) r_dst_hi ]
+       
+       return  $ ChildCode64 code r_dst_lo
+
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
+     r_dst_lo <-  getNewRegNat II32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_lo = mkVReg uq II32
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = mkMOV r_src_lo r_dst_lo
+         mov_hi = mkMOV r_src_hi r_dst_hi
+         mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+     return (
+            ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+         )
+
+-- Convert something into II64
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) 
+ = do
+       r_dst_lo        <- getNewRegNat II32
+       let r_dst_hi    = getHiVRegFromLo r_dst_lo
+
+       -- compute expr and load it into r_dst_lo
+       (a_reg, a_code) <- getSomeReg expr
+
+       let code        = a_code
+               `appOL` toOL
+                       [ mkRegRegMoveInstr g0    r_dst_hi      -- clear high 32 bits
+                       , mkRegRegMoveInstr a_reg r_dst_lo ]
+                       
+       return  $ ChildCode64 code r_dst_lo
+
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+--     we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed _ reg code -> 
+       return (reg, code)
+
+
+-- 
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+                 (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+
+
+-- Load a literal float into a float register.
+--     The actual literal is stored in a new data area, and we load it 
+--     at runtime.
+getRegister (CmmLit (CmmFloat f W32)) = do
+
+    -- a label for the new data area
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+
+    let code dst = toOL [
+            -- the data area         
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat f W32)],
+
+            -- load the literal
+           SETHI (HI (ImmCLbl lbl)) tmp,
+           LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+
+    return (Any FF32 code)
+
+getRegister (CmmLit (CmmFloat d W64)) = do
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+    let code dst = toOL [
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat d W64)],
+           SETHI (HI (ImmCLbl lbl)) tmp,
+           LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+    return (Any FF64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
+      MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
+
+      MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
+      MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
+
+      MO_FF_Conv W64 W32-> coerceDbl2Flt x
+      MO_FF_Conv W32 W64-> coerceFlt2Dbl x
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      -- Conversions which are a nop on sparc
+      MO_UU_Conv from to
+       | from == to    -> conversionNop (intSize to)  x
+      MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
+      MO_UU_Conv W32 to -> conversionNop (intSize to) x
+      MO_SS_Conv W32 to -> conversionNop (intSize to) x
+
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
+
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
+
+      other_op -> panic ("Unknown unary mach op: " ++ show mop)
+    where
+
+       -- | sign extend and widen
+       integerExtend 
+               :: Width                -- ^ width of source expression
+               -> Width                -- ^ width of result
+               -> CmmExpr              -- ^ source expression
+               -> NatM Register        
+
+       integerExtend from to expr
+        = do   -- load the expr into some register
+               (reg, e_code)   <- getSomeReg expr
+               tmp             <- getNewRegNat II32
+               let bitCount
+                       = case (from, to) of
+                               (W8,  W32)      -> 24
+                               (W16, W32)      -> 16
+                               (W8,  W16)      -> 24
+               let code dst
+                       = e_code        
+
+                       -- local shift word left to load the sign bit
+                       `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+                       
+                       -- arithmetic shift right to sign extend
+                       `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+                       
+               return (Any (intSize to) code)
+                               
+
+        conversionNop new_rep expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq rep -> condIntReg EQQ x y
+      MO_Ne rep -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+             
+      MO_U_Gt W32  -> condIntReg GTT x y
+      MO_U_Ge W32  -> condIntReg GE x y
+      MO_U_Lt W32  -> condIntReg LTT x y
+      MO_U_Le W32  -> condIntReg LE x y
+
+      MO_U_Gt W16 -> condIntReg GU  x y
+      MO_U_Ge W16 -> condIntReg GEU x y
+      MO_U_Lt W16 -> condIntReg LU  x y
+      MO_U_Le W16 -> condIntReg LEU x y
+
+      MO_Add W32 -> trivialCode W32 (ADD False False) x y
+      MO_Sub W32 -> trivialCode W32 (SUB False False) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_S_Quot W32    -> idiv True  False x y
+      MO_U_Quot W32    -> idiv False False x y
+       
+      MO_S_Rem  W32    -> irem True  x y
+      MO_U_Rem W32     -> irem False x y
+       
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE x y
+
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE x y 
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE x y
+
+      MO_F_Add  w  -> trivialFCode w FADD x y
+      MO_F_Sub  w  -> trivialFCode w FSUB x y
+      MO_F_Mul  w  -> trivialFCode w FMUL x y
+      MO_F_Quot w  -> trivialFCode w FDIV x y
+
+      MO_And rep   -> trivialCode rep (AND False) x y
+      MO_Or rep    -> trivialCode rep (OR  False) x y
+      MO_Xor rep   -> trivialCode rep (XOR False) x y
+
+      MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+      MO_Shl rep   -> trivialCode rep SLL  x y
+      MO_U_Shr rep   -> trivialCode rep SRL x y
+      MO_S_Shr rep   -> trivialCode rep SRA x y
+
+{-
+      MO_F32_Pwr  -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
+                                         [promote x, promote y])
+                      where promote x = CmmMachOp MO_F32_to_Dbl [x]
+      MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 
+                                        [x, y])
+-}
+      other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+    -- | Generate an integer division instruction.
+    idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+       
+    -- For unsigned division with a 32 bit numerator, 
+    --         we can just clear the Y register.
+    idiv False cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ WRY  g0 g0
+                       , UDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+       
+
+    -- For _signed_ division with a 32 bit numerator,
+    --         we have to sign extend the numerator into the Y register.
+    idiv True cc x y = do
+       (a_reg, a_code)         <- getSomeReg x
+               (b_reg, b_code)         <- getSomeReg y
+       
+       tmp                     <- getNewRegNat II32
+       
+       let code dst
+               =       a_code 
+               `appOL` b_code  
+               `appOL` toOL
+                       [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
+                       , SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+                       , WRY  tmp g0                           
+                       , SDIV cc a_reg (RIReg b_reg) dst]
+                       
+       return (Any II32 code)
+
+
+    -- | Do an integer remainder.
+    --
+    --  NOTE:  The SPARC v8 architecture manual says that integer division
+    --         instructions _may_ generate a remainder, depending on the implementation.
+    --         If so it is _recommended_ that the remainder is placed in the Y register.
+    --
+    --          The UltraSparc 2007 manual says Y is _undefined_ after division.
+    --
+    --         The SPARC T2 doesn't store the remainder, not sure about the others. 
+    --         It's probably best not to worry about it, and just generate our own
+    --         remainders. 
+    --
+    irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+    -- For unsigned operands: 
+    --         Division is between a 64 bit numerator and a 32 bit denominator, 
+    --         so we still have to clear the Y register.
+    irem False x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+
+       tmp_reg         <- getNewRegNat II32
+
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ WRY   g0 g0
+                       , UDIV  False         a_reg (RIReg b_reg) tmp_reg
+                       , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+                       , SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+       return  (Any II32 code)
+
+    
+    -- For signed operands:
+    --         Make sure to sign extend into the Y register, or the remainder
+    --         will have the wrong sign when the numerator is negative.
+    --
+    -- TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
+    --         not the full 32. Not sure why this is, something to do with overflow?
+    --         If anyone cares enough about the speed of signed remainder they
+    --         can work it out themselves (then tell me). -- BL 2009/01/20
+    
+    irem True x y = do
+       (a_reg, a_code) <- getSomeReg x
+       (b_reg, b_code) <- getSomeReg y
+       
+       tmp1_reg        <- getNewRegNat II32
+       tmp2_reg        <- getNewRegNat II32
+               
+       let code dst
+               =       a_code
+               `appOL` b_code
+               `appOL` toOL
+                       [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+                       , WRY   tmp1_reg g0
+
+                       , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg  
+                       , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+                       , SUB   False False    a_reg (RIReg tmp2_reg) dst]
+                       
+       return (Any II32 code)
+   
+
+    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getSomeReg a
+        (b_reg, b_code) <- getSomeReg b
+        res_lo <- getNewRegNat II32
+        res_hi <- getNewRegNat II32
+        let
+           shift_amt  = case rep of
+                         W32 -> 31
+                         W64 -> 63
+                         _ -> panic "shift_amt"
+           code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
+                           RDY res_hi,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+                           SUB False False res_lo (RIReg res_hi) dst
+                        ]
+         return (Any II32 code)
+
+getRegister (CmmLoad mem pk) = do
+    Amode src code <- getAmode mem
+    let
+       code__2 dst     = code `snocOL` LD (cmmTypeSize pk) src dst
+    return (Any (cmmTypeSize pk) code__2)
+
+getRegister (CmmLit (CmmInt i _))
+  | fits13Bits i
+  = let
+       src = ImmInt (fromInteger i)
+       code dst = unitOL (OR False g0 (RIImm src) dst)
+    in
+       return (Any II32 code)
+
+getRegister (CmmLit lit)
+  = let rep = cmmLitType lit
+       imm = litToImm lit
+       code dst = toOL [
+           SETHI (HI imm) dst,
+           OR False dst (RIImm (LO imm)) dst]
+    in return (Any II32 code)
+
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits (-i)
+  = do
+       (reg, code) <- getSomeReg x
+       let
+         off  = ImmInt (-(fromInteger i))
+       return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+  | fits13Bits i
+  = do
+       (reg, code) <- getSomeReg x
+       let
+        off  = ImmInt (fromInteger i)
+       return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+  = do
+    (regX, codeX) <- getSomeReg x
+    (regY, codeY) <- getSomeReg y
+    let
+       code = codeX `appOL` codeY
+    return (Amode (AddrRegReg regX regY) code)
+
+getAmode (CmmLit lit)
+  = do
+       let imm__2      = litToImm lit
+       tmp1    <- getNewRegNat II32
+       tmp2    <- getNewRegNat II32
+
+       let code = toOL [ SETHI (HI imm__2) tmp1
+                       , OR    False tmp1 (RIImm (LO imm__2)) tmp2]
+               
+       return (Amode (AddrRegReg tmp2 g0) code)
+
+getAmode other
+  = do
+       (reg, code) <- getSomeReg other
+       let
+           off  = ImmInt 0
+       return (Amode (AddrRegImm reg off) code)
+
+
+getCondCode :: CmmExpr -> NatM CondCode
+getCondCode (CmmMachOp mop [x, y])
+  = 
+    case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condIntCode cond x (CmmLit (CmmInt y rep))
+  | fits13Bits y
+  = do
+       (src1, code) <- getSomeReg x
+       let
+           src2 = ImmInt (fromInteger y)
+           code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+       return (CondCode False cond code')
+
+condIntCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+       code__2 = code1 `appOL` code2 `snocOL`
+                 SUB False True src1 (RIReg src2) g0
+    return (CondCode False cond code__2)
+
+
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condFltCode cond x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat FF64
+    let
+       promote x = FxTOy FF32 FF64 x tmp
+
+       pk1   = cmmExprType x
+       pk2   = cmmExprType y
+
+       code__2 =
+               if pk1 `cmmEqType` pk2 then
+                   code1 `appOL` code2 `snocOL`
+                   FCMP True (cmmTypeSize pk1) src1 src2
+               else if typeWidth pk1 == W32 then
+                   code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+                   FCMP True FF64 tmp src2
+               else
+                   code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+                   FCMP True FF64 src1 tmp
+    return (CondCode True cond code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_IntCode pk addr src = do
+    (srcReg, code) <- getSomeReg src
+    Amode dstAddr addr_code <- getAmode addr
+    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode pk reg src = do
+    r <- getRegister src
+    return $ case r of
+       Any _ code         -> code dst
+       Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
+    where
+      dst = getRegisterReg reg
+
+
+
+-- Floating point assignment to memory
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_FltCode pk addr src = do
+    Amode dst__2 code1 <- getAmode addr
+    (src__2, code2) <- getSomeReg src
+    tmp1 <- getNewRegNat pk
+    let
+       pk__2   = cmmExprType src
+       code__2 = code1 `appOL` code2 `appOL`
+           if   sizeToWidth pk == typeWidth pk__2 
+            then unitOL (ST pk src__2 dst__2)
+           else toOL   [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
+                       , ST    pk tmp1 dst__2]
+    return code__2
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+    srcRegister <- getRegister srcCmmExpr
+    let dstReg = getRegisterReg dstCmmReg
+
+    return $ case srcRegister of
+        Any _ code                 -> code dstReg
+       Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
+
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+  = return (toOL [CALL (Left target) 0 True, NOP])
+  where
+    target = ImmCLbl lbl
+
+genJump tree
+  = do
+        (target, code) <- getSomeReg tree
+       return (code `snocOL` JMP (AddrRegReg target g0)  `snocOL` NOP)
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation.  We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@.  We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+    :: BlockId     -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+
+
+genCondJump bid bool = do
+  CondCode is_float cond code <- getCondCode bool
+  return (
+       code `appOL` 
+       toOL (
+         if   is_float
+         then [NOP, BF cond False bid, NOP]
+         else [BI cond False bid, NOP]
+       )
+    )
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+{- 
+   The SPARC calling convention is an absolute
+   nightmare.  The first 6x32 bits of arguments are mapped into
+   %o0 through %o5, and the remaining arguments are dumped to the
+   stack, beginning at [%sp+92].  (Note that %o6 == %sp.)
+
+   If we have to put args on the stack, move %o6==%sp down by
+   the number of words to go on the stack, to ensure there's enough space.
+
+   According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+   16 words above the stack pointer is a word for the address of
+   a structure return value.  I use this as a temporary location
+   for moving values from float to int regs.  Certainly it isn't
+   safe to put anything in the 16 words starting at %sp, since
+   this area can get trashed at any time due to window overflows
+   caused by signal handlers.
+
+   A final complication (if the above isn't enough) is that 
+   we can't blithely calculate the arguments one by one into
+   %o0 .. %o5.  Consider the following nested calls:
+
+       fff a (fff b c)
+
+   Naive code moves a into %o0, and (fff b c) into %o1.  Unfortunately
+   the inner call will itself use %o0, which trashes the value put there
+   in preparation for the outer call.  Upshot: we need to calculate the
+   args into temporary regs, and move those to arg regs or onto the
+   stack only immediately prior to the call proper.  Sigh.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-}
+
+
+-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
+-- are guaranteed to take place before writes afterwards (unlike on PowerPC). 
+-- Ref: Section 8.4 of the SPARC V9 Architecture manual.
+--
+-- In the SPARC case we don't need a barrier.
+--
+genCCall (CmmPrim (MO_WriteBarrier)) _ _
+ = do  return nilOL
+
+genCCall target dest_regs argsAndHints 
+ = do          
+       -- strip hints from the arg regs
+       let args :: [CmmExpr]
+           args  = map hintlessCmm argsAndHints
+
+
+       -- work out the arguments, and assign them to integer regs
+       argcode_and_vregs       <- mapM arg_to_int_vregs args
+       let (argcodes, vregss)  = unzip argcode_and_vregs
+       let vregs               = concat vregss
+
+       let n_argRegs           = length allArgRegs
+       let n_argRegs_used      = min (length vregs) n_argRegs
+
+
+       -- deal with static vs dynamic call targets
+       callinsns <- case target of
+               CmmCallee (CmmLit (CmmLabel lbl)) conv -> 
+                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+               CmmCallee expr conv 
+                -> do  (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+               CmmPrim mop 
+                -> do  res     <- outOfLineFloatOp mop
+                       lblOrMopExpr <- case res of
+                               Left lbl -> do
+                                       return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+                               Right mopExpr -> do
+                                       (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+                                       return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+                       return lblOrMopExpr
+
+       let argcode = concatOL argcodes
+
+       let (move_sp_down, move_sp_up)
+                  = let diff = length vregs - n_argRegs
+                        nn   = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+                    in  if   nn <= 0
+                        then (nilOL, nilOL)
+                        else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+        let transfer_code
+               = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+                               
+       return 
+        $      argcode                 `appOL`
+               move_sp_down            `appOL`
+               transfer_code           `appOL`
+               callinsns               `appOL`
+               unitOL NOP              `appOL`
+               move_sp_up              `appOL`
+               assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+--     or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+       -- If the expr produces a 64 bit int, then we can just use iselExpr64
+       | isWord64 (cmmExprType arg)
+       = do    (ChildCode64 code r_lo) <- iselExpr64 arg
+               let r_hi                = getHiVRegFromLo r_lo
+               return (code, [r_hi, r_lo])
+
+       | otherwise
+       = do    (src, code)     <- getSomeReg arg
+               tmp             <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+               let pk          = cmmExprType arg
+
+               case cmmTypeSize pk of
+
+                -- Load a 64 bit float return value into two integer regs.
+                FF64 -> do
+                       v1 <- getNewRegNat II32
+                       v2 <- getNewRegNat II32
+
+                       let Just f0_high = fPair f0
+                       
+                       let code2 = 
+                               code                            `snocOL`
+                               FMOV FF64 src f0                `snocOL`
+                               ST   FF32  f0 (spRel 16)        `snocOL`
+                               LD   II32  (spRel 16) v1        `snocOL`
+                               ST   FF32  f0_high (spRel 16)   `snocOL`
+                               LD   II32  (spRel 16) v2
+
+                       return  (code2, [v1,v2])
+
+                -- Load a 32 bit float return value into an integer reg
+                FF32 -> do
+                       v1 <- getNewRegNat II32
+                       
+                       let code2 =
+                               code                            `snocOL`
+                               ST   FF32  src (spRel 16)       `snocOL`
+                               LD   II32  (spRel 16) v1
+                               
+                       return (code2, [v1])
+
+                -- Move an integer return value into its destination reg.
+                other -> do
+                       v1 <- getNewRegNat II32
+                       
+                       let code2 = 
+                               code                            `snocOL`
+                               OR False g0 (RIReg src) v1
+                       
+                       return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been 
+--     marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset          
+       = []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset     
+       = ST II32 v (spRel offset)
+       : move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset 
+       = OR False g0 (RIReg v) a
+       : move_final vs az offset
+
+
+-- | Assign results returned from the call into their 
+--     desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code [] = nilOL
+
+assign_code [CmmHinted dest _hint]     
+ = let rep     = localRegType dest
+       width   = typeWidth rep
+       r_dest  = getRegisterReg (CmmLocal dest)
+
+       result
+               | isFloatType rep 
+               , W32   <- width
+               = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+               | isFloatType rep
+               , W64   <- width
+               = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+               | not $ isFloatType rep
+               , W32   <- width
+               = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+               | not $ isFloatType rep
+               , W64           <- width
+               , r_dest_hi     <- getHiVRegFromLo r_dest
+               = toOL  [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+                       , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+   in  result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp 
+       :: CallishMachOp 
+       -> NatM (Either CLabel CmmExpr)
+
+outOfLineFloatOp mop 
+ = do  let functionName
+               = outOfLineFloatOp_table mop
+       
+       dflags  <- getDynFlagsNat
+       mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference 
+               $  mkForeignLabel functionName Nothing True IsFunction
+
+       let mopLabelOrExpr 
+               = case mopExpr of
+                       CmmLit (CmmLabel lbl)   -> Left lbl
+                        _                      -> Right mopExpr
+
+       return mopLabelOrExpr
+
+
+-- | Decide what C function to use to implement a CallishMachOp
+--
+outOfLineFloatOp_table 
+       :: CallishMachOp
+       -> FastString
+       
+outOfLineFloatOp_table mop
+ = case mop of
+       MO_F32_Exp    -> fsLit "expf"
+       MO_F32_Log    -> fsLit "logf"
+       MO_F32_Sqrt   -> fsLit "sqrtf"
+       MO_F32_Pwr    -> fsLit "powf"
+
+       MO_F32_Sin    -> fsLit "sinf"
+       MO_F32_Cos    -> fsLit "cosf"
+       MO_F32_Tan    -> fsLit "tanf"
+
+       MO_F32_Asin   -> fsLit "asinf"
+       MO_F32_Acos   -> fsLit "acosf"
+       MO_F32_Atan   -> fsLit "atanf"
+
+       MO_F32_Sinh   -> fsLit "sinhf"
+       MO_F32_Cosh   -> fsLit "coshf"
+       MO_F32_Tanh   -> fsLit "tanhf"
+
+       MO_F64_Exp    -> fsLit "exp"
+       MO_F64_Log    -> fsLit "log"
+       MO_F64_Sqrt   -> fsLit "sqrt"
+       MO_F64_Pwr    -> fsLit "pow"
+
+       MO_F64_Sin    -> fsLit "sin"
+       MO_F64_Cos    -> fsLit "cos"
+       MO_F64_Tan    -> fsLit "tan"
+
+       MO_F64_Asin   -> fsLit "asin"
+       MO_F64_Acos   -> fsLit "acos"
+       MO_F64_Atan   -> fsLit "atan"
+
+       MO_F64_Sinh   -> fsLit "sinh"
+       MO_F64_Cosh   -> fsLit "cosh"
+       MO_F64_Tanh   -> fsLit "tanh"
+
+       other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+                       (pprCallishMachOp mop)
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids
+       | opt_PIC
+       = error "MachCodeGen: sparc genSwitch PIC not finished\n"
+  
+       | otherwise
+       = do    (e_reg, e_code) <- getSomeReg expr
+
+               base_reg        <- getNewRegNat II32
+               offset_reg      <- getNewRegNat II32
+               dst             <- getNewRegNat II32
+
+               label           <- getNewLabelNat
+               let jumpTable   = map jumpTableEntry ids
+
+               return $ e_code `appOL`
+                toOL   
+                       -- the jump table
+                       [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+                       -- load base of jump table
+                       , SETHI (HI (ImmCLbl label)) base_reg
+                       , OR    False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+                       
+                       -- the addrs in the table are 32 bits wide..
+                       , SLL   e_reg (RIImm $ ImmInt 2) offset_reg
+
+                       -- load and jump to the destination
+                       , LD      II32 (AddrRegReg base_reg offset_reg) dst
+                       , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
+                       , NOP ]
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat II32
+    let
+       code__2 dst = code `appOL` toOL [
+           SUB False True g0 (RIReg src) g0,
+           SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+       code__2 dst = code1 `appOL` code2 `appOL` toOL [
+           XOR False src1 (RIReg src2) dst,
+           SUB False True g0 (RIReg dst) g0,
+           SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat II32
+    let
+       code__2 dst = code `appOL` toOL [
+           SUB False True g0 (RIReg src) g0,
+           ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+       code__2 dst = code1 `appOL` code2 `appOL` toOL [
+           XOR False src1 (RIReg src2) dst,
+           SUB False True g0 (RIReg dst) g0,
+           ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg cond x y = do
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
+    let
+       code__2 dst = cond_code `appOL` toOL [
+           BI cond False bid1, NOP,
+           OR False g0 (RIImm (ImmInt 0)) dst,
+           BI ALWAYS False bid2, NOP,
+           NEWBLOCK bid1,
+           OR False g0 (RIImm (ImmInt 1)) dst,
+           NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+condFltReg cond x y = do
+    bid1@(BlockId lbl1) <- getBlockIdNat
+    bid2@(BlockId lbl2) <- getBlockIdNat
+    CondCode _ cond cond_code <- condFltCode cond x y
+    let
+       code__2 dst = cond_code `appOL` toOL [ 
+           NOP,
+           BF cond False bid1, NOP,
+           OR False g0 (RIImm (ImmInt 0)) dst,
+           BI ALWAYS False bid2, NOP,
+           NEWBLOCK bid1,
+           OR False g0 (RIImm (ImmInt 1)) dst,
+           NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+  | fits13Bits y
+  = do
+      (src1, code) <- getSomeReg x
+      tmp <- getNewRegNat II32
+      let
+       src2 = ImmInt (fromInteger y)
+       code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+      return (Any II32 code__2)
+
+trivialCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat II32
+    tmp2 <- getNewRegNat II32
+    let
+       code__2 dst = code1 `appOL` code2 `snocOL`
+                     instr src1 (RIReg src2) dst
+    return (Any II32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
+    tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
+    tmp <- getNewRegNat FF64
+    let
+       promote x = FxTOy FF32 FF64 x tmp
+
+       pk1   = cmmExprType x
+       pk2   = cmmExprType y
+
+       code__2 dst =
+               if pk1 `cmmEqType` pk2 then
+                   code1 `appOL` code2 `snocOL`
+                   instr (floatSize pk) src1 src2 dst
+               else if typeWidth pk1 == W32 then
+                   code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+                   instr FF64 tmp src2 dst
+               else
+                   code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+                   instr FF64 src1 tmp dst
+    return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) 
+               code__2)
+
+------------
+trivialUCode size instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat size
+    let
+       code__2 dst = code `snocOL` instr (RIReg src) dst
+    return (Any size code__2)
+
+-------------
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    tmp <- getNewRegNat pk
+    let
+       code__2 dst = code `snocOL` instr src dst
+    return (Any pk code__2)
+
+
+
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+
+
+coerceInt2FP width1 width2 x = do
+    (src, code) <- getSomeReg x
+    let
+       code__2 dst = code `appOL` toOL [
+           ST (intSize width1) src (spRel (-2)),
+           LD (intSize width1) (spRel (-2)) dst,
+           FxTOy (intSize width1) (floatSize width2) dst dst]
+    return (Any (floatSize $ width2) code__2)
+
+
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--        FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x 
+ = do  let fsize1      = floatSize width1
+           fsize2      = floatSize width2
+       
+            isize2     = intSize   width2
+
+       (fsrc, code)    <- getSomeReg x
+       fdst            <- getNewRegNat fsize2
+    
+       let code2 dst   
+               =       code
+               `appOL` toOL
+                       -- convert float to int format, leaving it in a float reg.
+                       [ FxTOy fsize1 isize2 fsrc fdst
+
+                       -- store the int into mem, then load it back to move
+                       --      it into an actual int reg.
+                       , ST    fsize2 fdst (spRel (-2))
+                       , LD    isize2 (spRel (-2)) dst]
+
+       return (Any isize2 code2)
+
+------------
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) 
+
+------------
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
+
+
+
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from Instrs (SDM):
+
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+       = 23
diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs

new file mode 100644 (file)

index 0000000..d0f12ef
--- /dev/null
+++ b/compiler/nativeGen/SPARC/Cond.hs
@@ -0,0 +1,53 @@
+
+module SPARC.Cond (
+       Cond(..),
+       condUnsigned,
+       condToSigned,
+       condToUnsigned
+)
+
+where
+
+-- | Branch condition codes.
+data Cond
+       = ALWAYS
+       | EQQ
+       | GE
+       | GEU
+       | GTT
+       | GU
+       | LE
+       | LEU
+       | LTT
+       | LU
+       | NE
+       | NEG
+       | NEVER
+       | POS
+       | VC
+       | VS
+       deriving Eq
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs

index 9c33231..6dc6477 100644 (file)
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -10,56 +10,55 @@
  #include "nativeGen/NCG.h"
  
  module SPARC.Instr (
  #include "nativeGen/NCG.h"
  
  module SPARC.Instr (
-       Cond(..),
         RI(..),
         Instr(..),
         RI(..),
         Instr(..),
-       riZero,
-       fpRelEA,
-       moveSp,
-       fPair,
+       maxSpillSlots
  )
  
  where
  
  )
  
  where
  
-import BlockId
-import RegsBase
  import SPARC.Regs
  import SPARC.Regs
+import SPARC.Cond
+import Instruction
+import RegClass
+import Reg
+import Size
+
+import BlockId
  import Cmm
  import Outputable
  import Cmm
  import Outputable
-import Constants       ( wORD_SIZE )
+import Constants       (rESERVED_C_STACK_BYTES )
  import FastString
  import FastString
+import FastBool
  
  import GHC.Exts
  
  
  
  import GHC.Exts
  
  
--- | Branch condition codes.
-data Cond
-       = ALWAYS
-       | EQQ
-       | GE
-       | GEU
-       | GTT
-       | GU
-       | LE
-       | LEU
-       | LTT
-       | LU
-       | NE
-       | NEG
-       | NEVER
-       | POS
-       | VC
-       | VS
-       deriving Eq
-
-
  -- | Register or immediate
  data RI 
         = RIReg Reg
         | RIImm Imm
  
  
  -- | Register or immediate
  data RI 
         = RIReg Reg
         | RIImm Imm
  
  
--- | SPARC isntruction set.
+-- | instance for sparc instruction set
+instance Instruction Instr where
+       regUsageOfInstr         = sparc_regUsageOfInstr
+       patchRegsOfInstr        = sparc_patchRegsOfInstr
+       isJumpishInstr          = sparc_isJumpishInstr
+       jumpDestsOfInstr        = sparc_jumpDestsOfInstr
+       patchJumpInstr          = sparc_patchJumpInstr
+       mkSpillInstr            = sparc_mkSpillInstr
+       mkLoadInstr             = sparc_mkLoadInstr
+       takeDeltaInstr          = sparc_takeDeltaInstr
+       isMetaInstr             = sparc_isMetaInstr
+       mkRegRegMoveInstr       = sparc_mkRegRegMoveInstr
+       takeRegRegMoveInstr     = sparc_takeRegRegMoveInstr
+       mkJumpInstr             = sparc_mkJumpInstr
+
+
+-- | SPARC instruction set.
+--     Not complete. This is only the ones we need.
+--
  data Instr
  
         -- meta ops --------------------------------------------------
  data Instr
  
         -- meta ops --------------------------------------------------
@@ -78,12 +77,6 @@ data Instr
         -- specify current stack offset for benefit of subsequent passes.
         | DELTA   Int
  
         -- specify current stack offset for benefit of subsequent passes.
         | DELTA   Int
  
-       -- | spill this reg to a stack slot
-       | SPILL   Reg Int
-
-       -- | reload this reg from a stack slot
-       | RELOAD  Int Reg
-
         -- real instrs -----------------------------------------------
         -- Loads and stores.
         | LD            Size AddrMode Reg               -- size, src, dst
         -- real instrs -----------------------------------------------
         -- Loads and stores.
         | LD            Size AddrMode Reg               -- size, src, dst
@@ -157,39 +150,290 @@ data Instr
         | CALL          (Either Imm Reg) Int Bool       -- target, args, terminal
  
  
         | CALL          (Either Imm Reg) Int Bool       -- target, args, terminal
  
  
--- | Check if a RI represents a zero value.
---     - a literal zero
---     - register %g0, which is always zero.
+-- | regUsage returns the sets of src and destination registers used
+--     by a particular instruction.  Machine registers that are
+--     pre-allocated to stgRegs are filtered out, because they are
+--     uninteresting from a register allocation standpoint.  (We wouldn't
+--     want them to end up on the free list!)  As far as we are concerned,
+--     the fixed registers simply don't exist (for allocation purposes,
+--     anyway).
+
+--     regUsage doesn't need to do any trickery for jumps and such.  Just
+--     state precisely the regs read and written by that insn.  The
+--     consequences of control flow transfers, as far as register
+--     allocation goes, are taken care of by the register allocator.
+--
+sparc_regUsageOfInstr :: Instr -> RegUsage
+sparc_regUsageOfInstr instr 
+ = case instr of
+    LD    _ addr reg           -> usage (regAddr addr,         [reg])
+    ST    _ reg addr           -> usage (reg : regAddr addr,   [])
+    ADD   _ _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    SUB   _ _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    UMUL    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    SMUL    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    UDIV    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    SDIV    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    RDY       rd               -> usage ([],                   [rd])
+    WRY       r1 r2            -> usage ([r1, r2],             [])
+    AND     _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    ANDN    _ r1 ar r2                 -> usage (r1 : regRI ar,        [r2])
+    OR      _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    ORN     _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    XOR     _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    XNOR    _ r1 ar r2                 -> usage (r1 : regRI ar,        [r2])
+    SLL       r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    SRL       r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    SRA       r1 ar r2         -> usage (r1 : regRI ar,        [r2])
+    SETHI   _ reg              -> usage ([],                   [reg])
+    FABS    _ r1 r2                    -> usage ([r1],                 [r2])
+    FADD    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
+    FCMP    _ _  r1 r2         -> usage ([r1, r2],             [])
+    FDIV    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
+    FMOV    _ r1 r2                    -> usage ([r1],                 [r2])
+    FMUL    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
+    FNEG    _ r1 r2                    -> usage ([r1],                 [r2])
+    FSQRT   _ r1 r2            -> usage ([r1],                 [r2])
+    FSUB    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
+    FxTOy   _ _  r1 r2                 -> usage ([r1],                 [r2])
+
+    JMP     addr               -> usage (regAddr addr, [])
+    JMP_TBL addr _             -> usage (regAddr addr, [])
+
+    CALL  (Left _  )  _ True   -> noUsage
+    CALL  (Left _  )  n False  -> usage (argRegs n, callClobberedRegs)
+    CALL  (Right reg) _ True   -> usage ([reg], [])
+    CALL  (Right reg) n False  -> usage (reg : (argRegs n), callClobberedRegs)
+    _                          -> noUsage
+
+  where
+    usage (src, dst) 
+     = RU (filter interesting src) (filter interesting dst)
+
+    regAddr (AddrRegReg r1 r2) = [r1, r2]
+    regAddr (AddrRegImm r1 _)  = [r1]
+
+    regRI (RIReg r)            = [r]
+    regRI  _                   = []
+
+
+-- | Interesting regs are virtuals, or ones that are allocatable 
+--     by the register allocator.
+interesting :: Reg -> Bool
+interesting reg
+ = case reg of
+       VirtualRegI  _  -> True
+       VirtualRegHi _  -> True
+       VirtualRegF  _  -> True
+       VirtualRegD  _  -> True
+       RealReg i       -> isFastTrue (freeReg i)
+
+
+
+-- | Apply a given mapping to tall the register references in this instruction.
+sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+sparc_patchRegsOfInstr instr env = case instr of
+    LD    sz addr reg          -> LD sz (fixAddr addr) (env reg)
+    ST    sz reg addr          -> ST sz (env reg) (fixAddr addr)
+
+    ADD   x cc r1 ar r2        -> ADD   x cc  (env r1) (fixRI ar) (env r2)
+    SUB   x cc r1 ar r2                -> SUB   x cc  (env r1) (fixRI ar) (env r2)
+    UMUL    cc r1 ar r2                -> UMUL    cc  (env r1) (fixRI ar) (env r2)
+    SMUL    cc r1 ar r2                -> SMUL    cc  (env r1) (fixRI ar) (env r2)
+    UDIV    cc r1 ar r2                -> UDIV    cc  (env r1) (fixRI ar) (env r2)
+    SDIV    cc r1 ar r2                -> SDIV    cc  (env r1) (fixRI ar) (env r2)
+    RDY   rd                   -> RDY         (env rd)
+    WRY   r1 r2                        -> WRY         (env r1) (env r2)
+    AND   b r1 ar r2                   -> AND   b     (env r1) (fixRI ar) (env r2)
+    ANDN  b r1 ar r2           -> ANDN  b     (env r1) (fixRI ar) (env r2)
+    OR    b r1 ar r2                   -> OR    b     (env r1) (fixRI ar) (env r2)
+    ORN   b r1 ar r2           -> ORN   b     (env r1) (fixRI ar) (env r2)
+    XOR   b r1 ar r2           -> XOR   b     (env r1) (fixRI ar) (env r2)
+    XNOR  b r1 ar r2           -> XNOR  b     (env r1) (fixRI ar) (env r2)
+    SLL   r1 ar r2             -> SLL         (env r1) (fixRI ar) (env r2)
+    SRL   r1 ar r2             -> SRL         (env r1) (fixRI ar) (env r2)
+    SRA   r1 ar r2             -> SRA         (env r1) (fixRI ar) (env r2)
+
+    SETHI imm reg              -> SETHI imm (env reg)
+
+    FABS  s r1 r2              -> FABS    s   (env r1) (env r2)
+    FADD  s r1 r2 r3           -> FADD    s   (env r1) (env r2) (env r3)
+    FCMP  e s r1 r2            -> FCMP e  s   (env r1) (env r2)
+    FDIV  s r1 r2 r3           -> FDIV    s   (env r1) (env r2) (env r3)
+    FMOV  s r1 r2              -> FMOV    s   (env r1) (env r2)
+    FMUL  s r1 r2 r3           -> FMUL    s   (env r1) (env r2) (env r3)
+    FNEG  s r1 r2              -> FNEG    s   (env r1) (env r2)
+    FSQRT s r1 r2              -> FSQRT   s   (env r1) (env r2)
+    FSUB  s r1 r2 r3                   -> FSUB    s   (env r1) (env r2) (env r3)
+    FxTOy s1 s2 r1 r2          -> FxTOy s1 s2 (env r1) (env r2)
+
+    JMP     addr               -> JMP     (fixAddr addr)
+    JMP_TBL addr ids           -> JMP_TBL (fixAddr addr) ids
+
+    CALL  (Left i) n t         -> CALL (Left i) n t
+    CALL  (Right r) n t        -> CALL (Right (env r)) n t
+    _                          -> instr
+
+  where
+    fixAddr (AddrRegReg r1 r2)         = AddrRegReg   (env r1) (env r2)
+    fixAddr (AddrRegImm r1 i)          = AddrRegImm   (env r1) i
+
+    fixRI (RIReg r)            = RIReg (env r)
+    fixRI other                        = other
+
+
+--------------------------------------------------------------------------------
+sparc_isJumpishInstr :: Instr -> Bool
+sparc_isJumpishInstr instr
+ = case instr of
+       BI{}            -> True
+       BF{}            -> True
+       JMP{}           -> True
+       JMP_TBL{}       -> True
+       CALL{}          -> True
+       _               -> False
+
+sparc_jumpDestsOfInstr :: Instr -> [BlockId]
+sparc_jumpDestsOfInstr insn
+  = case insn of
+       BI   _ _ id     -> [id]
+       BF   _ _ id     -> [id]
+       JMP_TBL _ ids   -> ids
+       _               -> []
+
+
+sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+sparc_patchJumpInstr insn patchF
+  = case insn of
+       BI cc annul id  -> BI cc annul (patchF id)
+       BF cc annul id  -> BF cc annul (patchF id)
+       _               -> insn
+
+
+--------------------------------------------------------------------------------
+-- | Make a spill instruction.
+--     On SPARC we spill below frame pointer leaving 2 words/spill
+sparc_mkSpillInstr
+       :: Reg          -- ^ register to spill
+       -> Int          -- ^ current stack delta
+       -> Int          -- ^ spill slot to use
+       -> Instr
+
+sparc_mkSpillInstr reg _ slot
+ = let off     = spillSlotToOffset slot
+        off_w  = 1 + (off `div` 4)
+        sz     = case regClass reg of
+                       RcInteger -> II32
+                       RcFloat   -> FF32
+                       RcDouble  -> FF64
+               
+    in ST sz reg (fpRel (negate off_w))
+
+
+-- | Make a spill reload instruction.
+sparc_mkLoadInstr
+       :: Reg          -- ^ register to load
+       -> Int          -- ^ current stack delta
+       -> Int          -- ^ spill slot to use
+       -> Instr
+
+sparc_mkLoadInstr reg _ slot
+  = let off     = spillSlotToOffset slot
+       off_w   = 1 + (off `div` 4)
+        sz     = case regClass reg of
+                       RcInteger -> II32
+                       RcFloat   -> FF32
+                       RcDouble  -> FF64
+
+        in LD sz (fpRel (- off_w)) reg
+
+-- | Convert a spill slot number to a *byte* offset, with no sign.
  --
  --
-riZero :: RI -> Bool
-riZero (RIImm (ImmInt 0))          = True
-riZero (RIImm (ImmInteger 0))      = True
-riZero (RIReg (RealReg 0))          = True
-riZero _                           = False
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+       | slot >= 0 && slot < maxSpillSlots
+       = 64 + spillSlotSize * slot
+
+       | otherwise
+       = pprPanic "spillSlotToOffset:" 
+                     (   text "invalid spill location: " <> int slot
+                     $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+-- | We need 8 bytes because our largest registers are 64 bit.
+spillSlotSize :: Int
+spillSlotSize = 8
  
  
  
  
--- | Calculate the effective address which would be used by the
---     corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
---     alas -- can't have fpRelEA here because of module dependencies.
-fpRelEA :: Int -> Reg -> Instr
-fpRelEA n dst
-   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+-- | The maximum number of spill slots available on the C stack.
+--     If we use up all of the slots, then we're screwed.
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
  
  
  
  
--- | Code to shift the stack pointer by n words.
-moveSp :: Int -> Instr
-moveSp n
-   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+sparc_takeDeltaInstr
+       :: Instr
+       -> Maybe Int
+       
+sparc_takeDeltaInstr instr
+ = case instr of
+       DELTA i         -> Just i
+       _               -> Nothing
+
+
+sparc_isMetaInstr
+       :: Instr
+       -> Bool
+       
+sparc_isMetaInstr instr
+ = case instr of
+       COMMENT{}       -> True
+       LDATA{}         -> True
+       NEWBLOCK{}      -> True
+       DELTA{}         -> True
+       _               -> False
+       
+
+-- | Make a reg-reg move instruction.
+--     On SPARC v8 there are no instructions to move directly between
+--     floating point and integer regs. If we need to do that then we
+--     have to go via memory.
+--
+sparc_mkRegRegMoveInstr
+       :: Reg
+       -> Reg
+       -> Instr
+
+sparc_mkRegRegMoveInstr src dst
+ = case regClass src of
+       RcInteger -> ADD  False False src (RIReg g0) dst
+       RcDouble  -> FMOV FF64 src dst
+       RcFloat   -> FMOV FF32 src dst
+
+
+-- | Check whether an instruction represents a reg-reg move.
+--     The register allocator attempts to eliminate reg->reg moves whenever it can,
+--     by assigning the src and dest temporaries to the same real register.
+--
+sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+sparc_takeRegRegMoveInstr instr
+ = case instr of
+       ADD False False src (RIReg src2) dst
+        | g0 == src2           -> Just (src, dst)
+
+       FMOV FF64 src dst       -> Just (src, dst)
+       FMOV FF32  src dst      -> Just (src, dst)
+       _                       -> Nothing
  
  
  
  
--- | Produce the second-half-of-a-double register given the first half.
-fPair :: Reg -> Maybe Reg
-fPair (RealReg n) 
-       | n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+-- | Make an unconditional branch instruction.
+sparc_mkJumpInstr
+       :: BlockId
+       -> [Instr]
  
  
-fPair (VirtualRegD u)
-       = Just (VirtualRegHi u)
+sparc_mkJumpInstr id 
+ =      [BI ALWAYS False id
+       , NOP]                  -- fill the branch delay slot.
  
  
-fPair _
-       = trace ("MachInstrs.fPair: can't get high half of supposed double reg ") 
-               Nothing
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs

index 7d64df1..a0d5fff 100644 (file)
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -7,12 +7,15 @@
  -----------------------------------------------------------------------------
  
  module SPARC.Ppr (
  -----------------------------------------------------------------------------
  
  module SPARC.Ppr (
+       pprNatCmmTop,
+       pprBasicBlock,
+       pprSectionHeader,
+       pprData,
+       pprInstr,
         pprUserReg,
         pprSize,
         pprImm,
         pprUserReg,
         pprSize,
         pprImm,
-       pprSectionHeader,
-       pprDataItem,
-       pprInstr
+       pprDataItem
  )
  
  where
  )
  
  where
@@ -20,20 +23,119 @@ where
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
  
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
  
-import PprBase
-import RegsBase
  import SPARC.Regs
  import SPARC.Regs
+import SPARC.RegInfo
  import SPARC.Instr
  import SPARC.Instr
+import SPARC.Cond
+import Instruction
+import Reg
+import Size
+import PprBase
  
  import BlockId
  import Cmm
  
  import BlockId
  import Cmm
-
  import CLabel
  
  import CLabel
  
-import Panic           ( panic )
  import Unique          ( pprUnique )
  import Unique          ( pprUnique )
+import qualified Outputable
+import Outputable      (Outputable, panic)
  import Pretty
  import FastString
  import Pretty
  import FastString
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+                   then text "\t.long "
+                     <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+                     <+> char '-'
+                     <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                   else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+                                   (sLit ".globl ")) <>
+               pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+               pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+       ptext (sLit ".align ") <> int bytes
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr         instr  = Outputable.docToSDoc $ pprInstr instr
  
  
  -- | Pretty print a register.
  
  
  -- | Pretty print a register.
@@ -101,12 +203,13 @@ pprSize :: Size -> Doc
  pprSize x 
   = ptext 
      (case x of
  pprSize x 
   = ptext 
      (case x of
-       II8   -> sLit "ub"
-       II16  -> sLit "uh"
-       II32  -> sLit ""
-       II64  -> sLit "d"
-       FF32  -> sLit ""
-       FF64  -> sLit "d")
+       II8     -> sLit "ub"
+       II16    -> sLit "uh"
+       II32    -> sLit ""
+       II64    -> sLit "d"
+       FF32    -> sLit ""
+       FF64    -> sLit "d"
+       _       -> panic "SPARC.Ppr.pprSize: no match")
  
  
  -- | Pretty print a size for an instruction suffix.
  
  
  -- | Pretty print a size for an instruction suffix.
@@ -120,7 +223,8 @@ pprStSize x
         II32  -> sLit ""
         II64  -> sLit "x"
         FF32  -> sLit ""
         II32  -> sLit ""
         II64  -> sLit "x"
         FF32  -> sLit ""
-       FF64  -> sLit "d")
+       FF64  -> sLit "d"
+       _       -> panic "SPARC.Ppr.pprSize: no match")
  
                 
  -- | Pretty print a condition code.
  
                 
  -- | Pretty print a condition code.
@@ -258,6 +362,7 @@ pprInstr (NEWBLOCK _)
  pprInstr (LDATA _ _)
         = panic "PprMach.pprInstr: LDATA"
  
  pprInstr (LDATA _ _)
         = panic "PprMach.pprInstr: LDATA"
  
+{-
  pprInstr (SPILL reg slot)
   = hcat [
         ptext (sLit "\tSPILL"),
  pprInstr (SPILL reg slot)
   = hcat [
         ptext (sLit "\tSPILL"),
@@ -273,7 +378,7 @@ pprInstr (RELOAD slot reg)
         ptext (sLit "SLOT") <> parens (int slot),
         comma,
         pprReg reg]
         ptext (sLit "SLOT") <> parens (int slot),
         comma,
         pprReg reg]
-
+-}
  
  -- a clumsy hack for now, to handle possible double alignment problems
  -- even clumsier, to allow for RegReg regs that show when doing indexed
  
  -- a clumsy hack for now, to handle possible double alignment problems
  -- even clumsier, to allow for RegReg regs that show when doing indexed
diff --git a/compiler/nativeGen/SPARC/RegInfo.hs b/compiler/nativeGen/SPARC/RegInfo.hs

index 8f8a977..025e302 100644 (file)
--- a/compiler/nativeGen/SPARC/RegInfo.hs
+++ b/compiler/nativeGen/SPARC/RegInfo.hs
@@ -8,241 +8,115 @@
  -----------------------------------------------------------------------------
  
  module SPARC.RegInfo (
  -----------------------------------------------------------------------------
  
  module SPARC.RegInfo (
-       -- machine specific 
-       RegUsage(..),
-       noUsage,
-       regUsage,
-       patchRegs,
-       jumpDests,
-       isJumpish,
-       patchJump,
-       isRegRegMove,
+       mkVReg,
+
+       riZero,
+       fpRelEA,
+       moveSp,
+       fPair,
+
+       shortcutStatic,
+       regDotColor,
  
          JumpDest(..), 
         canShortcut, 
  
          JumpDest(..), 
         canShortcut, 
-       shortcutJump, 
-
-       mkSpillInstr,
-       mkLoadInstr,
-       mkRegRegMoveInstr,
-       mkBranchInstr,
-       
-       spillSlotSize,
-       maxSpillSlots,
-       spillSlotToOffset               
+       shortcutJump,
  )
  
  where
  
  )
  
  where
  
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
  import SPARC.Instr
  import SPARC.Regs
  import SPARC.Instr
  import SPARC.Regs
-import RegsBase
+import RegClass
+import Reg
+import Size
  
  
+import Constants       (wORD_SIZE)
+import Cmm
+import CLabel
  import BlockId
  import Outputable
  import BlockId
  import Outputable
-import Constants       ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- | Represents what regs are read and written to in an instruction.
---     
-data RegUsage 
-       = RU    [Reg]   -- regs read from
-               [Reg]   -- regs written to
-
-
--- | No regs read or written to.
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-
--- | regUsage returns the sets of src and destination registers used
---     by a particular instruction.  Machine registers that are
---     pre-allocated to stgRegs are filtered out, because they are
---     uninteresting from a register allocation standpoint.  (We wouldn't
---     want them to end up on the free list!)  As far as we are concerned,
---     the fixed registers simply don't exist (for allocation purposes,
---     anyway).
-
---     regUsage doesn't need to do any trickery for jumps and such.  Just
---     state precisely the regs read and written by that insn.  The
---     consequences of control flow transfers, as far as register
---     allocation goes, are taken care of by the register allocator.
---
-regUsage :: Instr -> RegUsage
-regUsage instr 
- = case instr of
-    SPILL  reg _               -> usage ([reg], [])
-    RELOAD _   reg             -> usage ([], [reg])
-
-    LD    _ addr reg           -> usage (regAddr addr,         [reg])
-    ST    _ reg addr           -> usage (reg : regAddr addr,   [])
-    ADD   _ _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    SUB   _ _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    UMUL    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    SMUL    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    UDIV    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    SDIV    _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    RDY       rd               -> usage ([],                   [rd])
-    WRY       r1 r2            -> usage ([r1, r2],             [])
-    AND     _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    ANDN    _ r1 ar r2                 -> usage (r1 : regRI ar,        [r2])
-    OR      _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    ORN     _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    XOR     _ r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    XNOR    _ r1 ar r2                 -> usage (r1 : regRI ar,        [r2])
-    SLL       r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    SRL       r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    SRA       r1 ar r2         -> usage (r1 : regRI ar,        [r2])
-    SETHI   _ reg              -> usage ([],                   [reg])
-    FABS    _ r1 r2                    -> usage ([r1],                 [r2])
-    FADD    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
-    FCMP    _ _  r1 r2         -> usage ([r1, r2],             [])
-    FDIV    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
-    FMOV    _ r1 r2                    -> usage ([r1],                 [r2])
-    FMUL    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
-    FNEG    _ r1 r2                    -> usage ([r1],                 [r2])
-    FSQRT   _ r1 r2            -> usage ([r1],                 [r2])
-    FSUB    _ r1 r2 r3                 -> usage ([r1, r2],             [r3])
-    FxTOy   _ _  r1 r2                 -> usage ([r1],                 [r2])
-
-    JMP     addr               -> usage (regAddr addr, [])
-    JMP_TBL addr _             -> usage (regAddr addr, [])
-
-    CALL  (Left _  )  _ True   -> noUsage
-    CALL  (Left _  )  n False  -> usage (argRegs n, callClobberedRegs)
-    CALL  (Right reg) _ True   -> usage ([reg], [])
-    CALL  (Right reg) n False  -> usage (reg : (argRegs n), callClobberedRegs)
-    _                          -> noUsage
-
-  where
-    usage (src, dst) 
-     = RU (filter interesting src) (filter interesting dst)
-
-    regAddr (AddrRegReg r1 r2) = [r1, r2]
-    regAddr (AddrRegImm r1 _)  = [r1]
+import Unique
  
  
-    regRI (RIReg r)            = [r]
-    regRI  _                   = []
  
  
+-- | Make a virtual reg with this size.
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+       | not (isFloatSize size) 
+       = VirtualRegI u
  
  
--- | Interesting regs are virtuals, or ones that are allocatable 
---     by the register allocator.
-interesting :: Reg -> Bool
-interesting reg
- = case reg of
-       VirtualRegI  _  -> True
-       VirtualRegHi _  -> True
-       VirtualRegF  _  -> True
-       VirtualRegD  _  -> True
-       RealReg i       -> isFastTrue (freeReg i)
-
-
-
--- | Apply a given mapping to tall the register references in this instruction.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    SPILL reg slot             -> SPILL  (env reg) slot
-    RELOAD slot reg            -> RELOAD slot (env reg)
-
-    LD    sz addr reg          -> LD sz (fixAddr addr) (env reg)
-    ST    sz reg addr          -> ST sz (env reg) (fixAddr addr)
-
-    ADD   x cc r1 ar r2        -> ADD   x cc  (env r1) (fixRI ar) (env r2)
-    SUB   x cc r1 ar r2                -> SUB   x cc  (env r1) (fixRI ar) (env r2)
-    UMUL    cc r1 ar r2                -> UMUL    cc  (env r1) (fixRI ar) (env r2)
-    SMUL    cc r1 ar r2                -> SMUL    cc  (env r1) (fixRI ar) (env r2)
-    UDIV    cc r1 ar r2                -> UDIV    cc  (env r1) (fixRI ar) (env r2)
-    SDIV    cc r1 ar r2                -> SDIV    cc  (env r1) (fixRI ar) (env r2)
-    RDY   rd                   -> RDY         (env rd)
-    WRY   r1 r2                        -> WRY         (env r1) (env r2)
-    AND   b r1 ar r2                   -> AND   b     (env r1) (fixRI ar) (env r2)
-    ANDN  b r1 ar r2           -> ANDN  b     (env r1) (fixRI ar) (env r2)
-    OR    b r1 ar r2                   -> OR    b     (env r1) (fixRI ar) (env r2)
-    ORN   b r1 ar r2           -> ORN   b     (env r1) (fixRI ar) (env r2)
-    XOR   b r1 ar r2           -> XOR   b     (env r1) (fixRI ar) (env r2)
-    XNOR  b r1 ar r2           -> XNOR  b     (env r1) (fixRI ar) (env r2)
-    SLL   r1 ar r2             -> SLL         (env r1) (fixRI ar) (env r2)
-    SRL   r1 ar r2             -> SRL         (env r1) (fixRI ar) (env r2)
-    SRA   r1 ar r2             -> SRA         (env r1) (fixRI ar) (env r2)
-
-    SETHI imm reg              -> SETHI imm (env reg)
-
-    FABS  s r1 r2              -> FABS    s   (env r1) (env r2)
-    FADD  s r1 r2 r3           -> FADD    s   (env r1) (env r2) (env r3)
-    FCMP  e s r1 r2            -> FCMP e  s   (env r1) (env r2)
-    FDIV  s r1 r2 r3           -> FDIV    s   (env r1) (env r2) (env r3)
-    FMOV  s r1 r2              -> FMOV    s   (env r1) (env r2)
-    FMUL  s r1 r2 r3           -> FMUL    s   (env r1) (env r2) (env r3)
-    FNEG  s r1 r2              -> FNEG    s   (env r1) (env r2)
-    FSQRT s r1 r2              -> FSQRT   s   (env r1) (env r2)
-    FSUB  s r1 r2 r3                   -> FSUB    s   (env r1) (env r2) (env r3)
-    FxTOy s1 s2 r1 r2          -> FxTOy s1 s2 (env r1) (env r2)
-
-    JMP     addr               -> JMP     (fixAddr addr)
-    JMP_TBL addr ids           -> JMP_TBL (fixAddr addr) ids
-
-    CALL  (Left i) n t         -> CALL (Left i) n t
-    CALL  (Right r) n t        -> CALL (Right (env r)) n t
-    _                          -> instr
-
-  where
-    fixAddr (AddrRegReg r1 r2)         = AddrRegReg   (env r1) (env r2)
-    fixAddr (AddrRegImm r1 i)          = AddrRegImm   (env r1) i
-
-    fixRI (RIReg r)            = RIReg (env r)
-    fixRI other                        = other
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-       BI   _ _ id     -> id : acc
-       BF   _ _ id     -> id : acc
-       JMP_TBL _ ids   -> ids ++ acc
-       _other          -> acc
+       | otherwise
+       = case size of
+               FF32    -> VirtualRegF u
+               FF64    -> VirtualRegD u
+               _       -> panic "mkVReg"
  
  
  
  
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
---     We can't just use jumpDests above because the jump might take its arg,
---     so the instr won't contain a blockid.
+-- | Check if a RI represents a zero value.
+--     - a literal zero
+--     - register %g0, which is always zero.
  --
  --
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-       BI{}            -> True
-       BF{}            -> True
-       JMP{}           -> True
-       JMP_TBL{}       -> True
-       CALL{}          -> True
-       _               -> False
-
-
--- | Change the destination of this jump instruction
---     Used in joinToTargets in the linear allocator, when emitting fixup code
---     for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-       BI cc annul id
-        | id == old    -> BI cc annul new
-        
-       BF cc annul id
-        | id == old    -> BF cc annul new
-
-       _other          -> insn
-
+riZero :: RI -> Bool
+riZero (RIImm (ImmInt 0))          = True
+riZero (RIImm (ImmInteger 0))      = True
+riZero (RIReg (RealReg 0))          = True
+riZero _                           = False
+
+
+-- | Calculate the effective address which would be used by the
+--     corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
+--     alas -- can't have fpRelEA here because of module dependencies.
+fpRelEA :: Int -> Reg -> Instr
+fpRelEA n dst
+   = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+
+
+-- | Code to shift the stack pointer by n words.
+moveSp :: Int -> Instr
+moveSp n
+   = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+
+
+-- | Produce the second-half-of-a-double register given the first half.
+fPair :: Reg -> Maybe Reg
+fPair (RealReg n) 
+       | n >= 32 && n `mod` 2 == 0  = Just (RealReg (n+1))
+
+fPair (VirtualRegD u)
+       = Just (VirtualRegHi u)
+
+fPair _
+       = trace ("MachInstrs.fPair: can't get high half of supposed double reg ") 
+               Nothing
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
+shortcutStatic _ other_static
+        = other_static
+
+shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
+
+
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+       RcInteger       -> text "blue"
+       RcFloat         -> text "red"
+       RcDouble        -> text "green"
  
  
  
  
  
  
@@ -253,108 +127,3 @@ canShortcut _ = Nothing
  
  shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
  shortcutJump _ other = other
  
  shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
  shortcutJump _ other = other
-
-
-
--- | Make a spill instruction.
---     On SPARC we spill below frame pointer leaving 2 words/spill
-mkSpillInstr
-       :: Reg          -- ^ register to spill
-       -> Int          -- ^ current stack delta
-       -> Int          -- ^ spill slot to use
-       -> Instr
-
-mkSpillInstr reg _ slot
- = let off     = spillSlotToOffset slot
-        off_w  = 1 + (off `div` 4)
-        sz     = case regClass reg of
-                       RcInteger -> II32
-                       RcFloat   -> FF32
-                       RcDouble  -> FF64
-               
-    in ST sz reg (fpRel (negate off_w))
-
-
--- | Make a spill reload instruction.
-mkLoadInstr
-       :: Reg          -- ^ register to load
-       -> Int          -- ^ current stack delta
-       -> Int          -- ^ spill slot to use
-       -> Instr
-
-mkLoadInstr reg _ slot
-  = let off     = spillSlotToOffset slot
-       off_w   = 1 + (off `div` 4)
-        sz     = case regClass reg of
-                       RcInteger -> II32
-                       RcFloat   -> FF32
-                       RcDouble  -> FF64
-
-        in LD sz (fpRel (- off_w)) reg
-
-
--- | Make a reg-reg move instruction.
---     On SPARC v8 there are no instructions to move directly between
---     floating point and integer regs. If we need to do that then we
---     have to go via memory.
---
-mkRegRegMoveInstr
-       :: Reg
-       -> Reg
-       -> Instr
-
-mkRegRegMoveInstr src dst
- = case regClass src of
-       RcInteger -> ADD  False False src (RIReg g0) dst
-       RcDouble  -> FMOV FF64 src dst
-       RcFloat   -> FMOV FF32 src dst
-
-
--- | Check whether an instruction represents a reg-reg move.
---     The register allocator attempts to eliminate reg->reg moves whenever it can,
---     by assigning the src and dest temporaries to the same real register.
---
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove instr
- = case instr of
-       ADD False False src (RIReg src2) dst
-        | g0 == src2           -> Just (src, dst)
-
-       FMOV FF64 src dst       -> Just (src, dst)
-       FMOV FF32  src dst      -> Just (src, dst)
-       _                       -> Nothing
-
-
--- | Make an unconditional branch instruction.
-mkBranchInstr
-       :: BlockId
-       -> [Instr]
-
-mkBranchInstr id 
- =      [BI ALWAYS False id
-       , NOP]                  -- fill the branch delay slot.
-
-
--- | TODO: Why do we need 8 bytes per slot?? -BL 2009/02
-spillSlotSize :: Int
-spillSlotSize = 8
-
-
--- | The maximum number of spill slots available on the C stack.
---     If we use up all of the slots, then we're screwed.
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
-
--- | Convert a spill slot number to a *byte* offset, with no sign.
---
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
-       | slot >= 0 && slot < maxSpillSlots
-       = 64 + spillSlotSize * slot
-
-       | otherwise
-       = pprPanic "spillSlotToOffset:" 
-                     (   text "invalid spill location: " <> int slot
-                     $$  text "maxSpillSlots:          " <> int maxSpillSlots)
-
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs

index 987fc2d..1fb6a01 100644 (file)
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -5,17 +5,6 @@
  -- -----------------------------------------------------------------------------
  
  module SPARC.Regs (
  -- -----------------------------------------------------------------------------
  
  module SPARC.Regs (
-
-       -- sizes
-       Size(..),
-       intSize, 
-       floatSize, 
-       isFloatSize, 
-       wordSize,
-       cmmTypeSize,
-       sizeToWidth,
-       mkVReg,
-
         -- immediate values
         Imm(..),
         strImmLit,
         -- immediate values
         Imm(..),
         strImmLit,
@@ -39,113 +28,33 @@ module SPARC.Regs (
         fits13Bits, 
         largeOffsetError,
         gReg, iReg, lReg, oReg, fReg,
         fits13Bits, 
         largeOffsetError,
         gReg, iReg, lReg, oReg, fReg,
-       fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
+       fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f22, f26, f27,
         nCG_FirstFloatReg,
  
         nCG_FirstFloatReg,
  
-       -- horror show
+       -- allocatable
         freeReg,
         freeReg,
-       globalRegMaybe
+       allocatableRegs,
+       globalRegMaybe,
+
+       get_GlobalReg_reg_or_addr
  )
  
  where
  
  )
  
  where
  
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-#include "../includes/MachRegs.h"
  
  
-import RegsBase
+import Reg
+import RegClass
  
  
+import CgUtils          ( get_GlobalReg_addr )
  import BlockId
  import Cmm
  import CLabel           ( CLabel )
  import Pretty
  import BlockId
  import Cmm
  import CLabel           ( CLabel )
  import Pretty
-import Outputable      ( Outputable(..), pprPanic, panic )
+import Outputable      ( panic )
  import qualified Outputable
  import qualified Outputable
-import Unique
  import Constants
  import FastBool
  
  import Constants
  import FastBool
  
--- sizes -----------------------------------------------------------------------
-
--- | A 'Size' also includes format information, such as whether
---     the word is signed or unsigned.
---
-data Size
-       = II8     -- byte (signed)
-       | II16    -- halfword (signed, 2 bytes)
-       | II32    -- word (4 bytes)
-       | II64    -- word (8 bytes)
-       | FF32    -- IEEE single-precision floating pt
-       | FF64    -- IEEE single-precision floating pt
-       deriving Eq
-
-
--- | Get the integer size of this width.
-intSize :: Width -> Size
-intSize width
- = case width of
-       W8      -> II8
-       W16     -> II16
-       W32     -> II32
-       W64     -> II64
-       other   -> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Get the float size of this width.
-floatSize :: Width -> Size
-floatSize width
- = case width of
-       W32     -> FF32
-       W64     -> FF64
-       other   -> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Check if a size represents a floating point value.
-isFloatSize :: Size -> Bool
-isFloatSize size
- = case size of
-       FF32    -> True
-       FF64    -> True
-       _       -> False
-
-
--- | Size of a machine word. 
---     This is big enough to hold a pointer.
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
--- | Convert a Cmm type to a Size.
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty 
-       | isFloatType ty        = floatSize (typeWidth ty)
-       | otherwise             = intSize (typeWidth ty)
-
-
--- | Get the Width of a Size.
-sizeToWidth :: Size -> Width
-sizeToWidth size
- = case size of
-       II8             -> W8
-       II16            -> W16
-       II32            -> W32
-       II64            -> W64
-       FF32            -> W32
-       FF64            -> W64
-
-
--- | Make a virtual reg with this size.
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-       | not (isFloatSize size) 
-       = VirtualRegI u
-
-       | otherwise
-       = case size of
-               FF32    -> VirtualRegF u
-               FF64    -> VirtualRegD u
-               _       -> panic "mkVReg"
-
  
  -- immediates ------------------------------------------------------------------
  
  
  -- immediates ------------------------------------------------------------------
  
@@ -390,48 +299,13 @@ o1  = RealReg (oReg 1)
  f0  = RealReg (fReg 0)
  
  
  f0  = RealReg (fReg 0)
  
  
+-- | We use he first few float regs as double precision. 
+--     This is the RegNo of the first float regs we use as single precision.
+--
  nCG_FirstFloatReg :: RegNo
  nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg
-#else
-nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg f22
-#endif
-
-
--- horror show -----------------------------------------------------------------
-#if sparc_TARGET_ARCH
-#define g0 0
-#define g1 1
-#define g2 2
-#define g3 3
-#define g4 4
-#define g5 5
-#define g6 6
-#define g7 7
-#define o0 8
-#define o1 9
-#define o2 10
-#define o3 11
-#define o4 12
-#define o5 13
-#define o6 14
-#define o7 15
-#define l0 16
-#define l1 17
-#define l2 18
-#define l3 19
-#define l4 20
-#define l5 21
-#define l6 22
-#define l7 23
-#define i0 24
-#define i1 25
-#define i2 26
-#define i3 27
-#define i4 28
-#define i5 29
-#define i6 30
-#define i7 31
+nCG_FirstFloatReg = 54
+
+
  
  -- | Check whether a machine register is free for allocation.
  --     This needs to match the info in includes/MachRegs.h otherwise modules
  
  -- | Check whether a machine register is free for allocation.
  --     This needs to match the info in includes/MachRegs.h otherwise modules
@@ -445,7 +319,11 @@ freeReg regno
  
         -- %g1(r1) - %g4(r4) are allocable -----------------
  
  
         -- %g1(r1) - %g4(r4) are allocable -----------------
  
-freeReg :: RegNo -> FastBool
+       -- %g5(r5) - %g7(r7) 
+       --      are reserved for the OS
+       5       -> fastBool False
+       6       -> fastBool False
+       7       -> fastBool False
  
         -- %o0(r8) - %o5(r13) are allocable ----------------
  
  
         -- %o0(r8) - %o5(r13) are allocable ----------------
  
@@ -507,7 +385,15 @@ freeReg :: RegNo -> FastBool
  
         -- regs not matched above are allocable.
         _       -> fastBool True
  
         -- regs not matched above are allocable.
         _       -> fastBool True
-       
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
  
  
  -- | Returns Just the real register that a global register is stored in.
  
  
  -- | Returns Just the real register that a global register is stored in.
@@ -539,15 +425,20 @@ globalRegMaybe gg
         Hp              -> Just (RealReg 27)    -- %i3
         HpLim           -> Just (RealReg 28)    -- %i4
  
         Hp              -> Just (RealReg 27)    -- %i3
         HpLim           -> Just (RealReg 28)    -- %i4
  
-globalRegMaybe :: GlobalReg -> Maybe Reg
-
-
-
-#else
-
-freeReg        _       = 0#
-globalRegMaybe = panic "SPARC.Regs.globalRegMaybe: not defined"
+       BaseReg         -> Just (RealReg 25)    -- %i1
+               
+       _               -> Nothing      
  
  
-#endif
  
  
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
  
  
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
diff --git a/compiler/nativeGen/Size.hs b/compiler/nativeGen/Size.hs

new file mode 100644 (file)

index 0000000..3be5430
--- /dev/null
+++ b/compiler/nativeGen/Size.hs
@@ -0,0 +1,103 @@
+-- | Sizes on this architecture
+--     A Size is a combination of width and class
+-- 
+--     TODO:   Rename this to "Format" instead of "Size" to reflect
+--             the fact that it represents floating point vs integer.
+--
+--     TODO:   Signed vs unsigned?
+--
+--     TODO:   This module is currenly shared by all architectures because
+--             NCGMonad need to know about it to make a VReg. It would be better
+--             to have architecture specific formats, and do the overloading
+--             properly. eg SPARC doesn't care about FF80.
+--
+module Size (
+       Size(..),
+       intSize,
+       floatSize,
+       isFloatSize,
+       cmmTypeSize,
+       sizeToWidth
+)
+
+where
+
+import Cmm
+import Outputable
+
+-- It looks very like the old MachRep, but it's now of purely local
+-- significance, here in the native code generator.  You can change it
+-- without global consequences.
+--
+-- A major use is as an opcode qualifier; thus the opcode 
+--     mov.l a b
+-- might be encoded 
+--     MOV II32 a b
+-- where the Size field encodes the ".l" part.
+
+-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
+--       here.  I've removed them from the x86 version, we'll see what happens --SDM
+
+-- ToDo: quite a few occurrences of Size could usefully be replaced by Width
+
+data Size
+       = II8 
+       | II16 
+       | II32 
+       | II64 
+       | FF32 
+       | FF64 
+       | FF80
+       deriving (Show, Eq)
+
+
+-- | Get the integer size of this width.
+intSize :: Width -> Size
+intSize width
+ = case width of
+       W8      -> II8
+       W16     -> II16
+       W32     -> II32
+       W64     -> II64
+       other   -> pprPanic "Size.intSize" (ppr other)
+
+
+-- | Get the float size of this width.
+floatSize :: Width -> Size
+floatSize width
+ = case width of
+       W32     -> FF32
+       W64     -> FF64
+       other   -> pprPanic "Size.floatSize" (ppr other)
+
+
+-- | Check if a size represents a floating point value.
+isFloatSize :: Size -> Bool
+isFloatSize size
+ = case size of
+       FF32    -> True
+       FF64    -> True
+       FF80    -> True
+       _       -> False
+
+
+-- | Convert a Cmm type to a Size.
+cmmTypeSize :: CmmType -> Size
+cmmTypeSize ty 
+       | isFloatType ty        = floatSize (typeWidth ty)
+       | otherwise             = intSize (typeWidth ty)
+
+
+-- | Get the Width of a Size.
+sizeToWidth :: Size -> Width
+sizeToWidth size
+ = case size of
+       II8             -> W8
+       II16            -> W16
+       II32            -> W32
+       II64            -> W64
+       FF32            -> W32
+       FF64            -> W64
+       FF80            -> W80
+       
+
diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs

new file mode 100644 (file)

index 0000000..2643b00
--- /dev/null
+++ b/compiler/nativeGen/TargetReg.hs
@@ -0,0 +1,101 @@
+
+-- | Hard wired things related to registers.
+--     This is module is preventing the native code generator being able to 
+--     emit code for non-host architectures.
+--
+--     TODO: Do a better job of the overloading, and eliminate this module.
+--     We'd probably do better with a Register type class, and hook this to 
+--     Instruction somehow.
+--
+--     TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable
+
+module TargetReg (
+       targetRegClass,
+       targetMkVReg,
+       targetWordSize,
+       targetRegDotColor
+)
+
+where
+
+#include "HsVersions.h"
+
+import Reg
+import RegClass
+import Size
+
+import CmmExpr (wordWidth)
+import Outputable
+import Unique
+
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import qualified X86.Regs      as X86
+import qualified X86.RegInfo   as X86
+
+#elif powerpc_TARGET_ARCH
+import qualified PPC.Regs      as PPC
+import qualified PPC.RegInfo   as PPC
+
+#elif sparc_TARGET_ARCH        
+import qualified SPARC.Regs    as SPARC
+import qualified SPARC.RegInfo as SPARC
+
+
+#else
+#error "RegAlloc.Graph.TargetReg: not defined"
+#endif
+
+-- x86 -------------------------------------------------------------------------
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass = X86.regClass
+
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg   = X86.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = X86.regDotColor
+
+
+-- ppc -------------------------------------------------------------------------
+#elif powerpc_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass = PPC.regClass
+
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg   = PPC.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = PPC.regDotColor
+
+
+-- sparc -----------------------------------------------------------------------
+#elif sparc_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass = SPARC.regClass
+
+-- | Size of a machine word. 
+--     This is big enough to hold a pointer.
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg   = SPARC.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = SPARC.regDotColor
+
+--------------------------------------------------------------------------------
+#else
+#error "RegAlloc.Graph.TargetReg: not defined"
+#endif
+
+
+
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs

new file mode 100644 (file)

index 0000000..43495a4
--- /dev/null
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -0,0 +1,2313 @@
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module X86.CodeGen ( 
+       cmmTopCodeGen, 
+       InstrBlock 
+) 
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import X86.Instr
+import X86.Cond
+import X86.Regs
+import X86.RegInfo
+import X86.Ppr
+import Instruction
+import PIC
+import NCGMonad
+import Size
+import Reg
+import RegClass
+import Platform
+
+-- Our intermediate code:
+import BasicTypes
+import BlockId
+import PprCmm          ( pprExpr )
+import Cmm
+import CLabel
+import ClosureInfo     ( C_SRT(..) )
+
+-- The rest:
+import StaticFlags     ( opt_PIC )
+import ForeignCall     ( CCallConv(..) )
+import OrdList
+import Pretty
+import qualified Outputable as O
+import Outputable
+import FastString
+import FastBool                ( isFastTrue )
+import Constants       ( wORD_SIZE )
+import DynFlags
+
+import Debug.Trace     ( trace )
+
+import Control.Monad   ( mapAndUnzipM )
+import Data.Maybe      ( fromJust )
+import Data.Bits
+import Data.Word
+import Data.Int
+
+
+cmmTopCodeGen 
+       :: DynFlags
+       -> RawCmmTop
+       -> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen dynflags 
+       (CmmProc info lab params (ListGraph blocks)) = do
+  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+  picBaseMb <- getPicBaseMaybeNat
+  let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+      tops = proc : concat statics
+      os   = platformOS $ targetPlatform dynflags
+
+  case picBaseMb of
+      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
+      Nothing -> return tops
+  
+cmmTopCodeGen _ (CmmData sec dat) = do
+  return [CmmData sec dat]  -- no translation, we just use CmmStatic
+
+
+basicBlockCodeGen 
+       :: CmmBasicBlock 
+       -> NatM ( [NatBasicBlock Instr]
+               , [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+  instrs <- stmtsToInstrs stmts
+  -- code generation may introduce new basic block boundaries, which
+  -- are indicated by the NEWBLOCK instruction.  We must split up the
+  -- instruction stream into basic blocks again.  Also, we extract
+  -- LDATAs here too.
+  let
+       (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+       
+       mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 
+         = ([], BasicBlock id instrs : blocks, statics)
+       mkBlocks (LDATA sec dat) (instrs,blocks,statics) 
+         = (instrs, blocks, CmmData sec dat:statics)
+       mkBlocks instr (instrs,blocks,statics)
+         = (instr:instrs, blocks, statics)
+  -- in
+  return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+   = do instrss <- mapM stmtToInstrs stmts
+        return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+    CmmNop        -> return nilOL
+    CmmComment s   -> return (unitOL (COMMENT s))
+
+    CmmAssign reg src
+      | isFloatType ty -> assignReg_FltCode size reg src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty    -> assignReg_I64Code      reg src
+#endif
+      | otherwise       -> assignReg_IntCode size reg src
+       where ty = cmmRegType reg
+             size = cmmTypeSize ty
+
+    CmmStore addr src
+      | isFloatType ty -> assignMem_FltCode size addr src
+#if WORD_SIZE_IN_BITS==32
+      | isWord64 ty     -> assignMem_I64Code      addr src
+#endif
+      | otherwise       -> assignMem_IntCode size addr src
+       where ty = cmmExprType src
+             size = cmmTypeSize ty
+
+    CmmCall target result_regs args _ _
+       -> genCCall target result_regs args
+
+    CmmBranch id         -> genBranch id
+    CmmCondBranch arg id  -> genCondJump id arg
+    CmmSwitch arg ids     -> genSwitch arg ids
+    CmmJump arg params   -> genJump arg
+    CmmReturn params     ->
+      panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+--     They are really trees of insns to facilitate fast appending, where a
+--     left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock 
+       = OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode  
+       = CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+--     Reg is the lower 32-bit temporary which contains the result. 
+--     Use getHiVRegFromLo to find the other VRegUnique.  
+--
+--     Rules of this simplified insn selection game are therefore that
+--     the returned Reg may be modified
+--
+data ChildCode64       
+   = ChildCode64 
+        InstrBlock
+        Reg            
+
+
+-- | Register's passed up the tree.  If the stix code forces the register
+--     to live in a pre-decided machine register, it comes out as @Fixed@;
+--     otherwise, it comes out as @Any@, and the parent can decide which
+--     register to put it in.
+--
+data Register
+       = Fixed Size Reg InstrBlock
+       | Any   Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn)     size = Any   size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+  = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+  = case get_GlobalReg_reg_or_addr mid of
+       Left (RealReg rrno) -> RealReg rrno
+       _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+          -- By this stage, the only MagicIds remaining should be the
+          -- ones which map to a real machine register on this
+          -- platform.  Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode 
+       = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to.  So you can't put
+anything in between, lest it overwrite some of those registers.  If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+    code
+    LEA amode, tmp
+    ... other computation ...
+    ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+--     A CmmInt is intended to be truncated to the appropriate 
+--     number of bits, so here we truncate it to Int64.  This is
+--     important because e.g. -1 as a CmmInt might be either
+--     -1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+  where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+    where blockLabel = mkAsmTempLabel id
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+  where width = typeWidth (cmmRegType reg)
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+--     we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed _ reg code -> 
+       return (reg, code)
+
+
+
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+  Amode addr addr_code <- getAmode addrTree
+  ChildCode64 vcode rlo <- iselExpr64 valueTree
+  let 
+        rhi = getHiVRegFromLo rlo
+
+        -- Little-endian store
+        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
+        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+  -- in
+  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+   let 
+         r_dst_lo = mkVReg u_dst II32
+         r_dst_hi = getHiVRegFromLo r_dst_lo
+         r_src_hi = getHiVRegFromLo r_src_lo
+         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
+         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
+   -- in
+   return (
+        vcode `snocOL` mov_lo `snocOL` mov_hi
+     )
+
+assignReg_I64Code lvalue valueTree
+   = panic "assignReg_I64Code(i386): invalid lvalue"
+
+
+
+
+iselExpr64        :: CmmExpr -> NatM ChildCode64
+iselExpr64 (CmmLit (CmmInt i _)) = do
+  (rlo,rhi) <- getNewRegPairNat II32
+  let
+       r = fromIntegral (fromIntegral i :: Word32)
+       q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+       code = toOL [
+               MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
+               MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
+               ]
+  -- in
+  return (ChildCode64 code rlo)
+
+iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+   Amode addr addr_code <- getAmode addrTree
+   (rlo,rhi) <- getNewRegPairNat II32
+   let 
+        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
+        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+   -- in
+   return (
+            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 
+                        rlo
+     )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
+   = return (ChildCode64 nilOL (mkVReg vu II32))
+         
+-- we handle addition, but rather badly
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   (rlo,rhi) <- getNewRegPairNat II32
+   let
+       r = fromIntegral (fromIntegral i :: Word32)
+       q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+       r1hi = getHiVRegFromLo r1lo
+       code =  code1 `appOL`
+               toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                      ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
+                      MOV II32 (OpReg r1hi) (OpReg rhi),
+                      ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+   ChildCode64 code1 r1lo <- iselExpr64 e1
+   ChildCode64 code2 r2lo <- iselExpr64 e2
+   (rlo,rhi) <- getNewRegPairNat II32
+   let
+       r1hi = getHiVRegFromLo r1lo
+       r2hi = getHiVRegFromLo r2lo
+       code =  code1 `appOL`
+               code2 `appOL`
+               toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+                      ADD II32 (OpReg r2lo) (OpReg rlo),
+                      MOV II32 (OpReg r1hi) (OpReg rhi),
+                      ADC II32 (OpReg r2hi) (OpReg rhi) ]
+   -- in
+   return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
+     fn <- getAnyReg expr
+     r_dst_lo <-  getNewRegNat II32
+     let r_dst_hi = getHiVRegFromLo r_dst_lo
+         code = fn r_dst_lo
+     return (
+             ChildCode64 (code `snocOL` 
+                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
+                          r_dst_lo
+            )
+
+iselExpr64 expr
+   = pprPanic "iselExpr64(i386)" (ppr expr)
+
+
+
+--------------------------------------------------------------------------------
+getRegister :: CmmExpr -> NatM Register
+
+#if !x86_64_TARGET_ARCH
+    -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+    -- register, it can only be used for rip-relative addressing.
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+  = do
+      reg <- getPicBaseNat archWordSize
+      return (Fixed archWordSize reg nilOL)
+#endif
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+                 (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+    -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+#endif
+
+
+
+
+#if i386_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f W32)) = do
+    lbl <- getNewLabelNat
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat f W32)]
+           `consOL` (addr_code `snocOL`
+           GLD FF32 addr dst)
+    -- in
+    return (Any FF32 code)
+
+
+getRegister (CmmLit (CmmFloat d W64))
+  | d == 0.0
+  = let code dst = unitOL (GLDZ dst)
+    in  return (Any FF64 code)
+
+  | d == 1.0
+  = let code dst = unitOL (GLD1 dst)
+    in  return (Any FF64 code)
+
+  | otherwise = do
+    lbl <- getNewLabelNat
+    dflags <- getDynFlagsNat
+    dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+    Amode addr addr_code <- getAmode dynRef
+    let code dst =
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat d W64)]
+           `consOL` (addr_code `snocOL`
+           GLD FF64 addr dst)
+    -- in
+    return (Any FF64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+
+
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmLit (CmmFloat 0.0 w)) = do
+   let size = floatSize w
+       code dst = unitOL  (XOR size (OpReg dst) (OpReg dst))
+       -- I don't know why there are xorpd, xorps, and pxor instructions.
+       -- They all appear to do the same thing --SDM
+   return (Any size code)
+
+getRegister (CmmLit (CmmFloat f w)) = do
+    lbl <- getNewLabelNat
+    let code dst = toOL [
+           LDATA ReadOnlyData
+                       [CmmDataLabel lbl,
+                        CmmStaticLit (CmmFloat f w)],
+           MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+           ]
+    -- in
+    return (Any size code)
+  where size = floatSize w
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II8) addr
+  return (Any II32 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II8) addr
+  return (Any II32 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II16) addr
+  return (Any II32 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II16) addr
+  return (Any II32 code)
+
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II8) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II8) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVZxL II16) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II16) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
+  code <- intLoadCode (MOVSxL II32) addr
+  return (Any II64 code)
+
+getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Any II64 (\dst -> unitOL $
+        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
+
+getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+    code dst = x_code dst `appOL` toOL [
+       -- This is how gcc does it, so it can't be that bad:
+       LDATA ReadOnlyData16 [
+               CmmAlign 16,
+               CmmDataLabel lbl,
+               CmmStaticLit (CmmInt 0x80000000 W32),
+               CmmStaticLit (CmmInt 0 W32),
+               CmmStaticLit (CmmInt 0 W32),
+               CmmStaticLit (CmmInt 0 W32)
+       ],
+       XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+               -- xorps, so we need the 128-bit constant
+               -- ToDo: rip-relative
+       ]
+  --
+  return (Any FF32 code)
+
+getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
+  x_code <- getAnyReg x
+  lbl <- getNewLabelNat
+  let
+       -- This is how gcc does it, so it can't be that bad:
+    code dst = x_code dst `appOL` toOL [
+       LDATA ReadOnlyData16 [
+               CmmAlign 16,
+               CmmDataLabel lbl,
+               CmmStaticLit (CmmInt 0x8000000000000000 W64),
+               CmmStaticLit (CmmInt 0 W64)
+       ],
+               -- gcc puts an unpck here.  Wonder if we need it.
+       XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+               -- xorpd, so we need the 128-bit constant
+       ]
+  --
+  return (Any FF64 code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+#if i386_TARGET_ARCH
+      MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
+      MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
+#endif
+
+      MO_S_Neg w -> triv_ucode NEGI (intSize w)
+      MO_F_Neg w -> triv_ucode NEGI (floatSize w)
+      MO_Not w   -> triv_ucode NOT  (intSize w)
+
+      -- Nop conversions
+      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
+      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
+      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
+      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
+      MO_UU_Conv W32 W16 -> toI16Reg W32 x
+      MO_SS_Conv W32 W16 -> toI16Reg W32 x
+
+#if x86_64_TARGET_ARCH
+      MO_UU_Conv W64 W32 -> conversionNop II64 x
+      MO_SS_Conv W64 W32 -> conversionNop II64 x
+      MO_UU_Conv W64 W16 -> toI16Reg W64 x
+      MO_SS_Conv W64 W16 -> toI16Reg W64 x
+      MO_UU_Conv W64 W8  -> toI8Reg  W64 x
+      MO_SS_Conv W64 W8  -> toI8Reg  W64 x
+#endif
+
+      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
+      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
+
+      -- widenings
+      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
+      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
+      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
+
+      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
+      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
+      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
+
+#if x86_64_TARGET_ARCH
+      MO_UU_Conv W8  W64 -> integerExtend W8  W64 MOVZxL x
+      MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
+      MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
+      MO_SS_Conv W8  W64 -> integerExtend W8  W64 MOVSxL x
+      MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
+      MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
+       -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+       -- However, we don't want the register allocator to throw it
+       -- away as an unnecessary reg-to-reg move, so we keep it in
+       -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
+      MO_FF_Conv W32 W64 -> conversionNop FF64 x
+      MO_FF_Conv W64 W32 -> conversionNop FF32 x
+#else
+      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
+      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
+#endif
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      other -> pprPanic "getRegister" (pprMachOp mop)
+   where
+       triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
+       triv_ucode instr size = trivialUCode size (instr size) x
+
+       -- signed or unsigned extension.
+       integerExtend :: Width -> Width
+                     -> (Size -> Operand -> Operand -> Instr)
+                     -> CmmExpr -> NatM Register
+       integerExtend from to instr expr = do
+           (reg,e_code) <- if from == W8 then getByteReg expr
+                                         else getSomeReg expr
+           let 
+               code dst = 
+                 e_code `snocOL`
+                 instr (intSize from) (OpReg reg) (OpReg dst)
+           return (Any (intSize to) code)
+
+       toI8Reg :: Width -> CmmExpr -> NatM Register
+       toI8Reg new_rep expr
+            = do codefn <- getAnyReg expr
+                return (Any (intSize new_rep) codefn)
+               -- HACK: use getAnyReg to get a byte-addressable register.
+               -- If the source was a Fixed register, this will add the
+               -- mov instruction to put it into the desired destination.
+               -- We're assuming that the destination won't be a fixed
+               -- non-byte-addressable register; it won't be, because all
+               -- fixed registers are word-sized.
+
+       toI16Reg = toI8Reg -- for now
+
+       conversionNop :: Size -> CmmExpr -> NatM Register
+        conversionNop new_size expr
+            = do e_code <- getRegister expr
+                 return (swizzleRegisterRep e_code new_size)
+
+
+getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
+  = case mop of
+      MO_F_Eq w -> condFltReg EQQ x y
+      MO_F_Ne w -> condFltReg NE x y
+      MO_F_Gt w -> condFltReg GTT x y
+      MO_F_Ge w -> condFltReg GE x y
+      MO_F_Lt w -> condFltReg LTT x y
+      MO_F_Le w -> condFltReg LE x y
+
+      MO_Eq rep   -> condIntReg EQQ x y
+      MO_Ne rep   -> condIntReg NE x y
+
+      MO_S_Gt rep -> condIntReg GTT x y
+      MO_S_Ge rep -> condIntReg GE x y
+      MO_S_Lt rep -> condIntReg LTT x y
+      MO_S_Le rep -> condIntReg LE x y
+
+      MO_U_Gt rep -> condIntReg GU  x y
+      MO_U_Ge rep -> condIntReg GEU x y
+      MO_U_Lt rep -> condIntReg LU  x y
+      MO_U_Le rep -> condIntReg LEU x y
+
+#if i386_TARGET_ARCH
+      MO_F_Add w -> trivialFCode w GADD x y
+      MO_F_Sub w -> trivialFCode w GSUB x y
+      MO_F_Quot w -> trivialFCode w GDIV x y
+      MO_F_Mul w -> trivialFCode w GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+      MO_F_Add w -> trivialFCode w ADD x y
+      MO_F_Sub w -> trivialFCode w SUB x y
+      MO_F_Quot w -> trivialFCode w FDIV x y
+      MO_F_Mul w -> trivialFCode w MUL x y
+#endif
+
+      MO_Add rep -> add_code rep x y
+      MO_Sub rep -> sub_code rep x y
+
+      MO_S_Quot rep -> div_code rep True  True  x y
+      MO_S_Rem  rep -> div_code rep True  False x y
+      MO_U_Quot rep -> div_code rep False True  x y
+      MO_U_Rem  rep -> div_code rep False False x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_Mul rep -> triv_op rep IMUL
+      MO_And rep -> triv_op rep AND
+      MO_Or  rep -> triv_op rep OR
+      MO_Xor rep -> triv_op rep XOR
+
+       {- Shift ops on x86s have constraints on their source, it
+          either has to be Imm, CL or 1
+           => trivialCode is not restrictive enough (sigh.)
+       -}         
+      MO_Shl rep   -> shift_code rep SHL x y {-False-}
+      MO_U_Shr rep -> shift_code rep SHR x y {-False-}
+      MO_S_Shr rep -> shift_code rep SAR x y {-False-}
+
+      other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+    --------------------
+    triv_op width instr = trivialCode width op (Just op) x y
+                       where op   = instr (intSize width)
+
+    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo rep a b = do
+         (a_reg, a_code) <- getNonClobberedReg a
+         b_code <- getAnyReg b
+         let 
+            shift_amt  = case rep of
+                          W32 -> 31
+                          W64 -> 63
+                          _ -> panic "shift_amt"
+
+            size = intSize rep
+             code = a_code `appOL` b_code eax `appOL`
+                        toOL [
+                          IMUL2 size (OpReg a_reg),   -- result in %edx:%eax
+                           SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
+                               -- sign extend lower part
+                           SUB size (OpReg edx) (OpReg eax)
+                               -- compare against upper
+                           -- eax==0 if high part == sign extended low part
+                        ]
+         -- in
+        return (Fixed size eax code)
+
+    --------------------
+    shift_code :: Width
+              -> (Size -> Operand -> Operand -> Instr)
+              -> CmmExpr
+              -> CmmExpr
+              -> NatM Register
+
+    {- Case1: shift length as immediate -}
+    shift_code width instr x y@(CmmLit lit) = do
+         x_code <- getAnyReg x
+         let
+              size = intSize width
+              code dst
+                 = x_code dst `snocOL` 
+                   instr size (OpImm (litToImm lit)) (OpReg dst)
+         -- in
+         return (Any size code)
+        
+    {- Case2: shift length is complex (non-immediate)
+      * y must go in %ecx.
+      * we cannot do y first *and* put its result in %ecx, because
+        %ecx might be clobbered by x.
+      * if we do y second, then x cannot be 
+        in a clobbered reg.  Also, we cannot clobber x's reg
+        with the instruction itself.
+      * so we can either:
+        - do y first, put its result in a fresh tmp, then copy it to %ecx later
+        - do y second and put its result into %ecx.  x gets placed in a fresh
+          tmp.  This is likely to be better, becuase the reg alloc can
+          eliminate this reg->reg move here (it won't eliminate the other one,
+          because the move is into the fixed %ecx).
+    -}
+    shift_code width instr x y{-amount-} = do
+        x_code <- getAnyReg x
+       let size = intSize width
+       tmp <- getNewRegNat size
+        y_code <- getAnyReg y
+       let 
+          code = x_code tmp `appOL`
+                 y_code ecx `snocOL`
+                 instr size (OpReg ecx) (OpReg tmp)
+        -- in
+        return (Fixed size tmp code)
+
+    --------------------
+    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    add_code rep x (CmmLit (CmmInt y _))
+       | is32BitInteger y = add_int rep x y
+    add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
+      where size = intSize rep
+
+    --------------------
+    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    sub_code rep x (CmmLit (CmmInt y _))
+       | is32BitInteger (-y) = add_int rep x (-y)
+    sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
+
+    -- our three-operand add instruction:
+    add_int width x y = do
+       (x_reg, x_code) <- getSomeReg x
+       let
+           size = intSize width
+           imm = ImmInt (fromInteger y)
+           code dst
+               = x_code `snocOL`
+                LEA size
+                       (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
+                        (OpReg dst)
+       -- 
+       return (Any size code)
+
+    ----------------------
+    div_code width signed quotient x y = do
+          (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
+          x_code <- getAnyReg x
+          let
+            size = intSize width
+            widen | signed    = CLTD size
+                  | otherwise = XOR size (OpReg edx) (OpReg edx)
+
+            instr | signed    = IDIV
+                  | otherwise = DIV
+
+            code = y_code `appOL`
+                   x_code eax `appOL`
+                   toOL [widen, instr size y_op]
+
+            result | quotient  = eax
+                   | otherwise = edx
+
+          -- in
+           return (Fixed size result code)
+
+
+getRegister (CmmLoad mem pk)
+  | isFloatType pk
+  = do
+    Amode src mem_code <- getAmode mem
+    let
+       size = cmmTypeSize pk
+       code dst = mem_code `snocOL` 
+                  IF_ARCH_i386(GLD size src dst,
+                               MOV size (OpAddr src) (OpReg dst))
+    return (Any size code)
+
+#if i386_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+  | not (isWord64 pk)
+  = do 
+    code <- intLoadCode instr mem
+    return (Any size code)
+  where
+    width = typeWidth pk
+    size = intSize width
+    instr = case width of
+               W8     -> MOVZxL II8
+               _other -> MOV size
+       -- We always zero-extend 8-bit loads, if we
+       -- can't think of anything better.  This is because
+       -- we can't guarantee access to an 8-bit variant of every register
+       -- (esi and edi don't have 8-bit variants), so to make things
+       -- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+  = do 
+    code <- intLoadCode (MOV size) mem
+    return (Any size code)
+  where size = intSize $ typeWidth pk
+#endif
+
+getRegister (CmmLit (CmmInt 0 width))
+  = let
+       size = intSize width
+
+       -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+       adj_size = case size of II64 -> II32; _ -> size
+       size1 = IF_ARCH_i386( size, adj_size ) 
+       code dst 
+           = unitOL (XOR size1 (OpReg dst) (OpReg dst))
+    in
+       return (Any size code)
+
+#if x86_64_TARGET_ARCH
+  -- optimisation for loading small literals on x86_64: take advantage
+  -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+  -- instruction forms are shorter.
+getRegister (CmmLit lit) 
+  | isWord64 (cmmLitType lit), not (isBigLit lit)
+  = let 
+       imm = litToImm lit
+       code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
+    in
+       return (Any II64 code)
+  where
+   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
+   isBigLit _ = False
+       -- note1: not the same as (not.is32BitLit), because that checks for
+       -- signed literals that fit in 32 bits, but we want unsigned
+       -- literals here.
+       -- note2: all labels are small, because we're assuming the
+       -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
+getRegister (CmmLit lit)
+  = let 
+       size = cmmTypeSize (cmmLitType lit)
+       imm = litToImm lit
+       code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
+    in
+       return (Any size code)
+
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
+
+
+intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
+   -> NatM (Reg -> InstrBlock)
+intLoadCode instr mem = do
+  Amode src mem_code <- getAmode mem
+  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
+
+-- Compute an expression into *any* register, adding the appropriate
+-- move instruction if necessary.
+getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
+getAnyReg expr = do
+  r <- getRegister expr
+  anyReg r
+
+anyReg :: Register -> NatM (Reg -> InstrBlock)
+anyReg (Any _ code)          = return code
+anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+
+-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
+-- Fixed registers might not be byte-addressable, so we make sure we've
+-- got a temporary, inserting an extra reg copy if necessary.
+getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
+getByteReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed rep reg code 
+       | isVirtualReg reg -> return (reg,code)
+       | otherwise -> do
+           tmp <- getNewRegNat rep
+           return (tmp, code `snocOL` reg2reg rep reg tmp)
+       -- ToDo: could optimise slightly by checking for byte-addressable
+       -- real registers, but that will happen very rarely if at all.
+#endif
+
+-- Another variant: this time we want the result in a register that cannot
+-- be modified by code to evaluate an arbitrary expression.
+getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getNonClobberedReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+       tmp <- getNewRegNat rep
+       return (tmp, code tmp)
+    Fixed rep reg code
+       -- only free regs can be clobbered
+       | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
+               tmp <- getNewRegNat rep
+               return (tmp, code `snocOL` reg2reg rep reg tmp)
+       | otherwise -> 
+               return (reg, code)
+
+reg2reg :: Size -> Reg -> Reg -> Instr
+reg2reg size src dst 
+#if i386_TARGET_ARCH
+  | isFloatSize size = GMOV src dst
+#endif
+  | otherwise       = MOV size (OpReg src) (OpReg dst)
+
+
+
+--------------------------------------------------------------------------------
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+#if x86_64_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
+                                     CmmLit displacement])
+    = return $ Amode (ripRel (litToImm displacement)) nilOL
+
+#endif
+
+
+-- This is all just ridiculous, since it carefully undoes 
+-- what mangleIndexTree has just done.
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+  | is32BitLit lit
+  -- ASSERT(rep == II32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (-(fromInteger i))
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+  
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+  | is32BitLit lit
+  -- ASSERT(rep == II32)???
+  = do (x_reg, x_code) <- getSomeReg x
+       let off = ImmInt (fromInteger i)
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+-- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be 
+-- recognised by the next rule.
+getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
+                                 b@(CmmLit _)])
+  = getAmode (CmmMachOp (MO_Add rep) [b,a])
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) 
+                                       [y, CmmLit (CmmInt shift _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  = x86_complex_amode x y shift 0
+
+getAmode (CmmMachOp (MO_Add rep) 
+                [x, CmmMachOp (MO_Add _)
+                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
+                         CmmLit (CmmInt offset _)]])
+  | shift == 0 || shift == 1 || shift == 2 || shift == 3
+  && is32BitInteger offset
+  = x86_complex_amode x y shift offset
+
+getAmode (CmmMachOp (MO_Add rep) [x,y])
+  = x86_complex_amode x y 0 0
+
+getAmode (CmmLit lit) | is32BitLit lit
+  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
+
+getAmode expr = do
+  (reg,code) <- getSomeReg expr
+  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+
+x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
+x86_complex_amode base index shift offset
+  = do (x_reg, x_code) <- getNonClobberedReg base
+       -- x must be in a temp, because it has to stay live over y_code
+       -- we could compre x_reg and y_reg and do something better here...
+       (y_reg, y_code) <- getSomeReg index
+       let
+          code = x_code `appOL` y_code
+           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
+               code)
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- getOperand: sometimes any operand will do.
+
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
+
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+                                          CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+  | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
+    return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk) 
+  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+    Amode src mem_code <- getAmode mem
+    (src',save_code) <- 
+       if (amodeCouldBeClobbered src) 
+               then do
+                  tmp <- getNewRegNat archWordSize
+                  return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+                          unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
+               else
+                  return (src, nilOL)
+    return (OpAddr src', save_code `appOL` mem_code)
+getNonClobberedOperand e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+amodeCouldBeClobbered :: AddrMode -> Bool
+amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+
+regClobbered (RealReg rr) = isFastTrue (freeReg rr)
+regClobbered _ = False
+
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+  | isSuitableFloatingPointLit lit = do
+    lbl <- getNewLabelNat
+    let code = unitOL (LDATA ReadOnlyData  [CmmDataLabel lbl,
+                                          CmmStaticLit lit])
+    return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+  | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
+    return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getOperand e = do
+    (reg, code) <- getSomeReg e
+    return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit)  = is32BitLit lit
+                         || isSuitableFloatingPointLit lit
+isOperand _             = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory.  However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+  | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+    Amode src mem_code <- getAmode mem
+    return (OpAddr src, mem_code)
+getRegOrMem e = do
+    (reg, code) <- getNonClobberedReg e
+    return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is32BitLit (CmmInt i W64) = is32BitInteger i
+   -- assume that labels are in the range 0-2^31-1: this assumes the
+   -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is32BitLit x = True
+
+
+
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- yes, they really do seem to want exactly the same!
+
+getCondCode (CmmMachOp mop [x, y])
+  = 
+    case mop of
+      MO_F_Eq W32 -> condFltCode EQQ x y
+      MO_F_Ne W32 -> condFltCode NE  x y
+      MO_F_Gt W32 -> condFltCode GTT x y
+      MO_F_Ge W32 -> condFltCode GE  x y
+      MO_F_Lt W32 -> condFltCode LTT x y
+      MO_F_Le W32 -> condFltCode LE  x y
+
+      MO_F_Eq W64 -> condFltCode EQQ x y
+      MO_F_Ne W64 -> condFltCode NE  x y
+      MO_F_Gt W64 -> condFltCode GTT x y
+      MO_F_Ge W64 -> condFltCode GE  x y
+      MO_F_Lt W64 -> condFltCode LTT x y
+      MO_F_Le W64 -> condFltCode LE  x y
+
+      MO_Eq rep -> condIntCode EQQ  x y
+      MO_Ne rep -> condIntCode NE   x y
+
+      MO_S_Gt rep -> condIntCode GTT  x y
+      MO_S_Ge rep -> condIntCode GE   x y
+      MO_S_Lt rep -> condIntCode LTT  x y
+      MO_S_Le rep -> condIntCode LE   x y
+
+      MO_U_Gt rep -> condIntCode GU   x y
+      MO_U_Ge rep -> condIntCode GEU  x y
+      MO_U_Lt rep -> condIntCode LU   x y
+      MO_U_Le rep -> condIntCode LEU  x y
+
+      other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other =  pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+-- memory vs immediate
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
+    Amode x_addr x_code <- getAmode x
+    let
+       imm  = litToImm lit
+       code = x_code `snocOL`
+                 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
+    --
+    return (CondCode False cond code)
+
+-- anything vs zero, using a mask
+-- TODO: Add some sanity checking!!!!
+condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
+    | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
+    = do
+      (x_reg, x_code) <- getSomeReg x
+      let
+         code = x_code `snocOL`
+                TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
+      --
+      return (CondCode False cond code)
+
+-- anything vs zero
+condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
+    (x_reg, x_code) <- getSomeReg x
+    let
+       code = x_code `snocOL`
+                 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
+    --
+    return (CondCode False cond code)
+
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+    (x_reg, x_code) <- getNonClobberedReg x
+    (y_op,  y_code) <- getOperand y    
+    let
+       code = x_code `appOL` y_code `snocOL`
+                  CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
+    -- in
+    return (CondCode False cond code)
+
+-- anything vs anything
+condIntCode cond x y = do
+  (y_reg, y_code) <- getNonClobberedReg y
+  (x_op, x_code) <- getRegOrMem x
+  let
+       code = y_code `appOL`
+              x_code `snocOL`
+                 CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
+  -- in
+  return (CondCode False cond code)
+
+
+
+--------------------------------------------------------------------------------
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+#if i386_TARGET_ARCH
+condFltCode cond x y 
+  = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_reg, y_code) <- getSomeReg y
+  let
+       code = x_code `appOL` y_code `snocOL`
+               GCMP cond x_reg y_reg
+  -- The GCMP insn does the test and sets the zero flag if comparable
+  -- and true.  Hence we always supply EQQ as the condition to test.
+  return (CondCode True EQQ code)
+
+#elif x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg.  We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+  (x_reg, x_code) <- getNonClobberedReg x
+  (y_op, y_code) <- getOperand y
+  let
+       code = x_code `appOL`
+              y_code `snocOL`
+                 CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
+       -- NB(1): we need to use the unsigned comparison operators on the
+       -- result of this comparison.
+  -- in
+  return (CondCode True (condToUnsigned cond) code)
+
+#else
+condFltCode    = panic "X86.condFltCode: not defined"
+
+#endif
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business.  Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers.  If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side.  This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg  -> CmmExpr -> NatM InstrBlock
+
+
+-- integer assignment to memory
+
+-- specific case of adding/subtracting an integer to a particular address.
+-- ToDo: catch other cases where we can use an operation directly on a memory 
+-- address.
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+                                                 CmmLit (CmmInt i _)])
+   | addr == addr2, pk /= II64 || is32BitInteger i,
+     Just instr <- check op
+   = do Amode amode code_addr <- getAmode addr
+        let code = code_addr `snocOL`
+                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
+        return code
+   where
+        check (MO_Add _) = Just ADD
+        check (MO_Sub _) = Just SUB
+        check _ = Nothing
+        -- ToDo: more?
+
+-- general case
+assignMem_IntCode pk addr src = do
+    Amode addr code_addr <- getAmode addr
+    (code_src, op_src)   <- get_op_RI src
+    let
+       code = code_src `appOL`
+              code_addr `snocOL`
+                  MOV pk op_src (OpAddr addr)
+       -- NOTE: op_src is stable, so it will still be valid
+       -- after code_addr.  This may involve the introduction 
+       -- of an extra MOV to a temporary register, but we hope
+       -- the register allocator will get rid of it.
+    --
+    return code
+  where
+    get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand)  -- code, operator
+    get_op_RI (CmmLit lit) | is32BitLit lit
+      = return (nilOL, OpImm (litToImm lit))
+    get_op_RI op
+      = do (reg,code) <- getNonClobberedReg op
+          return (code, OpReg reg)
+
+
+-- Assign; dst is a reg, rhs is mem
+assignReg_IntCode pk reg (CmmLoad src _) = do
+  load_code <- intLoadCode (MOV pk) src
+  return (load_code (getRegisterReg reg))
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src = do
+  code <- getAnyReg src
+  return (code (getRegisterReg reg))
+
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+  (src_reg, src_code) <- getNonClobberedReg src
+  Amode addr addr_code <- getAmode addr
+  let
+       code = src_code `appOL`
+              addr_code `snocOL`
+                IF_ARCH_i386(GST pk src_reg addr,
+                            MOV pk (OpReg src_reg) (OpAddr addr))
+  return code
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode pk reg src = do
+  src_code <- getAnyReg src
+  return (src_code (getRegisterReg reg))
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLoad mem pk) = do
+  Amode target code <- getAmode mem
+  return (code `snocOL` JMP (OpAddr target))
+
+genJump (CmmLit lit) = do
+  return (unitOL (JMP (OpImm (litToImm lit))))
+
+genJump expr = do
+  (reg,code) <- getSomeReg expr
+  return (code `snocOL` JMP (OpReg reg))
+
+
+-- -----------------------------------------------------------------------------
+--  Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+
+-- -----------------------------------------------------------------------------
+--  Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions.  We peek at the arguments to decide what kind of
+comparison to do.
+
+I386: First, we have to ensure that the condition
+codes are set according to the supplied comparison operation.
+-}
+
+genCondJump
+    :: BlockId     -- the branch target
+    -> CmmExpr      -- the condition on which to branch
+    -> NatM InstrBlock
+
+#if i386_TARGET_ARCH
+genCondJump id bool = do
+  CondCode _ cond code <- getCondCode bool
+  return (code `snocOL` JXX cond id)
+
+#elif x86_64_TARGET_ARCH
+genCondJump id bool = do
+  CondCode is_float cond cond_code <- getCondCode bool
+  if not is_float
+    then
+       return (cond_code `snocOL` JXX cond id)
+    else do
+       lbl <- getBlockIdNat
+
+       -- see comment with condFltReg
+       let code = case cond of
+                       NE  -> or_unordered
+                       GU  -> plain_test
+                       GEU -> plain_test
+                       _   -> and_ordered
+
+           plain_test = unitOL (
+                 JXX cond id
+               )
+           or_unordered = toOL [
+                 JXX cond id,
+                 JXX PARITY id
+               ]
+           and_ordered = toOL [
+                 JXX PARITY lbl,
+                 JXX cond id,
+                 JXX ALWAYS lbl,
+                 NEWBLOCK lbl
+               ]
+       return (cond_code `appOL` code)
+
+#else
+genCondJump    = panic "X86.genCondJump: not defined"
+
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+--  Generating C calls
+
+-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations.  Apart from that, the code is easy.
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+    :: CmmCallTarget           -- function to call
+    -> HintedCmmFormals                -- where to put the result
+    -> HintedCmmActuals                -- arguments (of mixed type)
+    -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+       -- write barrier compiles to no code on x86/x86-64; 
+       -- we keep it this long in order to prevent earlier optimisations.
+
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [CmmHinted r _] args = do
+  l1 <- getNewLabelNat
+  l2 <- getNewLabelNat
+  case op of
+       MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
+       MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
+       
+       MO_F32_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
+       MO_F64_Sin  -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
+
+       MO_F32_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
+       MO_F64_Cos  -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
+
+       MO_F32_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
+       MO_F64_Tan  -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
+       
+       other_op    -> outOfLineFloatOp op r args
+ where
+  actuallyInlineFloatOp instr size [CmmHinted x _]
+       = do res <- trivialUFCode size (instr size) x
+            any <- anyReg res
+            return (any (getRegisterReg (CmmLocal r)))
+
+genCCall target dest_regs args = do
+    let
+        sizes               = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
+#if !darwin_TARGET_OS        
+        tot_arg_size        = sum sizes
+#else
+        raw_arg_size        = sum sizes
+        tot_arg_size        = roundTo 16 raw_arg_size
+        arg_pad_size        = tot_arg_size - raw_arg_size
+    delta0 <- getDeltaNat
+    setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+    push_codes <- mapM push_arg (reverse args)
+    delta <- getDeltaNat
+
+    -- in
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+       -- CmmPrim -> ...
+        CmmCallee (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+             return (unitOL (CALL (Left fn_imm) []), conv)
+          where fn_imm = ImmCLbl lbl
+        CmmCallee expr conv
+           -> do { (dyn_c, dyn_r) <- get_op expr
+                 ; ASSERT( isWord32 (cmmExprType expr) )
+                   return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
+
+    let        push_code
+#if darwin_TARGET_OS
+            | arg_pad_size /= 0
+            = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+                    DELTA (delta0 - arg_pad_size)]
+              `appOL` concatOL push_codes
+            | otherwise
+#endif
+            = concatOL push_codes
+       call = callinsns `appOL`
+               toOL (
+                       -- Deallocate parameters after call for ccall;
+                       -- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || tot_arg_size==0 then [] else 
+                  [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + tot_arg_size)]
+               )
+    -- in
+    setDeltaNat (delta + tot_arg_size)
+
+    let
+       -- assign the results, if necessary
+       assign_code []     = nilOL
+       assign_code [CmmHinted dest _hint]
+         | isFloatType ty = unitOL (GMOV fake0 r_dest)
+         | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
+                                   MOV II32 (OpReg edx) (OpReg r_dest_hi)]
+         | otherwise      = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
+         where 
+               ty = localRegType dest
+               w  = typeWidth ty
+               r_dest_hi = getHiVRegFromLo r_dest
+               r_dest    = getRegisterReg (CmmLocal dest)
+       assign_code many = panic "genCCall.assign_code many"
+
+    return (push_code `appOL` 
+           call `appOL` 
+           assign_code dest_regs)
+
+  where
+    arg_size :: CmmType -> Int -- Width in bytes
+    arg_size ty = widthInBytes (typeWidth ty)
+
+    roundTo a x | x `mod` a == 0 = x
+                | otherwise = x + a - (x `mod` a)
+
+
+    push_arg :: HintedCmmActual {-current argument-}
+                    -> NatM InstrBlock  -- code
+
+    push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
+      | isWord64 arg_ty = do
+        ChildCode64 code r_lo <- iselExpr64 arg
+        delta <- getDeltaNat
+        setDeltaNat (delta - 8)
+        let 
+            r_hi = getHiVRegFromLo r_lo
+        -- in
+       return (       code `appOL`
+                       toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
+                             PUSH II32 (OpReg r_lo), DELTA (delta - 8),
+                            DELTA (delta-8)]
+            )
+
+      | otherwise = do
+        (code, reg) <- get_op arg
+        delta <- getDeltaNat
+        let size = arg_size arg_ty     -- Byte size
+        setDeltaNat (delta-size)
+        if (isFloatType arg_ty)
+           then return (code `appOL`
+                        toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
+                              DELTA (delta-size),
+                              GST (floatSize (typeWidth arg_ty))
+                                 reg (AddrBaseIndex (EABaseReg esp) 
+                                                        EAIndexNone
+                                                        (ImmInt 0))]
+                       )
+           else return (code `snocOL`
+                        PUSH II32 (OpReg reg) `snocOL`
+                        DELTA (delta-size)
+                       )
+      where
+         arg_ty = cmmExprType arg
+
+    ------------
+    get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
+    get_op op = do
+        (reg,code) <- getSomeReg op
+       return (code, reg)
+
+#elif x86_64_TARGET_ARCH
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+       -- write barrier compiles to no code on x86/x86-64; 
+       -- we keep it this long in order to prevent earlier optimisations.
+
+
+genCCall (CmmPrim op) [CmmHinted r _] args = 
+  outOfLineFloatOp op r args
+
+genCCall target dest_regs args = do
+
+       -- load up the register arguments
+    (stack_args, aregs, fregs, load_args_code)
+        <- load_args args allArgRegs allFPArgRegs nilOL
+
+    let
+       fp_regs_used  = reverse (drop (length fregs) (reverse allFPArgRegs))
+       int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+       arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
+               -- for annotating the call instruction with
+
+       sse_regs = length fp_regs_used
+
+       tot_arg_size = arg_size * length stack_args
+
+       -- On entry to the called function, %rsp should be aligned
+       -- on a 16-byte boundary +8 (i.e. the first stack arg after
+       -- the return address is 16-byte aligned).  In STG land
+       -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+       -- need to make sure we push a multiple of 16-bytes of args,
+       -- plus the return address, to get the correct alignment.
+       -- Urg, this is hard.  We need to feed the delta back into
+       -- the arg pushing code.
+    (real_size, adjust_rsp) <-
+       if tot_arg_size `rem` 16 == 0
+           then return (tot_arg_size, nilOL)
+           else do -- we need to adjust...
+               delta <- getDeltaNat
+               setDeltaNat (delta-8)
+               return (tot_arg_size+8, toOL [
+                               SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
+                               DELTA (delta-8)
+                       ])
+
+       -- push the stack args, right to left
+    push_code <- push_args (reverse stack_args) nilOL
+    delta <- getDeltaNat
+
+    -- deal with static vs dynamic call targets
+    (callinsns,cconv) <-
+      case target of
+       -- CmmPrim -> ...
+        CmmCallee (CmmLit (CmmLabel lbl)) conv
+           -> -- ToDo: stdcall arg sizes
+             return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+          where fn_imm = ImmCLbl lbl
+        CmmCallee expr conv
+           -> do (dyn_r, dyn_c) <- getSomeReg expr
+                return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+    let
+       -- The x86_64 ABI requires us to set %al to the number of SSE
+       -- registers that contain arguments, if the called routine
+       -- is a varargs function.  We don't know whether it's a
+       -- varargs function or not, so we have to assume it is.
+       --
+       -- It's not safe to omit this assignment, even if the number
+       -- of SSE regs in use is zero.  If %al is larger than 8
+       -- on entry to a varargs function, seg faults ensue.
+       assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
+
+    let call = callinsns `appOL`
+               toOL (
+                       -- Deallocate parameters after call for ccall;
+                       -- but not for stdcall (callee does it)
+                  (if cconv == StdCallConv || real_size==0 then [] else 
+                  [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
+                  ++
+                  [DELTA (delta + real_size)]
+               )
+    -- in
+    setDeltaNat (delta + real_size)
+
+    let
+       -- assign the results, if necessary
+       assign_code []     = nilOL
+       assign_code [CmmHinted dest _hint] = 
+         case typeWidth rep of
+               W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
+               W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
+               _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
+         where 
+               rep = localRegType dest
+               r_dest = getRegisterReg (CmmLocal dest)
+       assign_code many = panic "genCCall.assign_code many"
+
+    return (load_args_code     `appOL` 
+           adjust_rsp          `appOL`
+           push_code           `appOL`
+           assign_eax sse_regs `appOL`
+           call                `appOL` 
+           assign_code dest_regs)
+
+  where
+    arg_size = 8 -- always, at the mo
+
+    load_args :: [CmmHinted CmmExpr]
+             -> [Reg]                  -- int regs avail for args
+             -> [Reg]                  -- FP regs avail for args
+             -> InstrBlock
+             -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
+    load_args args [] [] code     =  return (args, [], [], code)
+       -- no more regs to use
+    load_args [] aregs fregs code =  return ([], aregs, fregs, code)
+       -- no more args to push
+    load_args ((CmmHinted arg hint) : rest) aregs fregs code
+       | isFloatType arg_rep = 
+       case fregs of
+         [] -> push_this_arg
+         (r:rs) -> do
+            arg_code <- getAnyReg arg
+            load_args rest aregs rs (code `appOL` arg_code r)
+       | otherwise =
+       case aregs of
+         [] -> push_this_arg
+         (r:rs) -> do
+            arg_code <- getAnyReg arg
+            load_args rest rs fregs (code `appOL` arg_code r)
+       where
+         arg_rep = cmmExprType arg
+
+         push_this_arg = do
+           (args',ars,frs,code') <- load_args rest aregs fregs code
+           return ((CmmHinted arg hint):args', ars, frs, code')
+
+    push_args [] code = return code
+    push_args ((CmmHinted arg hint):rest) code
+       | isFloatType arg_rep = do
+        (arg_reg, arg_code) <- getSomeReg arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+        let code' = code `appOL` arg_code `appOL` toOL [
+                       SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+                       DELTA (delta-arg_size),
+                       MOV (floatSize width) (OpReg arg_reg) (OpAddr  (spRel 0))]
+        push_args rest code'
+
+       | otherwise = do
+       -- we only ever generate word-sized function arguments.  Promotion
+       -- has already happened: our Int8# type is kept sign-extended
+       -- in an Int#, for example.
+        ASSERT(width == W64) return ()
+        (arg_op, arg_code) <- getOperand arg
+         delta <- getDeltaNat
+         setDeltaNat (delta-arg_size)
+        let code' = code `appOL` arg_code `appOL` toOL [
+                               PUSH II64 arg_op, 
+                               DELTA (delta-arg_size)]
+        push_args rest code'
+       where
+         arg_rep = cmmExprType arg
+         width = typeWidth arg_rep
+
+#else
+genCCall       = panic "X86.genCCAll: not defined"
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
+outOfLineFloatOp mop res args
+  = do
+      dflags <- getDynFlagsNat
+      targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
+      let target = CmmCallee targetExpr CCallConv
+        
+      if isFloat64 (localRegType res)
+        then
+          stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
+        else do
+          uq <- getUniqueNat
+          let 
+            tmp = LocalReg uq f64
+          -- in
+          code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
+          code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
+          return (code1 `appOL` code2)
+  where
+       lbl = mkForeignLabel fn Nothing False IsFunction
+
+       fn = case mop of
+             MO_F32_Sqrt  -> fsLit "sqrtf"
+             MO_F32_Sin   -> fsLit "sinf"
+             MO_F32_Cos   -> fsLit "cosf"
+             MO_F32_Tan   -> fsLit "tanf"
+             MO_F32_Exp   -> fsLit "expf"
+             MO_F32_Log   -> fsLit "logf"
+
+             MO_F32_Asin  -> fsLit "asinf"
+             MO_F32_Acos  -> fsLit "acosf"
+             MO_F32_Atan  -> fsLit "atanf"
+
+             MO_F32_Sinh  -> fsLit "sinhf"
+             MO_F32_Cosh  -> fsLit "coshf"
+             MO_F32_Tanh  -> fsLit "tanhf"
+             MO_F32_Pwr   -> fsLit "powf"
+
+             MO_F64_Sqrt  -> fsLit "sqrt"
+             MO_F64_Sin   -> fsLit "sin"
+             MO_F64_Cos   -> fsLit "cos"
+             MO_F64_Tan   -> fsLit "tan"
+             MO_F64_Exp   -> fsLit "exp"
+             MO_F64_Log   -> fsLit "log"
+
+             MO_F64_Asin  -> fsLit "asin"
+             MO_F64_Acos  -> fsLit "acos"
+             MO_F64_Atan  -> fsLit "atan"
+
+             MO_F64_Sinh  -> fsLit "sinh"
+             MO_F64_Cosh  -> fsLit "cosh"
+             MO_F64_Tanh  -> fsLit "tanh"
+             MO_F64_Pwr   -> fsLit "pow"
+
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+
+genSwitch expr ids
+  | opt_PIC
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        dflags <- getDynFlagsNat
+        dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+        (tableReg,t_code) <- getSomeReg $ dynRef
+        let
+            jumpTable = map jumpTableEntryRel ids
+            
+            jumpTableEntryRel Nothing
+                = CmmStaticLit (CmmInt 0 wordWidth)
+            jumpTableEntryRel (Just (BlockId id))
+                = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+                where blockLabel = mkAsmTempLabel id
+
+            op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+                                       (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+#if x86_64_TARGET_ARCH
+#if darwin_TARGET_OS
+    -- on Mac OS X/x86_64, put the jump table in the text section
+    -- to work around a limitation of the linker.
+    -- ld64 is unable to handle the relocations for
+    --     .quad L1 - L0
+    -- if L0 is not preceded by a non-anonymous label in its section.
+    
+            code = e_code `appOL` t_code `appOL` toOL [
+                            ADD (intSize wordWidth) op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
+                            LDATA Text (CmmDataLabel lbl : jumpTable)
+                    ]
+#else
+    -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
+    -- relocations, hence we only get 32-bit offsets in the jump
+    -- table. As these offsets are always negative we need to properly
+    -- sign extend them to 64-bit. This hack should be removed in
+    -- conjunction with the hack in PprMach.hs/pprDataItem once
+    -- binutils 2.17 is standard.
+            code = e_code `appOL` t_code `appOL` toOL [
+                           LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                           MOVSxL II32
+                                  (OpAddr (AddrBaseIndex (EABaseReg tableReg)
+                                                         (EAIndex reg wORD_SIZE) (ImmInt 0)))
+                                  (OpReg reg),
+                           ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
+                           JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+                  ]
+#endif
+#else
+            code = e_code `appOL` t_code `appOL` toOL [
+                            LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                            ADD (intSize wordWidth) op (OpReg tableReg),
+                            JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+                    ]
+#endif
+        return code
+  | otherwise
+  = do
+        (reg,e_code) <- getSomeReg expr
+        lbl <- getNewLabelNat
+        let
+            jumpTable = map jumpTableEntry ids
+            op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+            code = e_code `appOL` toOL [
+                    LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+                    JMP_TBL op [ id | Just id <- ids ]
+                 ]
+        -- in
+        return code
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+-- 
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg cond x y = do
+  CondCode _ cond cond_code <- condIntCode cond x y
+  tmp <- getNewRegNat II8
+  let 
+       code dst = cond_code `appOL` toOL [
+                   SETCC cond (OpReg tmp),
+                   MOVZxL II8 (OpReg tmp) (OpReg dst)
+                 ]
+  -- in
+  return (Any II32 code)
+
+
+
+condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp <- getNewRegNat II8
+  let 
+       code dst = cond_code `appOL` toOL [
+                   SETCC cond (OpReg tmp),
+                   MOVZxL II8 (OpReg tmp) (OpReg dst)
+                 ]
+  -- in
+  return (Any II32 code)
+
+#elif x86_64_TARGET_ARCH
+condFltReg cond x y = do
+  CondCode _ cond cond_code <- condFltCode cond x y
+  tmp1 <- getNewRegNat wordSize
+  tmp2 <- getNewRegNat wordSize
+  let 
+       -- We have to worry about unordered operands (eg. comparisons
+       -- against NaN).  If the operands are unordered, the comparison
+       -- sets the parity flag, carry flag and zero flag.
+       -- All comparisons are supposed to return false for unordered
+       -- operands except for !=, which returns true.
+       --
+       -- Optimisation: we don't have to test the parity flag if we
+       -- know the test has already excluded the unordered case: eg >
+       -- and >= test for a zero carry flag, which can only occur for
+       -- ordered operands.
+       --
+       -- ToDo: by reversing comparisons we could avoid testing the
+       -- parity flag in more cases.
+
+       code dst = 
+          cond_code `appOL` 
+            (case cond of
+               NE  -> or_unordered dst
+               GU  -> plain_test   dst
+               GEU -> plain_test   dst
+               _   -> and_ordered  dst)
+
+       plain_test dst = toOL [
+                   SETCC cond (OpReg tmp1),
+                   MOVZxL II8 (OpReg tmp1) (OpReg dst)
+                ]
+       or_unordered dst = toOL [
+                   SETCC cond (OpReg tmp1),
+                   SETCC PARITY (OpReg tmp2),
+                   OR II8 (OpReg tmp1) (OpReg tmp2),
+                   MOVZxL II8 (OpReg tmp2) (OpReg dst)
+                 ]
+       and_ordered dst = toOL [
+                   SETCC cond (OpReg tmp1),
+                   SETCC NOTPARITY (OpReg tmp2),
+                   AND II8 (OpReg tmp1) (OpReg tmp2),
+                   MOVZxL II8 (OpReg tmp2) (OpReg dst)
+                 ]
+  -- in
+  return (Any II32 code)
+
+#else
+condFltReg     = panic "X86.condFltReg: not defined"
+
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+
+{-
+The Rules of the Game are:
+
+* You cannot assume anything about the destination register dst;
+  it may be anything, including a fixed reg.
+
+* You may compute an operand into a fixed reg, but you may not 
+  subsequently change the contents of that fixed reg.  If you
+  want to do so, first copy the value either to a temporary
+  or into dst.  You are free to modify dst even if it happens
+  to be a fixed reg -- that's not your problem.
+
+* You cannot assume that a fixed reg will stay live over an
+  arbitrary computation.  The same applies to the dst reg.
+
+* Temporary regs obtained from getNewRegNat are distinct from 
+  each other and from all other regs, and stay live over 
+  arbitrary computations.
+
+--------------------
+
+SDM's version of The Rules:
+
+* If getRegister returns Any, that means it can generate correct
+  code which places the result in any register, period.  Even if that
+  register happens to be read during the computation.
+
+  Corollary #1: this means that if you are generating code for an
+  operation with two arbitrary operands, you cannot assign the result
+  of the first operand into the destination register before computing
+  the second operand.  The second operand might require the old value
+  of the destination register.
+
+  Corollary #2: A function might be able to generate more efficient
+  code if it knows the destination register is a new temporary (and
+  therefore not read by any of the sub-computations).
+
+* If getRegister returns Any, then the code it generates may modify only:
+       (a) fresh temporaries
+       (b) the destination register
+       (c) known registers (eg. %ecx is used by shifts)
+  In particular, it may *not* modify global registers, unless the global
+  register happens to be the destination register.
+-}
+
+trivialCode width instr (Just revinstr) (CmmLit lit_a) b
+  | is32BitLit lit_a = do
+  b_code <- getAnyReg b
+  let
+       code dst 
+        = b_code dst `snocOL`
+           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
+  -- in
+  return (Any (intSize width) code)
+
+trivialCode width instr maybe_revinstr a b
+  = genTrivialCode (intSize width) instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+  (b_op, b_code) <- getNonClobberedOperand b
+  a_code <- getAnyReg a
+  tmp <- getNewRegNat rep
+  let
+     -- We want the value of b to stay alive across the computation of a.
+     -- But, we want to calculate a straight into the destination register,
+     -- because the instruction only has two operands (dst := dst `op` src).
+     -- The troublesome case is when the result of b is in the same register
+     -- as the destination reg.  In this case, we have to save b in a
+     -- new temporary across the computation of a.
+     code dst
+       | dst `regClashesWithOp` b_op =
+               b_code `appOL`
+               unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+               a_code dst `snocOL`
+               instr (OpReg tmp) (OpReg dst)
+       | otherwise =
+               b_code `appOL`
+               a_code dst `snocOL`
+               instr b_op (OpReg dst)
+  -- in
+  return (Any rep code)
+
+reg `regClashesWithOp` OpReg reg2   = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _            = False
+
+-----------
+
+trivialUCode rep instr x = do
+  x_code <- getAnyReg x
+  let
+     code dst =
+       x_code dst `snocOL`
+       instr (OpReg dst)
+  return (Any rep code)
+
+-----------
+
+#if i386_TARGET_ARCH
+
+trivialFCode width instr x y = do
+  (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
+  (y_reg, y_code) <- getSomeReg y
+  let
+     size = floatSize width
+     code dst =
+       x_code `appOL`
+       y_code `snocOL`
+       instr size x_reg y_reg dst
+  return (Any size code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+trivialFCode pk instr x y 
+  = genTrivialCode size (instr size) x y
+  where size = floatSize pk
+#endif
+
+trivialUFCode size instr x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+     code dst =
+       x_code `snocOL`
+       instr x_reg dst
+  -- in
+  return (Any size code)
+
+
+--------------------------------------------------------------------------------
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+coerceInt2FP from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of W32 -> GITOF; W64 -> GITOD
+        code dst = x_code `snocOL` opc x_reg dst
+       -- ToDo: works for non-II32 reps?
+  return (Any (floatSize to) code)
+
+#elif x86_64_TARGET_ARCH
+coerceInt2FP from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any (floatSize to) code) -- works even if the destination rep is <II32
+
+#else
+coerceInt2FP   = panic "X86.coerceInt2FP: not defined"
+
+#endif
+
+
+
+
+--------------------------------------------------------------------------------
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+coerceFP2Int from to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case from of W32 -> GFTOI; W64 -> GDTOI
+        code dst = x_code `snocOL` opc x_reg dst
+       -- ToDo: works for non-II32 reps?
+  -- in
+  return (Any (intSize to) code)
+
+#elif x86_64_TARGET_ARCH
+coerceFP2Int from to x = do
+  (x_op, x_code) <- getOperand x  -- ToDo: could be a safe operand
+  let
+        opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
+        code dst = x_code `snocOL` opc x_op dst
+  -- in
+  return (Any (intSize to) code) -- works even if the destination rep is <II32
+
+#else
+coerceFP2Int   = panic "X86.coerceFP2Int: not defined"
+
+#endif
+
+
+
+
+--------------------------------------------------------------------------------
+coerceFP2FP :: Width -> CmmExpr -> NatM Register
+
+#if x86_64_TARGET_ARCH
+coerceFP2FP to x = do
+  (x_reg, x_code) <- getSomeReg x
+  let
+        opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
+        code dst = x_code `snocOL` opc x_reg dst
+  -- in
+  return (Any (floatSize to) code)
+
+#else
+coerceFP2FP    = panic "X86.coerceFP2FP: not defined"
+
+#endif
+
+
+
diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs

new file mode 100644 (file)

index 0000000..60e40b9
--- /dev/null
+++ b/compiler/nativeGen/X86/Cond.hs
@@ -0,0 +1,52 @@
+
+module X86.Cond (
+       Cond(..),
+       condUnsigned,
+       condToSigned,
+       condToUnsigned
+)
+
+where
+
+data Cond
+       = ALWAYS        -- What's really used? ToDo
+       | EQQ
+       | GE
+       | GEU
+       | GTT
+       | GU
+       | LE
+       | LEU
+       | LTT
+       | LU
+       | NE
+       | NEG
+       | POS
+       | CARRY
+       | OFLO
+       | PARITY
+       | NOTPARITY
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU  = True
+condUnsigned LU  = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _   = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU  = GTT
+condToSigned LU  = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x   = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE  = GEU
+condToUnsigned LE  = LEU
+condToUnsigned x   = x
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs

index 0dea1dd..b4b6fb5 100644 (file)
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -12,34 +12,46 @@
  module X86.Instr
  where
  
  module X86.Instr
  where
  
-import BlockId
+import X86.Cond
  import X86.Regs
  import X86.Regs
-import RegsBase
+import Instruction
+import Size
+import RegClass
+import Reg
+
+import BlockId
  import Cmm
  import FastString
  import Cmm
  import FastString
+import FastBool
  
  import CLabel
  import Panic
  
  
  import CLabel
  import Panic
  
-data Cond
-       = ALWAYS        -- What's really used? ToDo
-       | EQQ
-       | GE
-       | GEU
-       | GTT
-       | GU
-       | LE
-       | LEU
-       | LTT
-       | LU
-       | NE
-       | NEG
-       | POS
-       | CARRY
-       | OFLO
-       | PARITY
-       | NOTPARITY
-       deriving (Eq)
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize :: Size
+#if i386_TARGET_ARCH
+archWordSize   = II32
+#elif x86_64_TARGET_ARCH
+archWordSize   = II64
+#else
+archWordSize   = panic "X86.Instr.archWordSize: not defined"
+#endif
+
+-- | Instruction instance for x86 instruction set.
+instance Instruction Instr where
+       regUsageOfInstr         = x86_regUsageOfInstr
+       patchRegsOfInstr        = x86_patchRegsOfInstr
+       isJumpishInstr          = x86_isJumpishInstr
+       jumpDestsOfInstr        = x86_jumpDestsOfInstr
+       patchJumpInstr          = x86_patchJumpInstr
+       mkSpillInstr            = x86_mkSpillInstr
+       mkLoadInstr             = x86_mkLoadInstr
+       takeDeltaInstr          = x86_takeDeltaInstr
+       isMetaInstr             = x86_isMetaInstr
+       mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
+       takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
+       mkJumpInstr             = x86_mkJumpInstr
  
  
  -- -----------------------------------------------------------------------------
  
  
  -- -----------------------------------------------------------------------------
@@ -154,13 +166,6 @@ data Instr
          -- benefit of subsequent passes
         | DELTA   Int
  
          -- benefit of subsequent passes
         | DELTA   Int
  
-       -- | spill this reg to a stack slot
-       | SPILL   Reg Int
-
-       -- | reload this reg from a stack slot
-       | RELOAD  Int Reg
-
-
         -- Moves.
         | MOV         Size Operand Operand
         | MOVZxL      Size Operand Operand -- size is the size of operand 1
         -- Moves.
         | MOV         Size Operand Operand
         | MOVZxL      Size Operand Operand -- size is the size of operand 1
@@ -301,7 +306,436 @@ data Operand
  
  
  
  
  
  
-i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
+x86_regUsageOfInstr :: Instr -> RegUsage
+x86_regUsageOfInstr instr 
+ = case instr of
+    MOV    _ src dst   -> usageRW src dst
+    MOVZxL _ src dst   -> usageRW src dst
+    MOVSxL _ src dst   -> usageRW src dst
+    LEA    _ src dst   -> usageRW src dst
+    ADD    _ src dst   -> usageRM src dst
+    ADC    _ src dst   -> usageRM src dst
+    SUB    _ src dst   -> usageRM src dst
+    IMUL   _ src dst   -> usageRM src dst
+    IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
+    MUL    _ src dst   -> usageRM src dst
+    DIV    _ op        -> mkRU (eax:edx:use_R op) [eax,edx]
+    IDIV   _ op        -> mkRU (eax:edx:use_R op) [eax,edx]
+    AND    _ src dst   -> usageRM src dst
+    OR     _ src dst   -> usageRM src dst
+
+    XOR    _ (OpReg src) (OpReg dst)
+        | src == dst    -> mkRU [] [dst]
+
+    XOR    _ src dst   -> usageRM src dst
+    NOT    _ op                -> usageM op
+    NEGI   _ op                -> usageM op
+    SHL    _ imm dst   -> usageRM imm dst
+    SAR    _ imm dst   -> usageRM imm dst
+    SHR    _ imm dst   -> usageRM imm dst
+    BT     _ _   src   -> mkRUR (use_R src)
+
+    PUSH   _ op                -> mkRUR (use_R op)
+    POP    _ op                -> mkRU [] (def_W op)
+    TEST   _ src dst   -> mkRUR (use_R src ++ use_R dst)
+    CMP    _ src dst   -> mkRUR (use_R src ++ use_R dst)
+    SETCC  _ op                -> mkRU [] (def_W op)
+    JXX    _ _         -> mkRU [] []
+    JXX_GBL _ _                -> mkRU [] []
+    JMP     op         -> mkRUR (use_R op)
+    JMP_TBL op _        -> mkRUR (use_R op)
+    CALL (Left _)  params   -> mkRU params callClobberedRegs
+    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
+    CLTD   _           -> mkRU [eax] [edx]
+    NOP                        -> mkRU [] []
+
+#if i386_TARGET_ARCH
+    GMOV   src dst     -> mkRU [src] [dst]
+    GLD    _ src dst   -> mkRU (use_EA src) [dst]
+    GST    _ src dst   -> mkRUR (src : use_EA dst)
+
+    GLDZ   dst         -> mkRU [] [dst]
+    GLD1   dst         -> mkRU [] [dst]
+
+    GFTOI  src dst     -> mkRU [src] [dst]
+    GDTOI  src dst     -> mkRU [src] [dst]
+
+    GITOF  src dst     -> mkRU [src] [dst]
+    GITOD  src dst     -> mkRU [src] [dst]
+
+    GADD   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+    GSUB   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+    GMUL   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+    GDIV   _ s1 s2 dst -> mkRU [s1,s2] [dst]
+
+    GCMP   _ src1 src2   -> mkRUR [src1,src2]
+    GABS   _ src dst     -> mkRU [src] [dst]
+    GNEG   _ src dst     -> mkRU [src] [dst]
+    GSQRT  _ src dst     -> mkRU [src] [dst]
+    GSIN   _ _ _ src dst -> mkRU [src] [dst]
+    GCOS   _ _ _ src dst -> mkRU [src] [dst]
+    GTAN   _ _ _ src dst -> mkRU [src] [dst]
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD   src dst -> mkRU [src] [dst]
+    CVTSD2SS   src dst -> mkRU [src] [dst]
+    CVTTSS2SIQ src dst -> mkRU (use_R src) [dst]
+    CVTTSD2SIQ src dst -> mkRU (use_R src) [dst]
+    CVTSI2SS   src dst -> mkRU (use_R src) [dst]
+    CVTSI2SD   src dst -> mkRU (use_R src) [dst]
+    FDIV _     src dst -> usageRM src dst
+#endif    
+
+    FETCHGOT reg        -> mkRU [] [reg]
+    FETCHPC  reg        -> mkRU [] [reg]
+
+    COMMENT _          -> noUsage
+    DELTA   _           -> noUsage
+
+    _other             -> panic "regUsage: unrecognised instr"
+
+ where
+    -- 2 operand form; first operand Read; second Written
+    usageRW :: Operand -> Operand -> RegUsage
+    usageRW op (OpReg reg)     = mkRU (use_R op) [reg]
+    usageRW op (OpAddr ea)     = mkRUR (use_R op ++ use_EA ea)
+    usageRW _ _                        = panic "X86.RegInfo.usageRW: no match"
+
+    -- 2 operand form; first operand Read; second Modified
+    usageRM :: Operand -> Operand -> RegUsage
+    usageRM op (OpReg reg)     = mkRU (use_R op ++ [reg]) [reg]
+    usageRM op (OpAddr ea)     = mkRUR (use_R op ++ use_EA ea)
+    usageRM _ _                        = panic "X86.RegInfo.usageRM: no match"
+
+    -- 1 operand form; operand Modified
+    usageM :: Operand -> RegUsage
+    usageM (OpReg reg)         = mkRU [reg] [reg]
+    usageM (OpAddr ea)         = mkRUR (use_EA ea)
+    usageM _                   = panic "X86.RegInfo.usageM: no match"
+
+    -- Registers defd when an operand is written.
+    def_W (OpReg reg)          = [reg]
+    def_W (OpAddr _ )          = []
+    def_W _                    = panic "X86.RegInfo.def_W: no match"
+
+    -- Registers used when an operand is read.
+    use_R (OpReg reg)  = [reg]
+    use_R (OpImm _)    = []
+    use_R (OpAddr ea)  = use_EA ea
+
+    -- Registers used to compute an effective address.
+    use_EA (ImmAddr _ _) = []
+    use_EA (AddrBaseIndex base index _) = 
+       use_base base $! use_index index
+       where use_base (EABaseReg r) x = r : x
+             use_base _ x             = x
+             use_index EAIndexNone   = []
+             use_index (EAIndex i _) = [i]
+
+    mkRUR src = src' `seq` RU src' []
+       where src' = filter interesting src
+
+    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+       where src' = filter interesting src
+             dst' = filter interesting dst
+
+interesting :: Reg -> Bool
+interesting (VirtualRegI  _)  = True
+interesting (VirtualRegHi _)  = True
+interesting (VirtualRegF  _)  = True
+interesting (VirtualRegD  _)  = True
+interesting (RealReg i)       = isFastTrue (freeReg i)
+
+
+
+
+x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+x86_patchRegsOfInstr instr env 
+ = case instr of
+    MOV  sz src dst    -> patch2 (MOV  sz) src dst
+    MOVZxL sz src dst  -> patch2 (MOVZxL sz) src dst
+    MOVSxL sz src dst  -> patch2 (MOVSxL sz) src dst
+    LEA  sz src dst    -> patch2 (LEA  sz) src dst
+    ADD  sz src dst    -> patch2 (ADD  sz) src dst
+    ADC  sz src dst    -> patch2 (ADC  sz) src dst
+    SUB  sz src dst    -> patch2 (SUB  sz) src dst
+    IMUL sz src dst    -> patch2 (IMUL sz) src dst
+    IMUL2 sz src        -> patch1 (IMUL2 sz) src
+    MUL sz src dst     -> patch2 (MUL sz) src dst
+    IDIV sz op         -> patch1 (IDIV sz) op
+    DIV sz op          -> patch1 (DIV sz) op
+    AND  sz src dst    -> patch2 (AND  sz) src dst
+    OR   sz src dst    -> patch2 (OR   sz) src dst
+    XOR  sz src dst    -> patch2 (XOR  sz) src dst
+    NOT  sz op                 -> patch1 (NOT  sz) op
+    NEGI sz op         -> patch1 (NEGI sz) op
+    SHL  sz imm dst    -> patch1 (SHL sz imm) dst
+    SAR  sz imm dst    -> patch1 (SAR sz imm) dst
+    SHR  sz imm dst    -> patch1 (SHR sz imm) dst
+    BT   sz imm src     -> patch1 (BT  sz imm) src
+    TEST sz src dst    -> patch2 (TEST sz) src dst
+    CMP  sz src dst    -> patch2 (CMP  sz) src dst
+    PUSH sz op         -> patch1 (PUSH sz) op
+    POP  sz op         -> patch1 (POP  sz) op
+    SETCC cond op      -> patch1 (SETCC cond) op
+    JMP op             -> patch1 JMP op
+    JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
+
+#if i386_TARGET_ARCH
+    GMOV src dst       -> GMOV (env src) (env dst)
+    GLD  sz src dst    -> GLD sz (lookupAddr src) (env dst)
+    GST  sz src dst    -> GST sz (env src) (lookupAddr dst)
+
+    GLDZ dst           -> GLDZ (env dst)
+    GLD1 dst           -> GLD1 (env dst)
+
+    GFTOI src dst      -> GFTOI (env src) (env dst)
+    GDTOI src dst      -> GDTOI (env src) (env dst)
+
+    GITOF src dst      -> GITOF (env src) (env dst)
+    GITOD src dst      -> GITOD (env src) (env dst)
+
+    GADD sz s1 s2 dst  -> GADD sz (env s1) (env s2) (env dst)
+    GSUB sz s1 s2 dst  -> GSUB sz (env s1) (env s2) (env dst)
+    GMUL sz s1 s2 dst  -> GMUL sz (env s1) (env s2) (env dst)
+    GDIV sz s1 s2 dst  -> GDIV sz (env s1) (env s2) (env dst)
+
+    GCMP sz src1 src2  -> GCMP sz (env src1) (env src2)
+    GABS sz src dst    -> GABS sz (env src) (env dst)
+    GNEG sz src dst    -> GNEG sz (env src) (env dst)
+    GSQRT sz src dst   -> GSQRT sz (env src) (env dst)
+    GSIN sz l1 l2 src dst      -> GSIN sz l1 l2 (env src) (env dst)
+    GCOS sz l1 l2 src dst      -> GCOS sz l1 l2 (env src) (env dst)
+    GTAN sz l1 l2 src dst      -> GTAN sz l1 l2 (env src) (env dst)
+#endif
+
+#if x86_64_TARGET_ARCH
+    CVTSS2SD src dst   -> CVTSS2SD (env src) (env dst)
+    CVTSD2SS src dst   -> CVTSD2SS (env src) (env dst)
+    CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst)
+    CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst)
+    CVTSI2SS src dst   -> CVTSI2SS (patchOp src) (env dst)
+    CVTSI2SD src dst   -> CVTSI2SD (patchOp src) (env dst)
+    FDIV sz src dst    -> FDIV sz (patchOp src) (patchOp dst)
+#endif    
+
+    CALL (Left _)  _   -> instr
+    CALL (Right reg) p -> CALL (Right (env reg)) p
+    
+    FETCHGOT reg        -> FETCHGOT (env reg)
+    FETCHPC  reg        -> FETCHPC  (env reg)
+   
+    NOP                        -> instr
+    COMMENT _          -> instr
+    DELTA _            -> instr
+
+    JXX _ _            -> instr
+    JXX_GBL _ _                -> instr
+    CLTD _             -> instr
+
+    _other             -> panic "patchRegs: unrecognised instr"
+
+  where
+    patch1 insn op      = insn $! patchOp op
+    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
+
+    patchOp (OpReg  reg) = OpReg $! env reg
+    patchOp (OpImm  imm) = OpImm imm
+    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
+
+    lookupAddr (ImmAddr imm off) = ImmAddr imm off
+    lookupAddr (AddrBaseIndex base index disp)
+      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
+      where
+       lookupBase EABaseNone       = EABaseNone
+       lookupBase EABaseRip        = EABaseRip
+       lookupBase (EABaseReg r)    = EABaseReg (env r)
+                                
+       lookupIndex EAIndexNone     = EAIndexNone
+       lookupIndex (EAIndex r i)   = EAIndex (env r) i
+
+
+--------------------------------------------------------------------------------
+x86_isJumpishInstr 
+       :: Instr -> Bool
+
+x86_isJumpishInstr instr
+ = case instr of
+       JMP{}           -> True
+       JXX{}           -> True
+       JXX_GBL{}       -> True
+       JMP_TBL{}       -> True
+       CALL{}          -> True
+       _               -> False
+
+
+x86_jumpDestsOfInstr
+       :: Instr 
+       -> [BlockId]
+
+x86_jumpDestsOfInstr insn 
+  = case insn of
+       JXX _ id        -> [id]
+       JMP_TBL _ ids   -> ids
+       _               -> []
+
+
+x86_patchJumpInstr 
+       :: Instr -> (BlockId -> BlockId) -> Instr
+
+x86_patchJumpInstr insn patchF
+  = case insn of
+       JXX cc id       -> JXX cc (patchF id)
+       JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
+       _               -> insn
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- | Make a spill instruction.
+x86_mkSpillInstr
+       :: Reg          -- register to spill
+       -> Int          -- current stack delta
+       -> Int          -- spill slot to use
+       -> Instr
+
+#if   i386_TARGET_ARCH
+x86_mkSpillInstr reg delta slot
+  = let        off     = spillSlotToOffset slot
+    in
+    let off_w = (off-delta) `div` 4
+    in case regClass reg of
+          RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
+          _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
+
+#elif x86_64_TARGET_ARCH
+x86_mkSpillInstr reg delta slot
+  = let        off     = spillSlotToOffset slot
+    in
+    let off_w = (off-delta) `div` 8
+    in case regClass reg of
+          RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
+          RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
+           _         -> panic "X86.mkSpillInstr: no match"
+               -- ToDo: will it work to always spill as a double?
+               -- does that cause a stall if the data was a float?
+#else
+x86_mkSpillInstr _ _ _
+    =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
+#endif
+
+
+-- | Make a spill reload instruction.
+x86_mkLoadInstr
+       :: Reg          -- register to load
+       -> Int          -- current stack delta
+       -> Int          -- spill slot to use
+       -> Instr
+
+#if   i386_TARGET_ARCH
+x86_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+       let off_w = (off-delta) `div` 4
+        in case regClass reg of {
+              RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
+              _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
+#elif x86_64_TARGET_ARCH
+x86_mkLoadInstr reg delta slot
+  = let off     = spillSlotToOffset slot
+    in
+       let off_w = (off-delta) `div` 8
+        in case regClass reg of
+              RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
+              _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
+#else
+x86_mkLoadInstr _ _ _
+       = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
+#endif
+
+
+--------------------------------------------------------------------------------
+
+-- | See if this instruction is telling us the current C stack delta
+x86_takeDeltaInstr
+       :: Instr
+       -> Maybe Int
+       
+x86_takeDeltaInstr instr
+ = case instr of
+       DELTA i         -> Just i
+       _               -> Nothing
+
+
+x86_isMetaInstr
+       :: Instr
+       -> Bool
+       
+x86_isMetaInstr instr
+ = case instr of
+       COMMENT{}       -> True
+       LDATA{}         -> True
+       NEWBLOCK{}      -> True
+       DELTA{}         -> True
+       _               -> False
+
+
+
+-- | Make a reg-reg move instruction.
+--     On SPARC v8 there are no instructions to move directly between
+--     floating point and integer regs. If we need to do that then we
+--     have to go via memory.
+--
+x86_mkRegRegMoveInstr
+       :: Reg
+       -> Reg
+       -> Instr
+
+x86_mkRegRegMoveInstr src dst
+ = case regClass src of
+#if   i386_TARGET_ARCH
+        RcInteger -> MOV II32 (OpReg src) (OpReg dst)
+        RcDouble  -> GMOV src dst
+       RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+#else
+        RcInteger -> MOV II64 (OpReg src) (OpReg dst)
+        RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
+       RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+#endif
+
+
+-- | Check whether an instruction represents a reg-reg move.
+--     The register allocator attempts to eliminate reg->reg moves whenever it can,
+--     by assigning the src and dest temporaries to the same real register.
+--
+x86_takeRegRegMoveInstr
+       :: Instr 
+       -> Maybe (Reg,Reg)
+
+x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2)) 
+       = Just (r1,r2)
+
+x86_takeRegRegMoveInstr _  = Nothing
+
+
+-- | Make an unconditional branch instruction.
+x86_mkJumpInstr
+       :: BlockId
+       -> [Instr]
+
+x86_mkJumpInstr id 
+       = [JXX ALWAYS id]
+
+
+
+
+
+i386_insert_ffrees 
+       :: [GenBasicBlock Instr] 
+       -> [GenBasicBlock Instr]
+
  i386_insert_ffrees blocks
     | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
     = map ffree_before_nonlocal_transfers blocks
  i386_insert_ffrees blocks
     | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
     = map ffree_before_nonlocal_transfers blocks
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs

index c0ad496..3f181fc 100644 (file)
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -7,12 +7,15 @@
  -----------------------------------------------------------------------------
  
  module X86.Ppr (
  -----------------------------------------------------------------------------
  
  module X86.Ppr (
+       pprNatCmmTop,
+       pprBasicBlock,
+       pprSectionHeader,
+       pprData,
+       pprInstr,
         pprUserReg,
         pprSize,
         pprImm,
         pprUserReg,
         pprSize,
         pprImm,
-       pprSectionHeader,
         pprDataItem,
         pprDataItem,
-       pprInstr
  )
  
  where
  )
  
  where
@@ -20,24 +23,145 @@ where
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
  
  #include "HsVersions.h"
  #include "nativeGen/NCG.h"
  
-import PprBase
-import RegsBase
  import X86.Regs
  import X86.Instr
  import X86.Regs
  import X86.Instr
+import X86.Cond
+import Instruction
+import Size
+import Reg
+import PprBase
+
  
  import BlockId
  import Cmm
  
  import BlockId
  import Cmm
-
-import CLabel          ( CLabel, mkAsmTempLabel )
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-import CLabel       ( mkDeadStripPreventer )
-#endif
-
+import CLabel
  import Unique          ( pprUnique )
  import Pretty
  import FastString
  import qualified Outputable
  import Unique          ( pprUnique )
  import Pretty
  import FastString
  import qualified Outputable
-import Outputable      (panic)
+import Outputable      (panic, Outputable)
+
+import Data.Word
+
+
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) = 
+  pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = 
+  pprSectionHeader Text $$
+  (if null info then -- blocks guaranteed not null, so label needed
+       pprLabel lbl
+   else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+            pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                <> char ':' $$
+#endif
+       vcat (map pprData info) $$
+       pprLabel (entryLblToInfoLbl lbl)
+  ) $$
+  vcat (map pprBasicBlock blocks)
+     -- above: Even the first block gets a label, because with branch-chain
+     -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+        -- If we are using the .subsections_via_symbols directive
+        -- (available on recent versions of Darwin),
+        -- we have to make sure that there is some kind of reference
+        -- from the entry code to a label on the _top_ of of the info table,
+        -- so that the linker will not think it is unreferenced and dead-strip
+        -- it. That's why the label is called a DeadStripPreventer (_dsp).
+  $$ if not (null info)
+                   then text "\t.long "
+                     <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+                     <+> char '-'
+                     <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+                   else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+  pprLabel (mkAsmTempLabel id) $$
+  vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes)         = pprAlign bytes
+pprData (CmmDataLabel lbl)       = pprLabel lbl
+pprData (CmmString str)          = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit)       = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext IF_ARCH_sparc((sLit ".global "), 
+                                   (sLit ".globl ")) <>
+               pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+  | not (externallyVisibleCLabel lbl) = empty
+  | otherwise = ptext (sLit ".type ") <>
+               pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+  = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+  = vcat (map do1 str) $$ do1 0
+    where
+       do1 :: Word8 -> Doc
+       do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+
+
+pprAlign bytes
+       = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
+  where
+
+#if darwin_TARGET_OS
+       pow2 = log2 bytes
+
+       log2 :: Int -> Int  -- cache the common ones
+       log2 1 = 0 
+       log2 2 = 1
+       log2 4 = 2
+       log2 8 = 3
+       log2 n = 1 + log2 (n `quot` 2)
+#endif
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+    ppr         instr  = Outputable.docToSDoc $ pprInstr instr
+
+
+
+
+
+
+
+
+
+
+
  
  #if  i386_TARGET_ARCH || x86_64_TARGET_ARCH
  pprUserReg :: Reg -> Doc
  
  #if  i386_TARGET_ARCH || x86_64_TARGET_ARCH
  pprUserReg :: Reg -> Doc
@@ -49,7 +173,6 @@ pprUserReg = panic "X86.Ppr.pprUserReg: not defined"
  
  #endif
  
  
  #endif
  
-
  pprReg :: Size -> Reg -> Doc
  
  pprReg s r
  pprReg :: Size -> Reg -> Doc
  
  pprReg s r
@@ -228,7 +351,7 @@ pprAddr (AddrBaseIndex base index displacement)
    = let
         pp_disp  = ppr_disp displacement
         pp_off p = pp_disp <> char '(' <> p <> char ')'
    = let
         pp_disp  = ppr_disp displacement
         pp_off p = pp_disp <> char '(' <> p <> char ')'
-       pp_reg r = pprReg wordSize r
+       pp_reg r = pprReg archWordSize r
      in
      case (base, index) of
        (EABaseNone,  EAIndexNone) -> pp_disp
      in
      case (base, index) of
        (EABaseNone,  EAIndexNone) -> pp_disp
@@ -384,6 +507,7 @@ pprInstr (NEWBLOCK _)
  pprInstr (LDATA _ _)
     = panic "PprMach.pprInstr: LDATA"
  
  pprInstr (LDATA _ _)
     = panic "PprMach.pprInstr: LDATA"
  
+{-
  pprInstr (SPILL reg slot)
     = hcat [
         ptext (sLit "\tSPILL"),
  pprInstr (SPILL reg slot)
     = hcat [
         ptext (sLit "\tSPILL"),
@@ -399,6 +523,7 @@ pprInstr (RELOAD slot reg)
         ptext (sLit "SLOT") <> parens (int slot),
         comma,
         pprUserReg reg]
         ptext (sLit "SLOT") <> parens (int slot),
         comma,
         pprUserReg reg]
+-}
  
  pprInstr (MOV size src dst)
    = pprSizeOpOp (sLit "mov") size src dst
  
  pprInstr (MOV size src dst)
    = pprSizeOpOp (sLit "mov") size src dst
@@ -414,7 +539,7 @@ pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src
         -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
         -- instruction is shorter.
  
         -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
         -- instruction is shorter.
  
-pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordSize src dst
+pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
  
  -- here we do some patching, since the physical registers are only set late
  -- in the code generation.
  
  -- here we do some patching, since the physical registers are only set late
  -- in the code generation.
@@ -497,10 +622,10 @@ pprInstr (JXX cond (BlockId id))
  pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
  
  pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
  pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
  
  pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
-pprInstr (JMP op)          = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordSize op)
+pprInstr (JMP op)          = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
  pprInstr (JMP_TBL op _)  = pprInstr (JMP op)
  pprInstr (CALL (Left imm) _)    = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
  pprInstr (JMP_TBL op _)  = pprInstr (JMP op)
  pprInstr (CALL (Left imm) _)    = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
-pprInstr (CALL (Right reg) _)   = (<>) (ptext (sLit "\tcall *")) (pprReg wordSize reg)
+pprInstr (CALL (Right reg) _)   = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
  
  pprInstr (IDIV sz op)  = pprSizeOp (sLit "idiv") sz op
  pprInstr (DIV sz op)    = pprSizeOp (sLit "div")  sz op
  
  pprInstr (IDIV sz op)  = pprSizeOp (sLit "idiv") sz op
  pprInstr (DIV sz op)    = pprSizeOp (sLit "div")  sz op
@@ -941,9 +1066,9 @@ pprRegReg :: LitString -> Reg -> Reg -> Doc
  pprRegReg name reg1 reg2
    = hcat [
         pprMnemonic_ name,
  pprRegReg name reg1 reg2
    = hcat [
         pprMnemonic_ name,
-       pprReg wordSize reg1,
+       pprReg archWordSize reg1,
          comma,
          comma,
-        pprReg wordSize reg2
+        pprReg archWordSize reg2
      ]
  
  
      ]
  
  
@@ -951,9 +1076,9 @@ pprOpReg :: LitString -> Operand -> Reg -> Doc
  pprOpReg name op1 reg2
    = hcat [
         pprMnemonic_ name,
  pprOpReg name op1 reg2
    = hcat [
         pprMnemonic_ name,
-       pprOperand wordSize op1,
+       pprOperand archWordSize op1,
          comma,
          comma,
-        pprReg wordSize reg2
+        pprReg archWordSize reg2
      ]
  
  
      ]
  
  
diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs

index 39bc6de..58d063b 100644 (file)
--- a/compiler/nativeGen/X86/RegInfo.hs
+++ b/compiler/nativeGen/X86/RegInfo.hs
@@ -1,26 +1,17 @@
  
  module X86.RegInfo (
  
  module X86.RegInfo (
-       RegUsage(..),
-       noUsage,
-       regUsage,
-       patchRegs,
-       jumpDests,
-       isJumpish,
-       patchJump,
-       isRegRegMove,
-
-        JumpDest(..), 
+       mkVReg,
+
+        JumpDest, 
         canShortcut, 
         shortcutJump, 
  
         canShortcut, 
         shortcutJump, 
  
-       mkSpillInstr,
-       mkLoadInstr,
-       mkRegRegMoveInstr,
-       mkBranchInstr,
-
         spillSlotSize,
         maxSpillSlots,
         spillSlotSize,
         maxSpillSlots,
-       spillSlotToOffset               
+       spillSlotToOffset,
+       
+       shortcutStatic,
+       regDotColor
  )
  
  where
  )
  
  where
@@ -29,341 +20,26 @@ where
  #include "HsVersions.h"
  
  import X86.Instr
  #include "HsVersions.h"
  
  import X86.Instr
+import X86.Cond
  import X86.Regs
  import X86.Regs
-import RegsBase
+import Size
+import Reg
  
  
+import Cmm
+import CLabel
  import BlockId
  import Outputable
  import Constants       ( rESERVED_C_STACK_BYTES )
  import BlockId
  import Outputable
  import Constants       ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- -----------------------------------------------------------------------------
--- RegUsage type
-
--- @regUsage@ returns the sets of src and destination registers used
--- by a particular instruction.  Machine registers that are
--- pre-allocated to stgRegs are filtered out, because they are
--- uninteresting from a register allocation standpoint.  (We wouldn't
--- want them to end up on the free list!)  As far as we are concerned,
--- the fixed registers simply don't exist (for allocation purposes,
--- anyway).
-
--- regUsage doesn't need to do any trickery for jumps and such.  Just
--- state precisely the regs read and written by that insn.  The
--- consequences of control flow transfers, as far as register
--- allocation goes, are taken care of by the register allocator.
-
-data RegUsage = RU [Reg] [Reg]
-
-noUsage :: RegUsage
-noUsage  = RU [] []
-
-
-regUsage :: Instr -> RegUsage
-regUsage instr = case instr of
-    MOV    _ src dst   -> usageRW src dst
-    MOVZxL _ src dst   -> usageRW src dst
-    MOVSxL _ src dst   -> usageRW src dst
-    LEA    _ src dst   -> usageRW src dst
-    ADD    _ src dst   -> usageRM src dst
-    ADC    _ src dst   -> usageRM src dst
-    SUB    _ src dst   -> usageRM src dst
-    IMUL   _ src dst   -> usageRM src dst
-    IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
-    MUL    _ src dst   -> usageRM src dst
-    DIV    _ op        -> mkRU (eax:edx:use_R op) [eax,edx]
-    IDIV   _ op        -> mkRU (eax:edx:use_R op) [eax,edx]
-    AND    _ src dst   -> usageRM src dst
-    OR     _ src dst   -> usageRM src dst
-
-    XOR    _ (OpReg src) (OpReg dst)
-        | src == dst    -> mkRU [] [dst]
-
-    XOR    _ src dst   -> usageRM src dst
-    NOT    _ op                -> usageM op
-    NEGI   _ op                -> usageM op
-    SHL    _ imm dst   -> usageRM imm dst
-    SAR    _ imm dst   -> usageRM imm dst
-    SHR    _ imm dst   -> usageRM imm dst
-    BT     _ _   src   -> mkRUR (use_R src)
-
-    PUSH   _ op                -> mkRUR (use_R op)
-    POP    _ op                -> mkRU [] (def_W op)
-    TEST   _ src dst   -> mkRUR (use_R src ++ use_R dst)
-    CMP    _ src dst   -> mkRUR (use_R src ++ use_R dst)
-    SETCC  _ op                -> mkRU [] (def_W op)
-    JXX    _ _         -> mkRU [] []
-    JXX_GBL _ _                -> mkRU [] []
-    JMP     op         -> mkRUR (use_R op)
-    JMP_TBL op _        -> mkRUR (use_R op)
-    CALL (Left _)  params   -> mkRU params callClobberedRegs
-    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
-    CLTD   _           -> mkRU [eax] [edx]
-    NOP                        -> mkRU [] []
-
-#if i386_TARGET_ARCH
-    GMOV   src dst     -> mkRU [src] [dst]
-    GLD    _ src dst   -> mkRU (use_EA src) [dst]
-    GST    _ src dst   -> mkRUR (src : use_EA dst)
-
-    GLDZ   dst         -> mkRU [] [dst]
-    GLD1   dst         -> mkRU [] [dst]
-
-    GFTOI  src dst     -> mkRU [src] [dst]
-    GDTOI  src dst     -> mkRU [src] [dst]
-
-    GITOF  src dst     -> mkRU [src] [dst]
-    GITOD  src dst     -> mkRU [src] [dst]
-
-    GADD   _ s1 s2 dst -> mkRU [s1,s2] [dst]
-    GSUB   _ s1 s2 dst -> mkRU [s1,s2] [dst]
-    GMUL   _ s1 s2 dst -> mkRU [s1,s2] [dst]
-    GDIV   _ s1 s2 dst -> mkRU [s1,s2] [dst]
-
-    GCMP   _ src1 src2   -> mkRUR [src1,src2]
-    GABS   _ src dst     -> mkRU [src] [dst]
-    GNEG   _ src dst     -> mkRU [src] [dst]
-    GSQRT  _ src dst     -> mkRU [src] [dst]
-    GSIN   _ _ _ src dst -> mkRU [src] [dst]
-    GCOS   _ _ _ src dst -> mkRU [src] [dst]
-    GTAN   _ _ _ src dst -> mkRU [src] [dst]
-#endif
-
-#if x86_64_TARGET_ARCH
-    CVTSS2SD   src dst -> mkRU [src] [dst]
-    CVTSD2SS   src dst -> mkRU [src] [dst]
-    CVTTSS2SIQ src dst -> mkRU (use_R src) [dst]
-    CVTTSD2SIQ src dst -> mkRU (use_R src) [dst]
-    CVTSI2SS   src dst -> mkRU (use_R src) [dst]
-    CVTSI2SD   src dst -> mkRU (use_R src) [dst]
-    FDIV _     src dst -> usageRM src dst
-#endif    
-
-    FETCHGOT reg        -> mkRU [] [reg]
-    FETCHPC  reg        -> mkRU [] [reg]
-
-    COMMENT _          -> noUsage
-    DELTA   _           -> noUsage
-    SPILL   reg _      -> mkRU [reg] []
-    RELOAD  _  reg     -> mkRU []    [reg]
-
-    _other             -> panic "regUsage: unrecognised instr"
-
- where
-    -- 2 operand form; first operand Read; second Written
-    usageRW :: Operand -> Operand -> RegUsage
-    usageRW op (OpReg reg)     = mkRU (use_R op) [reg]
-    usageRW op (OpAddr ea)     = mkRUR (use_R op ++ use_EA ea)
-    usageRW _ _                        = panic "X86.RegInfo.usageRW: no match"
-
-    -- 2 operand form; first operand Read; second Modified
-    usageRM :: Operand -> Operand -> RegUsage
-    usageRM op (OpReg reg)     = mkRU (use_R op ++ [reg]) [reg]
-    usageRM op (OpAddr ea)     = mkRUR (use_R op ++ use_EA ea)
-    usageRM _ _                        = panic "X86.RegInfo.usageRM: no match"
-
-    -- 1 operand form; operand Modified
-    usageM :: Operand -> RegUsage
-    usageM (OpReg reg)         = mkRU [reg] [reg]
-    usageM (OpAddr ea)         = mkRUR (use_EA ea)
-    usageM _                   = panic "X86.RegInfo.usageM: no match"
-
-    -- Registers defd when an operand is written.
-    def_W (OpReg reg)          = [reg]
-    def_W (OpAddr _ )          = []
-    def_W _                    = panic "X86.RegInfo.def_W: no match"
-
-    -- Registers used when an operand is read.
-    use_R (OpReg reg)  = [reg]
-    use_R (OpImm _)    = []
-    use_R (OpAddr ea)  = use_EA ea
-
-    -- Registers used to compute an effective address.
-    use_EA (ImmAddr _ _) = []
-    use_EA (AddrBaseIndex base index _) = 
-       use_base base $! use_index index
-       where use_base (EABaseReg r) x = r : x
-             use_base _ x             = x
-             use_index EAIndexNone   = []
-             use_index (EAIndex i _) = [i]
-
-    mkRUR src = src' `seq` RU src' []
-       where src' = filter interesting src
-
-    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
-       where src' = filter interesting src
-             dst' = filter interesting dst
-
-interesting :: Reg -> Bool
-interesting (VirtualRegI  _)  = True
-interesting (VirtualRegHi _)  = True
-interesting (VirtualRegF  _)  = True
-interesting (VirtualRegD  _)  = True
-interesting (RealReg i)       = isFastTrue (freeReg i)
-
-
-
-
--- -----------------------------------------------------------------------------
--- 'patchRegs' function
-
--- 'patchRegs' takes an instruction and applies the given mapping to
--- all the register references.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
-    MOV  sz src dst    -> patch2 (MOV  sz) src dst
-    MOVZxL sz src dst  -> patch2 (MOVZxL sz) src dst
-    MOVSxL sz src dst  -> patch2 (MOVSxL sz) src dst
-    LEA  sz src dst    -> patch2 (LEA  sz) src dst
-    ADD  sz src dst    -> patch2 (ADD  sz) src dst
-    ADC  sz src dst    -> patch2 (ADC  sz) src dst
-    SUB  sz src dst    -> patch2 (SUB  sz) src dst
-    IMUL sz src dst    -> patch2 (IMUL sz) src dst
-    IMUL2 sz src        -> patch1 (IMUL2 sz) src
-    MUL sz src dst     -> patch2 (MUL sz) src dst
-    IDIV sz op         -> patch1 (IDIV sz) op
-    DIV sz op          -> patch1 (DIV sz) op
-    AND  sz src dst    -> patch2 (AND  sz) src dst
-    OR   sz src dst    -> patch2 (OR   sz) src dst
-    XOR  sz src dst    -> patch2 (XOR  sz) src dst
-    NOT  sz op                 -> patch1 (NOT  sz) op
-    NEGI sz op         -> patch1 (NEGI sz) op
-    SHL  sz imm dst    -> patch1 (SHL sz imm) dst
-    SAR  sz imm dst    -> patch1 (SAR sz imm) dst
-    SHR  sz imm dst    -> patch1 (SHR sz imm) dst
-    BT   sz imm src     -> patch1 (BT  sz imm) src
-    TEST sz src dst    -> patch2 (TEST sz) src dst
-    CMP  sz src dst    -> patch2 (CMP  sz) src dst
-    PUSH sz op         -> patch1 (PUSH sz) op
-    POP  sz op         -> patch1 (POP  sz) op
-    SETCC cond op      -> patch1 (SETCC cond) op
-    JMP op             -> patch1 JMP op
-    JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
-
-#if i386_TARGET_ARCH
-    GMOV src dst       -> GMOV (env src) (env dst)
-    GLD  sz src dst    -> GLD sz (lookupAddr src) (env dst)
-    GST  sz src dst    -> GST sz (env src) (lookupAddr dst)
-
-    GLDZ dst           -> GLDZ (env dst)
-    GLD1 dst           -> GLD1 (env dst)
-
-    GFTOI src dst      -> GFTOI (env src) (env dst)
-    GDTOI src dst      -> GDTOI (env src) (env dst)
-
-    GITOF src dst      -> GITOF (env src) (env dst)
-    GITOD src dst      -> GITOD (env src) (env dst)
-
-    GADD sz s1 s2 dst  -> GADD sz (env s1) (env s2) (env dst)
-    GSUB sz s1 s2 dst  -> GSUB sz (env s1) (env s2) (env dst)
-    GMUL sz s1 s2 dst  -> GMUL sz (env s1) (env s2) (env dst)
-    GDIV sz s1 s2 dst  -> GDIV sz (env s1) (env s2) (env dst)
-
-    GCMP sz src1 src2  -> GCMP sz (env src1) (env src2)
-    GABS sz src dst    -> GABS sz (env src) (env dst)
-    GNEG sz src dst    -> GNEG sz (env src) (env dst)
-    GSQRT sz src dst   -> GSQRT sz (env src) (env dst)
-    GSIN sz l1 l2 src dst      -> GSIN sz l1 l2 (env src) (env dst)
-    GCOS sz l1 l2 src dst      -> GCOS sz l1 l2 (env src) (env dst)
-    GTAN sz l1 l2 src dst      -> GTAN sz l1 l2 (env src) (env dst)
-#endif
-
-#if x86_64_TARGET_ARCH
-    CVTSS2SD src dst   -> CVTSS2SD (env src) (env dst)
-    CVTSD2SS src dst   -> CVTSD2SS (env src) (env dst)
-    CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst)
-    CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst)
-    CVTSI2SS src dst   -> CVTSI2SS (patchOp src) (env dst)
-    CVTSI2SD src dst   -> CVTSI2SD (patchOp src) (env dst)
-    FDIV sz src dst    -> FDIV sz (patchOp src) (patchOp dst)
-#endif    
-
-    CALL (Left _)  _   -> instr
-    CALL (Right reg) p -> CALL (Right (env reg)) p
-    
-    FETCHGOT reg        -> FETCHGOT (env reg)
-    FETCHPC  reg        -> FETCHPC  (env reg)
-   
-    NOP                        -> instr
-    COMMENT _          -> instr
-    DELTA _            -> instr
-    SPILL  reg slot    -> SPILL (env reg) slot
-    RELOAD slot reg    -> RELOAD slot (env reg)
-
-    JXX _ _            -> instr
-    JXX_GBL _ _                -> instr
-    CLTD _             -> instr
-
-    _other             -> panic "patchRegs: unrecognised instr"
-
-  where
-    patch1 insn op      = insn $! patchOp op
-    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
-
-    patchOp (OpReg  reg) = OpReg $! env reg
-    patchOp (OpImm  imm) = OpImm imm
-    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
-
-    lookupAddr (ImmAddr imm off) = ImmAddr imm off
-    lookupAddr (AddrBaseIndex base index disp)
-      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
-      where
-       lookupBase EABaseNone       = EABaseNone
-       lookupBase EABaseRip        = EABaseRip
-       lookupBase (EABaseReg r)    = EABaseReg (env r)
-                                
-       lookupIndex EAIndexNone     = EAIndexNone
-       lookupIndex (EAIndex r i)   = EAIndex (env r) i
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
-  = case insn of
-       JXX _ id        -> id : acc
-       JMP_TBL _ ids   -> ids ++ acc
-       _               -> acc
-
-
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
-       JMP{}           -> True
-       JXX{}           -> True
-       JXX_GBL{}       -> True
-       JMP_TBL{}       -> True
-       CALL{}          -> True
-       _               -> False
-
--- | Change the destination of this jump instruction
---     Used in joinToTargets in the linear allocator, when emitting fixup code
---     for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
-  = case insn of
-       JXX cc id | id == old -> JXX cc new
-       JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
-       _other          -> insn
-
-
--- -----------------------------------------------------------------------------
--- Detecting reg->reg moves
-
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
-
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
-isRegRegMove _  = Nothing
+import Unique
  
  
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+   | not (isFloatSize size) = VirtualRegI u
+   | otherwise
+   = case size of
+        FF32   -> VirtualRegD u
+        FF64   -> VirtualRegD u
+       _       -> panic "mkVReg"
  
  
  data JumpDest = DestBlockId BlockId | DestImm Imm
  
  
  data JumpDest = DestBlockId BlockId | DestImm Imm
@@ -386,92 +62,6 @@ shortcutJump _ other = other
  
  
  
  
  
  
--- -----------------------------------------------------------------------------
--- Generating spill instructions
-
-mkSpillInstr
-   :: Reg              -- register to spill
-   -> Int              -- current stack delta
-   -> Int              -- spill slot to use
-   -> Instr
-
-#if   i386_TARGET_ARCH
-mkSpillInstr reg delta slot
-  = let        off     = spillSlotToOffset slot
-    in
-    let off_w = (off-delta) `div` 4
-    in case regClass reg of
-          RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
-          _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
-
-#elif x86_64_TARGET_ARCH
-mkSpillInstr reg delta slot
-  = let        off     = spillSlotToOffset slot
-    in
-    let off_w = (off-delta) `div` 8
-    in case regClass reg of
-          RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
-          RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
-          RcFloat   -> panic "mkSpillInstr/RcFloat"
-               -- ToDo: will it work to always spill as a double?
-               -- does that cause a stall if the data was a float?
-#else
-mkSpillInstr _ _ _
-    =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
-#endif
-
-
-mkLoadInstr
-   :: Reg              -- register to load
-   -> Int              -- current stack delta
-   -> Int              -- spill slot to use
-   -> Instr
-#if   i386_TARGET_ARCH
-mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-       let off_w = (off-delta) `div` 4
-        in case regClass reg of {
-              RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
-              _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
-#elif x86_64_TARGET_ARCH
-mkLoadInstr reg delta slot
-  = let off     = spillSlotToOffset slot
-    in
-       let off_w = (off-delta) `div` 8
-        in case regClass reg of
-              RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
-              _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
-#else
-mkLoadInstr _ _ _
-  =    panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
-#endif
-
-
-
-mkRegRegMoveInstr
-    :: Reg
-    -> Reg
-    -> Instr
-mkRegRegMoveInstr src dst
-    = case regClass src of
-        RcInteger -> MOV wordSize (OpReg src) (OpReg dst)
-#if   i386_TARGET_ARCH
-        RcDouble  -> GMOV src dst
-       RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-#else
-        RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
-       RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-#endif
-
-
-mkBranchInstr
-    :: BlockId
-    -> [Instr]
-
-mkBranchInstr id = [JXX ALWAYS id]
-
-
  spillSlotSize :: Int
  spillSlotSize = IF_ARCH_i386(12, 8)
  
  spillSlotSize :: Int
  spillSlotSize = IF_ARCH_i386(12, 8)
  
@@ -489,3 +79,82 @@ spillSlotToOffset slot
     = pprPanic "spillSlotToOffset:" 
                (   text "invalid spill location: " <> int slot
               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
     = pprPanic "spillSlotToOffset:" 
                (   text "invalid spill location: " <> int slot
               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
+
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+  | Just uq <- maybeAsmTemp lab 
+  = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+  | Just uq <- maybeAsmTemp lbl1
+  = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+        -- slightly dodgy, we're ignoring the second label, but this
+        -- works with the way we use CmmLabelDiffOff for jump tables now.
+
+shortcutStatic _ other_static
+        = other_static
+
+shortBlockId 
+       :: (BlockId -> Maybe JumpDest)
+       -> BlockId
+       -> CLabel
+
+shortBlockId fn blockid@(BlockId uq) =
+   case fn blockid of
+      Nothing -> mkAsmTempLabel uq
+      Just (DestBlockId blockid')  -> shortBlockId fn blockid'
+      Just (DestImm (ImmCLbl lbl)) -> lbl
+      _other -> panic "shortBlockId"
+
+
+
+-- reg colors for x86
+#if i386_TARGET_ARCH
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = let Just    str     = lookupUFM regColors reg
+   in  text str
+
+regColors
+ = listToUFM
+ $     [ (eax, "#00ff00")
+       , (ebx, "#0000ff")
+       , (ecx, "#00ffff")
+       , (edx, "#0080ff")
+
+       , (fake0, "#ff00ff")
+       , (fake1, "#ff00aa")
+       , (fake2, "#aa00ff")
+       , (fake3, "#aa00aa")
+       , (fake4, "#ff0055")
+       , (fake5, "#5500ff") ]
+
+
+-- reg colors for x86_64
+#elif x86_64_TARGET_ARCH
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = let Just    str     = lookupUFM regColors reg
+   in  text str
+
+regColors
+ = listToUFM
+ $     [ (rax, "#00ff00"), (eax, "#00ff00")
+       , (rbx, "#0000ff"), (ebx, "#0000ff")
+       , (rcx, "#00ffff"), (ecx, "#00ffff")
+       , (rdx, "#0080ff"), (edx, "#00ffff")
+       , (r8,  "#00ff80")
+       , (r9,  "#008080")
+       , (r10, "#0040ff")
+       , (r11, "#00ff40")
+       , (r12, "#008040")
+       , (r13, "#004080")
+       , (r14, "#004040")
+       , (r15, "#002080") ]
+
+       ++ zip (map RealReg [16..31]) (repeat "red")
+#else
+regDotColor :: Reg -> SDoc
+regDotColor    = panic "not defined"
+#endif
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs

index 1f1c724..87564b8 100644 (file)
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -1,15 +1,4 @@
  module X86.Regs (
  module X86.Regs (
-
-       -- sizes
-       Size(..),
-       intSize, 
-       floatSize, 
-       isFloatSize, 
-       wordSize,
-       cmmTypeSize,
-       sizeToWidth,
-       mkVReg,
-
         -- immediates
         Imm(..),
         strImmLit,
         -- immediates
         Imm(..),
         strImmLit,
@@ -45,7 +34,10 @@ module X86.Regs (
  
         -- horror show
         freeReg,
  
         -- horror show
         freeReg,
-       globalRegMaybe
+       globalRegMaybe,
+       
+       get_GlobalReg_reg_or_addr,
+       allocatableRegs
  )
  
  where
  )
  
  where
@@ -60,93 +52,22 @@ where
  
  #include "../includes/MachRegs.h"
  
  
  #include "../includes/MachRegs.h"
  
-import RegsBase
+import Reg
+import RegClass
  
  
+import CgUtils          ( get_GlobalReg_addr )
  import BlockId
  import Cmm
  import CLabel           ( CLabel )
  import Pretty
  import BlockId
  import Cmm
  import CLabel           ( CLabel )
  import Pretty
-import Outputable      ( Outputable(..), pprPanic, panic )
+import Outputable      ( panic )
  import qualified Outputable
  import qualified Outputable
-import Unique
  import FastBool
  
  #if  defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
  import Constants
  #endif
  
  import FastBool
  
  #if  defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
  import Constants
  #endif
  
--- -----------------------------------------------------------------------------
--- Sizes on this architecture
--- 
--- A Size is usually a combination of width and class
-
--- It looks very like the old MachRep, but it's now of purely local
--- significance, here in the native code generator.  You can change it
--- without global consequences.
---
--- A major use is as an opcode qualifier; thus the opcode 
---     mov.l a b
--- might be encoded 
---     MOV II32 a b
--- where the Size field encodes the ".l" part.
-
--- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
--- here.  I've removed them from the x86 version, we'll see what happens --SDM
-
--- ToDo: quite a few occurrences of Size could usefully be replaced by Width
-
-data Size
-       = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
-       deriving Eq
-
-intSize, floatSize :: Width -> Size
-intSize W8 = II8
-intSize W16 = II16
-intSize W32 = II32
-intSize W64 = II64
-intSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-floatSize W32 = FF32
-floatSize W64 = FF64
-floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-isFloatSize :: Size -> Bool
-isFloatSize FF32 = True
-isFloatSize FF64 = True
-isFloatSize FF80 = True
-isFloatSize _    = False
-
-
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty | isFloatType ty = floatSize (typeWidth ty)
-              | otherwise      = intSize (typeWidth ty)
-
-
-sizeToWidth :: Size -> Width
-sizeToWidth II8  = W8
-sizeToWidth II16 = W16
-sizeToWidth II32 = W32
-sizeToWidth II64 = W64
-sizeToWidth FF32 = W32
-sizeToWidth FF64 = W64
-sizeToWidth _ = panic "MachInstrs.sizeToWidth"
-
-
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
-   | not (isFloatSize size) = VirtualRegI u
-   | otherwise
-   = case size of
-        FF32   -> VirtualRegD u
-        FF64   -> VirtualRegD u
-       _       -> panic "mkVReg"
-
  
  -- -----------------------------------------------------------------------------
  -- Immediates
  
  -- -----------------------------------------------------------------------------
  -- Immediates
@@ -699,4 +620,26 @@ callClobberedRegs  = panic "X86.Regs.globalRegMaybe: not defined"
  
  #endif
  
  
  #endif
  
+-- We map STG registers onto appropriate CmmExprs.  Either they map
+-- to real machine registers or stored as offsets from BaseReg.  Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+   = case globalRegMaybe mid of
+        Just rr -> Left rr
+        Nothing -> Right (get_GlobalReg_addr mid)
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+   = let isFree i = isFastTrue (freeReg i)
+     in  filter isFree allMachRegNos
+
author	Ben.Lippmeier@anu.edu.au <unknown>
	Sun, 15 Feb 2009 05:51:58 +0000 (05:51 +0000)
committer	Ben.Lippmeier@anu.edu.au <unknown>
	Sun, 15 Feb 2009 05:51:58 +0000 (05:51 +0000)
compiler/ghc.cabal.in		patch \| blob \| history
compiler/main/DynFlags.hs		patch \| blob \| history
compiler/nativeGen/Alpha/CodeGen.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/Alpha/Instr.hs		patch \| blob \| history
compiler/nativeGen/ArchReg.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/AsmCodeGen.lhs		patch \| blob \| history
compiler/nativeGen/Instrs.hs	[deleted file]	patch \| blob \| history
compiler/nativeGen/Instruction.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/MachCodeGen.hs	[deleted file]	patch \| blob \| history
compiler/nativeGen/NCGMonad.hs		patch \| blob \| history
compiler/nativeGen/PIC.hs	[moved from compiler/nativeGen/PositionIndependentCode.hs with 57% similarity]	patch \| blob \| history
compiler/nativeGen/PPC/CodeGen.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/PPC/Cond.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/PPC/Instr.hs		patch \| blob \| history
compiler/nativeGen/PPC/Ppr.hs		patch \| blob \| history
compiler/nativeGen/PPC/RegInfo.hs		patch \| blob \| history
compiler/nativeGen/PPC/Regs.hs		patch \| blob \| history
compiler/nativeGen/Platform.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/PprMach.hs	[deleted file]	patch \| blob \| history
compiler/nativeGen/Reg.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/RegAlloc/Graph/Coalesce.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Graph/Main.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Graph/Spill.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Graph/SpillClean.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Graph/SpillCost.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Graph/Stats.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs	[moved from compiler/nativeGen/Regs.hs with 55% similarity]	patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/Base.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/Main.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/StackMap.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/State.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/Stats.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs		patch \| blob \| history
compiler/nativeGen/RegAlloc/Liveness.hs		patch \| blob \| history
compiler/nativeGen/RegAllocInfo.hs	[deleted file]	patch \| blob \| history
compiler/nativeGen/RegClass.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/RegsBase.hs	[deleted file]	patch \| blob \| history
compiler/nativeGen/SPARC/CodeGen.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/SPARC/Cond.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/SPARC/Instr.hs		patch \| blob \| history
compiler/nativeGen/SPARC/Ppr.hs		patch \| blob \| history
compiler/nativeGen/SPARC/RegInfo.hs		patch \| blob \| history
compiler/nativeGen/SPARC/Regs.hs		patch \| blob \| history
compiler/nativeGen/Size.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/TargetReg.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/X86/CodeGen.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/X86/Cond.hs	[new file with mode: 0644]	patch \| blob
compiler/nativeGen/X86/Instr.hs		patch \| blob \| history
compiler/nativeGen/X86/Ppr.hs		patch \| blob \| history
compiler/nativeGen/X86/RegInfo.hs		patch \| blob \| history
compiler/nativeGen/X86/Regs.hs		patch \| blob \| history