From: Ben.Lippmeier@anu.edu.au Date: Sun, 15 Feb 2009 05:51:58 +0000 (+0000) Subject: NCG: Split up the native code generator into arch specific modules X-Git-Tag: 2009-03-13~77 X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=b04a210e26ca57242fd052f2aa91011a80b76299 NCG: Split up the native code generator into arch specific modules - nativeGen/Instruction defines a type class for a generic instruction set. Each of the instruction sets we have, X86, PPC and SPARC are instances of it. - The register alloctors use this type class when they need info about a certain register or instruction, such as regUsage, mkSpillInstr, mkJumpInstr, patchRegs.. - nativeGen/Platform defines some data types enumerating the architectures and operating systems supported by the native code generator. - DynFlags now keeps track of the current build platform, and the PositionIndependentCode module uses this to decide what to do instead of relying of #ifdefs. - It's not totally retargetable yet. Some info info about the build target is still hardwired, but I've tried to contain most of it to a single module, TargetRegs. - Moved the SPILL and RELOAD instructions into LiveInstr. - Reg and RegClass now have their own modules, and are shared across all architectures. --- diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 2328eca6..b276943 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -454,31 +454,38 @@ Library Exposed-Modules: AsmCodeGen - MachCodeGen - Regs - RegsBase - Instrs - RegAllocInfo - PprMach + TargetReg + NCGMonad + Instruction + Size + Reg + RegClass PprBase + PIC + Platform Alpha.Regs Alpha.RegInfo Alpha.Instr Alpha.Ppr + Alpha.CodeGen X86.Regs X86.RegInfo X86.Instr + X86.Cond X86.Ppr + X86.CodeGen PPC.Regs PPC.RegInfo PPC.Instr + PPC.Cond PPC.Ppr + PPC.CodeGen SPARC.Regs SPARC.RegInfo SPARC.Instr + SPARC.Cond SPARC.Ppr - NCGMonad - PositionIndependentCode + SPARC.CodeGen RegAlloc.Liveness RegAlloc.Graph.Main RegAlloc.Graph.Stats @@ -488,6 +495,7 @@ Library RegAlloc.Graph.Spill RegAlloc.Graph.SpillClean RegAlloc.Graph.SpillCost + RegAlloc.Graph.TrivColorable RegAlloc.Linear.Main RegAlloc.Linear.JoinToTargets RegAlloc.Linear.State diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 44bd124..eb9a182 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -64,6 +64,7 @@ module DynFlags ( #include "HsVersions.h" +import Platform import Module import PackageConfig import PrelNames ( mAIN, main_RDR_Unqual ) @@ -339,6 +340,7 @@ data DynFlags = DynFlags { specConstrCount :: Maybe Int, -- ^ Max number of specialisations for any one function liberateCaseThreshold :: Maybe Int, -- ^ Threshold for LiberateCase + targetPlatform :: Platform, -- ^ The platform we're compiling for. Used by the NCG. stolen_x86_regs :: Int, cmdlineHcIncludes :: [String], -- ^ @\-\#includes@ importPaths :: [FilePath], @@ -584,6 +586,7 @@ defaultDynFlags = specConstrThreshold = Just 200, specConstrCount = Just 3, liberateCaseThreshold = Just 200, + targetPlatform = defaultTargetPlatform, stolen_x86_regs = 4, cmdlineHcIncludes = [], importPaths = ["."], diff --git a/compiler/nativeGen/Alpha/CodeGen.hs b/compiler/nativeGen/Alpha/CodeGen.hs new file mode 100644 index 0000000..4ce774f --- /dev/null +++ b/compiler/nativeGen/Alpha/CodeGen.hs @@ -0,0 +1,789 @@ +module Alpha.CodeGen () + +where + +{- + +getRegister :: CmmExpr -> NatM Register + +#if !x86_64_TARGET_ARCH + -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured + -- register, it can only be used for rip-relative addressing. +getRegister (CmmReg (CmmGlobal PicBaseReg)) + = do + reg <- getPicBaseNat wordSize + return (Fixed wordSize reg nilOL) +#endif + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + + +#if WORD_SIZE_IN_BITS==32 + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: + -- TO_W_(x), TO_W_(x >> 32) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +#endif + +-- end of machine-"independent" bit; here we go on the rest... + + +getRegister (StDouble d) + = getBlockIdNat `thenNat` \ lbl -> + getNewRegNat PtrRep `thenNat` \ tmp -> + let code dst = mkSeqInstrs [ + LDATA RoDataSegment lbl [ + DATA TF [ImmLab (rational d)] + ], + LDA tmp (AddrImm (ImmCLbl lbl)), + LD TF dst (AddrReg tmp)] + in + return (Any FF64 code) + +getRegister (StPrim primop [x]) -- unary PrimOps + = case primop of + IntNegOp -> trivialUCode (NEG Q False) x + + NotOp -> trivialUCode NOT x + + FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x + DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x + + OrdOp -> coerceIntCode IntRep x + ChrOp -> chrCode x + + Float2IntOp -> coerceFP2Int x + Int2FloatOp -> coerceInt2FP pr x + Double2IntOp -> coerceFP2Int x + Int2DoubleOp -> coerceInt2FP pr x + + Double2FloatOp -> coerceFltCode x + Float2DoubleOp -> coerceFltCode x + + other_op -> getRegister (StCall fn CCallConv FF64 [x]) + where + fn = case other_op of + FloatExpOp -> fsLit "exp" + FloatLogOp -> fsLit "log" + FloatSqrtOp -> fsLit "sqrt" + FloatSinOp -> fsLit "sin" + FloatCosOp -> fsLit "cos" + FloatTanOp -> fsLit "tan" + FloatAsinOp -> fsLit "asin" + FloatAcosOp -> fsLit "acos" + FloatAtanOp -> fsLit "atan" + FloatSinhOp -> fsLit "sinh" + FloatCoshOp -> fsLit "cosh" + FloatTanhOp -> fsLit "tanh" + DoubleExpOp -> fsLit "exp" + DoubleLogOp -> fsLit "log" + DoubleSqrtOp -> fsLit "sqrt" + DoubleSinOp -> fsLit "sin" + DoubleCosOp -> fsLit "cos" + DoubleTanOp -> fsLit "tan" + DoubleAsinOp -> fsLit "asin" + DoubleAcosOp -> fsLit "acos" + DoubleAtanOp -> fsLit "atan" + DoubleSinhOp -> fsLit "sinh" + DoubleCoshOp -> fsLit "cosh" + DoubleTanhOp -> fsLit "tanh" + where + pr = panic "MachCode.getRegister: no primrep needed for Alpha" + +getRegister (StPrim primop [x, y]) -- dyadic PrimOps + = case primop of + CharGtOp -> trivialCode (CMP LTT) y x + CharGeOp -> trivialCode (CMP LE) y x + CharEqOp -> trivialCode (CMP EQQ) x y + CharNeOp -> int_NE_code x y + CharLtOp -> trivialCode (CMP LTT) x y + CharLeOp -> trivialCode (CMP LE) x y + + IntGtOp -> trivialCode (CMP LTT) y x + IntGeOp -> trivialCode (CMP LE) y x + IntEqOp -> trivialCode (CMP EQQ) x y + IntNeOp -> int_NE_code x y + IntLtOp -> trivialCode (CMP LTT) x y + IntLeOp -> trivialCode (CMP LE) x y + + WordGtOp -> trivialCode (CMP ULT) y x + WordGeOp -> trivialCode (CMP ULE) x y + WordEqOp -> trivialCode (CMP EQQ) x y + WordNeOp -> int_NE_code x y + WordLtOp -> trivialCode (CMP ULT) x y + WordLeOp -> trivialCode (CMP ULE) x y + + AddrGtOp -> trivialCode (CMP ULT) y x + AddrGeOp -> trivialCode (CMP ULE) y x + AddrEqOp -> trivialCode (CMP EQQ) x y + AddrNeOp -> int_NE_code x y + AddrLtOp -> trivialCode (CMP ULT) x y + AddrLeOp -> trivialCode (CMP ULE) x y + + FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y + FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y + FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y + FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y + FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y + FloatLeOp -> cmpF_code (FCMP TF LE) NE x y + + DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y + DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y + DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y + DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y + DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y + DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y + + IntAddOp -> trivialCode (ADD Q False) x y + IntSubOp -> trivialCode (SUB Q False) x y + IntMulOp -> trivialCode (MUL Q False) x y + IntQuotOp -> trivialCode (DIV Q False) x y + IntRemOp -> trivialCode (REM Q False) x y + + WordAddOp -> trivialCode (ADD Q False) x y + WordSubOp -> trivialCode (SUB Q False) x y + WordMulOp -> trivialCode (MUL Q False) x y + WordQuotOp -> trivialCode (DIV Q True) x y + WordRemOp -> trivialCode (REM Q True) x y + + FloatAddOp -> trivialFCode W32 (FADD TF) x y + FloatSubOp -> trivialFCode W32 (FSUB TF) x y + FloatMulOp -> trivialFCode W32 (FMUL TF) x y + FloatDivOp -> trivialFCode W32 (FDIV TF) x y + + DoubleAddOp -> trivialFCode W64 (FADD TF) x y + DoubleSubOp -> trivialFCode W64 (FSUB TF) x y + DoubleMulOp -> trivialFCode W64 (FMUL TF) x y + DoubleDivOp -> trivialFCode W64 (FDIV TF) x y + + AddrAddOp -> trivialCode (ADD Q False) x y + AddrSubOp -> trivialCode (SUB Q False) x y + AddrRemOp -> trivialCode (REM Q True) x y + + AndOp -> trivialCode AND x y + OrOp -> trivialCode OR x y + XorOp -> trivialCode XOR x y + SllOp -> trivialCode SLL x y + SrlOp -> trivialCode SRL x y + + ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll" + ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra" + ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl" + + FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) + DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) + where + {- ------------------------------------------------------------ + Some bizarre special code for getting condition codes into + registers. Integer non-equality is a test for equality + followed by an XOR with 1. (Integer comparisons always set + the result register to 0 or 1.) Floating point comparisons of + any kind leave the result in a floating point register, so we + need to wrangle an integer register out of things. + -} + int_NE_code :: StixTree -> StixTree -> NatM Register + + int_NE_code x y + = trivialCode (CMP EQQ) x y `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst) + in + return (Any IntRep code__2) + + {- ------------------------------------------------------------ + Comments for int_NE_code also apply to cmpF_code + -} + cmpF_code + :: (Reg -> Reg -> Reg -> Instr) + -> Cond + -> StixTree -> StixTree + -> NatM Register + + cmpF_code instr cond x y + = trivialFCode pr instr x y `thenNat` \ register -> + getNewRegNat FF64 `thenNat` \ tmp -> + getBlockIdNat `thenNat` \ lbl -> + let + code = registerCode register tmp + result = registerName register tmp + + code__2 dst = code . mkSeqInstrs [ + OR zeroh (RIImm (ImmInt 1)) dst, + BF cond result (ImmCLbl lbl), + OR zeroh (RIReg zeroh) dst, + NEWBLOCK lbl] + in + return (Any IntRep code__2) + where + pr = panic "trivialU?FCode: does not use PrimRep on Alpha" + ------------------------------------------------------------ + +getRegister (CmmLoad pk mem) + = getAmode mem `thenNat` \ amode -> + let + code = amodeCode amode + src = amodeAddr amode + size = primRepToSize pk + code__2 dst = code . mkSeqInstr (LD size dst src) + in + return (Any pk code__2) + +getRegister (StInt i) + | fits8Bits i + = let + code dst = mkSeqInstr (OR zeroh (RIImm src) dst) + in + return (Any IntRep code) + | otherwise + = let + code dst = mkSeqInstr (LDI Q dst src) + in + return (Any IntRep code) + where + src = ImmInt (fromInteger i) + +getRegister leaf + | isJust imm + = let + code dst = mkSeqInstr (LDA dst (AddrImm imm__2)) + in + return (Any PtrRep code) + where + imm = maybeImm leaf + imm__2 = case imm of Just x -> x + + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if alpha_TARGET_ARCH + +getAmode (StPrim IntSubOp [x, StInt i]) + = getNewRegNat PtrRep `thenNat` \ tmp -> + getRegister x `thenNat` \ register -> + let + code = registerCode register tmp + reg = registerName register tmp + off = ImmInt (-(fromInteger i)) + in + return (Amode (AddrRegImm reg off) code) + +getAmode (StPrim IntAddOp [x, StInt i]) + = getNewRegNat PtrRep `thenNat` \ tmp -> + getRegister x `thenNat` \ register -> + let + code = registerCode register tmp + reg = registerName register tmp + off = ImmInt (fromInteger i) + in + return (Amode (AddrRegImm reg off) code) + +getAmode leaf + | isJust imm + = return (Amode (AddrImm imm__2) id) + where + imm = maybeImm leaf + imm__2 = case imm of Just x -> x + +getAmode other + = getNewRegNat PtrRep `thenNat` \ tmp -> + getRegister other `thenNat` \ register -> + let + code = registerCode register tmp + reg = registerName register tmp + in + return (Amode (AddrReg reg) code) + +#endif /* alpha_TARGET_ARCH */ + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + + +assignIntCode pk (CmmLoad dst _) src + = getNewRegNat IntRep `thenNat` \ tmp -> + getAmode dst `thenNat` \ amode -> + getRegister src `thenNat` \ register -> + let + code1 = amodeCode amode [] + dst__2 = amodeAddr amode + code2 = registerCode register tmp [] + src__2 = registerName register tmp + sz = primRepToSize pk + code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) + in + return code__2 + +assignIntCode pk dst src + = getRegister dst `thenNat` \ register1 -> + getRegister src `thenNat` \ register2 -> + let + dst__2 = registerName register1 zeroh + code = registerCode register2 dst__2 + src__2 = registerName register2 dst__2 + code__2 = if isFixed register2 + then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2) + else code + in + return code__2 + +assignFltCode pk (CmmLoad dst _) src + = getNewRegNat pk `thenNat` \ tmp -> + getAmode dst `thenNat` \ amode -> + getRegister src `thenNat` \ register -> + let + code1 = amodeCode amode [] + dst__2 = amodeAddr amode + code2 = registerCode register tmp [] + src__2 = registerName register tmp + sz = primRepToSize pk + code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) + in + return code__2 + +assignFltCode pk dst src + = getRegister dst `thenNat` \ register1 -> + getRegister src `thenNat` \ register2 -> + let + dst__2 = registerName register1 zeroh + code = registerCode register2 dst__2 + src__2 = registerName register2 dst__2 + code__2 = if isFixed register2 + then code . mkSeqInstr (FMOV src__2 dst__2) + else code + in + return code__2 + + +-- ----------------------------------------------------------------------------- +-- Generating an non-local jump + +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + +genJump (CmmLabel lbl) + | isAsmTemp lbl = returnInstr (BR target) + | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0] + where + target = ImmCLbl lbl + +genJump tree + = getRegister tree `thenNat` \ register -> + getNewRegNat PtrRep `thenNat` \ tmp -> + let + dst = registerName register pv + code = registerCode register pv + target = registerName register pv + in + if isFixed register then + returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0] + else + return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0)) + + +-- ----------------------------------------------------------------------------- +-- Unconditional branches + +genBranch :: BlockId -> NatM InstrBlock + +genBranch = return . toOL . mkBranchInstr + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +ALPHA: For comparisons with 0, we're laughing, because we can just do +the desired conditional branch. + +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +genCondJump id (StPrim op [x, StInt 0]) + = getRegister x `thenNat` \ register -> + getNewRegNat (registerRep register) + `thenNat` \ tmp -> + let + code = registerCode register tmp + value = registerName register tmp + pk = registerRep register + target = ImmCLbl lbl + in + returnSeq code [BI (cmpOp op) value target] + where + cmpOp CharGtOp = GTT + cmpOp CharGeOp = GE + cmpOp CharEqOp = EQQ + cmpOp CharNeOp = NE + cmpOp CharLtOp = LTT + cmpOp CharLeOp = LE + cmpOp IntGtOp = GTT + cmpOp IntGeOp = GE + cmpOp IntEqOp = EQQ + cmpOp IntNeOp = NE + cmpOp IntLtOp = LTT + cmpOp IntLeOp = LE + cmpOp WordGtOp = NE + cmpOp WordGeOp = ALWAYS + cmpOp WordEqOp = EQQ + cmpOp WordNeOp = NE + cmpOp WordLtOp = NEVER + cmpOp WordLeOp = EQQ + cmpOp AddrGtOp = NE + cmpOp AddrGeOp = ALWAYS + cmpOp AddrEqOp = EQQ + cmpOp AddrNeOp = NE + cmpOp AddrLtOp = NEVER + cmpOp AddrLeOp = EQQ + +genCondJump lbl (StPrim op [x, StDouble 0.0]) + = getRegister x `thenNat` \ register -> + getNewRegNat (registerRep register) + `thenNat` \ tmp -> + let + code = registerCode register tmp + value = registerName register tmp + pk = registerRep register + target = ImmCLbl lbl + in + return (code . mkSeqInstr (BF (cmpOp op) value target)) + where + cmpOp FloatGtOp = GTT + cmpOp FloatGeOp = GE + cmpOp FloatEqOp = EQQ + cmpOp FloatNeOp = NE + cmpOp FloatLtOp = LTT + cmpOp FloatLeOp = LE + cmpOp DoubleGtOp = GTT + cmpOp DoubleGeOp = GE + cmpOp DoubleEqOp = EQQ + cmpOp DoubleNeOp = NE + cmpOp DoubleLtOp = LTT + cmpOp DoubleLeOp = LE + +genCondJump lbl (StPrim op [x, y]) + | fltCmpOp op + = trivialFCode pr instr x y `thenNat` \ register -> + getNewRegNat FF64 `thenNat` \ tmp -> + let + code = registerCode register tmp + result = registerName register tmp + target = ImmCLbl lbl + in + return (code . mkSeqInstr (BF cond result target)) + where + pr = panic "trivialU?FCode: does not use PrimRep on Alpha" + + fltCmpOp op = case op of + FloatGtOp -> True + FloatGeOp -> True + FloatEqOp -> True + FloatNeOp -> True + FloatLtOp -> True + FloatLeOp -> True + DoubleGtOp -> True + DoubleGeOp -> True + DoubleEqOp -> True + DoubleNeOp -> True + DoubleLtOp -> True + DoubleLeOp -> True + _ -> False + (instr, cond) = case op of + FloatGtOp -> (FCMP TF LE, EQQ) + FloatGeOp -> (FCMP TF LTT, EQQ) + FloatEqOp -> (FCMP TF EQQ, NE) + FloatNeOp -> (FCMP TF EQQ, EQQ) + FloatLtOp -> (FCMP TF LTT, NE) + FloatLeOp -> (FCMP TF LE, NE) + DoubleGtOp -> (FCMP TF LE, EQQ) + DoubleGeOp -> (FCMP TF LTT, EQQ) + DoubleEqOp -> (FCMP TF EQQ, NE) + DoubleNeOp -> (FCMP TF EQQ, EQQ) + DoubleLtOp -> (FCMP TF LTT, NE) + DoubleLeOp -> (FCMP TF LE, NE) + +genCondJump lbl (StPrim op [x, y]) + = trivialCode instr x y `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + result = registerName register tmp + target = ImmCLbl lbl + in + return (code . mkSeqInstr (BI cond result target)) + where + (instr, cond) = case op of + CharGtOp -> (CMP LE, EQQ) + CharGeOp -> (CMP LTT, EQQ) + CharEqOp -> (CMP EQQ, NE) + CharNeOp -> (CMP EQQ, EQQ) + CharLtOp -> (CMP LTT, NE) + CharLeOp -> (CMP LE, NE) + IntGtOp -> (CMP LE, EQQ) + IntGeOp -> (CMP LTT, EQQ) + IntEqOp -> (CMP EQQ, NE) + IntNeOp -> (CMP EQQ, EQQ) + IntLtOp -> (CMP LTT, NE) + IntLeOp -> (CMP LE, NE) + WordGtOp -> (CMP ULE, EQQ) + WordGeOp -> (CMP ULT, EQQ) + WordEqOp -> (CMP EQQ, NE) + WordNeOp -> (CMP EQQ, EQQ) + WordLtOp -> (CMP ULT, NE) + WordLeOp -> (CMP ULE, NE) + AddrGtOp -> (CMP ULE, EQQ) + AddrGeOp -> (CMP ULT, EQQ) + AddrEqOp -> (CMP EQQ, NE) + AddrNeOp -> (CMP EQQ, EQQ) + AddrLtOp -> (CMP ULT, NE) + AddrLeOp -> (CMP ULE, NE) + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +ccallResultRegs = + +genCCall fn cconv result_regs args + = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args + `thenNat` \ ((unused,_), argCode) -> + let + nRegs = length allArgRegs - length unused + code = asmSeqThen (map ($ []) argCode) + in + returnSeq code [ + LDA pv (AddrImm (ImmLab (ptext fn))), + JSR ra (AddrReg pv) nRegs, + LDGP gp (AddrReg ra)] + where + ------------------------ + {- Try to get a value into a specific register (or registers) for + a call. The first 6 arguments go into the appropriate + argument register (separate registers for integer and floating + point arguments, but used in lock-step), and the remaining + arguments are dumped to the stack, beginning at 0(sp). Our + first argument is a pair of the list of remaining argument + registers to be assigned for this call and the next stack + offset to use for overflowing arguments. This way, + @get_Arg@ can be applied to all of a call's arguments using + @mapAccumLNat@. + -} + get_arg + :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator) + -> StixTree -- Current argument + -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code + + -- We have to use up all of our argument registers first... + + get_arg ((iDst,fDst):dsts, offset) arg + = getRegister arg `thenNat` \ register -> + let + reg = if isFloatType pk then fDst else iDst + code = registerCode register reg + src = registerName register reg + pk = registerRep register + in + return ( + if isFloatType pk then + ((dsts, offset), if isFixed register then + code . mkSeqInstr (FMOV src fDst) + else code) + else + ((dsts, offset), if isFixed register then + code . mkSeqInstr (OR src (RIReg src) iDst) + else code)) + + -- Once we have run out of argument registers, we move to the + -- stack... + + get_arg ([], offset) arg + = getRegister arg `thenNat` \ register -> + getNewRegNat (registerRep register) + `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + pk = registerRep register + sz = primRepToSize pk + in + return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset))) + +trivialCode instr x (StInt y) + | fits8Bits y + = getRegister x `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + src1 = registerName register tmp + src2 = ImmInt (fromInteger y) + code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst) + in + return (Any IntRep code__2) + +trivialCode instr x y + = getRegister x `thenNat` \ register1 -> + getRegister y `thenNat` \ register2 -> + getNewRegNat IntRep `thenNat` \ tmp1 -> + getNewRegNat IntRep `thenNat` \ tmp2 -> + let + code1 = registerCode register1 tmp1 [] + src1 = registerName register1 tmp1 + code2 = registerCode register2 tmp2 [] + src2 = registerName register2 tmp2 + code__2 dst = asmSeqThen [code1, code2] . + mkSeqInstr (instr src1 (RIReg src2) dst) + in + return (Any IntRep code__2) + +------------ +trivialUCode instr x + = getRegister x `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + code__2 dst = code . mkSeqInstr (instr (RIReg src) dst) + in + return (Any IntRep code__2) + +------------ +trivialFCode _ instr x y + = getRegister x `thenNat` \ register1 -> + getRegister y `thenNat` \ register2 -> + getNewRegNat FF64 `thenNat` \ tmp1 -> + getNewRegNat FF64 `thenNat` \ tmp2 -> + let + code1 = registerCode register1 tmp1 + src1 = registerName register1 tmp1 + + code2 = registerCode register2 tmp2 + src2 = registerName register2 tmp2 + + code__2 dst = asmSeqThen [code1 [], code2 []] . + mkSeqInstr (instr src1 src2 dst) + in + return (Any FF64 code__2) + +trivialUFCode _ instr x + = getRegister x `thenNat` \ register -> + getNewRegNat FF64 `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + code__2 dst = code . mkSeqInstr (instr src dst) + in + return (Any FF64 code__2) + +#if alpha_TARGET_ARCH + +coerceInt2FP _ x + = getRegister x `thenNat` \ register -> + getNewRegNat IntRep `thenNat` \ reg -> + let + code = registerCode register reg + src = registerName register reg + + code__2 dst = code . mkSeqInstrs [ + ST Q src (spRel 0), + LD TF dst (spRel 0), + CVTxy Q TF dst dst] + in + return (Any FF64 code__2) + +------------- +coerceFP2Int x + = getRegister x `thenNat` \ register -> + getNewRegNat FF64 `thenNat` \ tmp -> + let + code = registerCode register tmp + src = registerName register tmp + + code__2 dst = code . mkSeqInstrs [ + CVTxy TF Q src tmp, + ST TF tmp (spRel 0), + LD Q dst (spRel 0)] + in + return (Any IntRep code__2) + +#endif /* alpha_TARGET_ARCH */ + + +-} + + + + + diff --git a/compiler/nativeGen/Alpha/Instr.hs b/compiler/nativeGen/Alpha/Instr.hs index e2d66d3..990ea8b 100644 --- a/compiler/nativeGen/Alpha/Instr.hs +++ b/compiler/nativeGen/Alpha/Instr.hs @@ -10,13 +10,14 @@ #include "nativeGen/NCG.h" module Alpha.Instr ( - Cond(..), - Instr(..), - RI(..) +-- Cond(..), +-- Instr(..), +-- RI(..) ) where +{- import BlockId import Regs import Cmm @@ -138,3 +139,4 @@ data Instr | FUNEND CLabel +-} diff --git a/compiler/nativeGen/ArchReg.hs b/compiler/nativeGen/ArchReg.hs new file mode 100644 index 0000000..7170228 --- /dev/null +++ b/compiler/nativeGen/ArchReg.hs @@ -0,0 +1,14 @@ + + +module ArchReg ( + +) + +where + + +class ArchReg reg format where + classOfReg :: reg -> RegClass + mkVReg :: format -> VirtReg reg + + diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index ce411ed..8613a8e 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -19,21 +19,56 @@ module AsmCodeGen ( nativeCodeGen ) where #include "HsVersions.h" #include "nativeGen/NCG.h" -import Instrs -import Regs -import MachCodeGen -import PprMach -import RegAllocInfo -import NCGMonad -import PositionIndependentCode -import RegAlloc.Liveness -import qualified RegAlloc.Linear.Main as Linear +#if alpha_TARGET_ARCH +import Alpha.CodeGen +import Alpha.Regs +import Alpha.RegInfo +import Alpha.Instr +import Alpha.Ppr + +#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH +import X86.CodeGen +import X86.Regs +import X86.RegInfo +import X86.Instr +import X86.Ppr + +#elif sparc_TARGET_ARCH +import SPARC.CodeGen +import SPARC.Regs +import SPARC.RegInfo +import SPARC.Instr +import SPARC.Ppr + +#elif powerpc_TARGET_ARCH +import PPC.CodeGen +import PPC.Regs +import PPC.RegInfo +import PPC.Instr +import PPC.Ppr + +#else +#error "AsmCodeGen: unknown architecture" + +#endif + +import RegAlloc.Liveness +import qualified RegAlloc.Linear.Main as Linear import qualified GraphColor as Color import qualified RegAlloc.Graph.Main as Color import qualified RegAlloc.Graph.Stats as Color import qualified RegAlloc.Graph.Coalesce as Color +import qualified RegAlloc.Graph.TrivColorable as Color + +import qualified TargetReg as Target + +import Platform +import Instruction +import PIC +import Reg +import NCGMonad import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) @@ -160,7 +195,7 @@ nativeCodeGen dflags h us cmms dumpIfSet_dyn dflags Opt_D_dump_asm_conflicts "Register conflict graph" - $ Color.dotGraph Color.regDotColor trivColorable + $ Color.dotGraph Target.targetRegDotColor (Color.trivColorable Target.targetRegClass) $ graphGlobal) @@ -172,7 +207,7 @@ nativeCodeGen dflags h us cmms -- write out the imports Pretty.printDoc Pretty.LeftMode h - $ makeImportsDoc (concat imports) + $ makeImportsDoc dflags (concat imports) return () @@ -225,13 +260,13 @@ cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count cmmNativeGen :: DynFlags -> UniqSupply - -> RawCmmTop -- ^ the cmm to generate code for - -> Int -- ^ sequence number of this top thing + -> RawCmmTop -- ^ the cmm to generate code for + -> Int -- ^ sequence number of this top thing -> IO ( UniqSupply - , [NatCmmTop] -- native code - , [CLabel] -- things imported by this cmm - , Maybe [Color.RegAllocStats] -- stats for the coloring register allocator - , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators + , [NatCmmTop Instr] -- native code + , [CLabel] -- things imported by this cmm + , Maybe [Color.RegAllocStats Instr] -- stats for the coloring register allocator + , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators cmmNativeGen dflags us cmm count = do @@ -375,8 +410,8 @@ x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) = -- | Build a doc for all the imports. -- -makeImportsDoc :: [CLabel] -> Pretty.Doc -makeImportsDoc imports +makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc +makeImportsDoc dflags imports = dyld_stubs imports #if HAVE_SUBSECTIONS_VIA_SYMBOLS @@ -410,13 +445,16 @@ makeImportsDoc imports {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $ map head $ group $ sort imps-} + arch = platformArch $ targetPlatform dflags + os = platformOS $ targetPlatform dflags + -- (Hack) sometimes two Labels pretty-print the same, but have -- different uniques; so we compare their text versions... dyld_stubs imps - | needImportedSymbols + | needImportedSymbols arch os = Pretty.vcat $ - (pprGotDeclaration :) $ - map (pprImportedSymbol . fst . head) $ + (pprGotDeclaration arch os :) $ + map ( pprImportedSymbol arch os . fst . head) $ groupBy (\(_,a) (_,b) -> a == b) $ sortBy (\(_,a) (_,b) -> compare a b) $ map doPpr $ @@ -437,7 +475,11 @@ makeImportsDoc imports -- such that as many of the local jumps as possible turn into -- fallthroughs. -sequenceTop :: NatCmmTop -> NatCmmTop +sequenceTop + :: Instruction instr + => NatCmmTop instr + -> NatCmmTop instr + sequenceTop top@(CmmData _ _) = top sequenceTop (CmmProc info lbl params (ListGraph blocks)) = CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks) @@ -452,21 +494,36 @@ sequenceTop (CmmProc info lbl params (ListGraph blocks)) = -- FYI, the classic layout for basic blocks uses postorder DFS; this -- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007). -sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock] +sequenceBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [NatBasicBlock instr] + sequenceBlocks [] = [] sequenceBlocks (entry:blocks) = seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks))) -- the first block is the entry point ==> it must remain at the start. -sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])] + +sccBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [SCC ( NatBasicBlock instr + , Unique + , [Unique])] + sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks) -getOutEdges :: [Instr] -> [Unique] -getOutEdges instrs = case jumpDests (last instrs) [] of - [one] -> [getUnique one] - _many -> [] - -- we're only interested in the last instruction of - -- the block, and only if it has a single destination. +-- we're only interested in the last instruction of +-- the block, and only if it has a single destination. +getOutEdges + :: Instruction instr + => [instr] -> [Unique] + +getOutEdges instrs + = case jumpDestsOfInstr (last instrs) of + [one] -> [getUnique one] + _many -> [] mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs) @@ -494,7 +551,10 @@ reorder id accum (b@(block,id',out) : rest) -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too -- big, we have to work around this limitation. -makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock] +makeFarBranches + :: Instruction instr + => [NatBasicBlock instr] + -> [NatBasicBlock instr] #if powerpc_TARGET_ARCH makeFarBranches blocks @@ -530,7 +590,11 @@ makeFarBranches = id -- ----------------------------------------------------------------------------- -- Shortcut branches -shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop] +shortcutBranches + :: DynFlags + -> [NatCmmTop Instr] + -> [NatCmmTop Instr] + shortcutBranches dflags tops | optLevel dflags < 1 = tops -- only with -O or higher | otherwise = map (apply_mapping mapping) tops' @@ -589,12 +653,17 @@ apply_mapping ufm (CmmProc info lbl params (ListGraph blocks)) -- Switching between the two monads whilst carrying along the same -- Unique supply breaks abstraction. Is that bad? -genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel]) +genMachCode + :: DynFlags + -> RawCmmTop + -> UniqSM + ( [NatCmmTop Instr] + , [CLabel]) genMachCode dflags cmm_top = do { initial_us <- getUs ; let initial_st = mkNatM_State initial_us 0 dflags - (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top) + (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top) final_delta = natm_delta final_st final_imports = natm_imports final_st ; if final_delta == 0 diff --git a/compiler/nativeGen/Instrs.hs b/compiler/nativeGen/Instrs.hs deleted file mode 100644 index 3f38a36..0000000 --- a/compiler/nativeGen/Instrs.hs +++ /dev/null @@ -1,97 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- Machine-dependent assembly language --- --- (c) The University of Glasgow 1993-2004 --- ------------------------------------------------------------------------------ - -#include "nativeGen/NCG.h" - - -module Instrs ( - NatCmm, - NatCmmTop, - NatBasicBlock, - condUnsigned, - condToSigned, - condToUnsigned, - -#if alpha_TARGET_ARCH - module Alpha.Instr -#elif powerpc_TARGET_ARCH - module PPC.Instr -#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH - module X86.Instr -#elif sparc_TARGET_ARCH - module SPARC.Instr -#else -#error "Instrs: not defined for this architecture" -#endif -) - -where - -#include "HsVersions.h" - -import BlockId -import Regs -import Cmm -import CLabel ( CLabel, pprCLabel ) -import Panic ( panic ) -import Outputable -import FastString -import Constants ( wORD_SIZE ) - -import GHC.Exts - -#if alpha_TARGET_ARCH -import Alpha.Instr -#elif powerpc_TARGET_ARCH -import PPC.Instr -#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH -import X86.Instr -#elif sparc_TARGET_ARCH -import SPARC.Instr -#else -#error "Instrs: not defined for this architecture" -#endif - - --- Our flavours of the Cmm types --- Type synonyms for Cmm populated with native code - -type NatCmm = GenCmm CmmStatic [CmmStatic] (ListGraph Instr) -type NatCmmTop = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr) -type NatBasicBlock = GenBasicBlock Instr - - --- Condition utils -condUnsigned GU = True -condUnsigned LU = True -condUnsigned GEU = True -condUnsigned LEU = True -condUnsigned _ = False - -condToSigned GU = GTT -condToSigned LU = LTT -condToSigned GEU = GE -condToSigned LEU = LE -condToSigned x = x - -condToUnsigned GTT = GU -condToUnsigned LTT = LU -condToUnsigned GE = GEU -condToUnsigned LE = LEU -condToUnsigned x = x - - - - diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs new file mode 100644 index 0000000..22c37a5 --- /dev/null +++ b/compiler/nativeGen/Instruction.hs @@ -0,0 +1,159 @@ + +module Instruction ( + RegUsage(..), + noUsage, + NatCmm, + NatCmmTop, + NatBasicBlock, + Instruction(..) +) + +where + +import Reg + +import BlockId +import Cmm + +-- | Holds a list of source and destination registers used by a +-- particular instruction. +-- +-- Machine registers that are pre-allocated to stgRegs are filtered +-- out, because they are uninteresting from a register allocation +-- standpoint. (We wouldn't want them to end up on the free list!) +-- +-- As far as we are concerned, the fixed registers simply don't exist +-- (for allocation purposes, anyway). +-- +data RegUsage + = RU [Reg] [Reg] + +-- | No regs read or written to. +noUsage :: RegUsage +noUsage = RU [] [] + + +-- Our flavours of the Cmm types +-- Type synonyms for Cmm populated with native code +type NatCmm instr + = GenCmm + CmmStatic + [CmmStatic] + (ListGraph instr) + +type NatCmmTop instr + = GenCmmTop + CmmStatic + [CmmStatic] + (ListGraph instr) + + +type NatBasicBlock instr + = GenBasicBlock instr + + + + +-- | Common things that we can do with instructions, on all architectures. +-- These are used by the shared parts of the native code generator, +-- specifically the register allocators. +-- +class Instruction instr where + + -- | Get the registers that are being used by this instruction. + -- regUsage doesn't need to do any trickery for jumps and such. + -- Just state precisely the regs read and written by that insn. + -- The consequences of control flow transfers, as far as register + -- allocation goes, are taken care of by the register allocator. + -- + regUsageOfInstr + :: instr + -> RegUsage + + + -- | Apply a given mapping to all the register references in this + -- instruction. + patchRegsOfInstr + :: instr + -> (Reg -> Reg) + -> instr + + + -- | Checks whether this instruction is a jump/branch instruction. + -- One that can change the flow of control in a way that the + -- register allocator needs to worry about. + isJumpishInstr + :: instr -> Bool + + + -- | Give the possible destinations of this jump instruction. + -- Must be defined for all jumpish instructions. + jumpDestsOfInstr + :: instr -> [BlockId] + + + -- | Change the destination of this jump instruction. + -- Used in the linear allocator when adding fixup blocks for join + -- points. + patchJumpInstr + :: instr + -> (BlockId -> BlockId) + -> instr + + + -- | An instruction to spill a register into a spill slot. + mkSpillInstr + :: Reg -- ^ the reg to spill + -> Int -- ^ the current stack delta + -> Int -- ^ spill slot to use + -> instr + + + -- | An instruction to reload a register from a spill slot. + mkLoadInstr + :: Reg -- ^ the reg to reload. + -> Int -- ^ the current stack delta + -> Int -- ^ the spill slot to use + -> instr + + -- | See if this instruction is telling us the current C stack delta + takeDeltaInstr + :: instr + -> Maybe Int + + -- | Check whether this instruction is some meta thing inserted into + -- the instruction stream for other purposes. + -- + -- Not something that has to be treated as a real machine instruction + -- and have its registers allocated. + -- + -- eg, comments, delta, ldata, etc. + isMetaInstr + :: instr + -> Bool + + + + -- | Copy the value in a register to another one. + -- Must work for all register classes. + mkRegRegMoveInstr + :: Reg -- ^ source register + -> Reg -- ^ destination register + -> instr + + -- | Take the source and destination from this reg -> reg move instruction + -- or Nothing if it's not one + takeRegRegMoveInstr + :: instr + -> Maybe (Reg, Reg) + + -- | Make an unconditional jump instruction. + -- For architectures with branch delay slots, its ok to put + -- a NOP after the jump. Don't fill the delay slot with an + -- instruction that references regs or you'll confuse the + -- linear allocator. + mkJumpInstr + :: BlockId + -> [instr] + + diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs deleted file mode 100644 index d94a906..0000000 --- a/compiler/nativeGen/MachCodeGen.hs +++ /dev/null @@ -1,5199 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- Generating machine code (instruction selection) --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ - --- This is a big module, but, if you pay attention to --- (a) the sectioning, (b) the type signatures, and --- (c) the #if blah_TARGET_ARCH} things, the --- structure should not be too overwhelming. - -module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where - -#include "HsVersions.h" -#include "nativeGen/NCG.h" -#include "MachDeps.h" - --- NCG stuff: -import Instrs -import Regs -import NCGMonad -import PositionIndependentCode -import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr ) -import PprMach - --- Our intermediate code: -import BlockId -import PprCmm ( pprExpr ) -import Cmm -import CLabel -import ClosureInfo ( C_SRT(..) ) - --- The rest: -import BasicTypes -import StaticFlags ( opt_PIC ) -import ForeignCall ( CCallConv(..) ) -import OrdList -import Pretty -import qualified Outputable as O -import Outputable -import FastString -import FastBool ( isFastTrue ) -import Constants ( wORD_SIZE ) - -import Debug.Trace ( trace ) - -import Control.Monad ( mapAndUnzipM ) -import Data.Maybe ( fromJust ) -import Data.Bits -import Data.Word -import Data.Int - - --- ----------------------------------------------------------------------------- --- Top-level of the instruction selector - --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal (pre-order?) yields the insns in the correct --- order. - -type InstrBlock = OrdList Instr - -cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop] -cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do - (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks - picBaseMb <- getPicBaseMaybeNat - let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) - tops = proc : concat statics - case picBaseMb of - Just picBase -> initializePicBase picBase tops - Nothing -> return tops - -cmmTopCodeGen (CmmData sec dat) = do - return [CmmData sec dat] -- no translation, we just use CmmStatic - -basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop]) -basicBlockCodeGen (BasicBlock id stmts) = do - instrs <- stmtsToInstrs stmts - -- code generation may introduce new basic block boundaries, which - -- are indicated by the NEWBLOCK instruction. We must split up the - -- instruction stream into basic blocks again. Also, we extract - -- LDATAs here too. - let - (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs - - mkBlocks (NEWBLOCK id) (instrs,blocks,statics) - = ([], BasicBlock id instrs : blocks, statics) - mkBlocks (LDATA sec dat) (instrs,blocks,statics) - = (instrs, blocks, CmmData sec dat:statics) - mkBlocks instr (instrs,blocks,statics) - = (instr:instrs, blocks, statics) - -- in - return (BasicBlock id top : other_blocks, statics) - -stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock -stmtsToInstrs stmts - = do instrss <- mapM stmtToInstrs stmts - return (concatOL instrss) - -stmtToInstrs :: CmmStmt -> NatM InstrBlock -stmtToInstrs stmt = case stmt of - CmmNop -> return nilOL - CmmComment s -> return (unitOL (COMMENT s)) - - CmmAssign reg src - | isFloatType ty -> assignReg_FltCode size reg src -#if WORD_SIZE_IN_BITS==32 - | isWord64 ty -> assignReg_I64Code reg src -#endif - | otherwise -> assignReg_IntCode size reg src - where ty = cmmRegType reg - size = cmmTypeSize ty - - CmmStore addr src - | isFloatType ty -> assignMem_FltCode size addr src -#if WORD_SIZE_IN_BITS==32 - | isWord64 ty -> assignMem_I64Code addr src -#endif - | otherwise -> assignMem_IntCode size addr src - where ty = cmmExprType src - size = cmmTypeSize ty - - CmmCall target result_regs args _ _ - -> genCCall target result_regs args - - CmmBranch id -> genBranch id - CmmCondBranch arg id -> genCondJump id arg - CmmSwitch arg ids -> genSwitch arg ids - CmmJump arg params -> genJump arg - CmmReturn params -> - panic "stmtToInstrs: return statement should have been cps'd away" - --- ----------------------------------------------------------------------------- --- General things for putting together code sequences - --- Expand CmmRegOff. ToDo: should we do it this way around, or convert --- CmmExprs into CmmRegOff? -mangleIndexTree :: CmmExpr -> CmmExpr -mangleIndexTree (CmmRegOff reg off) - = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType reg) - --- ----------------------------------------------------------------------------- --- Code gen for 64-bit arithmetic on 32-bit platforms - -{- -Simple support for generating 64-bit code (ie, 64 bit values and 64 -bit assignments) on 32-bit platforms. Unlike the main code generator -we merely shoot for generating working code as simply as possible, and -pay little attention to code quality. Specifically, there is no -attempt to deal cleverly with the fixed-vs-floating register -distinction; all values are generated into (pairs of) floating -registers, even if this would mean some redundant reg-reg moves as a -result. Only one of the VRegUniques is returned, since it will be -of the VRegUniqueLo form, and the upper-half VReg can be determined -by applying getHiVRegFromLo to it. --} - -data ChildCode64 -- a.k.a "Register64" - = ChildCode64 - InstrBlock -- code - Reg -- the lower 32-bit temporary which contains the - -- result; use getHiVRegFromLo to find the other - -- VRegUnique. Rules of this simplified insn - -- selection game are therefore that the returned - -- Reg may be modified - -#if WORD_SIZE_IN_BITS==32 -assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock -#endif - -#ifndef x86_64_TARGET_ARCH -iselExpr64 :: CmmExpr -> NatM ChildCode64 -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -assignMem_I64Code addrTree valueTree = do - Amode addr addr_code <- getAmode addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Little-endian store - mov_lo = MOV II32 (OpReg rlo) (OpAddr addr) - mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) - -- in - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = mkVReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo) - mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi) - -- in - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code lvalue valueTree - = panic "assignReg_I64Code(i386): invalid lvalue" - ------------- - -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - r = fromIntegral (fromIntegral i :: Word32) - q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) - code = toOL [ - MOV II32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV II32 (OpImm (ImmInteger q)) (OpReg rhi) - ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - Amode addr addr_code <- getAmode addrTree - (rlo,rhi) <- getNewRegPairNat II32 - let - mov_lo = MOV II32 (OpAddr addr) (OpReg rlo) - mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) - -- in - return ( - ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - ) - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (mkVReg vu II32)) - --- we handle addition, but rather badly -iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - (rlo,rhi) <- getNewRegPairNat II32 - let - r = fromIntegral (fromIntegral i :: Word32) - q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) - r1hi = getHiVRegFromLo r1lo - code = code1 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - ADD II32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - ADD II32 (OpReg r2lo) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - ADC II32 (OpReg r2hi) (OpReg rhi) ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do - fn <- getAnyReg expr - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - code = fn r_dst_lo - return ( - ChildCode64 (code `snocOL` - MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) - r_dst_lo - ) - -iselExpr64 expr - = pprPanic "iselExpr64(i386)" (ppr expr) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -assignMem_I64Code addrTree valueTree = do - Amode addr addr_code <- getAmode addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - (src, code) <- getSomeReg addrTree - let - rhi = getHiVRegFromLo rlo - -- Big-endian store - mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0)) - mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4)) - return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo) - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = mkVReg u_dst (cmmTypeSize pk) - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = mkMOV r_src_lo r_dst_lo - mov_hi = mkMOV r_src_hi r_dst_hi - mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg - return (vcode `snocOL` mov_hi `snocOL` mov_lo) -assignReg_I64Code lvalue valueTree - = panic "assignReg_I64Code(sparc): invalid lvalue" - - --- Load a 64 bit word -iselExpr64 (CmmLoad addrTree ty) - | isWord64 ty - = do Amode amode addr_code <- getAmode addrTree - let result - - | AddrRegReg r1 r2 <- amode - = do rlo <- getNewRegNat II32 - tmp <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - - return $ ChildCode64 - ( addr_code - `appOL` toOL - [ ADD False False r1 (RIReg r2) tmp - , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi - , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ]) - rlo - - | AddrRegImm r1 (ImmInt i) <- amode - = do rlo <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - - return $ ChildCode64 - ( addr_code - `appOL` toOL - [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi - , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ]) - rlo - - result - - --- Add a literal to a 64 bit integer -iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) - = do ChildCode64 code1 r1_lo <- iselExpr64 e1 - let r1_hi = getHiVRegFromLo r1_lo - - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - return $ ChildCode64 - ( toOL - [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo - , ADD True False r1_hi (RIReg g0) r_dst_hi ]) - r_dst_lo - - --- Addition of II64 -iselExpr64 (CmmMachOp (MO_Add width) [e1, e2]) - = do ChildCode64 code1 r1_lo <- iselExpr64 e1 - let r1_hi = getHiVRegFromLo r1_lo - - ChildCode64 code2 r2_lo <- iselExpr64 e2 - let r2_hi = getHiVRegFromLo r2_lo - - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - let code = code1 - `appOL` code2 - `appOL` toOL - [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo - , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ] - - return $ ChildCode64 code r_dst_lo - - -iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_lo = mkVReg uq II32 - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = mkMOV r_src_lo r_dst_lo - mov_hi = mkMOV r_src_hi r_dst_hi - mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg - return ( - ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo - ) - --- Convert something into II64 -iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) - = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - -- compute expr and load it into r_dst_lo - (a_reg, a_code) <- getSomeReg expr - - let code = a_code - `appOL` toOL - [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits - , mkRegRegMoveInstr a_reg r_dst_lo ] - - return $ ChildCode64 code r_dst_lo - - -iselExpr64 expr - = pprPanic "iselExpr64(sparc)" (ppr expr) - -#endif /* sparc_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if powerpc_TARGET_ARCH - -getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) -getI64Amodes addrTree = do - Amode hi_addr addr_code <- getAmode addrTree - case addrOffset hi_addr 4 of - Just lo_addr -> return (hi_addr, lo_addr, addr_code) - Nothing -> do (hi_ptr, code) <- getSomeReg addrTree - return (AddrRegImm hi_ptr (ImmInt 0), - AddrRegImm hi_ptr (ImmInt 4), - code) - -assignMem_I64Code addrTree valueTree = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Big-endian store - mov_hi = ST II32 rhi hi_addr - mov_lo = ST II32 rlo lo_addr - -- in - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = mkVReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MR r_dst_lo r_src_lo - mov_hi = MR r_dst_hi r_src_hi - -- in - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code lvalue valueTree - = panic "assignReg_I64Code(powerpc): invalid lvalue" - - --- Don't delete this -- it's very handy for debugging. ---iselExpr64 expr --- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False --- = panic "iselExpr64(???)" - -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LD II32 rhi hi_addr - mov_lo = LD II32 rlo lo_addr - return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (mkVReg vu II32)) - -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - half0 = fromIntegral (fromIntegral i :: Word16) - half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) - half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16) - half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16) - - code = toOL [ - LIS rlo (ImmInt half1), - OR rlo rlo (RIImm $ ImmInt half0), - LIS rhi (ImmInt half3), - OR rlo rlo (RIImm $ ImmInt half2) - ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ ADDC rlo r1lo r2lo, - ADDE rhi r1hi r2hi ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do - (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LI rhi (ImmInt 0) - mov_lo = MR rlo expr_reg - return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo -iselExpr64 expr - = pprPanic "iselExpr64(powerpc)" (ppr expr) - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- The 'Register' type - --- 'Register's passed up the tree. If the stix code forces the register --- to live in a pre-decided machine register, it comes out as @Fixed@; --- otherwise, it comes out as @Any@, and the parent can decide which --- register to put it in. - -data Register - = Fixed Size Reg InstrBlock - | Any Size (Reg -> InstrBlock) - -swizzleRegisterRep :: Register -> Size -> Register --- Change the width; it's a no-op -swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code -swizzleRegisterRep (Any _ codefn) size = Any size codefn - - --- ----------------------------------------------------------------------------- --- Utils based on getRegister, below - --- The dual to getAnyReg: compute an expression into a register, but --- we don't mind which one it is. -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getSomeReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed _ reg code -> - return (reg, code) - --- ----------------------------------------------------------------------------- --- Grab the Reg for a CmmReg - -getRegisterReg :: CmmReg -> Reg - -getRegisterReg (CmmLocal (LocalReg u pk)) - = mkVReg u (cmmTypeSize pk) - -getRegisterReg (CmmGlobal mid) - = case get_GlobalReg_reg_or_addr mid of - Left (RealReg rrno) -> RealReg rrno - _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... - - --- ----------------------------------------------------------------------------- --- Generate code to get a subtree into a Register - --- Don't delete this -- it's very handy for debugging. ---getRegister expr --- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False --- = panic "getRegister(???)" - -getRegister :: CmmExpr -> NatM Register - -#if !x86_64_TARGET_ARCH - -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured - -- register, it can only be used for rip-relative addressing. -getRegister (CmmReg (CmmGlobal PicBaseReg)) - = do - reg <- getPicBaseNat wordSize - return (Fixed wordSize reg nilOL) -#endif - -getRegister (CmmReg reg) - = return (Fixed (cmmTypeSize (cmmRegType reg)) - (getRegisterReg reg) nilOL) - -getRegister tree@(CmmRegOff _ _) - = getRegister (mangleIndexTree tree) - - -#if WORD_SIZE_IN_BITS==32 - -- for 32-bit architectuers, support some 64 -> 32 bit conversions: - -- TO_W_(x), TO_W_(x >> 32) - -getRegister (CmmMachOp (MO_UU_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister (CmmMachOp (MO_SS_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -#endif - --- end of machine-"independent" bit; here we go on the rest... - -#if alpha_TARGET_ARCH - -getRegister (StDouble d) - = getBlockIdNat `thenNat` \ lbl -> - getNewRegNat PtrRep `thenNat` \ tmp -> - let code dst = mkSeqInstrs [ - LDATA RoDataSegment lbl [ - DATA TF [ImmLab (rational d)] - ], - LDA tmp (AddrImm (ImmCLbl lbl)), - LD TF dst (AddrReg tmp)] - in - return (Any FF64 code) - -getRegister (StPrim primop [x]) -- unary PrimOps - = case primop of - IntNegOp -> trivialUCode (NEG Q False) x - - NotOp -> trivialUCode NOT x - - FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x - DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x - - OrdOp -> coerceIntCode IntRep x - ChrOp -> chrCode x - - Float2IntOp -> coerceFP2Int x - Int2FloatOp -> coerceInt2FP pr x - Double2IntOp -> coerceFP2Int x - Int2DoubleOp -> coerceInt2FP pr x - - Double2FloatOp -> coerceFltCode x - Float2DoubleOp -> coerceFltCode x - - other_op -> getRegister (StCall fn CCallConv FF64 [x]) - where - fn = case other_op of - FloatExpOp -> fsLit "exp" - FloatLogOp -> fsLit "log" - FloatSqrtOp -> fsLit "sqrt" - FloatSinOp -> fsLit "sin" - FloatCosOp -> fsLit "cos" - FloatTanOp -> fsLit "tan" - FloatAsinOp -> fsLit "asin" - FloatAcosOp -> fsLit "acos" - FloatAtanOp -> fsLit "atan" - FloatSinhOp -> fsLit "sinh" - FloatCoshOp -> fsLit "cosh" - FloatTanhOp -> fsLit "tanh" - DoubleExpOp -> fsLit "exp" - DoubleLogOp -> fsLit "log" - DoubleSqrtOp -> fsLit "sqrt" - DoubleSinOp -> fsLit "sin" - DoubleCosOp -> fsLit "cos" - DoubleTanOp -> fsLit "tan" - DoubleAsinOp -> fsLit "asin" - DoubleAcosOp -> fsLit "acos" - DoubleAtanOp -> fsLit "atan" - DoubleSinhOp -> fsLit "sinh" - DoubleCoshOp -> fsLit "cosh" - DoubleTanhOp -> fsLit "tanh" - where - pr = panic "MachCode.getRegister: no primrep needed for Alpha" - -getRegister (StPrim primop [x, y]) -- dyadic PrimOps - = case primop of - CharGtOp -> trivialCode (CMP LTT) y x - CharGeOp -> trivialCode (CMP LE) y x - CharEqOp -> trivialCode (CMP EQQ) x y - CharNeOp -> int_NE_code x y - CharLtOp -> trivialCode (CMP LTT) x y - CharLeOp -> trivialCode (CMP LE) x y - - IntGtOp -> trivialCode (CMP LTT) y x - IntGeOp -> trivialCode (CMP LE) y x - IntEqOp -> trivialCode (CMP EQQ) x y - IntNeOp -> int_NE_code x y - IntLtOp -> trivialCode (CMP LTT) x y - IntLeOp -> trivialCode (CMP LE) x y - - WordGtOp -> trivialCode (CMP ULT) y x - WordGeOp -> trivialCode (CMP ULE) x y - WordEqOp -> trivialCode (CMP EQQ) x y - WordNeOp -> int_NE_code x y - WordLtOp -> trivialCode (CMP ULT) x y - WordLeOp -> trivialCode (CMP ULE) x y - - AddrGtOp -> trivialCode (CMP ULT) y x - AddrGeOp -> trivialCode (CMP ULE) y x - AddrEqOp -> trivialCode (CMP EQQ) x y - AddrNeOp -> int_NE_code x y - AddrLtOp -> trivialCode (CMP ULT) x y - AddrLeOp -> trivialCode (CMP ULE) x y - - FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y - FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y - FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y - FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y - FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y - FloatLeOp -> cmpF_code (FCMP TF LE) NE x y - - DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y - DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y - DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y - DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y - DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y - DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y - - IntAddOp -> trivialCode (ADD Q False) x y - IntSubOp -> trivialCode (SUB Q False) x y - IntMulOp -> trivialCode (MUL Q False) x y - IntQuotOp -> trivialCode (DIV Q False) x y - IntRemOp -> trivialCode (REM Q False) x y - - WordAddOp -> trivialCode (ADD Q False) x y - WordSubOp -> trivialCode (SUB Q False) x y - WordMulOp -> trivialCode (MUL Q False) x y - WordQuotOp -> trivialCode (DIV Q True) x y - WordRemOp -> trivialCode (REM Q True) x y - - FloatAddOp -> trivialFCode W32 (FADD TF) x y - FloatSubOp -> trivialFCode W32 (FSUB TF) x y - FloatMulOp -> trivialFCode W32 (FMUL TF) x y - FloatDivOp -> trivialFCode W32 (FDIV TF) x y - - DoubleAddOp -> trivialFCode W64 (FADD TF) x y - DoubleSubOp -> trivialFCode W64 (FSUB TF) x y - DoubleMulOp -> trivialFCode W64 (FMUL TF) x y - DoubleDivOp -> trivialFCode W64 (FDIV TF) x y - - AddrAddOp -> trivialCode (ADD Q False) x y - AddrSubOp -> trivialCode (SUB Q False) x y - AddrRemOp -> trivialCode (REM Q True) x y - - AndOp -> trivialCode AND x y - OrOp -> trivialCode OR x y - XorOp -> trivialCode XOR x y - SllOp -> trivialCode SLL x y - SrlOp -> trivialCode SRL x y - - ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll" - ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra" - ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl" - - FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) - DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) - where - {- ------------------------------------------------------------ - Some bizarre special code for getting condition codes into - registers. Integer non-equality is a test for equality - followed by an XOR with 1. (Integer comparisons always set - the result register to 0 or 1.) Floating point comparisons of - any kind leave the result in a floating point register, so we - need to wrangle an integer register out of things. - -} - int_NE_code :: StixTree -> StixTree -> NatM Register - - int_NE_code x y - = trivialCode (CMP EQQ) x y `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst) - in - return (Any IntRep code__2) - - {- ------------------------------------------------------------ - Comments for int_NE_code also apply to cmpF_code - -} - cmpF_code - :: (Reg -> Reg -> Reg -> Instr) - -> Cond - -> StixTree -> StixTree - -> NatM Register - - cmpF_code instr cond x y - = trivialFCode pr instr x y `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - getBlockIdNat `thenNat` \ lbl -> - let - code = registerCode register tmp - result = registerName register tmp - - code__2 dst = code . mkSeqInstrs [ - OR zeroh (RIImm (ImmInt 1)) dst, - BF cond result (ImmCLbl lbl), - OR zeroh (RIReg zeroh) dst, - NEWBLOCK lbl] - in - return (Any IntRep code__2) - where - pr = panic "trivialU?FCode: does not use PrimRep on Alpha" - ------------------------------------------------------------ - -getRegister (CmmLoad pk mem) - = getAmode mem `thenNat` \ amode -> - let - code = amodeCode amode - src = amodeAddr amode - size = primRepToSize pk - code__2 dst = code . mkSeqInstr (LD size dst src) - in - return (Any pk code__2) - -getRegister (StInt i) - | fits8Bits i - = let - code dst = mkSeqInstr (OR zeroh (RIImm src) dst) - in - return (Any IntRep code) - | otherwise - = let - code dst = mkSeqInstr (LDI Q dst src) - in - return (Any IntRep code) - where - src = ImmInt (fromInteger i) - -getRegister leaf - | isJust imm - = let - code dst = mkSeqInstr (LDA dst (AddrImm imm__2)) - in - return (Any PtrRep code) - where - imm = maybeImm leaf - imm__2 = case imm of Just x -> x - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -getRegister (CmmLit (CmmFloat f W32)) = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let code dst = - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f W32)] - `consOL` (addr_code `snocOL` - GLD FF32 addr dst) - -- in - return (Any FF32 code) - - -getRegister (CmmLit (CmmFloat d W64)) - | d == 0.0 - = let code dst = unitOL (GLDZ dst) - in return (Any FF64 code) - - | d == 1.0 - = let code dst = unitOL (GLD1 dst) - in return (Any FF64 code) - - | otherwise = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let code dst = - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d W64)] - `consOL` (addr_code `snocOL` - GLD FF64 addr dst) - -- in - return (Any FF64 code) - -#endif /* i386_TARGET_ARCH */ - -#if x86_64_TARGET_ARCH - -getRegister (CmmLit (CmmFloat 0.0 w)) = do - let size = floatSize w - code dst = unitOL (XOR size (OpReg dst) (OpReg dst)) - -- I don't know why there are xorpd, xorps, and pxor instructions. - -- They all appear to do the same thing --SDM - return (Any size code) - -getRegister (CmmLit (CmmFloat f w)) = do - lbl <- getNewLabelNat - let code dst = toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f w)], - MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - ] - -- in - return (Any size code) - where size = floatSize w - -#endif /* x86_64_TARGET_ARCH */ - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II8) addr - return (Any II32 code) - -getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II8) addr - return (Any II32 code) - -getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II16) addr - return (Any II32 code) - -getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II16) addr - return (Any II32 code) - -#endif - -#if x86_64_TARGET_ARCH - --- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II8) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II8) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II16) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II16) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend - return (Any II64 code) - -getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II32) addr - return (Any II64 code) - -#endif - -#if x86_64_TARGET_ARCH -getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), - CmmLit displacement]) - = return $ Any II64 (\dst -> unitOL $ - LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) -#endif - -#if x86_64_TARGET_ARCH -getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do - x_code <- getAnyReg x - lbl <- getNewLabelNat - let - code dst = x_code dst `appOL` toOL [ - -- This is how gcc does it, so it can't be that bad: - LDATA ReadOnlyData16 [ - CmmAlign 16, - CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x80000000 W32), - CmmStaticLit (CmmInt 0 W32), - CmmStaticLit (CmmInt 0 W32), - CmmStaticLit (CmmInt 0 W32) - ], - XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - -- xorps, so we need the 128-bit constant - -- ToDo: rip-relative - ] - -- - return (Any FF32 code) - -getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do - x_code <- getAnyReg x - lbl <- getNewLabelNat - let - -- This is how gcc does it, so it can't be that bad: - code dst = x_code dst `appOL` toOL [ - LDATA ReadOnlyData16 [ - CmmAlign 16, - CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x8000000000000000 W64), - CmmStaticLit (CmmInt 0 W64) - ], - -- gcc puts an unpck here. Wonder if we need it. - XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - -- xorpd, so we need the 128-bit constant - ] - -- - return (Any FF64 code) -#endif - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of -#if i386_TARGET_ARCH - MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x - MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x -#endif - - MO_S_Neg w -> triv_ucode NEGI (intSize w) - MO_F_Neg w -> triv_ucode NEGI (floatSize w) - MO_Not w -> triv_ucode NOT (intSize w) - - -- Nop conversions - MO_UU_Conv W32 W8 -> toI8Reg W32 x - MO_SS_Conv W32 W8 -> toI8Reg W32 x - MO_UU_Conv W16 W8 -> toI8Reg W16 x - MO_SS_Conv W16 W8 -> toI8Reg W16 x - MO_UU_Conv W32 W16 -> toI16Reg W32 x - MO_SS_Conv W32 W16 -> toI16Reg W32 x - -#if x86_64_TARGET_ARCH - MO_UU_Conv W64 W32 -> conversionNop II64 x - MO_SS_Conv W64 W32 -> conversionNop II64 x - MO_UU_Conv W64 W16 -> toI16Reg W64 x - MO_SS_Conv W64 W16 -> toI16Reg W64 x - MO_UU_Conv W64 W8 -> toI8Reg W64 x - MO_SS_Conv W64 W8 -> toI8Reg W64 x -#endif - - MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x - MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x - - -- widenings - MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x - MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x - MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x - - MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x - MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x - MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x - -#if x86_64_TARGET_ARCH - MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x - MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x - MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x - MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x - MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x - MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x - -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. - -- However, we don't want the register allocator to throw it - -- away as an unnecessary reg-to-reg move, so we keep it in - -- the form of a movzl and print it as a movl later. -#endif - -#if i386_TARGET_ARCH - MO_FF_Conv W32 W64 -> conversionNop FF64 x - MO_FF_Conv W64 W32 -> conversionNop FF32 x -#else - MO_FF_Conv W32 W64 -> coerceFP2FP W64 x - MO_FF_Conv W64 W32 -> coerceFP2FP W32 x -#endif - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - other -> pprPanic "getRegister" (pprMachOp mop) - where - triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register - triv_ucode instr size = trivialUCode size (instr size) x - - -- signed or unsigned extension. - integerExtend :: Width -> Width - -> (Size -> Operand -> Operand -> Instr) - -> CmmExpr -> NatM Register - integerExtend from to instr expr = do - (reg,e_code) <- if from == W8 then getByteReg expr - else getSomeReg expr - let - code dst = - e_code `snocOL` - instr (intSize from) (OpReg reg) (OpReg dst) - return (Any (intSize to) code) - - toI8Reg :: Width -> CmmExpr -> NatM Register - toI8Reg new_rep expr - = do codefn <- getAnyReg expr - return (Any (intSize new_rep) codefn) - -- HACK: use getAnyReg to get a byte-addressable register. - -- If the source was a Fixed register, this will add the - -- mov instruction to put it into the desired destination. - -- We're assuming that the destination won't be a fixed - -- non-byte-addressable register; it won't be, because all - -- fixed registers are word-sized. - - toI16Reg = toI8Reg -- for now - - conversionNop :: Size -> CmmExpr -> NatM Register - conversionNop new_size expr - = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_size) - - -getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps - = case mop of - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ x y - MO_Ne rep -> condIntReg NE x y - - MO_S_Gt rep -> condIntReg GTT x y - MO_S_Ge rep -> condIntReg GE x y - MO_S_Lt rep -> condIntReg LTT x y - MO_S_Le rep -> condIntReg LE x y - - MO_U_Gt rep -> condIntReg GU x y - MO_U_Ge rep -> condIntReg GEU x y - MO_U_Lt rep -> condIntReg LU x y - MO_U_Le rep -> condIntReg LEU x y - -#if i386_TARGET_ARCH - MO_F_Add w -> trivialFCode w GADD x y - MO_F_Sub w -> trivialFCode w GSUB x y - MO_F_Quot w -> trivialFCode w GDIV x y - MO_F_Mul w -> trivialFCode w GMUL x y -#endif - -#if x86_64_TARGET_ARCH - MO_F_Add w -> trivialFCode w ADD x y - MO_F_Sub w -> trivialFCode w SUB x y - MO_F_Quot w -> trivialFCode w FDIV x y - MO_F_Mul w -> trivialFCode w MUL x y -#endif - - MO_Add rep -> add_code rep x y - MO_Sub rep -> sub_code rep x y - - MO_S_Quot rep -> div_code rep True True x y - MO_S_Rem rep -> div_code rep True False x y - MO_U_Quot rep -> div_code rep False True x y - MO_U_Rem rep -> div_code rep False False x y - - MO_S_MulMayOflo rep -> imulMayOflo rep x y - - MO_Mul rep -> triv_op rep IMUL - MO_And rep -> triv_op rep AND - MO_Or rep -> triv_op rep OR - MO_Xor rep -> triv_op rep XOR - - {- Shift ops on x86s have constraints on their source, it - either has to be Imm, CL or 1 - => trivialCode is not restrictive enough (sigh.) - -} - MO_Shl rep -> shift_code rep SHL x y {-False-} - MO_U_Shr rep -> shift_code rep SHR x y {-False-} - MO_S_Shr rep -> shift_code rep SAR x y {-False-} - - other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) - where - -------------------- - triv_op width instr = trivialCode width op (Just op) x y - where op = instr (intSize width) - - imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register - imulMayOflo rep a b = do - (a_reg, a_code) <- getNonClobberedReg a - b_code <- getAnyReg b - let - shift_amt = case rep of - W32 -> 31 - W64 -> 63 - _ -> panic "shift_amt" - - size = intSize rep - code = a_code `appOL` b_code eax `appOL` - toOL [ - IMUL2 size (OpReg a_reg), -- result in %edx:%eax - SAR size (OpImm (ImmInt shift_amt)) (OpReg eax), - -- sign extend lower part - SUB size (OpReg edx) (OpReg eax) - -- compare against upper - -- eax==0 if high part == sign extended low part - ] - -- in - return (Fixed size eax code) - - -------------------- - shift_code :: Width - -> (Size -> Operand -> Operand -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - - {- Case1: shift length as immediate -} - shift_code width instr x y@(CmmLit lit) = do - x_code <- getAnyReg x - let - size = intSize width - code dst - = x_code dst `snocOL` - instr size (OpImm (litToImm lit)) (OpReg dst) - -- in - return (Any size code) - - {- Case2: shift length is complex (non-immediate) - * y must go in %ecx. - * we cannot do y first *and* put its result in %ecx, because - %ecx might be clobbered by x. - * if we do y second, then x cannot be - in a clobbered reg. Also, we cannot clobber x's reg - with the instruction itself. - * so we can either: - - do y first, put its result in a fresh tmp, then copy it to %ecx later - - do y second and put its result into %ecx. x gets placed in a fresh - tmp. This is likely to be better, becuase the reg alloc can - eliminate this reg->reg move here (it won't eliminate the other one, - because the move is into the fixed %ecx). - -} - shift_code width instr x y{-amount-} = do - x_code <- getAnyReg x - let size = intSize width - tmp <- getNewRegNat size - y_code <- getAnyReg y - let - code = x_code tmp `appOL` - y_code ecx `snocOL` - instr size (OpReg ecx) (OpReg tmp) - -- in - return (Fixed size tmp code) - - -------------------- - add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register - add_code rep x (CmmLit (CmmInt y _)) - | is32BitInteger y = add_int rep x y - add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y - where size = intSize rep - - -------------------- - sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register - sub_code rep x (CmmLit (CmmInt y _)) - | is32BitInteger (-y) = add_int rep x (-y) - sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y - - -- our three-operand add instruction: - add_int width x y = do - (x_reg, x_code) <- getSomeReg x - let - size = intSize width - imm = ImmInt (fromInteger y) - code dst - = x_code `snocOL` - LEA size - (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) - (OpReg dst) - -- - return (Any size code) - - ---------------------- - div_code width signed quotient x y = do - (y_op, y_code) <- getRegOrMem y -- cannot be clobbered - x_code <- getAnyReg x - let - size = intSize width - widen | signed = CLTD size - | otherwise = XOR size (OpReg edx) (OpReg edx) - - instr | signed = IDIV - | otherwise = DIV - - code = y_code `appOL` - x_code eax `appOL` - toOL [widen, instr size y_op] - - result | quotient = eax - | otherwise = edx - - -- in - return (Fixed size result code) - - -getRegister (CmmLoad mem pk) - | isFloatType pk - = do - Amode src mem_code <- getAmode mem - let - size = cmmTypeSize pk - code dst = mem_code `snocOL` - IF_ARCH_i386(GLD size src dst, - MOV size (OpAddr src) (OpReg dst)) - return (Any size code) - -#if i386_TARGET_ARCH -getRegister (CmmLoad mem pk) - | not (isWord64 pk) - = do - code <- intLoadCode instr mem - return (Any size code) - where - width = typeWidth pk - size = intSize width - instr = case width of - W8 -> MOVZxL II8 - _other -> MOV size - -- We always zero-extend 8-bit loads, if we - -- can't think of anything better. This is because - -- we can't guarantee access to an 8-bit variant of every register - -- (esi and edi don't have 8-bit variants), so to make things - -- simpler we do our 8-bit arithmetic with full 32-bit registers. -#endif - -#if x86_64_TARGET_ARCH --- Simpler memory load code on x86_64 -getRegister (CmmLoad mem pk) - = do - code <- intLoadCode (MOV size) mem - return (Any size code) - where size = intSize $ typeWidth pk -#endif - -getRegister (CmmLit (CmmInt 0 width)) - = let - size = intSize width - - -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits - adj_size = case size of II64 -> II32; _ -> size - size1 = IF_ARCH_i386( size, adj_size ) - code dst - = unitOL (XOR size1 (OpReg dst) (OpReg dst)) - in - return (Any size code) - -#if x86_64_TARGET_ARCH - -- optimisation for loading small literals on x86_64: take advantage - -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit - -- instruction forms are shorter. -getRegister (CmmLit lit) - | isWord64 (cmmLitType lit), not (isBigLit lit) - = let - imm = litToImm lit - code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst)) - in - return (Any II64 code) - where - isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff - isBigLit _ = False - -- note1: not the same as (not.is32BitLit), because that checks for - -- signed literals that fit in 32 bits, but we want unsigned - -- literals here. - -- note2: all labels are small, because we're assuming the - -- small memory model (see gcc docs, -mcmodel=small). -#endif - -getRegister (CmmLit lit) - = let - size = cmmTypeSize (cmmLitType lit) - imm = litToImm lit - code dst = unitOL (MOV size (OpImm imm) (OpReg dst)) - in - return (Any size code) - -getRegister other = pprPanic "getRegister(x86)" (ppr other) - - -intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr - -> NatM (Reg -> InstrBlock) -intLoadCode instr mem = do - Amode src mem_code <- getAmode mem - return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst)) - --- Compute an expression into *any* register, adding the appropriate --- move instruction if necessary. -getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock) -getAnyReg expr = do - r <- getRegister expr - anyReg r - -anyReg :: Register -> NatM (Reg -> InstrBlock) -anyReg (Any _ code) = return code -anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst) - --- A bit like getSomeReg, but we want a reg that can be byte-addressed. --- Fixed registers might not be byte-addressable, so we make sure we've --- got a temporary, inserting an extra reg copy if necessary. -getByteReg :: CmmExpr -> NatM (Reg, InstrBlock) -#if x86_64_TARGET_ARCH -getByteReg = getSomeReg -- all regs are byte-addressable on x86_64 -#else -getByteReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed rep reg code - | isVirtualReg reg -> return (reg,code) - | otherwise -> do - tmp <- getNewRegNat rep - return (tmp, code `snocOL` reg2reg rep reg tmp) - -- ToDo: could optimise slightly by checking for byte-addressable - -- real registers, but that will happen very rarely if at all. -#endif - --- Another variant: this time we want the result in a register that cannot --- be modified by code to evaluate an arbitrary expression. -getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock) -getNonClobberedReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed rep reg code - -- only free regs can be clobbered - | RealReg rr <- reg, isFastTrue (freeReg rr) -> do - tmp <- getNewRegNat rep - return (tmp, code `snocOL` reg2reg rep reg tmp) - | otherwise -> - return (reg, code) - -reg2reg :: Size -> Reg -> Reg -> Instr -reg2reg size src dst -#if i386_TARGET_ARCH - | isFloatSize size = GMOV src dst -#endif - | otherwise = MOV size (OpReg src) (OpReg dst) - -#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - --- getRegister :: CmmExpr -> NatM Register - --- Load a literal float into a float register. --- The actual literal is stored in a new data area, and we load it --- at runtime. -getRegister (CmmLit (CmmFloat f W32)) = do - - -- a label for the new data area - lbl <- getNewLabelNat - tmp <- getNewRegNat II32 - - let code dst = toOL [ - -- the data area - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f W32)], - - -- load the literal - SETHI (HI (ImmCLbl lbl)) tmp, - LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] - - return (Any FF32 code) - -getRegister (CmmLit (CmmFloat d W64)) = do - lbl <- getNewLabelNat - tmp <- getNewRegNat II32 - let code dst = toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d W64)], - SETHI (HI (ImmCLbl lbl)) tmp, - LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] - return (Any FF64 code) - -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of - MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x - MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x - - MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x - MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x - - MO_FF_Conv W64 W32-> coerceDbl2Flt x - MO_FF_Conv W32 W64-> coerceFlt2Dbl x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - -- Conversions which are a nop on sparc - MO_UU_Conv from to - | from == to -> conversionNop (intSize to) x - MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) - MO_UU_Conv W32 to -> conversionNop (intSize to) x - MO_SS_Conv W32 to -> conversionNop (intSize to) x - - MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x - MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x - MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x - - -- sign extension - MO_SS_Conv W8 W32 -> integerExtend W8 W32 x - MO_SS_Conv W16 W32 -> integerExtend W16 W32 x - MO_SS_Conv W8 W16 -> integerExtend W8 W16 x - - other_op -> panic ("Unknown unary mach op: " ++ show mop) - where - - -- | sign extend and widen - integerExtend - :: Width -- ^ width of source expression - -> Width -- ^ width of result - -> CmmExpr -- ^ source expression - -> NatM Register - - integerExtend from to expr - = do -- load the expr into some register - (reg, e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - let bitCount - = case (from, to) of - (W8, W32) -> 24 - (W16, W32) -> 16 - (W8, W16) -> 24 - let code dst - = e_code - - -- local shift word left to load the sign bit - `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp - - -- arithmetic shift right to sign extend - `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst - - return (Any (intSize to) code) - - - conversionNop new_rep expr - = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_rep) - -getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps - = case mop of - MO_Eq rep -> condIntReg EQQ x y - MO_Ne rep -> condIntReg NE x y - - MO_S_Gt rep -> condIntReg GTT x y - MO_S_Ge rep -> condIntReg GE x y - MO_S_Lt rep -> condIntReg LTT x y - MO_S_Le rep -> condIntReg LE x y - - MO_U_Gt W32 -> condIntReg GTT x y - MO_U_Ge W32 -> condIntReg GE x y - MO_U_Lt W32 -> condIntReg LTT x y - MO_U_Le W32 -> condIntReg LE x y - - MO_U_Gt W16 -> condIntReg GU x y - MO_U_Ge W16 -> condIntReg GEU x y - MO_U_Lt W16 -> condIntReg LU x y - MO_U_Le W16 -> condIntReg LEU x y - - MO_Add W32 -> trivialCode W32 (ADD False False) x y - MO_Sub W32 -> trivialCode W32 (SUB False False) x y - - MO_S_MulMayOflo rep -> imulMayOflo rep x y - - MO_S_Quot W32 -> idiv True False x y - MO_U_Quot W32 -> idiv False False x y - - MO_S_Rem W32 -> irem True x y - MO_U_Rem W32 -> irem False x y - - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_F_Add w -> trivialFCode w FADD x y - MO_F_Sub w -> trivialFCode w FSUB x y - MO_F_Mul w -> trivialFCode w FMUL x y - MO_F_Quot w -> trivialFCode w FDIV x y - - MO_And rep -> trivialCode rep (AND False) x y - MO_Or rep -> trivialCode rep (OR False) x y - MO_Xor rep -> trivialCode rep (XOR False) x y - - MO_Mul rep -> trivialCode rep (SMUL False) x y - - MO_Shl rep -> trivialCode rep SLL x y - MO_U_Shr rep -> trivialCode rep SRL x y - MO_S_Shr rep -> trivialCode rep SRA x y - -{- - MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 - [promote x, promote y]) - where promote x = CmmMachOp MO_F32_to_Dbl [x] - MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 - [x, y]) --} - other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) - where - -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) - - - -- | Generate an integer division instruction. - idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register - - -- For unsigned division with a 32 bit numerator, - -- we can just clear the Y register. - idiv False cc x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ WRY g0 g0 - , UDIV cc a_reg (RIReg b_reg) dst] - - return (Any II32 code) - - - -- For _signed_ division with a 32 bit numerator, - -- we have to sign extend the numerator into the Y register. - idiv True cc x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend - , SRA tmp (RIImm (ImmInt 16)) tmp - - , WRY tmp g0 - , SDIV cc a_reg (RIReg b_reg) dst] - - return (Any II32 code) - - - -- | Do an integer remainder. - -- - -- NOTE: The SPARC v8 architecture manual says that integer division - -- instructions _may_ generate a remainder, depending on the implementation. - -- If so it is _recommended_ that the remainder is placed in the Y register. - -- - -- The UltraSparc 2007 manual says Y is _undefined_ after division. - -- - -- The SPARC T2 doesn't store the remainder, not sure about the others. - -- It's probably best not to worry about it, and just generate our own - -- remainders. - -- - irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register - - -- For unsigned operands: - -- Division is between a 64 bit numerator and a 32 bit denominator, - -- so we still have to clear the Y register. - irem False x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp_reg <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ WRY g0 g0 - , UDIV False a_reg (RIReg b_reg) tmp_reg - , UMUL False tmp_reg (RIReg b_reg) tmp_reg - , SUB False False a_reg (RIReg tmp_reg) dst] - - return (Any II32 code) - - - -- For signed operands: - -- Make sure to sign extend into the Y register, or the remainder - -- will have the wrong sign when the numerator is negative. - -- - -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, - -- not the full 32. Not sure why this is, something to do with overflow? - -- If anyone cares enough about the speed of signed remainder they - -- can work it out themselves (then tell me). -- BL 2009/01/20 - - irem True x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp1_reg <- getNewRegNat II32 - tmp2_reg <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend - , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend - , WRY tmp1_reg g0 - - , SDIV False a_reg (RIReg b_reg) tmp2_reg - , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg - , SUB False False a_reg (RIReg tmp2_reg) dst] - - return (Any II32 code) - - - imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register - imulMayOflo rep a b = do - (a_reg, a_code) <- getSomeReg a - (b_reg, b_code) <- getSomeReg b - res_lo <- getNewRegNat II32 - res_hi <- getNewRegNat II32 - let - shift_amt = case rep of - W32 -> 31 - W64 -> 63 - _ -> panic "shift_amt" - code dst = a_code `appOL` b_code `appOL` - toOL [ - SMUL False a_reg (RIReg b_reg) res_lo, - RDY res_hi, - SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, - SUB False False res_lo (RIReg res_hi) dst - ] - return (Any II32 code) - -getRegister (CmmLoad mem pk) = do - Amode src code <- getAmode mem - let - code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst - return (Any (cmmTypeSize pk) code__2) - -getRegister (CmmLit (CmmInt i _)) - | fits13Bits i - = let - src = ImmInt (fromInteger i) - code dst = unitOL (OR False g0 (RIImm src) dst) - in - return (Any II32 code) - -getRegister (CmmLit lit) - = let rep = cmmLitType lit - imm = litToImm lit - code dst = toOL [ - SETHI (HI imm) dst, - OR False dst (RIImm (LO imm)) dst] - in return (Any II32 code) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -getRegister (CmmLoad mem pk) - | not (isWord64 pk) - = do - Amode addr addr_code <- getAmode mem - let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk) - addr_code `snocOL` LD size dst addr - return (Any size code) - where size = cmmTypeSize pk - --- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - --- Note: there is no Load Byte Arithmetic instruction, so no signed case here - -getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem - return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem - return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of - MO_Not rep -> triv_ucode_int rep NOT - - MO_F_Neg w -> triv_ucode_float w FNEG - MO_S_Neg w -> triv_ucode_int w NEG - - MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x - MO_FF_Conv W32 W64 -> conversionNop FF64 x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - MO_SS_Conv from to - | from == to -> conversionNop (intSize to) x - - -- narrowing is a nop: we treat the high bits as undefined - MO_SS_Conv W32 to -> conversionNop (intSize to) x - MO_SS_Conv W16 W8 -> conversionNop II8 x - MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) - MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) - - MO_UU_Conv from to - | from == to -> conversionNop (intSize to) x - -- narrowing is a nop: we treat the high bits as undefined - MO_UU_Conv W32 to -> conversionNop (intSize to) x - MO_UU_Conv W16 W8 -> conversionNop II8 x - MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) - MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) - - where - triv_ucode_int width instr = trivialUCode (intSize width) instr x - triv_ucode_float width instr = trivialUCode (floatSize width) instr x - - conversionNop new_size expr - = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_size) - -getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps - = case mop of - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) - MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) - - MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y) - MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y) - - MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) - - MO_F_Add w -> triv_float w FADD - MO_F_Sub w -> triv_float w FSUB - MO_F_Mul w -> triv_float w FMUL - MO_F_Quot w -> triv_float w FDIV - - -- optimize addition with 32-bit immediate - -- (needed for PIC) - MO_Add W32 -> - case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm) - -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) - CmmLit lit - -> do - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - code dst = srcCode `appOL` toOL [ - ADDIS dst src (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - return (Any II32 code) - _ -> trivialCode W32 True ADD x y - - MO_Add rep -> trivialCode rep True ADD x y - MO_Sub rep -> - case y of -- subfi ('substract from' with immediate) doesn't exist - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) - -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) - _ -> trivialCodeNoImm' (intSize rep) SUBF y x - - MO_Mul rep -> trivialCode rep True MULLW x y - - MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y - - MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented" - MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" - - MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) - MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y) - - MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) - MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) - - MO_And rep -> trivialCode rep False AND x y - MO_Or rep -> trivialCode rep False OR x y - MO_Xor rep -> trivialCode rep False XOR x y - - MO_Shl rep -> trivialCode rep False SLW x y - MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y - MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y - where - triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register - triv_float width instr = trivialCodeNoImm (floatSize width) instr x y - -getRegister (CmmLit (CmmInt i rep)) - | Just imm <- makeImmediate rep True i - = let - code dst = unitOL (LI dst imm) - in - return (Any (intSize rep) code) - -getRegister (CmmLit (CmmFloat f frep)) = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let size = floatSize frep - code dst = - LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f frep)] - `consOL` (addr_code `snocOL` LD size dst addr) - return (Any size code) - -getRegister (CmmLit lit) - = let rep = cmmLitType lit - imm = litToImm lit - code dst = toOL [ - LIS dst (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - in return (Any (cmmTypeSize rep) code) - -getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) - - -- extend?Rep: wrap integer expression of type rep - -- in a conversion to II32 -extendSExpr W32 x = x -extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x] -extendUExpr W32 x = x -extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x] - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- The 'Amode' type: Memory addressing modes passed up the tree. - -data Amode = Amode AddrMode InstrBlock - -{- -Now, given a tree (the argument to an CmmLoad) that references memory, -produce a suitable addressing mode. - -A Rule of the Game (tm) for Amodes: use of the addr bit must -immediately follow use of the code part, since the code part puts -values in registers which the addr then refers to. So you can't put -anything in between, lest it overwrite some of those registers. If -you need to do some other computation between the code part and use of -the addr bit, first store the effective address from the amode in a -temporary, then do the other computation, and then use the temporary: - - code - LEA amode, tmp - ... other computation ... - ... (tmp) ... --} - -getAmode :: CmmExpr -> NatM Amode -getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -getAmode (StPrim IntSubOp [x, StInt i]) - = getNewRegNat PtrRep `thenNat` \ tmp -> - getRegister x `thenNat` \ register -> - let - code = registerCode register tmp - reg = registerName register tmp - off = ImmInt (-(fromInteger i)) - in - return (Amode (AddrRegImm reg off) code) - -getAmode (StPrim IntAddOp [x, StInt i]) - = getNewRegNat PtrRep `thenNat` \ tmp -> - getRegister x `thenNat` \ register -> - let - code = registerCode register tmp - reg = registerName register tmp - off = ImmInt (fromInteger i) - in - return (Amode (AddrRegImm reg off) code) - -getAmode leaf - | isJust imm - = return (Amode (AddrImm imm__2) id) - where - imm = maybeImm leaf - imm__2 = case imm of Just x -> x - -getAmode other - = getNewRegNat PtrRep `thenNat` \ tmp -> - getRegister other `thenNat` \ register -> - let - code = registerCode register tmp - reg = registerName register tmp - in - return (Amode (AddrReg reg) code) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), - CmmLit displacement]) - = return $ Amode (ripRel (litToImm displacement)) nilOL - -#endif - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- This is all just ridiculous, since it carefully undoes --- what mangleIndexTree has just done. -getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) - | is32BitLit lit - -- ASSERT(rep == II32)??? - = do (x_reg, x_code) <- getSomeReg x - let off = ImmInt (-(fromInteger i)) - return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) - -getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) - | is32BitLit lit - -- ASSERT(rep == II32)??? - = do (x_reg, x_code) <- getSomeReg x - let off = ImmInt (fromInteger i) - return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) - --- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be --- recognised by the next rule. -getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), - b@(CmmLit _)]) - = getAmode (CmmMachOp (MO_Add rep) [b,a]) - -getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) - [y, CmmLit (CmmInt shift _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - = x86_complex_amode x y shift 0 - -getAmode (CmmMachOp (MO_Add rep) - [x, CmmMachOp (MO_Add _) - [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)], - CmmLit (CmmInt offset _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - && is32BitInteger offset - = x86_complex_amode x y shift offset - -getAmode (CmmMachOp (MO_Add rep) [x,y]) - = x86_complex_amode x y 0 0 - -getAmode (CmmLit lit) | is32BitLit lit - = return (Amode (ImmAddr (litToImm lit) 0) nilOL) - -getAmode expr = do - (reg,code) <- getSomeReg expr - return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) - - -x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode -x86_complex_amode base index shift offset - = do (x_reg, x_code) <- getNonClobberedReg base - -- x must be in a temp, because it has to stay live over y_code - -- we could compre x_reg and y_reg and do something better here... - (y_reg, y_code) <- getSomeReg index - let - code = x_code `appOL` y_code - base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 - return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset))) - code) - -#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)]) - | fits13Bits (-i) - = do - (reg, code) <- getSomeReg x - let - off = ImmInt (-(fromInteger i)) - return (Amode (AddrRegImm reg off) code) - - -getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)]) - | fits13Bits i - = do - (reg, code) <- getSomeReg x - let - off = ImmInt (fromInteger i) - return (Amode (AddrRegImm reg off) code) - -getAmode (CmmMachOp (MO_Add rep) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - let - code = codeX `appOL` codeY - return (Amode (AddrRegReg regX regY) code) - -getAmode (CmmLit lit) - = do - let imm__2 = litToImm lit - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - - let code = toOL [ SETHI (HI imm__2) tmp1 - , OR False tmp1 (RIImm (LO imm__2)) tmp2] - - return (Amode (AddrRegReg tmp2 g0) code) - -getAmode other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) - -#endif /* sparc_TARGET_ARCH */ - -#ifdef powerpc_TARGET_ARCH -getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True (-i) - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True i - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -- optimize addition with 32-bit immediate - -- (needed for PIC) -getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) - = do - tmp <- getNewRegNat II32 - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - code = srcCode `snocOL` ADDIS tmp src (HA imm) - return (Amode (AddrRegImm tmp (LO imm)) code) - -getAmode (CmmLit lit) - = do - tmp <- getNewRegNat II32 - let imm = litToImm lit - code = unitOL (LIS tmp (HA imm)) - return (Amode (AddrRegImm tmp (LO imm)) code) - -getAmode (CmmMachOp (MO_Add W32) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - -getAmode other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) -#endif /* powerpc_TARGET_ARCH */ - --- ----------------------------------------------------------------------------- --- getOperand: sometimes any operand will do. - --- getNonClobberedOperand: the value of the operand will remain valid across --- the computation of an arbitrary expression, unless the expression --- is computed directly into a register which the operand refers to --- (see trivialCode where this function is used for an example). - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) -#if x86_64_TARGET_ARCH -getNonClobberedOperand (CmmLit lit) - | isSuitableFloatingPointLit lit = do - lbl <- getNewLabelNat - let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit lit]) - return (OpAddr (ripRel (ImmCLbl lbl)), code) -#endif -getNonClobberedOperand (CmmLit lit) - | is32BitLit lit && not (isFloatType (cmmLitType lit)) = - return (OpImm (litToImm lit), nilOL) -getNonClobberedOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - (src',save_code) <- - if (amodeCouldBeClobbered src) - then do - tmp <- getNewRegNat wordSize - return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), - unitOL (LEA II32 (OpAddr src) (OpReg tmp))) - else - return (src, nilOL) - return (OpAddr src', save_code `appOL` mem_code) -getNonClobberedOperand e = do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) - -amodeCouldBeClobbered :: AddrMode -> Bool -amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode) - -regClobbered (RealReg rr) = isFastTrue (freeReg rr) -regClobbered _ = False - --- getOperand: the operand is not required to remain valid across the --- computation of an arbitrary expression. -getOperand :: CmmExpr -> NatM (Operand, InstrBlock) -#if x86_64_TARGET_ARCH -getOperand (CmmLit lit) - | isSuitableFloatingPointLit lit = do - lbl <- getNewLabelNat - let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit lit]) - return (OpAddr (ripRel (ImmCLbl lbl)), code) -#endif -getOperand (CmmLit lit) - | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do - return (OpImm (litToImm lit), nilOL) -getOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) -getOperand e = do - (reg, code) <- getSomeReg e - return (OpReg reg, code) - -isOperand :: CmmExpr -> Bool -isOperand (CmmLoad _ _) = True -isOperand (CmmLit lit) = is32BitLit lit - || isSuitableFloatingPointLit lit -isOperand _ = False - --- if we want a floating-point literal as an operand, we can --- use it directly from memory. However, if the literal is --- zero, we're better off generating it into a register using --- xor. -isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 -isSuitableFloatingPointLit _ = False - -getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) -getRegOrMem (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) -getRegOrMem e = do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) - -#if x86_64_TARGET_ARCH -is32BitLit (CmmInt i W64) = is32BitInteger i - -- assume that labels are in the range 0-2^31-1: this assumes the - -- small memory model (see gcc docs, -mcmodel=small). -#endif -is32BitLit x = True -#endif - -is32BitInteger :: Integer -> Bool -is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 - where i64 = fromIntegral i :: Int64 - -- a CmmInt is intended to be truncated to the appropriate - -- number of bits, so here we truncate it to Int64. This is - -- important because e.g. -1 as a CmmInt might be either - -- -1 or 18446744073709551615. - --- ----------------------------------------------------------------------------- --- The 'CondCode' type: Condition codes passed up the tree. - -data CondCode = CondCode Bool Cond InstrBlock - --- Set up a condition code for a conditional branch. - -getCondCode :: CmmExpr -> NatM CondCode - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH -getCondCode = panic "MachCode.getCondCode: not on Alphas" -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH --- yes, they really do seem to want exactly the same! - -getCondCode (CmmMachOp mop [x, y]) - = - case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq rep -> condIntCode EQQ x y - MO_Ne rep -> condIntCode NE x y - - MO_S_Gt rep -> condIntCode GTT x y - MO_S_Ge rep -> condIntCode GE x y - MO_S_Lt rep -> condIntCode LTT x y - MO_S_Le rep -> condIntCode LE x y - - MO_U_Gt rep -> condIntCode GU x y - MO_U_Ge rep -> condIntCode GEU x y - MO_U_Lt rep -> condIntCode LU x y - MO_U_Le rep -> condIntCode LEU x y - - other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) - -getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) - -#elif powerpc_TARGET_ARCH - --- almost the same as everywhere else - but we need to --- extend small integers to 32 bit first - -getCondCode (CmmMachOp mop [x, y]) - = case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) - MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) - - MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y) - MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y) - - MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y) - - other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) - -getCondCode other = panic "getCondCode(2)(powerpc)" - - -#endif - - --- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be --- passed back up the tree. - -condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode - -#if alpha_TARGET_ARCH -condIntCode = panic "MachCode.condIntCode: not on Alphas" -condFltCode = panic "MachCode.condFltCode: not on Alphas" -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- memory vs immediate -condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do - Amode x_addr x_code <- getAmode x - let - imm = litToImm lit - code = x_code `snocOL` - CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr) - -- - return (CondCode False cond code) - --- anything vs zero, using a mask --- TODO: Add some sanity checking!!!! -condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) - | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit - = do - (x_reg, x_code) <- getSomeReg x - let - code = x_code `snocOL` - TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg) - -- - return (CondCode False cond code) - --- anything vs zero -condIntCode cond x (CmmLit (CmmInt 0 pk)) = do - (x_reg, x_code) <- getSomeReg x - let - code = x_code `snocOL` - TEST (intSize pk) (OpReg x_reg) (OpReg x_reg) - -- - return (CondCode False cond code) - --- anything vs operand -condIntCode cond x y | isOperand y = do - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` y_code `snocOL` - CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg) - -- in - return (CondCode False cond code) - --- anything vs anything -condIntCode cond x y = do - (y_reg, y_code) <- getNonClobberedReg y - (x_op, x_code) <- getRegOrMem x - let - code = y_code `appOL` - x_code `snocOL` - CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op - -- in - return (CondCode False cond code) -#endif - -#if i386_TARGET_ARCH -condFltCode cond x y - = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do - (x_reg, x_code) <- getNonClobberedReg x - (y_reg, y_code) <- getSomeReg y - let - code = x_code `appOL` y_code `snocOL` - GCMP cond x_reg y_reg - -- The GCMP insn does the test and sets the zero flag if comparable - -- and true. Hence we always supply EQQ as the condition to test. - return (CondCode True EQQ code) -#endif /* i386_TARGET_ARCH */ - -#if x86_64_TARGET_ARCH --- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be --- an operand, but the right must be a reg. We can probably do better --- than this general case... -condFltCode cond x y = do - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` - y_code `snocOL` - CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg) - -- NB(1): we need to use the unsigned comparison operators on the - -- result of this comparison. - -- in - return (CondCode True (condToUnsigned cond) code) -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -condIntCode cond x (CmmLit (CmmInt y rep)) - | fits13Bits y - = do - (src1, code) <- getSomeReg x - let - src2 = ImmInt (fromInteger y) - code' = code `snocOL` SUB False True src1 (RIImm src2) g0 - return (CondCode False cond code') - -condIntCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code__2 = code1 `appOL` code2 `snocOL` - SUB False True src1 (RIReg src2) g0 - return (CondCode False cond code__2) - ------------ -condFltCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp <- getNewRegNat FF64 - let - promote x = FxTOy FF32 FF64 x tmp - - pk1 = cmmExprType x - pk2 = cmmExprType y - - code__2 = - if pk1 `cmmEqType` pk2 then - code1 `appOL` code2 `snocOL` - FCMP True (cmmTypeSize pk1) src1 src2 - else if typeWidth pk1 == W32 then - code1 `snocOL` promote src1 `appOL` code2 `snocOL` - FCMP True FF64 tmp src2 - else - code1 `appOL` code2 `snocOL` promote src2 `snocOL` - FCMP True FF64 src1 tmp - return (CondCode True cond code__2) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH --- ###FIXME: I16 and I8! -condIntCode cond x (CmmLit (CmmInt y rep)) - | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y - = do - (src1, code) <- getSomeReg x - let - code' = code `snocOL` - (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) - return (CondCode False cond code') - -condIntCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code' = code1 `appOL` code2 `snocOL` - (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) - return (CondCode False cond code') - -condFltCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 - code'' = case cond of -- twiddle CR to handle unordered case - GE -> code' `snocOL` CRNOR ltbit eqbit gtbit - LE -> code' `snocOL` CRNOR gtbit eqbit ltbit - _ -> code' - where - ltbit = 0 ; eqbit = 2 ; gtbit = 1 - return (CondCode True cond code'') - -#endif /* powerpc_TARGET_ARCH */ - --- ----------------------------------------------------------------------------- --- Generating assignments - --- Assignments are really at the heart of the whole code generation --- business. Almost all top-level nodes of any real importance are --- assignments, which correspond to loads, stores, or register --- transfers. If we're really lucky, some of the register transfers --- will go away, because we can use the destination register to --- complete the code generation for the right hand side. This only --- fails when the right hand side is forced into a fixed register --- (e.g. the result of a call). - -assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -assignIntCode pk (CmmLoad dst _) src - = getNewRegNat IntRep `thenNat` \ tmp -> - getAmode dst `thenNat` \ amode -> - getRegister src `thenNat` \ register -> - let - code1 = amodeCode amode [] - dst__2 = amodeAddr amode - code2 = registerCode register tmp [] - src__2 = registerName register tmp - sz = primRepToSize pk - code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) - in - return code__2 - -assignIntCode pk dst src - = getRegister dst `thenNat` \ register1 -> - getRegister src `thenNat` \ register2 -> - let - dst__2 = registerName register1 zeroh - code = registerCode register2 dst__2 - src__2 = registerName register2 dst__2 - code__2 = if isFixed register2 - then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2) - else code - in - return code__2 - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- integer assignment to memory - --- specific case of adding/subtracting an integer to a particular address. --- ToDo: catch other cases where we can use an operation directly on a memory --- address. -assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, - CmmLit (CmmInt i _)]) - | addr == addr2, pk /= II64 || is32BitInteger i, - Just instr <- check op - = do Amode amode code_addr <- getAmode addr - let code = code_addr `snocOL` - instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode) - return code - where - check (MO_Add _) = Just ADD - check (MO_Sub _) = Just SUB - check _ = Nothing - -- ToDo: more? - --- general case -assignMem_IntCode pk addr src = do - Amode addr code_addr <- getAmode addr - (code_src, op_src) <- get_op_RI src - let - code = code_src `appOL` - code_addr `snocOL` - MOV pk op_src (OpAddr addr) - -- NOTE: op_src is stable, so it will still be valid - -- after code_addr. This may involve the introduction - -- of an extra MOV to a temporary register, but we hope - -- the register allocator will get rid of it. - -- - return code - where - get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator - get_op_RI (CmmLit lit) | is32BitLit lit - = return (nilOL, OpImm (litToImm lit)) - get_op_RI op - = do (reg,code) <- getNonClobberedReg op - return (code, OpReg reg) - - --- Assign; dst is a reg, rhs is mem -assignReg_IntCode pk reg (CmmLoad src _) = do - load_code <- intLoadCode (MOV pk) src - return (load_code (getRegisterReg reg)) - --- dst is a reg, but src could be anything -assignReg_IntCode pk reg src = do - code <- getAnyReg src - return (code (getRegisterReg reg)) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- getAmode addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - -assignReg_IntCode pk reg src = do - r <- getRegister src - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst - where - dst = getRegisterReg reg - - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- getAmode addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - --- dst is a reg, but src could be anything -assignReg_IntCode pk reg src - = do - r <- getRegister src - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` MR dst freg - where - dst = getRegisterReg reg - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Floating-point assignments - -#if alpha_TARGET_ARCH - -assignFltCode pk (CmmLoad dst _) src - = getNewRegNat pk `thenNat` \ tmp -> - getAmode dst `thenNat` \ amode -> - getRegister src `thenNat` \ register -> - let - code1 = amodeCode amode [] - dst__2 = amodeAddr amode - code2 = registerCode register tmp [] - src__2 = registerName register tmp - sz = primRepToSize pk - code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) - in - return code__2 - -assignFltCode pk dst src - = getRegister dst `thenNat` \ register1 -> - getRegister src `thenNat` \ register2 -> - let - dst__2 = registerName register1 zeroh - code = registerCode register2 dst__2 - src__2 = registerName register2 dst__2 - code__2 = if isFixed register2 - then code . mkSeqInstr (FMOV src__2 dst__2) - else code - in - return code__2 - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- Floating point assignment to memory -assignMem_FltCode pk addr src = do - (src_reg, src_code) <- getNonClobberedReg src - Amode addr addr_code <- getAmode addr - let - code = src_code `appOL` - addr_code `snocOL` - IF_ARCH_i386(GST pk src_reg addr, - MOV pk (OpReg src_reg) (OpAddr addr)) - return code - --- Floating point assignment to a register/temporary -assignReg_FltCode pk reg src = do - src_code <- getAnyReg src - return (src_code (getRegisterReg reg)) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - --- Floating point assignment to memory -assignMem_FltCode pk addr src = do - Amode dst__2 code1 <- getAmode addr - (src__2, code2) <- getSomeReg src - tmp1 <- getNewRegNat pk - let - pk__2 = cmmExprType src - code__2 = code1 `appOL` code2 `appOL` - if sizeToWidth pk == typeWidth pk__2 - then unitOL (ST pk src__2 dst__2) - else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1 - , ST pk tmp1 dst__2] - return code__2 - --- Floating point assignment to a register/temporary -assignReg_FltCode pk dstCmmReg srcCmmExpr = do - srcRegister <- getRegister srcCmmExpr - let dstReg = getRegisterReg dstCmmReg - - return $ case srcRegister of - Any _ code -> code dstReg - Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - --- Easy, isn't it? -assignMem_FltCode = assignMem_IntCode -assignReg_FltCode = assignReg_IntCode - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Generating an non-local jump - --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -genJump (CmmLabel lbl) - | isAsmTemp lbl = returnInstr (BR target) - | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0] - where - target = ImmCLbl lbl - -genJump tree - = getRegister tree `thenNat` \ register -> - getNewRegNat PtrRep `thenNat` \ tmp -> - let - dst = registerName register pv - code = registerCode register pv - target = registerName register pv - in - if isFixed register then - returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0] - else - return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0)) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -genJump (CmmLoad mem pk) = do - Amode target code <- getAmode mem - return (code `snocOL` JMP (OpAddr target)) - -genJump (CmmLit lit) = do - return (unitOL (JMP (OpImm (litToImm lit)))) - -genJump expr = do - (reg,code) <- getSomeReg expr - return (code `snocOL` JMP (OpReg reg)) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -genJump (CmmLit (CmmLabel lbl)) - = return (toOL [CALL (Left target) 0 True, NOP]) - where - target = ImmCLbl lbl - -genJump tree - = do - (target, code) <- getSomeReg tree - return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -genJump (CmmLit (CmmLabel lbl)) - = return (unitOL $ JMP lbl) - -genJump tree - = do - (target,code) <- getSomeReg tree - return (code `snocOL` MTCTR target `snocOL` BCTR []) -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Unconditional branches - -genBranch :: BlockId -> NatM InstrBlock - -genBranch = return . toOL . mkBranchInstr - --- ----------------------------------------------------------------------------- --- Conditional jumps - -{- -Conditional jumps are always to local labels, so we can use branch -instructions. We peek at the arguments to decide what kind of -comparison to do. - -ALPHA: For comparisons with 0, we're laughing, because we can just do -the desired conditional branch. - -I386: First, we have to ensure that the condition -codes are set according to the supplied comparison operation. - -SPARC: First, we have to ensure that the condition codes are set -according to the supplied comparison operation. We generate slightly -different code for floating point comparisons, because a floating -point operation cannot directly precede a @BF@. We assume the worst -and fill that slot with a @NOP@. - -SPARC: Do not fill the delay slots here; you will confuse the register -allocator. --} - - -genCondJump - :: BlockId -- the branch target - -> CmmExpr -- the condition on which to branch - -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -genCondJump id (StPrim op [x, StInt 0]) - = getRegister x `thenNat` \ register -> - getNewRegNat (registerRep register) - `thenNat` \ tmp -> - let - code = registerCode register tmp - value = registerName register tmp - pk = registerRep register - target = ImmCLbl lbl - in - returnSeq code [BI (cmpOp op) value target] - where - cmpOp CharGtOp = GTT - cmpOp CharGeOp = GE - cmpOp CharEqOp = EQQ - cmpOp CharNeOp = NE - cmpOp CharLtOp = LTT - cmpOp CharLeOp = LE - cmpOp IntGtOp = GTT - cmpOp IntGeOp = GE - cmpOp IntEqOp = EQQ - cmpOp IntNeOp = NE - cmpOp IntLtOp = LTT - cmpOp IntLeOp = LE - cmpOp WordGtOp = NE - cmpOp WordGeOp = ALWAYS - cmpOp WordEqOp = EQQ - cmpOp WordNeOp = NE - cmpOp WordLtOp = NEVER - cmpOp WordLeOp = EQQ - cmpOp AddrGtOp = NE - cmpOp AddrGeOp = ALWAYS - cmpOp AddrEqOp = EQQ - cmpOp AddrNeOp = NE - cmpOp AddrLtOp = NEVER - cmpOp AddrLeOp = EQQ - -genCondJump lbl (StPrim op [x, StDouble 0.0]) - = getRegister x `thenNat` \ register -> - getNewRegNat (registerRep register) - `thenNat` \ tmp -> - let - code = registerCode register tmp - value = registerName register tmp - pk = registerRep register - target = ImmCLbl lbl - in - return (code . mkSeqInstr (BF (cmpOp op) value target)) - where - cmpOp FloatGtOp = GTT - cmpOp FloatGeOp = GE - cmpOp FloatEqOp = EQQ - cmpOp FloatNeOp = NE - cmpOp FloatLtOp = LTT - cmpOp FloatLeOp = LE - cmpOp DoubleGtOp = GTT - cmpOp DoubleGeOp = GE - cmpOp DoubleEqOp = EQQ - cmpOp DoubleNeOp = NE - cmpOp DoubleLtOp = LTT - cmpOp DoubleLeOp = LE - -genCondJump lbl (StPrim op [x, y]) - | fltCmpOp op - = trivialFCode pr instr x y `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - let - code = registerCode register tmp - result = registerName register tmp - target = ImmCLbl lbl - in - return (code . mkSeqInstr (BF cond result target)) - where - pr = panic "trivialU?FCode: does not use PrimRep on Alpha" - - fltCmpOp op = case op of - FloatGtOp -> True - FloatGeOp -> True - FloatEqOp -> True - FloatNeOp -> True - FloatLtOp -> True - FloatLeOp -> True - DoubleGtOp -> True - DoubleGeOp -> True - DoubleEqOp -> True - DoubleNeOp -> True - DoubleLtOp -> True - DoubleLeOp -> True - _ -> False - (instr, cond) = case op of - FloatGtOp -> (FCMP TF LE, EQQ) - FloatGeOp -> (FCMP TF LTT, EQQ) - FloatEqOp -> (FCMP TF EQQ, NE) - FloatNeOp -> (FCMP TF EQQ, EQQ) - FloatLtOp -> (FCMP TF LTT, NE) - FloatLeOp -> (FCMP TF LE, NE) - DoubleGtOp -> (FCMP TF LE, EQQ) - DoubleGeOp -> (FCMP TF LTT, EQQ) - DoubleEqOp -> (FCMP TF EQQ, NE) - DoubleNeOp -> (FCMP TF EQQ, EQQ) - DoubleLtOp -> (FCMP TF LTT, NE) - DoubleLeOp -> (FCMP TF LE, NE) - -genCondJump lbl (StPrim op [x, y]) - = trivialCode instr x y `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - result = registerName register tmp - target = ImmCLbl lbl - in - return (code . mkSeqInstr (BI cond result target)) - where - (instr, cond) = case op of - CharGtOp -> (CMP LE, EQQ) - CharGeOp -> (CMP LTT, EQQ) - CharEqOp -> (CMP EQQ, NE) - CharNeOp -> (CMP EQQ, EQQ) - CharLtOp -> (CMP LTT, NE) - CharLeOp -> (CMP LE, NE) - IntGtOp -> (CMP LE, EQQ) - IntGeOp -> (CMP LTT, EQQ) - IntEqOp -> (CMP EQQ, NE) - IntNeOp -> (CMP EQQ, EQQ) - IntLtOp -> (CMP LTT, NE) - IntLeOp -> (CMP LE, NE) - WordGtOp -> (CMP ULE, EQQ) - WordGeOp -> (CMP ULT, EQQ) - WordEqOp -> (CMP EQQ, NE) - WordNeOp -> (CMP EQQ, EQQ) - WordLtOp -> (CMP ULT, NE) - WordLeOp -> (CMP ULE, NE) - AddrGtOp -> (CMP ULE, EQQ) - AddrGeOp -> (CMP ULT, EQQ) - AddrEqOp -> (CMP EQQ, NE) - AddrNeOp -> (CMP EQQ, EQQ) - AddrLtOp -> (CMP ULT, NE) - AddrLeOp -> (CMP ULE, NE) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -genCondJump id bool = do - CondCode _ cond code <- getCondCode bool - return (code `snocOL` JXX cond id) - -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -genCondJump id bool = do - CondCode is_float cond cond_code <- getCondCode bool - if not is_float - then - return (cond_code `snocOL` JXX cond id) - else do - lbl <- getBlockIdNat - - -- see comment with condFltReg - let code = case cond of - NE -> or_unordered - GU -> plain_test - GEU -> plain_test - _ -> and_ordered - - plain_test = unitOL ( - JXX cond id - ) - or_unordered = toOL [ - JXX cond id, - JXX PARITY id - ] - and_ordered = toOL [ - JXX PARITY lbl, - JXX cond id, - JXX ALWAYS lbl, - NEWBLOCK lbl - ] - return (cond_code `appOL` code) - -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -genCondJump bid bool = do - CondCode is_float cond code <- getCondCode bool - return ( - code `appOL` - toOL ( - if is_float - then [NOP, BF cond False bid, NOP] - else [BI cond False bid, NOP] - ) - ) - -#endif /* sparc_TARGET_ARCH */ - - -#if powerpc_TARGET_ARCH - -genCondJump id bool = do - CondCode is_float cond code <- getCondCode bool - return (code `snocOL` BCC cond id) - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Generating C calls - --- Now the biggest nightmare---calls. Most of the nastiness is buried in --- @get_arg@, which moves the arguments to the correct registers/stack --- locations. Apart from that, the code is easy. --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -genCCall - :: CmmCallTarget -- function to call - -> HintedCmmFormals -- where to put the result - -> HintedCmmActuals -- arguments (of mixed type) - -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -ccallResultRegs = - -genCCall fn cconv result_regs args - = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args - `thenNat` \ ((unused,_), argCode) -> - let - nRegs = length allArgRegs - length unused - code = asmSeqThen (map ($ []) argCode) - in - returnSeq code [ - LDA pv (AddrImm (ImmLab (ptext fn))), - JSR ra (AddrReg pv) nRegs, - LDGP gp (AddrReg ra)] - where - ------------------------ - {- Try to get a value into a specific register (or registers) for - a call. The first 6 arguments go into the appropriate - argument register (separate registers for integer and floating - point arguments, but used in lock-step), and the remaining - arguments are dumped to the stack, beginning at 0(sp). Our - first argument is a pair of the list of remaining argument - registers to be assigned for this call and the next stack - offset to use for overflowing arguments. This way, - @get_Arg@ can be applied to all of a call's arguments using - @mapAccumLNat@. - -} - get_arg - :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator) - -> StixTree -- Current argument - -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code - - -- We have to use up all of our argument registers first... - - get_arg ((iDst,fDst):dsts, offset) arg - = getRegister arg `thenNat` \ register -> - let - reg = if isFloatType pk then fDst else iDst - code = registerCode register reg - src = registerName register reg - pk = registerRep register - in - return ( - if isFloatType pk then - ((dsts, offset), if isFixed register then - code . mkSeqInstr (FMOV src fDst) - else code) - else - ((dsts, offset), if isFixed register then - code . mkSeqInstr (OR src (RIReg src) iDst) - else code)) - - -- Once we have run out of argument registers, we move to the - -- stack... - - get_arg ([], offset) arg - = getRegister arg `thenNat` \ register -> - getNewRegNat (registerRep register) - `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - pk = registerRep register - sz = primRepToSize pk - in - return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset))) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL - -- write barrier compiles to no code on x86/x86-64; - -- we keep it this long in order to prevent earlier optimisations. - --- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [CmmHinted r _] args = do - l1 <- getNewLabelNat - l2 <- getNewLabelNat - case op of - MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args - MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args - - MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args - MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args - - MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args - MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args - - MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args - MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args - - other_op -> outOfLineFloatOp op r args - where - actuallyInlineFloatOp instr size [CmmHinted x _] - = do res <- trivialUFCode size (instr size) x - any <- anyReg res - return (any (getRegisterReg (CmmLocal r))) - -genCCall target dest_regs args = do - let - sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args) -#if !darwin_TARGET_OS - tot_arg_size = sum sizes -#else - raw_arg_size = sum sizes - tot_arg_size = roundTo 16 raw_arg_size - arg_pad_size = tot_arg_size - raw_arg_size - delta0 <- getDeltaNat - setDeltaNat (delta0 - arg_pad_size) -#endif - - push_codes <- mapM push_arg (reverse args) - delta <- getDeltaNat - - -- in - -- deal with static vs dynamic call targets - (callinsns,cconv) <- - case target of - -- CmmPrim -> ... - CmmCallee (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) []), conv) - where fn_imm = ImmCLbl lbl - CmmCallee expr conv - -> do { (dyn_c, dyn_r) <- get_op expr - ; ASSERT( isWord32 (cmmExprType expr) ) - return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } - - let push_code -#if darwin_TARGET_OS - | arg_pad_size /= 0 - = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), - DELTA (delta0 - arg_pad_size)] - `appOL` concatOL push_codes - | otherwise -#endif - = concatOL push_codes - call = callinsns `appOL` - toOL ( - -- Deallocate parameters after call for ccall; - -- but not for stdcall (callee does it) - (if cconv == StdCallConv || tot_arg_size==0 then [] else - [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) - ++ - [DELTA (delta + tot_arg_size)] - ) - -- in - setDeltaNat (delta + tot_arg_size) - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [CmmHinted dest _hint] - | isFloatType ty = unitOL (GMOV fake0 r_dest) - | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), - MOV II32 (OpReg edx) (OpReg r_dest_hi)] - | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest)) - where - ty = localRegType dest - w = typeWidth ty - r_dest_hi = getHiVRegFromLo r_dest - r_dest = getRegisterReg (CmmLocal dest) - assign_code many = panic "genCCall.assign_code many" - - return (push_code `appOL` - call `appOL` - assign_code dest_regs) - - where - arg_size :: CmmType -> Int -- Width in bytes - arg_size ty = widthInBytes (typeWidth ty) - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - - push_arg :: HintedCmmActual {-current argument-} - -> NatM InstrBlock -- code - - push_arg (CmmHinted arg _hint) -- we don't need the hints on x86 - | isWord64 arg_ty = do - ChildCode64 code r_lo <- iselExpr64 arg - delta <- getDeltaNat - setDeltaNat (delta - 8) - let - r_hi = getHiVRegFromLo r_lo - -- in - return ( code `appOL` - toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), - PUSH II32 (OpReg r_lo), DELTA (delta - 8), - DELTA (delta-8)] - ) - - | otherwise = do - (code, reg) <- get_op arg - delta <- getDeltaNat - let size = arg_size arg_ty -- Byte size - setDeltaNat (delta-size) - if (isFloatType arg_ty) - then return (code `appOL` - toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), - DELTA (delta-size), - GST (floatSize (typeWidth arg_ty)) - reg (AddrBaseIndex (EABaseReg esp) - EAIndexNone - (ImmInt 0))] - ) - else return (code `snocOL` - PUSH II32 (OpReg reg) `snocOL` - DELTA (delta-size) - ) - where - arg_ty = cmmExprType arg - - ------------ - get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg - get_op op = do - (reg,code) <- getSomeReg op - return (code, reg) - -#endif /* i386_TARGET_ARCH */ - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals - -> NatM InstrBlock -outOfLineFloatOp mop res args - = do - dflags <- getDynFlagsNat - targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl - let target = CmmCallee targetExpr CCallConv - - if isFloat64 (localRegType res) - then - stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn) - else do - uq <- getUniqueNat - let - tmp = LocalReg uq f64 - -- in - code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn) - code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) - return (code1 `appOL` code2) - where - lbl = mkForeignLabel fn Nothing False IsFunction - - fn = case mop of - MO_F32_Sqrt -> fsLit "sqrtf" - MO_F32_Sin -> fsLit "sinf" - MO_F32_Cos -> fsLit "cosf" - MO_F32_Tan -> fsLit "tanf" - MO_F32_Exp -> fsLit "expf" - MO_F32_Log -> fsLit "logf" - - MO_F32_Asin -> fsLit "asinf" - MO_F32_Acos -> fsLit "acosf" - MO_F32_Atan -> fsLit "atanf" - - MO_F32_Sinh -> fsLit "sinhf" - MO_F32_Cosh -> fsLit "coshf" - MO_F32_Tanh -> fsLit "tanhf" - MO_F32_Pwr -> fsLit "powf" - - MO_F64_Sqrt -> fsLit "sqrt" - MO_F64_Sin -> fsLit "sin" - MO_F64_Cos -> fsLit "cos" - MO_F64_Tan -> fsLit "tan" - MO_F64_Exp -> fsLit "exp" - MO_F64_Log -> fsLit "log" - - MO_F64_Asin -> fsLit "asin" - MO_F64_Acos -> fsLit "acos" - MO_F64_Atan -> fsLit "atan" - - MO_F64_Sinh -> fsLit "sinh" - MO_F64_Cosh -> fsLit "cosh" - MO_F64_Tanh -> fsLit "tanh" - MO_F64_Pwr -> fsLit "pow" - -#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL - -- write barrier compiles to no code on x86/x86-64; - -- we keep it this long in order to prevent earlier optimisations. - - -genCCall (CmmPrim op) [CmmHinted r _] args = - outOfLineFloatOp op r args - -genCCall target dest_regs args = do - - -- load up the register arguments - (stack_args, aregs, fregs, load_args_code) - <- load_args args allArgRegs allFPArgRegs nilOL - - let - fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs)) - int_regs_used = reverse (drop (length aregs) (reverse allArgRegs)) - arg_regs = [eax] ++ int_regs_used ++ fp_regs_used - -- for annotating the call instruction with - - sse_regs = length fp_regs_used - - tot_arg_size = arg_size * length stack_args - - -- On entry to the called function, %rsp should be aligned - -- on a 16-byte boundary +8 (i.e. the first stack arg after - -- the return address is 16-byte aligned). In STG land - -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just - -- need to make sure we push a multiple of 16-bytes of args, - -- plus the return address, to get the correct alignment. - -- Urg, this is hard. We need to feed the delta back into - -- the arg pushing code. - (real_size, adjust_rsp) <- - if tot_arg_size `rem` 16 == 0 - then return (tot_arg_size, nilOL) - else do -- we need to adjust... - delta <- getDeltaNat - setDeltaNat (delta-8) - return (tot_arg_size+8, toOL [ - SUB II64 (OpImm (ImmInt 8)) (OpReg rsp), - DELTA (delta-8) - ]) - - -- push the stack args, right to left - push_code <- push_args (reverse stack_args) nilOL - delta <- getDeltaNat - - -- deal with static vs dynamic call targets - (callinsns,cconv) <- - case target of - -- CmmPrim -> ... - CmmCallee (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) arg_regs), conv) - where fn_imm = ImmCLbl lbl - CmmCallee expr conv - -> do (dyn_r, dyn_c) <- getSomeReg expr - return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) - - let - -- The x86_64 ABI requires us to set %al to the number of SSE - -- registers that contain arguments, if the called routine - -- is a varargs function. We don't know whether it's a - -- varargs function or not, so we have to assume it is. - -- - -- It's not safe to omit this assignment, even if the number - -- of SSE regs in use is zero. If %al is larger than 8 - -- on entry to a varargs function, seg faults ensue. - assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) - - let call = callinsns `appOL` - toOL ( - -- Deallocate parameters after call for ccall; - -- but not for stdcall (callee does it) - (if cconv == StdCallConv || real_size==0 then [] else - [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)]) - ++ - [DELTA (delta + real_size)] - ) - -- in - setDeltaNat (delta + real_size) - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [CmmHinted dest _hint] = - case typeWidth rep of - W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) - W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) - _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest)) - where - rep = localRegType dest - r_dest = getRegisterReg (CmmLocal dest) - assign_code many = panic "genCCall.assign_code many" - - return (load_args_code `appOL` - adjust_rsp `appOL` - push_code `appOL` - assign_eax sse_regs `appOL` - call `appOL` - assign_code dest_regs) - - where - arg_size = 8 -- always, at the mo - - load_args :: [CmmHinted CmmExpr] - -> [Reg] -- int regs avail for args - -> [Reg] -- FP regs avail for args - -> InstrBlock - -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock) - load_args args [] [] code = return (args, [], [], code) - -- no more regs to use - load_args [] aregs fregs code = return ([], aregs, fregs, code) - -- no more args to push - load_args ((CmmHinted arg hint) : rest) aregs fregs code - | isFloatType arg_rep = - case fregs of - [] -> push_this_arg - (r:rs) -> do - arg_code <- getAnyReg arg - load_args rest aregs rs (code `appOL` arg_code r) - | otherwise = - case aregs of - [] -> push_this_arg - (r:rs) -> do - arg_code <- getAnyReg arg - load_args rest rs fregs (code `appOL` arg_code r) - where - arg_rep = cmmExprType arg - - push_this_arg = do - (args',ars,frs,code') <- load_args rest aregs fregs code - return ((CmmHinted arg hint):args', ars, frs, code') - - push_args [] code = return code - push_args ((CmmHinted arg hint):rest) code - | isFloatType arg_rep = do - (arg_reg, arg_code) <- getSomeReg arg - delta <- getDeltaNat - setDeltaNat (delta-arg_size) - let code' = code `appOL` arg_code `appOL` toOL [ - SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) , - DELTA (delta-arg_size), - MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))] - push_args rest code' - - | otherwise = do - -- we only ever generate word-sized function arguments. Promotion - -- has already happened: our Int8# type is kept sign-extended - -- in an Int#, for example. - ASSERT(width == W64) return () - (arg_op, arg_code) <- getOperand arg - delta <- getDeltaNat - setDeltaNat (delta-arg_size) - let code' = code `appOL` arg_code `appOL` toOL [ - PUSH II64 arg_op, - DELTA (delta-arg_size)] - push_args rest code' - where - arg_rep = cmmExprType arg - width = typeWidth arg_rep -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH -{- - The SPARC calling convention is an absolute - nightmare. The first 6x32 bits of arguments are mapped into - %o0 through %o5, and the remaining arguments are dumped to the - stack, beginning at [%sp+92]. (Note that %o6 == %sp.) - - If we have to put args on the stack, move %o6==%sp down by - the number of words to go on the stack, to ensure there's enough space. - - According to Fraser and Hanson's lcc book, page 478, fig 17.2, - 16 words above the stack pointer is a word for the address of - a structure return value. I use this as a temporary location - for moving values from float to int regs. Certainly it isn't - safe to put anything in the 16 words starting at %sp, since - this area can get trashed at any time due to window overflows - caused by signal handlers. - - A final complication (if the above isn't enough) is that - we can't blithely calculate the arguments one by one into - %o0 .. %o5. Consider the following nested calls: - - fff a (fff b c) - - Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately - the inner call will itself use %o0, which trashes the value put there - in preparation for the outer call. Upshot: we need to calculate the - args into temporary regs, and move those to arg regs or onto the - stack only immediately prior to the call proper. Sigh. - -genCCall - :: CmmCallTarget -- function to call - -> HintedCmmFormals -- where to put the result - -> HintedCmmActuals -- arguments (of mixed type) - -> NatM InstrBlock - --} - - --- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream --- are guaranteed to take place before writes afterwards (unlike on PowerPC). --- Ref: Section 8.4 of the SPARC V9 Architecture manual. --- --- In the SPARC case we don't need a barrier. --- -genCCall (CmmPrim (MO_WriteBarrier)) _ _ - = do return nilOL - -genCCall target dest_regs argsAndHints - = do - -- strip hints from the arg regs - let args :: [CmmExpr] - args = map hintlessCmm argsAndHints - - - -- work out the arguments, and assign them to integer regs - argcode_and_vregs <- mapM arg_to_int_vregs args - let (argcodes, vregss) = unzip argcode_and_vregs - let vregs = concat vregss - - let n_argRegs = length allArgRegs - let n_argRegs_used = min (length vregs) n_argRegs - - - -- deal with static vs dynamic call targets - callinsns <- case target of - CmmCallee (CmmLit (CmmLabel lbl)) conv -> - return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) - - CmmCallee expr conv - -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr - return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) - - CmmPrim mop - -> do res <- outOfLineFloatOp mop - lblOrMopExpr <- case res of - Left lbl -> do - return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) - - Right mopExpr -> do - (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr - return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) - - return lblOrMopExpr - - let argcode = concatOL argcodes - - let (move_sp_down, move_sp_up) - = let diff = length vregs - n_argRegs - nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment - in if nn <= 0 - then (nilOL, nilOL) - else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn))) - - let transfer_code - = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE) - - return - $ argcode `appOL` - move_sp_down `appOL` - transfer_code `appOL` - callinsns `appOL` - unitOL NOP `appOL` - move_sp_up `appOL` - assign_code dest_regs - - --- | Generate code to calculate an argument, and move it into one --- or two integer vregs. -arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) -arg_to_int_vregs arg - - -- If the expr produces a 64 bit int, then we can just use iselExpr64 - | isWord64 (cmmExprType arg) - = do (ChildCode64 code r_lo) <- iselExpr64 arg - let r_hi = getHiVRegFromLo r_lo - return (code, [r_hi, r_lo]) - - | otherwise - = do (src, code) <- getSomeReg arg - tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg) - let pk = cmmExprType arg - - case cmmTypeSize pk of - - -- Load a 64 bit float return value into two integer regs. - FF64 -> do - v1 <- getNewRegNat II32 - v2 <- getNewRegNat II32 - - let Just f0_high = fPair f0 - - let code2 = - code `snocOL` - FMOV FF64 src f0 `snocOL` - ST FF32 f0 (spRel 16) `snocOL` - LD II32 (spRel 16) v1 `snocOL` - ST FF32 f0_high (spRel 16) `snocOL` - LD II32 (spRel 16) v2 - - return (code2, [v1,v2]) - - -- Load a 32 bit float return value into an integer reg - FF32 -> do - v1 <- getNewRegNat II32 - - let code2 = - code `snocOL` - ST FF32 src (spRel 16) `snocOL` - LD II32 (spRel 16) v1 - - return (code2, [v1]) - - -- Move an integer return value into its destination reg. - other -> do - v1 <- getNewRegNat II32 - - let code2 = - code `snocOL` - OR False g0 (RIReg src) v1 - - return (code2, [v1]) - - --- | Move args from the integer vregs into which they have been --- marshalled, into %o0 .. %o5, and the rest onto the stack. --- -move_final :: [Reg] -> [Reg] -> Int -> [Instr] - --- all args done -move_final [] _ offset - = [] - --- out of aregs; move to stack -move_final (v:vs) [] offset - = ST II32 v (spRel offset) - : move_final vs [] (offset+1) - --- move into an arg (%o[0..5]) reg -move_final (v:vs) (a:az) offset - = OR False g0 (RIReg v) a - : move_final vs az offset - - --- | Assign results returned from the call into their --- desination regs. --- -assign_code :: [CmmHinted LocalReg] -> OrdList Instr -assign_code [] = nilOL - -assign_code [CmmHinted dest _hint] - = let rep = localRegType dest - width = typeWidth rep - r_dest = getRegisterReg (CmmLocal dest) - - result - | isFloatType rep - , W32 <- width - = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest - - | isFloatType rep - , W64 <- width - = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest - - | not $ isFloatType rep - , W32 <- width - = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest - - | not $ isFloatType rep - , W64 <- width - , r_dest_hi <- getHiVRegFromLo r_dest - = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi - , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest] - in result - - --- | Generate a call to implement an out-of-line floating point operation -outOfLineFloatOp - :: CallishMachOp - -> NatM (Either CLabel CmmExpr) - -outOfLineFloatOp mop - = do let functionName - = outOfLineFloatOp_table mop - - dflags <- getDynFlagsNat - mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference - $ mkForeignLabel functionName Nothing True IsFunction - - let mopLabelOrExpr - = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - - return mopLabelOrExpr - - --- | Decide what C function to use to implement a CallishMachOp --- -outOfLineFloatOp_table - :: CallishMachOp - -> FastString - -outOfLineFloatOp_table mop - = case mop of - MO_F32_Exp -> fsLit "expf" - MO_F32_Log -> fsLit "logf" - MO_F32_Sqrt -> fsLit "sqrtf" - MO_F32_Pwr -> fsLit "powf" - - MO_F32_Sin -> fsLit "sinf" - MO_F32_Cos -> fsLit "cosf" - MO_F32_Tan -> fsLit "tanf" - - MO_F32_Asin -> fsLit "asinf" - MO_F32_Acos -> fsLit "acosf" - MO_F32_Atan -> fsLit "atanf" - - MO_F32_Sinh -> fsLit "sinhf" - MO_F32_Cosh -> fsLit "coshf" - MO_F32_Tanh -> fsLit "tanhf" - - MO_F64_Exp -> fsLit "exp" - MO_F64_Log -> fsLit "log" - MO_F64_Sqrt -> fsLit "sqrt" - MO_F64_Pwr -> fsLit "pow" - - MO_F64_Sin -> fsLit "sin" - MO_F64_Cos -> fsLit "cos" - MO_F64_Tan -> fsLit "tan" - - MO_F64_Asin -> fsLit "asin" - MO_F64_Acos -> fsLit "acos" - MO_F64_Atan -> fsLit "atan" - - MO_F64_Sinh -> fsLit "sinh" - MO_F64_Cosh -> fsLit "cosh" - MO_F64_Tanh -> fsLit "tanh" - - other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op " - (pprCallishMachOp mop) - - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - -#if darwin_TARGET_OS || linux_TARGET_OS -{- - The PowerPC calling convention for Darwin/Mac OS X - is described in Apple's document - "Inside Mac OS X - Mach-O Runtime Architecture". - - PowerPC Linux uses the System V Release 4 Calling Convention - for PowerPC. It is described in the - "System V Application Binary Interface PowerPC Processor Supplement". - - Both conventions are similar: - Parameters may be passed in general-purpose registers starting at r3, in - floating point registers starting at f1, or on the stack. - - But there are substantial differences: - * The number of registers used for parameter passing and the exact set of - nonvolatile registers differs (see MachRegs.lhs). - * On Darwin, stack space is always reserved for parameters, even if they are - passed in registers. The called routine may choose to save parameters from - registers to the corresponding space on the stack. - * On Darwin, a corresponding amount of GPRs is skipped when a floating point - parameter is passed in an FPR. - * SysV insists on either passing I64 arguments on the stack, or in two GPRs, - starting with an odd-numbered GPR. It may skip a GPR to achieve this. - Darwin just treats an I64 like two separate II32s (high word first). - * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only - 4-byte aligned like everything else on Darwin. - * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on - PowerPC Linux does not agree, so neither do we. - - According to both conventions, The parameter area should be part of the - caller's stack frame, allocated in the caller's prologue code (large enough - to hold the parameter lists for all called routines). The NCG already - uses the stack for register spilling, leaving 64 bytes free at the top. - If we need a larger parameter area than that, we just allocate a new stack - frame just before ccalling. --} - - -genCCall (CmmPrim MO_WriteBarrier) _ _ - = return $ unitOL LWSYNC - -genCCall target dest_regs argsAndHints - = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps) - -- we rely on argument promotion in the codeGen - do - (finalStack,passArgumentsCode,usedRegs) <- passArguments - (zip args argReps) - allArgRegs allFPArgRegs - initialStackOffset - (toOL []) [] - - (labelOrExpr, reduceToFF32) <- case target of - CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) - CmmCallee expr conv -> return (Right expr, False) - CmmPrim mop -> outOfLineFloatOp mop - - let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode - codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 - - case labelOrExpr of - Left lbl -> do - return ( codeBefore - `snocOL` BL lbl usedRegs - `appOL` codeAfter) - Right dyn -> do - (dynReg, dynCode) <- getSomeReg dyn - return ( dynCode - `snocOL` MTCTR dynReg - `appOL` codeBefore - `snocOL` BCTRL usedRegs - `appOL` codeAfter) - where -#if darwin_TARGET_OS - initialStackOffset = 24 - -- size of linkage area + size of arguments, in bytes - stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ - map (widthInBytes . typeWidth) argReps -#elif linux_TARGET_OS - initialStackOffset = 8 - stackDelta finalStack = roundTo 16 finalStack -#endif - args = map hintlessCmm argsAndHints - argReps = map cmmExprType args - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - move_sp_down finalStack - | delta > 64 = - toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), - DELTA (-delta)] - | otherwise = nilOL - where delta = stackDelta finalStack - move_sp_up finalStack - | delta > 64 = - toOL [ADD sp sp (RIImm (ImmInt delta)), - DELTA 0] - | otherwise = nilOL - where delta = stackDelta finalStack - - - passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) - passArguments ((arg,arg_ty):args) gprs fprs stackOffset - accumCode accumUsed | isWord64 arg_ty = - do - ChildCode64 code vr_lo <- iselExpr64 arg - let vr_hi = getHiVRegFromLo vr_lo - -#if darwin_TARGET_OS - passArguments args - (drop 2 gprs) - fprs - (stackOffset+8) - (accumCode `appOL` code - `snocOL` storeWord vr_hi gprs stackOffset - `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) - ((take 2 gprs) ++ accumUsed) - where - storeWord vr (gpr:_) offset = MR gpr vr - storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset)) - -#elif linux_TARGET_OS - let stackOffset' = roundTo 8 stackOffset - stackCode = accumCode `appOL` code - `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) - `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) - regCode hireg loreg = - accumCode `appOL` code - `snocOL` MR hireg vr_hi - `snocOL` MR loreg vr_lo - - case gprs of - hireg : loreg : regs | even (length gprs) -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _skipped : hireg : loreg : regs -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _ -> -- only one or no regs left - passArguments args [] fprs (stackOffset'+8) - stackCode accumUsed -#endif - - passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed - | reg : _ <- regs = do - register <- getRegister arg - let code = case register of - Fixed _ freg fcode -> fcode `snocOL` MR reg freg - Any _ acode -> acode reg - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) -#if darwin_TARGET_OS - -- The Darwin ABI requires that we reserve stack slots for register parameters - (stackOffset + stackBytes) -#elif linux_TARGET_OS - -- ... the SysV ABI doesn't. - stackOffset -#endif - (accumCode `appOL` code) - (reg : accumUsed) - | otherwise = do - (vr, code) <- getSomeReg arg - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) - (stackOffset' + stackBytes) - (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot) - accumUsed - where -#if darwin_TARGET_OS - -- stackOffset is at least 4-byte aligned - -- The Darwin ABI is happy with that. - stackOffset' = stackOffset -#else - -- ... the SysV ABI requires 8-byte alignment for doubles. - stackOffset' | isFloatType rep && typeWidth rep == W64 = - roundTo 8 stackOffset - | otherwise = stackOffset -#endif - stackSlot = AddrRegImm sp (ImmInt stackOffset') - (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of - II32 -> (1, 0, 4, gprs) -#if darwin_TARGET_OS - -- The Darwin ABI requires that we skip a corresponding number of GPRs when - -- we use the FPRs. - FF32 -> (1, 1, 4, fprs) - FF64 -> (2, 1, 8, fprs) -#elif linux_TARGET_OS - -- ... the SysV ABI doesn't. - FF32 -> (0, 1, 4, fprs) - FF64 -> (0, 1, 8, fprs) -#endif - - moveResult reduceToFF32 = - case dest_regs of - [] -> nilOL - [CmmHinted dest _hint] - | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) - | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) - | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3, - MR r_dest r4] - | otherwise -> unitOL (MR r_dest r3) - where rep = cmmRegType (CmmLocal dest) - r_dest = getRegisterReg (CmmLocal dest) - - outOfLineFloatOp mop = - do - dflags <- getDynFlagsNat - mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $ - mkForeignLabel functionName Nothing True IsFunction - let mopLabelOrExpr = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - return (mopLabelOrExpr, reduce) - where - (functionName, reduce) = case mop of - MO_F32_Exp -> (fsLit "exp", True) - MO_F32_Log -> (fsLit "log", True) - MO_F32_Sqrt -> (fsLit "sqrt", True) - - MO_F32_Sin -> (fsLit "sin", True) - MO_F32_Cos -> (fsLit "cos", True) - MO_F32_Tan -> (fsLit "tan", True) - - MO_F32_Asin -> (fsLit "asin", True) - MO_F32_Acos -> (fsLit "acos", True) - MO_F32_Atan -> (fsLit "atan", True) - - MO_F32_Sinh -> (fsLit "sinh", True) - MO_F32_Cosh -> (fsLit "cosh", True) - MO_F32_Tanh -> (fsLit "tanh", True) - MO_F32_Pwr -> (fsLit "pow", True) - - MO_F64_Exp -> (fsLit "exp", False) - MO_F64_Log -> (fsLit "log", False) - MO_F64_Sqrt -> (fsLit "sqrt", False) - - MO_F64_Sin -> (fsLit "sin", False) - MO_F64_Cos -> (fsLit "cos", False) - MO_F64_Tan -> (fsLit "tan", False) - - MO_F64_Asin -> (fsLit "asin", False) - MO_F64_Acos -> (fsLit "acos", False) - MO_F64_Atan -> (fsLit "atan", False) - - MO_F64_Sinh -> (fsLit "sinh", False) - MO_F64_Cosh -> (fsLit "cosh", False) - MO_F64_Tanh -> (fsLit "tanh", False) - MO_F64_Pwr -> (fsLit "pow", False) - other -> pprPanic "genCCall(ppc): unknown callish op" - (pprCallishMachOp other) - -#endif /* darwin_TARGET_OS || linux_TARGET_OS */ - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Generating a table-branch - -genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH -genSwitch expr ids - | opt_PIC - = do - (reg,e_code) <- getSomeReg expr - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let - jumpTable = map jumpTableEntryRel ids - - jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordWidth) - jumpTableEntryRel (Just (BlockId id)) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel id - - op = OpAddr (AddrBaseIndex (EABaseReg tableReg) - (EAIndex reg wORD_SIZE) (ImmInt 0)) - -#if x86_64_TARGET_ARCH -#if darwin_TARGET_OS - -- on Mac OS X/x86_64, put the jump table in the text section - -- to work around a limitation of the linker. - -- ld64 is unable to handle the relocations for - -- .quad L1 - L0 - -- if L0 is not preceded by a non-anonymous label in its section. - - code = e_code `appOL` t_code `appOL` toOL [ - ADD (intSize wordWidth) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ], - LDATA Text (CmmDataLabel lbl : jumpTable) - ] -#else - -- HACK: On x86_64 binutils<2.17 is only able to generate PC32 - -- relocations, hence we only get 32-bit offsets in the jump - -- table. As these offsets are always negative we need to properly - -- sign extend them to 64-bit. This hack should be removed in - -- conjunction with the hack in PprMach.hs/pprDataItem once - -- binutils 2.17 is standard. - code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - MOVSxL II32 - (OpAddr (AddrBaseIndex (EABaseReg tableReg) - (EAIndex reg wORD_SIZE) (ImmInt 0))) - (OpReg reg), - ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] - ] -#endif -#else - code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - ADD (intSize wordWidth) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] - ] -#endif - return code - | otherwise - = do - (reg,e_code) <- getSomeReg expr - lbl <- getNewLabelNat - let - jumpTable = map jumpTableEntry ids - op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) - code = e_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - JMP_TBL op [ id | Just id <- ids ] - ] - -- in - return code -#elif powerpc_TARGET_ARCH -genSwitch expr ids - | opt_PIC - = do - (reg,e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let - jumpTable = map jumpTableEntryRel ids - - jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordWidth) - jumpTableEntryRel (Just (BlockId id)) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel id - - code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - SLW tmp reg (RIImm (ImmInt 2)), - LD II32 tmp (AddrRegReg tableReg tmp), - ADD tmp tmp (RIReg tableReg), - MTCTR tmp, - BCTR [ id | Just id <- ids ] - ] - return code - | otherwise - = do - (reg,e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - lbl <- getNewLabelNat - let - jumpTable = map jumpTableEntry ids - - code = e_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - SLW tmp reg (RIImm (ImmInt 2)), - ADDIS tmp tmp (HA (ImmCLbl lbl)), - LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), - MTCTR tmp, - BCTR [ id | Just id <- ids ] - ] - return code -#elif sparc_TARGET_ARCH -genSwitch expr ids - | opt_PIC - = error "MachCodeGen: sparc genSwitch PIC not finished\n" - - | otherwise - = do (e_reg, e_code) <- getSomeReg expr - - base_reg <- getNewRegNat II32 - offset_reg <- getNewRegNat II32 - dst <- getNewRegNat II32 - - label <- getNewLabelNat - let jumpTable = map jumpTableEntry ids - - return $ e_code `appOL` - toOL - -- the jump table - [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable) - - -- load base of jump table - , SETHI (HI (ImmCLbl label)) base_reg - , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg - - -- the addrs in the table are 32 bits wide.. - , SLL e_reg (RIImm $ ImmInt 2) offset_reg - - -- load and jump to the destination - , LD II32 (AddrRegReg base_reg offset_reg) dst - , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids] - , NOP ] - -#else -#error "ToDo: genSwitch" -#endif - - --- | Convert a BlockId to some CmmStatic data -jumpTableEntry :: Maybe BlockId -> CmmStatic -jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) -jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel id - --- ----------------------------------------------------------------------------- --- Support bits --- ----------------------------------------------------------------------------- - - --- ----------------------------------------------------------------------------- --- 'condIntReg' and 'condFltReg': condition codes into registers - --- Turn those condition codes into integers now (when they appear on --- the right hand side of an assignment). --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH -condIntReg = panic "MachCode.condIntReg (not on Alpha)" -condFltReg = panic "MachCode.condFltReg (not on Alpha)" -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -condIntReg cond x y = do - CondCode _ cond cond_code <- condIntCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - -- in - return (Any II32 code) - -#endif - -#if i386_TARGET_ARCH - -condFltReg cond x y = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - -- in - return (Any II32 code) - -#endif - -#if x86_64_TARGET_ARCH - -condFltReg cond x y = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp1 <- getNewRegNat wordSize - tmp2 <- getNewRegNat wordSize - let - -- We have to worry about unordered operands (eg. comparisons - -- against NaN). If the operands are unordered, the comparison - -- sets the parity flag, carry flag and zero flag. - -- All comparisons are supposed to return false for unordered - -- operands except for !=, which returns true. - -- - -- Optimisation: we don't have to test the parity flag if we - -- know the test has already excluded the unordered case: eg > - -- and >= test for a zero carry flag, which can only occur for - -- ordered operands. - -- - -- ToDo: by reversing comparisons we could avoid testing the - -- parity flag in more cases. - - code dst = - cond_code `appOL` - (case cond of - NE -> or_unordered dst - GU -> plain_test dst - GEU -> plain_test dst - _ -> and_ordered dst) - - plain_test dst = toOL [ - SETCC cond (OpReg tmp1), - MOVZxL II8 (OpReg tmp1) (OpReg dst) - ] - or_unordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC PARITY (OpReg tmp2), - OR II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - and_ordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC NOTPARITY (OpReg tmp2), - AND II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - -- in - return (Any II32 code) - -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat II32 - let - code__2 dst = code `appOL` toOL [ - SUB False True g0 (RIReg src) g0, - SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any II32 code__2) - -condIntReg EQQ x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - let - code__2 dst = code1 `appOL` code2 `appOL` toOL [ - XOR False src1 (RIReg src2) dst, - SUB False True g0 (RIReg dst) g0, - SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any II32 code__2) - -condIntReg NE x (CmmLit (CmmInt 0 d)) = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat II32 - let - code__2 dst = code `appOL` toOL [ - SUB False True g0 (RIReg src) g0, - ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any II32 code__2) - -condIntReg NE x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - let - code__2 dst = code1 `appOL` code2 `appOL` toOL [ - XOR False src1 (RIReg src2) dst, - SUB False True g0 (RIReg dst) g0, - ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any II32 code__2) - -condIntReg cond x y = do - bid1@(BlockId lbl1) <- getBlockIdNat - bid2@(BlockId lbl2) <- getBlockIdNat - CondCode _ cond cond_code <- condIntCode cond x y - let - code__2 dst = cond_code `appOL` toOL [ - BI cond False bid1, NOP, - OR False g0 (RIImm (ImmInt 0)) dst, - BI ALWAYS False bid2, NOP, - NEWBLOCK bid1, - OR False g0 (RIImm (ImmInt 1)) dst, - NEWBLOCK bid2] - return (Any II32 code__2) - -condFltReg cond x y = do - bid1@(BlockId lbl1) <- getBlockIdNat - bid2@(BlockId lbl2) <- getBlockIdNat - CondCode _ cond cond_code <- condFltCode cond x y - let - code__2 dst = cond_code `appOL` toOL [ - NOP, - BF cond False bid1, NOP, - OR False g0 (RIImm (ImmInt 0)) dst, - BI ALWAYS False bid2, NOP, - NEWBLOCK bid1, - OR False g0 (RIImm (ImmInt 1)) dst, - NEWBLOCK bid2] - return (Any II32 code__2) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -condReg getCond = do - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - CondCode _ cond cond_code <- getCond - let -{- code dst = cond_code `appOL` toOL [ - BCC cond lbl1, - LI dst (ImmInt 0), - BCC ALWAYS lbl2, - NEWBLOCK lbl1, - LI dst (ImmInt 1), - BCC ALWAYS lbl2, - NEWBLOCK lbl2 - ]-} - code dst = cond_code - `appOL` negate_code - `appOL` toOL [ - MFCR dst, - RLWINM dst dst (bit + 1) 31 31 - ] - - negate_code | do_negate = unitOL (CRNOR bit bit bit) - | otherwise = nilOL - - (bit, do_negate) = case cond of - LTT -> (0, False) - LE -> (1, True) - EQQ -> (2, False) - GE -> (0, True) - GTT -> (1, False) - - NE -> (2, True) - - LU -> (0, False) - LEU -> (1, True) - GEU -> (0, True) - GU -> (1, False) - - return (Any II32 code) - -condIntReg cond x y = condReg (condIntCode cond x y) -condFltReg cond x y = condReg (condFltCode cond x y) -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- 'trivial*Code': deal with trivial instructions - --- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', --- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. --- Only look for constants on the right hand side, because that's --- where the generic optimizer will have put them. - --- Similarly, for unary instructions, we don't have to worry about --- matching an StInt as the argument, because genericOpt will already --- have handled the constant-folding. - -trivialCode - :: Width -- Int only - -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr) - ,IF_ARCH_i386 ((Operand -> Operand -> Instr) - -> Maybe (Operand -> Operand -> Instr) - ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) - -> Maybe (Operand -> Operand -> Instr) - ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr) - ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr) - ,))))) - -> CmmExpr -> CmmExpr -- the two arguments - -> NatM Register - -#ifndef powerpc_TARGET_ARCH -trivialFCode - :: Width -- Floating point only - -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr) - ,)))) - -> CmmExpr -> CmmExpr -- the two arguments - -> NatM Register -#endif - -trivialUCode - :: Size - -> IF_ARCH_alpha((RI -> Reg -> Instr) - ,IF_ARCH_i386 ((Operand -> Instr) - ,IF_ARCH_x86_64 ((Operand -> Instr) - ,IF_ARCH_sparc((RI -> Reg -> Instr) - ,IF_ARCH_powerpc((Reg -> Reg -> Instr) - ,))))) - -> CmmExpr -- the one argument - -> NatM Register - -#ifndef powerpc_TARGET_ARCH -trivialUFCode - :: Size - -> IF_ARCH_alpha((Reg -> Reg -> Instr) - ,IF_ARCH_i386 ((Reg -> Reg -> Instr) - ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr) - ,IF_ARCH_sparc((Reg -> Reg -> Instr) - ,)))) - -> CmmExpr -- the one argument - -> NatM Register -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -trivialCode instr x (StInt y) - | fits8Bits y - = getRegister x `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - src1 = registerName register tmp - src2 = ImmInt (fromInteger y) - code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst) - in - return (Any IntRep code__2) - -trivialCode instr x y - = getRegister x `thenNat` \ register1 -> - getRegister y `thenNat` \ register2 -> - getNewRegNat IntRep `thenNat` \ tmp1 -> - getNewRegNat IntRep `thenNat` \ tmp2 -> - let - code1 = registerCode register1 tmp1 [] - src1 = registerName register1 tmp1 - code2 = registerCode register2 tmp2 [] - src2 = registerName register2 tmp2 - code__2 dst = asmSeqThen [code1, code2] . - mkSeqInstr (instr src1 (RIReg src2) dst) - in - return (Any IntRep code__2) - ------------- -trivialUCode instr x - = getRegister x `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - code__2 dst = code . mkSeqInstr (instr (RIReg src) dst) - in - return (Any IntRep code__2) - ------------- -trivialFCode _ instr x y - = getRegister x `thenNat` \ register1 -> - getRegister y `thenNat` \ register2 -> - getNewRegNat FF64 `thenNat` \ tmp1 -> - getNewRegNat FF64 `thenNat` \ tmp2 -> - let - code1 = registerCode register1 tmp1 - src1 = registerName register1 tmp1 - - code2 = registerCode register2 tmp2 - src2 = registerName register2 tmp2 - - code__2 dst = asmSeqThen [code1 [], code2 []] . - mkSeqInstr (instr src1 src2 dst) - in - return (Any FF64 code__2) - -trivialUFCode _ instr x - = getRegister x `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - code__2 dst = code . mkSeqInstr (instr src dst) - in - return (Any FF64 code__2) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -{- -The Rules of the Game are: - -* You cannot assume anything about the destination register dst; - it may be anything, including a fixed reg. - -* You may compute an operand into a fixed reg, but you may not - subsequently change the contents of that fixed reg. If you - want to do so, first copy the value either to a temporary - or into dst. You are free to modify dst even if it happens - to be a fixed reg -- that's not your problem. - -* You cannot assume that a fixed reg will stay live over an - arbitrary computation. The same applies to the dst reg. - -* Temporary regs obtained from getNewRegNat are distinct from - each other and from all other regs, and stay live over - arbitrary computations. - --------------------- - -SDM's version of The Rules: - -* If getRegister returns Any, that means it can generate correct - code which places the result in any register, period. Even if that - register happens to be read during the computation. - - Corollary #1: this means that if you are generating code for an - operation with two arbitrary operands, you cannot assign the result - of the first operand into the destination register before computing - the second operand. The second operand might require the old value - of the destination register. - - Corollary #2: A function might be able to generate more efficient - code if it knows the destination register is a new temporary (and - therefore not read by any of the sub-computations). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - (c) known registers (eg. %ecx is used by shifts) - In particular, it may *not* modify global registers, unless the global - register happens to be the destination register. --} - -trivialCode width instr (Just revinstr) (CmmLit lit_a) b - | is32BitLit lit_a = do - b_code <- getAnyReg b - let - code dst - = b_code dst `snocOL` - revinstr (OpImm (litToImm lit_a)) (OpReg dst) - -- in - return (Any (intSize width) code) - -trivialCode width instr maybe_revinstr a b - = genTrivialCode (intSize width) instr a b - --- This is re-used for floating pt instructions too. -genTrivialCode rep instr a b = do - (b_op, b_code) <- getNonClobberedOperand b - a_code <- getAnyReg a - tmp <- getNewRegNat rep - let - -- We want the value of b to stay alive across the computation of a. - -- But, we want to calculate a straight into the destination register, - -- because the instruction only has two operands (dst := dst `op` src). - -- The troublesome case is when the result of b is in the same register - -- as the destination reg. In this case, we have to save b in a - -- new temporary across the computation of a. - code dst - | dst `regClashesWithOp` b_op = - b_code `appOL` - unitOL (MOV rep b_op (OpReg tmp)) `appOL` - a_code dst `snocOL` - instr (OpReg tmp) (OpReg dst) - | otherwise = - b_code `appOL` - a_code dst `snocOL` - instr b_op (OpReg dst) - -- in - return (Any rep code) - -reg `regClashesWithOp` OpReg reg2 = reg == reg2 -reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) -reg `regClashesWithOp` _ = False - ------------ - -trivialUCode rep instr x = do - x_code <- getAnyReg x - let - code dst = - x_code dst `snocOL` - instr (OpReg dst) - return (Any rep code) - ------------ - -#if i386_TARGET_ARCH - -trivialFCode width instr x y = do - (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too - (y_reg, y_code) <- getSomeReg y - let - size = floatSize width - code dst = - x_code `appOL` - y_code `snocOL` - instr size x_reg y_reg dst - return (Any size code) - -#endif - -#if x86_64_TARGET_ARCH -trivialFCode pk instr x y - = genTrivialCode size (instr size) x y - where size = floatSize pk -#endif - -------------- - -trivialUFCode size instr x = do - (x_reg, x_code) <- getSomeReg x - let - code dst = - x_code `snocOL` - instr x_reg dst - -- in - return (Any size code) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -trivialCode pk instr x (CmmLit (CmmInt y d)) - | fits13Bits y - = do - (src1, code) <- getSomeReg x - tmp <- getNewRegNat II32 - let - src2 = ImmInt (fromInteger y) - code__2 dst = code `snocOL` instr src1 (RIImm src2) dst - return (Any II32 code__2) - -trivialCode pk instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - let - code__2 dst = code1 `appOL` code2 `snocOL` - instr src1 (RIReg src2) dst - return (Any II32 code__2) - ------------- -trivialFCode pk instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x) - tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y) - tmp <- getNewRegNat FF64 - let - promote x = FxTOy FF32 FF64 x tmp - - pk1 = cmmExprType x - pk2 = cmmExprType y - - code__2 dst = - if pk1 `cmmEqType` pk2 then - code1 `appOL` code2 `snocOL` - instr (floatSize pk) src1 src2 dst - else if typeWidth pk1 == W32 then - code1 `snocOL` promote src1 `appOL` code2 `snocOL` - instr FF64 tmp src2 dst - else - code1 `appOL` code2 `snocOL` promote src2 `snocOL` - instr FF64 src1 tmp dst - return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) - code__2) - ------------- -trivialUCode size instr x = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat size - let - code__2 dst = code `snocOL` instr (RIReg src) dst - return (Any size code__2) - -------------- -trivialUFCode pk instr x = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat pk - let - code__2 dst = code `snocOL` instr src dst - return (Any pk code__2) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - -{- -Wolfgang's PowerPC version of The Rules: - -A slightly modified version of The Rules to take advantage of the fact -that PowerPC instructions work on all registers and don't implicitly -clobber any fixed registers. - -* The only expression for which getRegister returns Fixed is (CmmReg reg). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - It may *not* modify global registers, unless the global - register happens to be the destination register. - It may not clobber any other registers. In fact, only ccalls clobber any - fixed registers. - Also, it may not modify the counter register (used by genCCall). - - Corollary: If a getRegister for a subexpression returns Fixed, you need - not move it to a fresh temporary before evaluating the next subexpression. - The Fixed register won't be modified. - Therefore, we don't need a counterpart for the x86's getStableReg on PPC. - -* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on - the value of the destination register. --} - -trivialCode rep signed instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate rep signed y - = do - (src1, code1) <- getSomeReg x - let code dst = code1 `snocOL` instr dst src1 (RIImm imm) - return (Any (intSize rep) code) - -trivialCode rep signed instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) - return (Any (intSize rep) code) - -trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm' size instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 - return (Any size code) - -trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y - -trivialUCode rep instr x = do - (src, code) <- getSomeReg x - let code' dst = code `snocOL` instr dst src - return (Any rep code') - --- There is no "remainder" instruction on the PPC, so we have to do --- it the hard way. --- The "div" parameter is the division instruction to use (DIVW or DIVWU) - -remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -remainderCode rep div x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `appOL` toOL [ - div dst src1 src2, - MULLW dst dst (RIReg src2), - SUBF dst dst src1 - ] - return (Any (intSize rep) code) - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Coercing to/from integer/floating-point... - --- When going to integer, we truncate (round towards 0). - --- @coerce(Int2FP|FP2Int)@ are more complicated integer/float --- conversions. We have to store temporaries in memory to move --- between the integer and the floating point register sets. - --- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we --- pretend, on sparc at least, that double and float regs are seperate --- kinds, so the value has to be computed into one kind before being --- explicitly "converted" to live in the other kind. - -coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register - -#if sparc_TARGET_ARCH -coerceDbl2Flt :: CmmExpr -> NatM Register -coerceFlt2Dbl :: CmmExpr -> NatM Register -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -coerceInt2FP _ x - = getRegister x `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ reg -> - let - code = registerCode register reg - src = registerName register reg - - code__2 dst = code . mkSeqInstrs [ - ST Q src (spRel 0), - LD TF dst (spRel 0), - CVTxy Q TF dst dst] - in - return (Any FF64 code__2) - -------------- -coerceFP2Int x - = getRegister x `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - - code__2 dst = code . mkSeqInstrs [ - CVTxy TF Q src tmp, - ST TF tmp (spRel 0), - LD Q dst (spRel 0)] - in - return (Any IntRep code__2) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -coerceInt2FP from to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> GITOF; W64 -> GITOD - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - return (Any (floatSize to) code) - ------------- - -coerceFP2Int from to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case from of W32 -> GFTOI; W64 -> GDTOI - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - -- in - return (Any (intSize to) code) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -coerceFP2Int from to x = do - (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand - let - opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ - code dst = x_code `snocOL` opc x_op dst - -- in - return (Any (intSize to) code) -- works even if the destination rep is CVTSI2SS; W64 -> CVTSI2SD - code dst = x_code `snocOL` opc x_op dst - -- in - return (Any (floatSize to) code) -- works even if the destination rep is CmmExpr -> NatM Register -coerceFP2FP to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD - code dst = x_code `snocOL` opc x_reg dst - -- in - return (Any (floatSize to) code) -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -coerceInt2FP width1 width2 x = do - (src, code) <- getSomeReg x - let - code__2 dst = code `appOL` toOL [ - ST (intSize width1) src (spRel (-2)), - LD (intSize width1) (spRel (-2)) dst, - FxTOy (intSize width1) (floatSize width2) dst dst] - return (Any (floatSize $ width2) code__2) - - --- | Coerce a floating point value to integer --- --- NOTE: On sparc v9 there are no instructions to move a value from an --- FP register directly to an int register, so we have to use a load/store. --- -coerceFP2Int width1 width2 x - = do let fsize1 = floatSize width1 - fsize2 = floatSize width2 - - isize2 = intSize width2 - - (fsrc, code) <- getSomeReg x - fdst <- getNewRegNat fsize2 - - let code2 dst - = code - `appOL` toOL - -- convert float to int format, leaving it in a float reg. - [ FxTOy fsize1 isize2 fsrc fdst - - -- store the int into mem, then load it back to move - -- it into an actual int reg. - , ST fsize2 fdst (spRel (-2)) - , LD isize2 (spRel (-2)) dst] - - return (Any isize2 code2) - ------------- -coerceDbl2Flt x = do - (src, code) <- getSomeReg x - return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) - ------------- -coerceFlt2Dbl x = do - (src, code) <- getSomeReg x - return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst)) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -coerceInt2FP fromRep toRep x = do - (src, code) <- getSomeReg x - lbl <- getNewLabelNat - itmp <- getNewRegNat II32 - ftmp <- getNewRegNat FF64 - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let - code' dst = code `appOL` maybe_exts `appOL` toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x43300000 W32), - CmmStaticLit (CmmInt 0x80000000 W32)], - XORIS itmp src (ImmInt 0x8000), - ST II32 itmp (spRel 3), - LIS itmp (ImmInt 0x4330), - ST II32 itmp (spRel 2), - LD FF64 ftmp (spRel 2) - ] `appOL` addr_code `appOL` toOL [ - LD FF64 dst addr, - FSUB FF64 dst ftmp dst - ] `appOL` maybe_frsp dst - - maybe_exts = case fromRep of - W8 -> unitOL $ EXTS II8 src src - W16 -> unitOL $ EXTS II16 src src - W32 -> nilOL - maybe_frsp dst = case toRep of - W32 -> unitOL $ FRSP dst dst - W64 -> nilOL - return (Any (floatSize toRep) code') - -coerceFP2Int fromRep toRep x = do - -- the reps don't really matter: F*->FF64 and II32->I* are no-ops - (src, code) <- getSomeReg x - tmp <- getNewRegNat FF64 - let - code' dst = code `appOL` toOL [ - -- convert to int in FP reg - FCTIWZ tmp src, - -- store value (64bit) from FP to stack - ST FF64 tmp (spRel 2), - -- read low word of value (high word is undefined) - LD II32 dst (spRel 3)] - return (Any (intSize toRep) code') -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- eXTRA_STK_ARGS_HERE - --- We (allegedly) put the first six C-call arguments in registers; --- where do we start putting the rest of them? - --- Moved from Instrs (SDM): - -#if alpha_TARGET_ARCH || sparc_TARGET_ARCH -eXTRA_STK_ARGS_HERE :: Int -eXTRA_STK_ARGS_HERE - = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???)) -#endif diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index d19cda4..ed59d2b 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -10,28 +10,43 @@ module NCGMonad ( NatM_State(..), mkNatM_State, NatM, -- instance Monad - initNat, addImportNat, getUniqueNat, - mapAccumLNat, setDeltaNat, getDeltaNat, - getBlockIdNat, getNewLabelNat, getNewRegNat, getNewRegPairNat, - getPicBaseMaybeNat, getPicBaseNat, getDynFlagsNat - ) where + initNat, + addImportNat, + getUniqueNat, + mapAccumLNat, + setDeltaNat, + getDeltaNat, + getBlockIdNat, + getNewLabelNat, + getNewRegNat, + getNewRegPairNat, + getPicBaseMaybeNat, + getPicBaseNat, + getDynFlagsNat +) + +where #include "HsVersions.h" +import Reg +import Size +import TargetReg + import BlockId import CLabel ( CLabel, mkAsmTempLabel ) -import Regs import UniqSupply import Unique ( Unique ) import DynFlags -data NatM_State = NatM_State { - natm_us :: UniqSupply, - natm_delta :: Int, - natm_imports :: [(CLabel)], - natm_pic :: Maybe Reg, - natm_dflags :: DynFlags - } +data NatM_State + = NatM_State { + natm_us :: UniqSupply, + natm_delta :: Int, + natm_imports :: [(CLabel)], + natm_pic :: Maybe Reg, + natm_dflags :: DynFlags + } newtype NatM result = NatM (NatM_State -> (result, NatM_State)) @@ -39,22 +54,27 @@ unNat :: NatM a -> NatM_State -> (a, NatM_State) unNat (NatM a) = a mkNatM_State :: UniqSupply -> Int -> DynFlags -> NatM_State -mkNatM_State us delta dflags = NatM_State us delta [] Nothing dflags +mkNatM_State us delta dflags + = NatM_State us delta [] Nothing dflags initNat :: NatM_State -> NatM a -> (a, NatM_State) -initNat init_st m = case unNat m init_st of { (r,st) -> (r,st) } +initNat init_st m + = case unNat m init_st of { (r,st) -> (r,st) } + instance Monad NatM where (>>=) = thenNat return = returnNat + thenNat :: NatM a -> (a -> NatM b) -> NatM b thenNat expr cont - = NatM $ \st -> case unNat expr st of + = NatM $ \st -> case unNat expr st of (result, st') -> unNat (cont result) st' returnNat :: a -> NatM a -returnNat result = NatM $ \st -> (result, st) +returnNat result + = NatM $ \st -> (result, st) mapAccumLNat :: (acc -> x -> NatM (acc, y)) -> acc @@ -75,43 +95,64 @@ getUniqueNat = NatM $ \ (NatM_State us delta imports pic dflags) -> getDynFlagsNat :: NatM DynFlags -getDynFlagsNat = NatM $ \ (NatM_State us delta imports pic dflags) -> - (dflags, (NatM_State us delta imports pic dflags)) +getDynFlagsNat + = NatM $ \ (NatM_State us delta imports pic dflags) -> + (dflags, (NatM_State us delta imports pic dflags)) + getDeltaNat :: NatM Int -getDeltaNat = NatM $ \ st -> (natm_delta st, st) +getDeltaNat + = NatM $ \ st -> (natm_delta st, st) + setDeltaNat :: Int -> NatM () -setDeltaNat delta = NatM $ \ (NatM_State us _ imports pic dflags) -> - ((), NatM_State us delta imports pic dflags) +setDeltaNat delta + = NatM $ \ (NatM_State us _ imports pic dflags) -> + ((), NatM_State us delta imports pic dflags) + addImportNat :: CLabel -> NatM () -addImportNat imp = NatM $ \ (NatM_State us delta imports pic dflags) -> - ((), NatM_State us delta (imp:imports) pic dflags) +addImportNat imp + = NatM $ \ (NatM_State us delta imports pic dflags) -> + ((), NatM_State us delta (imp:imports) pic dflags) + getBlockIdNat :: NatM BlockId -getBlockIdNat = do u <- getUniqueNat; return (BlockId u) +getBlockIdNat + = do u <- getUniqueNat + return (BlockId u) + getNewLabelNat :: NatM CLabel -getNewLabelNat = do u <- getUniqueNat; return (mkAsmTempLabel u) +getNewLabelNat + = do u <- getUniqueNat + return (mkAsmTempLabel u) + getNewRegNat :: Size -> NatM Reg -getNewRegNat rep = do u <- getUniqueNat; return (mkVReg u rep) +getNewRegNat rep + = do u <- getUniqueNat + return (targetMkVReg u rep) + getNewRegPairNat :: Size -> NatM (Reg,Reg) -getNewRegPairNat rep = do - u <- getUniqueNat - let lo = mkVReg u rep; hi = getHiVRegFromLo lo - return (lo,hi) +getNewRegPairNat rep + = do u <- getUniqueNat + let lo = targetMkVReg u rep; hi = getHiVRegFromLo lo + return (lo,hi) + getPicBaseMaybeNat :: NatM (Maybe Reg) -getPicBaseMaybeNat = NatM (\state -> (natm_pic state, state)) +getPicBaseMaybeNat + = NatM (\state -> (natm_pic state, state)) + getPicBaseNat :: Size -> NatM Reg -getPicBaseNat rep = do - mbPicBase <- getPicBaseMaybeNat - case mbPicBase of - Just picBase -> return picBase - Nothing -> do - reg <- getNewRegNat rep - NatM (\state -> (reg, state { natm_pic = Just reg })) +getPicBaseNat rep + = do mbPicBase <- getPicBaseMaybeNat + case mbPicBase of + Just picBase -> return picBase + Nothing + -> do + reg <- getNewRegNat rep + NatM (\state -> (reg, state { natm_pic = Just reg })) diff --git a/compiler/nativeGen/PositionIndependentCode.hs b/compiler/nativeGen/PIC.hs similarity index 57% rename from compiler/nativeGen/PositionIndependentCode.hs rename to compiler/nativeGen/PIC.hs index a1e11d8..98e4f9f 100644 --- a/compiler/nativeGen/PositionIndependentCode.hs +++ b/compiler/nativeGen/PIC.hs @@ -1,24 +1,9 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module PositionIndependentCode ( - cmmMakeDynamicReference, - ReferenceKind(..), - needImportedSymbols, - pprImportedSymbol, - pprGotDeclaration, - initializePicBase - ) where - -#include "HsVersions.h" - {- This module handles generation of position independent code and dynamic-linking related issues for the native code generator. + + This depends both the architecture and OS, so we define it here + instead of in one of the architecture specific modules. Things outside this module which are related to this: @@ -53,7 +38,30 @@ module PositionIndependentCode ( and ppc-linux). -} -#include "nativeGen/NCG.h" +module PIC ( + cmmMakeDynamicReference, + ReferenceKind(..), + needImportedSymbols, + pprImportedSymbol, + pprGotDeclaration, + + initializePicBase_ppc, + initializePicBase_x86 +) + +where + +import qualified PPC.Instr as PPC +import qualified PPC.Regs as PPC + +import qualified X86.Instr as X86 + +import Platform +import Instruction +import Size +import Reg +import NCGMonad + import Cmm import CLabel ( CLabel, pprCLabel, @@ -61,13 +69,8 @@ import CLabel ( CLabel, pprCLabel, dynamicLinkerLabelInfo, mkPicBaseLabel, labelDynamic, externallyVisibleCLabel ) -#if linux_TARGET_OS import CLabel ( mkForeignLabel ) -#endif -import Regs -import Instrs -import NCGMonad ( NatM, getNewRegNat, getNewLabelNat ) import StaticFlags ( opt_PIC, opt_Static ) import BasicTypes @@ -80,8 +83,7 @@ import DynFlags import FastString --- The most important function here is cmmMakeDynamicReference. - +-------------------------------------------------------------------------------- -- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm -- code. It does The Right Thing(tm) to convert the CmmLabel into a -- position-independent, dynamic-linking-aware reference to the thing @@ -94,10 +96,12 @@ import FastString -- - addImportCmmOpt for the CmmOptM monad -- - addImportNat for the NatM monad. -data ReferenceKind = DataReference - | CallReference - | JumpReference - deriving(Eq) +data ReferenceKind + = DataReference + | CallReference + | JumpReference + deriving(Eq) + cmmMakeDynamicReference :: Monad m => DynFlags @@ -110,80 +114,98 @@ cmmMakeDynamicReference cmmMakeDynamicReference dflags addImport referenceKind lbl | Just _ <- dynamicLinkerLabelInfo lbl = return $ CmmLit $ CmmLabel lbl -- already processed it, pass through - | otherwise = case howToAccessLabel dflags referenceKind lbl of + + | otherwise + = case howToAccessLabel + dflags + (platformArch $ targetPlatform dflags) + (platformOS $ targetPlatform dflags) + referenceKind lbl of + AccessViaStub -> do let stub = mkDynamicLinkerLabel CodeStub lbl addImport stub return $ CmmLit $ CmmLabel stub + AccessViaSymbolPtr -> do let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl addImport symbolPtr - return $ CmmLoad (cmmMakePicReference symbolPtr) bWord + return $ CmmLoad (cmmMakePicReference dflags symbolPtr) bWord + AccessDirectly -> case referenceKind of -- for data, we might have to make some calculations: - DataReference -> return $ cmmMakePicReference lbl + DataReference -> return $ cmmMakePicReference dflags lbl -- all currently supported processors support -- PC-relative branch and call instructions, -- so just jump there if it's a call or a jump _ -> return $ CmmLit $ CmmLabel lbl - --- ------------------------------------------------------------------- - + + +-- ----------------------------------------------------------------------------- -- Create a position independent reference to a label. -- (but do not bother with dynamic linking). -- We calculate the label's address by adding some (platform-dependent) -- offset to our base register; this offset is calculated by -- the function picRelative in the platform-dependent part below. -cmmMakePicReference :: CLabel -> CmmExpr - -#if !mingw32_TARGET_OS +cmmMakePicReference :: DynFlags -> CLabel -> CmmExpr +cmmMakePicReference dflags lbl + -- Windows doesn't need PIC, -- everything gets relocated at runtime + | OSMinGW32 <- platformOS $ targetPlatform dflags + = CmmLit $ CmmLabel lbl -cmmMakePicReference lbl - | (opt_PIC || not opt_Static) && absoluteLabel lbl = CmmMachOp (MO_Add wordWidth) [ - CmmReg (CmmGlobal PicBaseReg), - CmmLit $ picRelative lbl - ] - where - absoluteLabel lbl = case dynamicLinkerLabelInfo lbl of - Just (GotSymbolPtr, _) -> False - Just (GotSymbolOffset, _) -> False - _ -> True -#endif -cmmMakePicReference lbl = CmmLit $ CmmLabel lbl + | (opt_PIC || not opt_Static) && absoluteLabel lbl + = CmmMachOp (MO_Add wordWidth) + [ CmmReg (CmmGlobal PicBaseReg) + , CmmLit $ picRelative + (platformArch $ targetPlatform dflags) + (platformOS $ targetPlatform dflags) + lbl ] --- =================================================================== --- Platform dependent stuff --- =================================================================== + | otherwise + = CmmLit $ CmmLabel lbl + + +absoluteLabel :: CLabel -> Bool +absoluteLabel lbl + = case dynamicLinkerLabelInfo lbl of + Just (GotSymbolPtr, _) -> False + Just (GotSymbolOffset, _) -> False + _ -> True + +-------------------------------------------------------------------------------- -- Knowledge about how special dynamic linker labels like symbol -- pointers, code stubs and GOT offsets look like is located in the -- module CLabel. --- ------------------------------------------------------------------- - -- We have to decide which labels need to be accessed -- indirectly or via a piece of stub code. +data LabelAccessStyle + = AccessViaStub + | AccessViaSymbolPtr + | AccessDirectly -data LabelAccessStyle = AccessViaStub - | AccessViaSymbolPtr - | AccessDirectly +howToAccessLabel + :: DynFlags -> Arch -> OS -> ReferenceKind -> CLabel -> LabelAccessStyle -howToAccessLabel :: DynFlags -> ReferenceKind -> CLabel -> LabelAccessStyle - -#if mingw32_TARGET_OS -- Windows -- -- We need to use access *exactly* those things that -- are imported from a DLL via an __imp_* label. -- There are no stubs for imported code. +-- +howToAccessLabel dflags _ OSMinGW32 _ lbl + | labelDynamic (thisPackage dflags) lbl + = AccessViaSymbolPtr + + | otherwise + = AccessDirectly + -howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr - | otherwise = AccessDirectly -#elif darwin_TARGET_OS -- Mach-O (Darwin, Mac OS X) -- -- Indirect access is required in the following cases: @@ -191,54 +213,48 @@ howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessVi -- * (not on x86_64) data from a different module, if we're generating PIC code -- It is always possible to access something indirectly, -- even when it's not necessary. +-- +howToAccessLabel dflags arch OSDarwin DataReference lbl + -- data access to a dynamic library goes via a symbol pointer + | labelDynamic (thisPackage dflags) lbl + = AccessViaSymbolPtr + + -- when generating PIC code, all cross-module data references must + -- must go via a symbol pointer, too, because the assembler + -- cannot generate code for a label difference where one + -- label is undefined. Doesn't apply t x86_64. + -- Unfortunately, we don't know whether it's cross-module, + -- so we do it for all externally visible labels. + -- This is a slight waste of time and space, but otherwise + -- we'd need to pass the current Module all the way in to + -- this function. + | arch /= ArchX86_64 + , opt_PIC && externallyVisibleCLabel lbl + = AccessViaSymbolPtr + + | otherwise + = AccessDirectly + +howToAccessLabel dflags arch OSDarwin JumpReference lbl + -- dyld code stubs don't work for tailcalls because the + -- stack alignment is only right for regular calls. + -- Therefore, we have to go via a symbol pointer: + | arch == ArchX86 || arch == ArchX86_64 + , labelDynamic (thisPackage dflags) lbl + = AccessViaSymbolPtr + + +howToAccessLabel dflags arch OSDarwin _ lbl + -- Code stubs are the usual method of choice for imported code; + -- not needed on x86_64 because Apple's new linker, ld64, generates + -- them automatically. + | arch /= ArchX86_64 + , labelDynamic (thisPackage dflags) lbl + = AccessViaStub + + | otherwise + = AccessDirectly -howToAccessLabel dflags DataReference lbl - -- data access to a dynamic library goes via a symbol pointer - | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr - -#if !x86_64_TARGET_ARCH - -- when generating PIC code, all cross-module data references must - -- must go via a symbol pointer, too, because the assembler - -- cannot generate code for a label difference where one - -- label is undefined. Doesn't apply t x86_64. - -- Unfortunately, we don't know whether it's cross-module, - -- so we do it for all externally visible labels. - -- This is a slight waste of time and space, but otherwise - -- we'd need to pass the current Module all the way in to - -- this function. - | opt_PIC && externallyVisibleCLabel lbl = AccessViaSymbolPtr -#endif - | otherwise = AccessDirectly - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -- dyld code stubs don't work for tailcalls because the - -- stack alignment is only right for regular calls. - -- Therefore, we have to go via a symbol pointer: -howToAccessLabel dflags JumpReference lbl - | labelDynamic (thisPackage dflags) lbl - = AccessViaSymbolPtr -#endif - -howToAccessLabel dflags _ lbl -#if !x86_64_TARGET_ARCH - -- Code stubs are the usual method of choice for imported code; - -- not needed on x86_64 because Apple's new linker, ld64, generates - -- them automatically. - | labelDynamic (thisPackage dflags) lbl - = AccessViaStub -#endif - | otherwise - = AccessDirectly - - -#elif linux_TARGET_OS && powerpc64_TARGET_ARCH --- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC - -howToAccessLabel _ DataReference lbl = AccessViaSymbolPtr -howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label - -#elif linux_TARGET_OS -- ELF (Linux) -- -- ELF tries to pretend to the main application code that dynamic linking does @@ -250,63 +266,82 @@ howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label -- from position independent code. It is also required from the main program -- when dynamic libraries containing Haskell code are used. -howToAccessLabel _ _ lbl +howToAccessLabel _ ArchPPC_64 OSLinux kind _ + + -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC + | DataReference <- kind + = AccessViaSymbolPtr + + -- actually, .label instead of label + | otherwise + = AccessDirectly + +howToAccessLabel _ _ OSLinux _ _ -- no PIC -> the dynamic linker does everything for us; -- if we don't dynamically link to Haskell code, -- it actually manages to do so without messing thins up. - | not opt_PIC && opt_Static = AccessDirectly - -howToAccessLabel dflags DataReference lbl - -- A dynamic label needs to be accessed via a symbol pointer. - | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr -#if powerpc_TARGET_ARCH + | not opt_PIC && opt_Static + = AccessDirectly + +howToAccessLabel dflags arch OSLinux DataReference lbl + -- A dynamic label needs to be accessed via a symbol pointer. + | labelDynamic (thisPackage dflags) lbl + = AccessViaSymbolPtr + -- For PowerPC32 -fPIC, we have to access even static data -- via a symbol pointer (see below for an explanation why -- PowerPC32 Linux is especially broken). - | opt_PIC = AccessViaSymbolPtr -#endif - | otherwise = AccessDirectly - - --- In most cases, we have to avoid symbol stubs on ELF, for the following reasons: --- * on i386, the position-independent symbol stubs in the Procedure Linkage Table --- require the address of the GOT to be loaded into register %ebx on entry. --- * The linker will take any reference to the symbol stub as a hint that --- the label in question is a code label. When linking executables, this --- will cause the linker to replace even data references to the label with --- references to the symbol stub. - --- This leaves calling a (foreign) function from non-PIC code --- (AccessDirectly, because we get an implicit symbol stub) --- and calling functions from PIC code on non-i386 platforms (via a symbol stub) - -howToAccessLabel dflags CallReference lbl - | labelDynamic (thisPackage dflags) lbl && not opt_PIC - = AccessDirectly -#if !i386_TARGET_ARCH - | labelDynamic (thisPackage dflags) lbl && opt_PIC - = AccessViaStub -#endif - -howToAccessLabel dflags _ lbl - | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr - | otherwise = AccessDirectly -#else --- + | arch == ArchPPC + , opt_PIC + = AccessViaSymbolPtr + + | otherwise + = AccessDirectly + + + -- In most cases, we have to avoid symbol stubs on ELF, for the following reasons: + -- * on i386, the position-independent symbol stubs in the Procedure Linkage Table + -- require the address of the GOT to be loaded into register %ebx on entry. + -- * The linker will take any reference to the symbol stub as a hint that + -- the label in question is a code label. When linking executables, this + -- will cause the linker to replace even data references to the label with + -- references to the symbol stub. + + -- This leaves calling a (foreign) function from non-PIC code + -- (AccessDirectly, because we get an implicit symbol stub) + -- and calling functions from PIC code on non-i386 platforms (via a symbol stub) + +howToAccessLabel dflags arch OSLinux CallReference lbl + | labelDynamic (thisPackage dflags) lbl && not opt_PIC + = AccessDirectly + + | arch /= ArchX86 + , labelDynamic (thisPackage dflags) lbl && opt_PIC + = AccessViaStub + +howToAccessLabel dflags _ OSLinux _ lbl + | labelDynamic (thisPackage dflags) lbl + = AccessViaSymbolPtr + + | otherwise + = AccessDirectly + -- all other platforms --- -howToAccessLabel _ _ _ - | not opt_PIC = AccessDirectly - | otherwise = panic "howToAccessLabel: PIC not defined for this platform" -#endif +howToAccessLabel _ _ _ _ _ + | not opt_PIC + = AccessDirectly + + | otherwise + = panic "howToAccessLabel: PIC not defined for this platform" + + -- ------------------------------------------------------------------- +-- | Says what we we have to add to our 'PIC base register' in order to +-- get the address of a label. --- What do we have to add to our 'PIC base register' in order to --- get the address of a label? +picRelative :: Arch -> OS -> CLabel -> CmmLit -picRelative :: CLabel -> CmmLit -#if darwin_TARGET_OS && !x86_64_TARGET_ARCH -- Darwin, but not x86_64: -- The PIC base register points to the PIC base label at the beginning -- of the current CmmTop. We just have to use a label difference to @@ -314,21 +349,21 @@ picRelative :: CLabel -> CmmLit -- We have already made sure that all labels that are not from the current -- module are accessed indirectly ('as' can't calculate differences between -- undefined labels). +picRelative arch OSDarwin lbl + | arch /= ArchX86_64 + = CmmLabelDiffOff lbl mkPicBaseLabel 0 + -picRelative lbl - = CmmLabelDiffOff lbl mkPicBaseLabel 0 - -#elif powerpc_TARGET_ARCH && linux_TARGET_OS -- PowerPC Linux: -- The PIC base register points to our fake GOT. Use a label difference -- to get the offset. -- We have made sure that *everything* is accessed indirectly, so this -- is only used for offsets from the GOT to symbol pointers inside the -- GOT. -picRelative lbl - = CmmLabelDiffOff lbl gotLabel 0 +picRelative ArchPPC OSLinux lbl + = CmmLabelDiffOff lbl gotLabel 0 + -#elif linux_TARGET_OS || (darwin_TARGET_OS && x86_64_TARGET_ARCH) -- Most Linux versions: -- The PIC base register points to the GOT. Use foo@got for symbol -- pointers, and foo@gotoff for everything else. @@ -336,61 +371,116 @@ picRelative lbl -- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers, -- and a GotSymbolOffset label for other things. -- For reasons of tradition, the symbol offset label is written as a plain label. +picRelative arch os lbl + | os == OSLinux || (os == OSDarwin && arch == ArchX86_64) + = let result + | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl + = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl' -picRelative lbl - | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl - = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl' - | otherwise - = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl + | otherwise + = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl + + in result + +picRelative _ _ _ + = panic "PositionIndependentCode.picRelative undefined for this platform" -#else -picRelative lbl = panic "PositionIndependentCode.picRelative" -#endif --- ------------------------------------------------------------------- --- What do we have to add to every assembly file we generate? +-------------------------------------------------------------------------------- -- utility function for pretty-printing asm-labels, -- copied from PprMach -asmSDoc d = Outputable.withPprStyleDoc ( - Outputable.mkCodeStyle Outputable.AsmStyle) d -pprCLabel_asm l = asmSDoc (pprCLabel l) +-- +asmSDoc :: Outputable.SDoc -> Doc +asmSDoc d + = Outputable.withPprStyleDoc + (Outputable.mkCodeStyle Outputable.AsmStyle) d + +pprCLabel_asm :: CLabel -> Doc +pprCLabel_asm l + = asmSDoc (pprCLabel l) + + +needImportedSymbols :: Arch -> OS -> Bool +needImportedSymbols arch os + | os == OSDarwin + , arch /= ArchX86_64 + = True + + -- PowerPC Linux: -fPIC or -dynamic + | os == OSLinux + , arch == ArchPPC + = opt_PIC || not opt_Static + + -- i386 (and others?): -dynamic but not -fPIC + | os == OSLinux + , arch /= ArchPPC_64 + = not opt_Static && not opt_PIC + + | otherwise + = False +-- gotLabel +-- The label used to refer to our "fake GOT" from +-- position-independent code. +gotLabel :: CLabel +gotLabel + = mkForeignLabel -- HACK: it's not really foreign + (fsLit ".LCTOC1") Nothing False IsData -#if darwin_TARGET_OS && !x86_64_TARGET_ARCH -needImportedSymbols = True +-------------------------------------------------------------------------------- -- We don't need to declare any offset tables. -- However, for PIC on x86, we need a small helper function. -#if i386_TARGET_ARCH -pprGotDeclaration - | opt_PIC - = vcat [ - ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"), - ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"), - ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"), - ptext (sLit "___i686.get_pc_thunk.ax:"), - ptext (sLit "\tmovl (%esp), %eax"), - ptext (sLit "\tret") - ] - | otherwise = Pretty.empty -#else -pprGotDeclaration = Pretty.empty -#endif +pprGotDeclaration :: Arch -> OS -> Doc +pprGotDeclaration ArchX86 OSDarwin + | opt_PIC + = vcat [ + ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"), + ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"), + ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"), + ptext (sLit "___i686.get_pc_thunk.ax:"), + ptext (sLit "\tmovl (%esp), %eax"), + ptext (sLit "\tret") ] + + | otherwise + = empty + + +-- pprGotDeclaration +-- Output whatever needs to be output once per .s file. +-- The .LCTOC1 label is defined to point 32768 bytes into the table, +-- to make the most of the PPC's 16-bit displacements. +-- Only needed for PIC. +pprGotDeclaration arch OSLinux + | arch /= ArchPPC_64 + , not opt_PIC + = Pretty.empty + + | arch /= ArchPPC_64 + = vcat [ + ptext (sLit ".section \".got2\",\"aw\""), + ptext (sLit ".LCTOC1 = .+32768") ] + +pprGotDeclaration _ _ + = panic "pprGotDeclaration: no match" + +-------------------------------------------------------------------------------- -- On Darwin, we have to generate our own stub code for lazy binding.. -- For each processor architecture, there are two versions, one for PIC -- and one for non-PIC. -- -- Whenever you change something in this assembler output, make sure -- the splitter in driver/split/ghc-split.lprl recognizes the new output -pprImportedSymbol importedLbl -#if powerpc_TARGET_ARCH - | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl - = case opt_PIC of - False -> + +pprImportedSymbol :: Arch -> OS -> CLabel -> Doc +pprImportedSymbol ArchPPC OSDarwin importedLbl + | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl + = case opt_PIC of + False -> vcat [ ptext (sLit ".symbol_stub"), ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"), @@ -404,7 +494,7 @@ pprImportedSymbol importedLbl <> ptext (sLit "$lazy_ptr)"), ptext (sLit "\tbctr") ] - True -> + True -> vcat [ ptext (sLit ".section __TEXT,__picsymbolstub1,") <> ptext (sLit "symbol_stubs,pure_instructions,32"), @@ -424,16 +514,27 @@ pprImportedSymbol importedLbl ptext (sLit "\tmtctr r12"), ptext (sLit "\tbctr") ] - $+$ vcat [ - ptext (sLit ".lazy_symbol_pointer"), - ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"), - ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, - ptext (sLit "\t.long dyld_stub_binding_helper") - ] -#elif i386_TARGET_ARCH - | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl - = case opt_PIC of - False -> + $+$ vcat [ + ptext (sLit ".lazy_symbol_pointer"), + ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"), + ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, + ptext (sLit "\t.long dyld_stub_binding_helper")] + + | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl + = vcat [ + ptext (sLit ".non_lazy_symbol_pointer"), + char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"), + ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, + ptext (sLit "\t.long\t0")] + + | otherwise + = empty + + +pprImportedSymbol ArchX86 OSDarwin importedLbl + | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl + = case opt_PIC of + False -> vcat [ ptext (sLit ".symbol_stub"), ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"), @@ -446,7 +547,7 @@ pprImportedSymbol importedLbl <> ptext (sLit "$lazy_ptr"), ptext (sLit "\tjmp dyld_stub_binding_helper") ] - True -> + True -> vcat [ ptext (sLit ".section __TEXT,__picsymbolstub2,") <> ptext (sLit "symbol_stubs,pure_instructions,25"), @@ -464,27 +565,28 @@ pprImportedSymbol importedLbl ptext (sLit "\tpushl %eax"), ptext (sLit "\tjmp dyld_stub_binding_helper") ] - $+$ vcat [ ptext (sLit ".section __DATA, __la_sym_ptr") + $+$ vcat [ ptext (sLit ".section __DATA, __la_sym_ptr") <> (if opt_PIC then int 2 else int 3) <> ptext (sLit ",lazy_symbol_pointers"), - ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"), - ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, - ptext (sLit "\t.long L") <> pprCLabel_asm lbl - <> ptext (sLit "$stub_binder") - ] -#endif --- We also have to declare our symbol pointers ourselves: - | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl - = vcat [ - ptext (sLit ".non_lazy_symbol_pointer"), - char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"), - ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, - ptext (sLit "\t.long\t0") - ] - - | otherwise = empty - -#elif linux_TARGET_OS && !powerpc64_TARGET_ARCH + ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"), + ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, + ptext (sLit "\t.long L") <> pprCLabel_asm lbl + <> ptext (sLit "$stub_binder")] + + | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl + = vcat [ + ptext (sLit ".non_lazy_symbol_pointer"), + char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"), + ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl, + ptext (sLit "\t.long\t0")] + + | otherwise + = empty + + +pprImportedSymbol _ OSDarwin _ + = empty + -- ELF / Linux -- @@ -514,64 +616,29 @@ pprImportedSymbol importedLbl -- When needImportedSymbols is defined, -- the NCG will keep track of all DynamicLinkerLabels it uses -- and output each of them using pprImportedSymbol. -#if powerpc_TARGET_ARCH - -- PowerPC Linux: -fPIC or -dynamic -needImportedSymbols = opt_PIC || not opt_Static -#else - -- i386 (and others?): -dynamic but not -fPIC -needImportedSymbols = not opt_Static && not opt_PIC -#endif --- gotLabel --- The label used to refer to our "fake GOT" from --- position-independent code. -gotLabel = mkForeignLabel -- HACK: it's not really foreign - (fsLit ".LCTOC1") Nothing False IsData +pprImportedSymbol ArchPPC_64 OSLinux _ + = empty --- pprGotDeclaration --- Output whatever needs to be output once per .s file. --- The .LCTOC1 label is defined to point 32768 bytes into the table, --- to make the most of the PPC's 16-bit displacements. --- Only needed for PIC. - -pprGotDeclaration - | not opt_PIC = Pretty.empty - | otherwise = vcat [ - ptext (sLit ".section \".got2\",\"aw\""), - ptext (sLit ".LCTOC1 = .+32768") - ] - --- We generate one .long/.quad literal for every symbol we import; --- the dynamic linker will relocate those addresses. - -pprImportedSymbol importedLbl - | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl - = vcat [ - ptext (sLit ".section \".got2\", \"aw\""), - ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':', - ptext symbolSize <+> pprCLabel_asm lbl - ] - --- PLT code stubs are generated automatically by the dynamic linker. - | otherwise = empty - where - symbolSize = case wordWidth of +pprImportedSymbol _ OSLinux importedLbl + | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl + = let symbolSize = case wordWidth of W32 -> sLit "\t.long" W64 -> sLit "\t.quad" _ -> panic "Unknown wordRep in pprImportedSymbol" -#else - --- For all other currently supported platforms, we don't need to do --- anything at all. + in vcat [ + ptext (sLit ".section \".got2\", \"aw\""), + ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':', + ptext symbolSize <+> pprCLabel_asm lbl ] -needImportedSymbols = False -pprGotDeclaration = Pretty.empty -pprImportedSymbol _ = empty -#endif + -- PLT code stubs are generated automatically by the dynamic linker. + | otherwise = empty --- ------------------------------------------------------------------- +pprImportedSymbol _ _ _ + = panic "PIC.pprImportedSymbol: no match" +-------------------------------------------------------------------------------- -- Generate code to calculate the address that should be put in the -- PIC base register. -- This is called by MachCodeGen for every CmmProc that accessed the @@ -581,10 +648,6 @@ pprImportedSymbol _ = empty -- It is assumed that the first NatCmmTop in the input list is a Proc -- and the rest are CmmDatas. -initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop] - -#if darwin_TARGET_OS - -- Darwin is simple: just fetch the address of a local label. -- The FETCHPC pseudo-instruction is expanded to multiple instructions -- during pretty-printing so that we don't have to deal with the @@ -598,12 +661,7 @@ initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop] -- call 1f -- 1: popl %picReg -initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics) - = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics) - where BasicBlock bID insns = head blocks - b' = BasicBlock bID (FETCHPC picReg : insns) -#elif powerpc_TARGET_ARCH && linux_TARGET_OS -- Get a pointer to our own fake GOT, which is defined on a per-module basis. -- This is exactly how GCC does it, and it's quite horrible: @@ -612,7 +670,13 @@ initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics) -- define in .text space right next to the proc. This .long literal contains -- the (32-bit) offset from our local label to our global offset table -- (.LCTOC1 aka gotOffLabel). -initializePicBase picReg + +initializePicBase_ppc + :: Arch -> OS -> Reg + -> [NatCmmTop PPC.Instr] + -> NatM [NatCmmTop PPC.Instr] + +initializePicBase_ppc ArchPPC OSLinux picReg (CmmProc info lab params (ListGraph blocks) : statics) = do gotOffLabel <- getNewLabelNat @@ -624,16 +688,33 @@ initializePicBase picReg mkPicBaseLabel 0) ] - offsetToOffset = ImmConstantDiff (ImmCLbl gotOffLabel) - (ImmCLbl mkPicBaseLabel) - BasicBlock bID insns = head blocks - b' = BasicBlock bID (FETCHPC picReg - : LD wordSize tmp - (AddrRegImm picReg offsetToOffset) - : ADD picReg picReg (RIReg tmp) + offsetToOffset + = PPC.ImmConstantDiff + (PPC.ImmCLbl gotOffLabel) + (PPC.ImmCLbl mkPicBaseLabel) + + BasicBlock bID insns + = head blocks + + b' = BasicBlock bID (PPC.FETCHPC picReg + : PPC.LD PPC.archWordSize tmp + (PPC.AddrRegImm picReg offsetToOffset) + : PPC.ADD picReg picReg (PPC.RIReg tmp) : insns) + return (CmmProc info lab params (ListGraph (b' : tail blocks)) : gotOffset : statics) -#elif i386_TARGET_ARCH && linux_TARGET_OS + +initializePicBase_ppc ArchPPC OSDarwin picReg + (CmmProc info lab params (ListGraph blocks) : statics) + = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics) + + where BasicBlock bID insns = head blocks + b' = BasicBlock bID (PPC.FETCHPC picReg : insns) + + +initializePicBase_ppc _ _ _ _ + = panic "initializePicBase_ppc: not needed" + -- We cheat a bit here by defining a pseudo-instruction named FETCHGOT -- which pretty-prints as: @@ -642,13 +723,24 @@ initializePicBase picReg -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg -- (See PprMach.lhs) -initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics) +initializePicBase_x86 + :: Arch -> OS -> Reg + -> [NatCmmTop X86.Instr] + -> NatM [NatCmmTop X86.Instr] + +initializePicBase_x86 ArchX86 OSLinux picReg + (CmmProc info lab params (ListGraph blocks) : statics) = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics) where BasicBlock bID insns = head blocks - b' = BasicBlock bID (FETCHGOT picReg : insns) + b' = BasicBlock bID (X86.FETCHGOT picReg : insns) + +initializePicBase_x86 ArchX86 OSDarwin picReg + (CmmProc info lab params (ListGraph blocks) : statics) + = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics) + + where BasicBlock bID insns = head blocks + b' = BasicBlock bID (X86.FETCHPC picReg : insns) -#else -initializePicBase picReg proc = panic "initializePicBase" +initializePicBase_x86 _ _ _ _ + = panic "initializePicBase_x86: not needed" --- mingw32_TARGET_OS: not needed, won't be called -#endif diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs new file mode 100644 index 0000000..6661a3e --- /dev/null +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -0,0 +1,1364 @@ +{-# OPTIONS -w #-} + +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +-- This is a big module, but, if you pay attention to +-- (a) the sectioning, (b) the type signatures, and +-- (c) the #if blah_TARGET_ARCH} things, the +-- structure should not be too overwhelming. + +module PPC.CodeGen ( + cmmTopCodeGen, + InstrBlock +) + +where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import PPC.Instr +import PPC.Cond +import PPC.Regs +import PPC.RegInfo +import NCGMonad +import Instruction +import PIC +import Size +import RegClass +import Reg +import Platform + +-- Our intermediate code: +import BlockId +import PprCmm ( pprExpr ) +import Cmm +import CLabel + +-- The rest: +import StaticFlags ( opt_PIC ) +import OrdList +import qualified Outputable as O +import Outputable +import DynFlags + +import Control.Monad ( mapAndUnzipM ) +import Data.Bits +import Data.Int +import Data.Word + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal (pre-order?) yields the insns in the correct +-- order. + +cmmTopCodeGen + :: DynFlags + -> RawCmmTop + -> NatM [NatCmmTop Instr] + +cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + tops = proc : concat statics + os = platformOS $ targetPlatform dflags + case picBaseMb of + Just picBase -> initializePicBase_ppc ArchPPC os picBase tops + Nothing -> return tops + +cmmTopCodeGen dflags (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen + :: CmmBasicBlock + -> NatM ( [NatBasicBlock Instr] + , [NatCmmTop Instr]) + +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode size reg src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignReg_I64Code reg src +#endif + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode size addr src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignMem_I64Code addr src +#endif + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty + + CmmCall target result_regs args _ _ + -> genCCall target result_regs args + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg params -> genJump arg + CmmReturn params -> + panic "stmtToInstrs: return statement should have been cps'd away" + + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) + + +swizzleRegisterRep :: Register -> Size -> Register +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn + + +-- | Grab the Reg for a CmmReg +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + + +-- | Check whether an integer will fit in 32 bits. +-- A CmmInt is intended to be truncated to the appropriate +-- number of bits, so here we truncate it to Int64. This is +-- important because e.g. -1 as a CmmInt might be either +-- -1 or 18446744073709551615. +-- +is32BitInteger :: Integer -> Bool +is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 + where i64 = fromIntegral i :: Int64 + + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + + + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + +mangleIndexTree _ + = panic "PPC.CodeGen.mangleIndexTree: no match" + +-- ----------------------------------------------------------------------------- +-- Code gen for 64-bit arithmetic on 32-bit platforms + +{- +Simple support for generating 64-bit code (ie, 64 bit values and 64 +bit assignments) on 32-bit platforms. Unlike the main code generator +we merely shoot for generating working code as simply as possible, and +pay little attention to code quality. Specifically, there is no +attempt to deal cleverly with the fixed-vs-floating register +distinction; all values are generated into (pairs of) floating +registers, even if this would mean some redundant reg-reg moves as a +result. Only one of the VRegUniques is returned, since it will be +of the VRegUniqueLo form, and the upper-half VReg can be determined +by applying getHiVRegFromLo to it. +-} + +data ChildCode64 -- a.k.a "Register64" + = ChildCode64 + InstrBlock -- code + Reg -- the lower 32-bit temporary which contains the + -- result; use getHiVRegFromLo to find the other + -- VRegUnique. Rules of this simplified insn + -- selection game are therefore that the returned + -- Reg may be modified + + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + +getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) +getI64Amodes addrTree = do + Amode hi_addr addr_code <- getAmode addrTree + case addrOffset hi_addr 4 of + Just lo_addr -> return (hi_addr, lo_addr, addr_code) + Nothing -> do (hi_ptr, code) <- getSomeReg addrTree + return (AddrRegImm hi_ptr (ImmInt 0), + AddrRegImm hi_ptr (ImmInt 4), + code) + + +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_I64Code addrTree valueTree = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + let + rhi = getHiVRegFromLo rlo + + -- Big-endian store + mov_hi = ST II32 rhi hi_addr + mov_lo = ST II32 rlo lo_addr + -- in + return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) + + +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst II32 + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = MR r_dst_lo r_src_lo + mov_hi = MR r_dst_hi r_src_hi + -- in + return ( + vcode `snocOL` mov_lo `snocOL` mov_hi + ) + +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(powerpc): invalid lvalue" + + +iselExpr64 :: CmmExpr -> NatM ChildCode64 +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do + (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LD II32 rhi hi_addr + mov_lo = LD II32 rlo lo_addr + return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo + +iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty + = return (ChildCode64 nilOL (mkVReg vu II32)) + +iselExpr64 (CmmLit (CmmInt i _)) = do + (rlo,rhi) <- getNewRegPairNat II32 + let + half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) + half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16) + half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16) + + code = toOL [ + LIS rlo (ImmInt half1), + OR rlo rlo (RIImm $ ImmInt half0), + LIS rhi (ImmInt half3), + OR rlo rlo (RIImm $ ImmInt half2) + ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ ADDC rlo r1lo r2lo, + ADDE rhi r1hi r2hi ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LI rhi (ImmInt 0) + mov_lo = MR rlo expr_reg + return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo +iselExpr64 expr + = pprPanic "iselExpr64(powerpc)" (ppr expr) + + + +getRegister :: CmmExpr -> NatM Register + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + + +#if WORD_SIZE_IN_BITS==32 + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: + -- TO_W_(x), TO_W_(x >> 32) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +#endif + + +getRegister (CmmLoad mem pk) + | not (isWord64 pk) + = do + Amode addr addr_code <- getAmode mem + let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk) + addr_code `snocOL` LD size dst addr + return (Any size code) + where size = cmmTypeSize pk + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) + +-- Note: there is no Load Byte Arithmetic instruction, so no signed case here + +getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) + +getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do + Amode addr addr_code <- getAmode mem + return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_Not rep -> triv_ucode_int rep NOT + + MO_F_Neg w -> triv_ucode_float w FNEG + MO_S_Neg w -> triv_ucode_int w NEG + + MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x + MO_FF_Conv W32 W64 -> conversionNop FF64 x + + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + MO_SS_Conv from to + | from == to -> conversionNop (intSize to) x + + -- narrowing is a nop: we treat the high bits as undefined + MO_SS_Conv W32 to -> conversionNop (intSize to) x + MO_SS_Conv W16 W8 -> conversionNop II8 x + MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) + MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) + + MO_UU_Conv from to + | from == to -> conversionNop (intSize to) x + -- narrowing is a nop: we treat the high bits as undefined + MO_UU_Conv W32 to -> conversionNop (intSize to) x + MO_UU_Conv W16 W8 -> conversionNop II8 x + MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) + MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) + _ -> panic "PPC.CodeGen.getRegister: no match" + + where + triv_ucode_int width instr = trivialUCode (intSize width) instr x + triv_ucode_float width instr = trivialUCode (floatSize width) instr x + + conversionNop new_size expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_size) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) + + MO_F_Add w -> triv_float w FADD + MO_F_Sub w -> triv_float w FSUB + MO_F_Mul w -> triv_float w FMUL + MO_F_Quot w -> triv_float w FDIV + + -- optimize addition with 32-bit immediate + -- (needed for PIC) + MO_Add W32 -> + case y of + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm) + -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) + CmmLit lit + -> do + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code dst = srcCode `appOL` toOL [ + ADDIS dst src (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + return (Any II32 code) + _ -> trivialCode W32 True ADD x y + + MO_Add rep -> trivialCode rep True ADD x y + MO_Sub rep -> + case y of -- subfi ('substract from' with immediate) doesn't exist + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) + -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) + _ -> trivialCodeNoImm' (intSize rep) SUBF y x + + MO_Mul rep -> trivialCode rep True MULLW x y + + MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y + + MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented" + MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" + + MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) + + MO_And rep -> trivialCode rep False AND x y + MO_Or rep -> trivialCode rep False OR x y + MO_Xor rep -> trivialCode rep False XOR x y + + MO_Shl rep -> trivialCode rep False SLW x y + MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y + MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y + _ -> panic "PPC.CodeGen.getRegister: no match" + + where + triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register + triv_float width instr = trivialCodeNoImm (floatSize width) instr x y + +getRegister (CmmLit (CmmInt i rep)) + | Just imm <- makeImmediate rep True i + = let + code dst = unitOL (LI dst imm) + in + return (Any (intSize rep) code) + +getRegister (CmmLit (CmmFloat f frep)) = do + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let size = floatSize frep + code dst = + LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f frep)] + `consOL` (addr_code `snocOL` LD size dst addr) + return (Any size code) + +getRegister (CmmLit lit) + = let rep = cmmLitType lit + imm = litToImm lit + code dst = toOL [ + LIS dst (HA imm), + ADD dst dst (RIImm (LO imm)) + ] + in return (Any (cmmTypeSize rep) code) + +getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) + + -- extend?Rep: wrap integer expression of type rep + -- in a conversion to II32 +extendSExpr W32 x = x +extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x] +extendUExpr W32 x = x +extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x] + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. + +data Amode + = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True (-i) + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True i + = do + (reg, code) <- getSomeReg x + return (Amode (AddrRegImm reg off) code) + + -- optimize addition with 32-bit immediate + -- (needed for PIC) +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) + = do + tmp <- getNewRegNat II32 + (src, srcCode) <- getSomeReg x + let imm = litToImm lit + code = srcCode `snocOL` ADDIS tmp src (HA imm) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmLit lit) + = do + tmp <- getNewRegNat II32 + let imm = litToImm lit + code = unitOL (LIS tmp (HA imm)) + return (Amode (AddrRegImm tmp (LO imm)) code) + +getAmode (CmmMachOp (MO_Add W32) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) + + + +-- The 'CondCode' type: Condition codes passed up the tree. +data CondCode + = CondCode Bool Cond InstrBlock + +-- Set up a condition code for a conditional branch. + +getCondCode :: CmmExpr -> NatM CondCode + +-- almost the same as everywhere else - but we need to +-- extend small integers to 32 bit first + +getCondCode (CmmMachOp mop [x, y]) + = case mop of + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) + MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) + + MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y) + MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y) + MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y) + + MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y) + MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y) + + other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) + +getCondCode other = panic "getCondCode(2)(powerpc)" + + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode + +-- ###FIXME: I16 and I8! +condIntCode cond x (CmmLit (CmmInt y rep)) + | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y + = do + (src1, code) <- getSomeReg x + let + code' = code `snocOL` + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) + return (CondCode False cond code') + +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 + code'' = case cond of -- twiddle CR to handle unordered case + GE -> code' `snocOL` CRNOR ltbit eqbit gtbit + LE -> code' `snocOL` CRNOR gtbit eqbit ltbit + _ -> code' + where + ltbit = 0 ; eqbit = 2 ; gtbit = 1 + return (CondCode True cond code'') + + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + +-- dst is a reg, but src could be anything +assignReg_IntCode _ reg src + = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` MR dst freg + where + dst = getRegisterReg reg + + + +-- Easy, isn't it? +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + + + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +genJump (CmmLit (CmmLabel lbl)) + = return (unitOL $ JMP lbl) + +genJump tree + = do + (target,code) <- getSomeReg tree + return (code `snocOL` MTCTR target `snocOL` BCTR []) + + +-- ----------------------------------------------------------------------------- +-- Unconditional branches +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +SPARC: First, we have to ensure that the condition codes are set +according to the supplied comparison operation. We generate slightly +different code for floating point comparisons, because a floating +point operation cannot directly precede a @BF@. We assume the worst +and fill that slot with a @NOP@. + +SPARC: Do not fill the delay slots here; you will confuse the register +allocator. +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + +genCondJump id bool = do + CondCode _ cond code <- getCondCode bool + return (code `snocOL` BCC cond id) + + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + + +#if darwin_TARGET_OS || linux_TARGET_OS +{- + The PowerPC calling convention for Darwin/Mac OS X + is described in Apple's document + "Inside Mac OS X - Mach-O Runtime Architecture". + + PowerPC Linux uses the System V Release 4 Calling Convention + for PowerPC. It is described in the + "System V Application Binary Interface PowerPC Processor Supplement". + + Both conventions are similar: + Parameters may be passed in general-purpose registers starting at r3, in + floating point registers starting at f1, or on the stack. + + But there are substantial differences: + * The number of registers used for parameter passing and the exact set of + nonvolatile registers differs (see MachRegs.lhs). + * On Darwin, stack space is always reserved for parameters, even if they are + passed in registers. The called routine may choose to save parameters from + registers to the corresponding space on the stack. + * On Darwin, a corresponding amount of GPRs is skipped when a floating point + parameter is passed in an FPR. + * SysV insists on either passing I64 arguments on the stack, or in two GPRs, + starting with an odd-numbered GPR. It may skip a GPR to achieve this. + Darwin just treats an I64 like two separate II32s (high word first). + * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only + 4-byte aligned like everything else on Darwin. + * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on + PowerPC Linux does not agree, so neither do we. + + According to both conventions, The parameter area should be part of the + caller's stack frame, allocated in the caller's prologue code (large enough + to hold the parameter lists for all called routines). The NCG already + uses the stack for register spilling, leaving 64 bytes free at the top. + If we need a larger parameter area than that, we just allocate a new stack + frame just before ccalling. +-} + + +genCCall (CmmPrim MO_WriteBarrier) _ _ + = return $ unitOL LWSYNC + +genCCall target dest_regs argsAndHints + = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps) + -- we rely on argument promotion in the codeGen + do + (finalStack,passArgumentsCode,usedRegs) <- passArguments + (zip args argReps) + allArgRegs allFPArgRegs + initialStackOffset + (toOL []) [] + + (labelOrExpr, reduceToFF32) <- case target of + CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) + CmmCallee expr conv -> return (Right expr, False) + CmmPrim mop -> outOfLineFloatOp mop + + let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode + codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 + + case labelOrExpr of + Left lbl -> do + return ( codeBefore + `snocOL` BL lbl usedRegs + `appOL` codeAfter) + Right dyn -> do + (dynReg, dynCode) <- getSomeReg dyn + return ( dynCode + `snocOL` MTCTR dynReg + `appOL` codeBefore + `snocOL` BCTRL usedRegs + `appOL` codeAfter) + where +#if darwin_TARGET_OS + initialStackOffset = 24 + -- size of linkage area + size of arguments, in bytes + stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ + map (widthInBytes . typeWidth) argReps +#elif linux_TARGET_OS + initialStackOffset = 8 + stackDelta finalStack = roundTo 16 finalStack +#endif + args = map hintlessCmm argsAndHints + argReps = map cmmExprType args + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + + move_sp_down finalStack + | delta > 64 = + toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), + DELTA (-delta)] + | otherwise = nilOL + where delta = stackDelta finalStack + move_sp_up finalStack + | delta > 64 = + toOL [ADD sp sp (RIImm (ImmInt delta)), + DELTA 0] + | otherwise = nilOL + where delta = stackDelta finalStack + + + passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) + passArguments ((arg,arg_ty):args) gprs fprs stackOffset + accumCode accumUsed | isWord64 arg_ty = + do + ChildCode64 code vr_lo <- iselExpr64 arg + let vr_hi = getHiVRegFromLo vr_lo + +#if darwin_TARGET_OS + passArguments args + (drop 2 gprs) + fprs + (stackOffset+8) + (accumCode `appOL` code + `snocOL` storeWord vr_hi gprs stackOffset + `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) + ((take 2 gprs) ++ accumUsed) + where + storeWord vr (gpr:_) offset = MR gpr vr + storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset)) + +#elif linux_TARGET_OS + let stackOffset' = roundTo 8 stackOffset + stackCode = accumCode `appOL` code + `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) + `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) + regCode hireg loreg = + accumCode `appOL` code + `snocOL` MR hireg vr_hi + `snocOL` MR loreg vr_lo + + case gprs of + hireg : loreg : regs | even (length gprs) -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _skipped : hireg : loreg : regs -> + passArguments args regs fprs stackOffset + (regCode hireg loreg) (hireg : loreg : accumUsed) + _ -> -- only one or no regs left + passArguments args [] fprs (stackOffset'+8) + stackCode accumUsed +#endif + + passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed + | reg : _ <- regs = do + register <- getRegister arg + let code = case register of + Fixed _ freg fcode -> fcode `snocOL` MR reg freg + Any _ acode -> acode reg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we reserve stack slots for register parameters + (stackOffset + stackBytes) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + stackOffset +#endif + (accumCode `appOL` code) + (reg : accumUsed) + | otherwise = do + (vr, code) <- getSomeReg arg + passArguments args + (drop nGprs gprs) + (drop nFprs fprs) + (stackOffset' + stackBytes) + (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot) + accumUsed + where +#if darwin_TARGET_OS + -- stackOffset is at least 4-byte aligned + -- The Darwin ABI is happy with that. + stackOffset' = stackOffset +#else + -- ... the SysV ABI requires 8-byte alignment for doubles. + stackOffset' | isFloatType rep && typeWidth rep == W64 = + roundTo 8 stackOffset + | otherwise = stackOffset +#endif + stackSlot = AddrRegImm sp (ImmInt stackOffset') + (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of + II32 -> (1, 0, 4, gprs) +#if darwin_TARGET_OS + -- The Darwin ABI requires that we skip a corresponding number of GPRs when + -- we use the FPRs. + FF32 -> (1, 1, 4, fprs) + FF64 -> (2, 1, 8, fprs) +#elif linux_TARGET_OS + -- ... the SysV ABI doesn't. + FF32 -> (0, 1, 4, fprs) + FF64 -> (0, 1, 8, fprs) +#endif + + moveResult reduceToFF32 = + case dest_regs of + [] -> nilOL + [CmmHinted dest _hint] + | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) + | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) + | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3, + MR r_dest r4] + | otherwise -> unitOL (MR r_dest r3) + where rep = cmmRegType (CmmLocal dest) + r_dest = getRegisterReg (CmmLocal dest) + + outOfLineFloatOp mop = + do + dflags <- getDynFlagsNat + mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $ + mkForeignLabel functionName Nothing True + let mopLabelOrExpr = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + return (mopLabelOrExpr, reduce) + where + (functionName, reduce) = case mop of + MO_F32_Exp -> (fsLit "exp", True) + MO_F32_Log -> (fsLit "log", True) + MO_F32_Sqrt -> (fsLit "sqrt", True) + + MO_F32_Sin -> (fsLit "sin", True) + MO_F32_Cos -> (fsLit "cos", True) + MO_F32_Tan -> (fsLit "tan", True) + + MO_F32_Asin -> (fsLit "asin", True) + MO_F32_Acos -> (fsLit "acos", True) + MO_F32_Atan -> (fsLit "atan", True) + + MO_F32_Sinh -> (fsLit "sinh", True) + MO_F32_Cosh -> (fsLit "cosh", True) + MO_F32_Tanh -> (fsLit "tanh", True) + MO_F32_Pwr -> (fsLit "pow", True) + + MO_F64_Exp -> (fsLit "exp", False) + MO_F64_Log -> (fsLit "log", False) + MO_F64_Sqrt -> (fsLit "sqrt", False) + + MO_F64_Sin -> (fsLit "sin", False) + MO_F64_Cos -> (fsLit "cos", False) + MO_F64_Tan -> (fsLit "tan", False) + + MO_F64_Asin -> (fsLit "asin", False) + MO_F64_Acos -> (fsLit "acos", False) + MO_F64_Atan -> (fsLit "atan", False) + + MO_F64_Sinh -> (fsLit "sinh", False) + MO_F64_Cosh -> (fsLit "cosh", False) + MO_F64_Tanh -> (fsLit "tanh", False) + MO_F64_Pwr -> (fsLit "pow", False) + other -> pprPanic "genCCall(ppc): unknown callish op" + (pprCallishMachOp other) + +#else /* darwin_TARGET_OS || linux_TARGET_OS */ +genCCall = panic "PPC.CodeGen.genCCall: not defined for this os" +#endif + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock +genSwitch expr ids + | opt_PIC + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + (tableReg,t_code) <- getSomeReg $ dynRef + let + jumpTable = map jumpTableEntryRel ids + + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordWidth) + jumpTableEntryRel (Just (BlockId id)) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel id + + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + LD II32 tmp (AddrRegReg tableReg tmp), + ADD tmp tmp (RIReg tableReg), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + | otherwise + = do + (reg,e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + lbl <- getNewLabelNat + let + jumpTable = map jumpTableEntry ids + + code = e_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + SLW tmp reg (RIImm (ImmInt 2)), + ADDIS tmp tmp (HA (ImmCLbl lbl)), + LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), + MTCTR tmp, + BCTR [ id | Just id <- ids ] + ] + return code + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +condReg :: NatM CondCode -> NatM Register +condReg getCond = do + CondCode _ cond cond_code <- getCond + let +{- code dst = cond_code `appOL` toOL [ + BCC cond lbl1, + LI dst (ImmInt 0), + BCC ALWAYS lbl2, + NEWBLOCK lbl1, + LI dst (ImmInt 1), + BCC ALWAYS lbl2, + NEWBLOCK lbl2 + ]-} + code dst = cond_code + `appOL` negate_code + `appOL` toOL [ + MFCR dst, + RLWINM dst dst (bit + 1) 31 31 + ] + + negate_code | do_negate = unitOL (CRNOR bit bit bit) + | otherwise = nilOL + + (bit, do_negate) = case cond of + LTT -> (0, False) + LE -> (1, True) + EQQ -> (2, False) + GE -> (0, True) + GTT -> (1, False) + + NE -> (2, True) + + LU -> (0, False) + LEU -> (1, True) + GEU -> (0, True) + GU -> (1, False) + _ -> panic "PPC.CodeGen.codeReg: no match" + + return (Any II32 code) + +condIntReg cond x y = condReg (condIntCode cond x y) +condFltReg cond x y = condReg (condFltCode cond x y) + + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + + + +{- +Wolfgang's PowerPC version of The Rules: + +A slightly modified version of The Rules to take advantage of the fact +that PowerPC instructions work on all registers and don't implicitly +clobber any fixed registers. + +* The only expression for which getRegister returns Fixed is (CmmReg reg). + +* If getRegister returns Any, then the code it generates may modify only: + (a) fresh temporaries + (b) the destination register + It may *not* modify global registers, unless the global + register happens to be the destination register. + It may not clobber any other registers. In fact, only ccalls clobber any + fixed registers. + Also, it may not modify the counter register (used by genCCall). + + Corollary: If a getRegister for a subexpression returns Fixed, you need + not move it to a fresh temporary before evaluating the next subexpression. + The Fixed register won't be modified. + Therefore, we don't need a counterpart for the x86's getStableReg on PPC. + +* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on + the value of the destination register. +-} + +trivialCode + :: Width + -> Bool + -> (Reg -> Reg -> RI -> Instr) + -> CmmExpr + -> CmmExpr + -> NatM Register + +trivialCode rep signed instr x (CmmLit (CmmInt y _)) + | Just imm <- makeImmediate rep signed y + = do + (src1, code1) <- getSomeReg x + let code dst = code1 `snocOL` instr dst src1 (RIImm imm) + return (Any (intSize rep) code) + +trivialCode rep _ instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) + return (Any (intSize rep) code) + +trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm' size instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 + return (Any size code) + +trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y + + +trivialUCode + :: Size + -> (Reg -> Reg -> Instr) + -> CmmExpr + -> NatM Register +trivialUCode rep instr x = do + (src, code) <- getSomeReg x + let code' dst = code `snocOL` instr dst src + return (Any rep code') + +-- There is no "remainder" instruction on the PPC, so we have to do +-- it the hard way. +-- The "div" parameter is the division instruction to use (DIVW or DIVWU) + +remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +remainderCode rep div x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `appOL` toOL [ + div dst src1 src2, + MULLW dst dst (RIReg src2), + SUBF dst dst src1 + ] + return (Any (intSize rep) code) + + +coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register +coerceInt2FP fromRep toRep x = do + (src, code) <- getSomeReg x + lbl <- getNewLabelNat + itmp <- getNewRegNat II32 + ftmp <- getNewRegNat FF64 + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let + code' dst = code `appOL` maybe_exts `appOL` toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x43300000 W32), + CmmStaticLit (CmmInt 0x80000000 W32)], + XORIS itmp src (ImmInt 0x8000), + ST II32 itmp (spRel 3), + LIS itmp (ImmInt 0x4330), + ST II32 itmp (spRel 2), + LD FF64 ftmp (spRel 2) + ] `appOL` addr_code `appOL` toOL [ + LD FF64 dst addr, + FSUB FF64 dst ftmp dst + ] `appOL` maybe_frsp dst + + maybe_exts = case fromRep of + W8 -> unitOL $ EXTS II8 src src + W16 -> unitOL $ EXTS II16 src src + W32 -> nilOL + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + maybe_frsp dst + = case toRep of + W32 -> unitOL $ FRSP dst dst + W64 -> nilOL + _ -> panic "PPC.CodeGen.coerceInt2FP: no match" + + return (Any (floatSize toRep) code') + +coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register +coerceFP2Int _ toRep x = do + -- the reps don't really matter: F*->FF64 and II32->I* are no-ops + (src, code) <- getSomeReg x + tmp <- getNewRegNat FF64 + let + code' dst = code `appOL` toOL [ + -- convert to int in FP reg + FCTIWZ tmp src, + -- store value (64bit) from FP to stack + ST FF64 tmp (spRel 2), + -- read low word of value (high word is undefined) + LD II32 dst (spRel 3)] + return (Any (intSize toRep) code') diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs new file mode 100644 index 0000000..7345ee5 --- /dev/null +++ b/compiler/nativeGen/PPC/Cond.hs @@ -0,0 +1,62 @@ + +module PPC.Cond ( + Cond(..), + condNegate, + condUnsigned, + condToSigned, + condToUnsigned, +) + +where + +import Panic + +data Cond + = ALWAYS + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + deriving Eq + + +condNegate :: Cond -> Cond +condNegate ALWAYS = panic "condNegate: ALWAYS" +condNegate EQQ = NE +condNegate GE = LTT +condNegate GEU = LU +condNegate GTT = LE +condNegate GU = LEU +condNegate LE = GTT +condNegate LEU = GU +condNegate LTT = GE +condNegate LU = GEU +condNegate NE = EQQ + +-- Condition utils +condUnsigned :: Cond -> Bool +condUnsigned GU = True +condUnsigned LU = True +condUnsigned GEU = True +condUnsigned LEU = True +condUnsigned _ = False + +condToSigned :: Cond -> Cond +condToSigned GU = GTT +condToSigned LU = LTT +condToSigned GEU = GE +condToSigned LEU = LE +condToSigned x = x + +condToUnsigned :: Cond -> Cond +condToUnsigned GTT = GU +condToUnsigned LTT = LU +condToUnsigned GE = GEU +condToUnsigned LE = LEU +condToUnsigned x = x diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index 85aa494..55affc6 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -10,49 +10,50 @@ #include "nativeGen/NCG.h" module PPC.Instr ( - Cond(..), - condNegate, + archWordSize, RI(..), - Instr(..) + Instr(..), + maxSpillSlots ) where -import BlockId import PPC.Regs -import RegsBase +import PPC.Cond +import Instruction +import Size +import RegClass +import Reg + +import Constants (rESERVED_C_STACK_BYTES) +import BlockId import Cmm -import Outputable import FastString import CLabel +import Outputable +import FastBool + +-------------------------------------------------------------------------------- +-- Size of a PPC memory address, in bytes. +-- +archWordSize :: Size +archWordSize = II32 -data Cond - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - deriving Eq - - -condNegate :: Cond -> Cond -condNegate ALWAYS = panic "condNegate: ALWAYS" -condNegate EQQ = NE -condNegate GE = LTT -condNegate GEU = LU -condNegate GTT = LE -condNegate GU = LEU -condNegate LE = GTT -condNegate LEU = GU -condNegate LTT = GE -condNegate LU = GEU -condNegate NE = EQQ + +-- | Instruction instance for powerpc +instance Instruction Instr where + regUsageOfInstr = ppc_regUsageOfInstr + patchRegsOfInstr = ppc_patchRegsOfInstr + isJumpishInstr = ppc_isJumpishInstr + jumpDestsOfInstr = ppc_jumpDestsOfInstr + patchJumpInstr = ppc_patchJumpInstr + mkSpillInstr = ppc_mkSpillInstr + mkLoadInstr = ppc_mkLoadInstr + takeDeltaInstr = ppc_takeDeltaInstr + isMetaInstr = ppc_isMetaInstr + mkRegRegMoveInstr = ppc_mkRegRegMoveInstr + takeRegRegMoveInstr = ppc_takeRegRegMoveInstr + mkJumpInstr = ppc_mkJumpInstr -- ----------------------------------------------------------------------------- @@ -85,12 +86,6 @@ data Instr -- benefit of subsequent passes | DELTA Int - -- | spill this reg to a stack slot - | SPILL Reg Int - - -- | reload this reg from a stack slot - | RELOAD Int Reg - -- Loads and stores. | LD Size Reg AddrMode -- Load size, dst, src | LA Size Reg AddrMode -- Load arithmetic size, dst, src @@ -165,3 +160,293 @@ data Instr -- bcl to next insn, mflr reg | LWSYNC -- memory barrier + + +-- | Get the registers that are being used by this instruction. +-- regUsage doesn't need to do any trickery for jumps and such. +-- Just state precisely the regs read and written by that insn. +-- The consequences of control flow transfers, as far as register +-- allocation goes, are taken care of by the register allocator. +-- +ppc_regUsageOfInstr :: Instr -> RegUsage +ppc_regUsageOfInstr instr + = case instr of + LD _ reg addr -> usage (regAddr addr, [reg]) + LA _ reg addr -> usage (regAddr addr, [reg]) + ST _ reg addr -> usage (reg : regAddr addr, []) + STU _ reg addr -> usage (reg : regAddr addr, []) + LIS reg _ -> usage ([], [reg]) + LI reg _ -> usage ([], [reg]) + MR reg1 reg2 -> usage ([reg2], [reg1]) + CMP _ reg ri -> usage (reg : regRI ri,[]) + CMPL _ reg ri -> usage (reg : regRI ri,[]) + BCC _ _ -> noUsage + BCCFAR _ _ -> noUsage + MTCTR reg -> usage ([reg],[]) + BCTR _ -> noUsage + BL _ params -> usage (params, callClobberedRegs) + BCTRL params -> usage (params, callClobberedRegs) + ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + ADDC reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + ADDE reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) + SUBF reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + DIVW reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) + MULLW_MayOflo reg1 reg2 reg3 + -> usage ([reg2,reg3], [reg1]) + AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + XORIS reg1 reg2 _ -> usage ([reg2], [reg1]) + EXTS _ reg1 reg2 -> usage ([reg2], [reg1]) + NEG reg1 reg2 -> usage ([reg2], [reg1]) + NOT reg1 reg2 -> usage ([reg2], [reg1]) + SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + RLWINM reg1 reg2 _ _ _ + -> usage ([reg2], [reg1]) + FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1]) + FNEG r1 r2 -> usage ([r2], [r1]) + FCMP r1 r2 -> usage ([r1,r2], []) + FCTIWZ r1 r2 -> usage ([r2], [r1]) + FRSP r1 r2 -> usage ([r2], [r1]) + MFCR reg -> usage ([], [reg]) + MFLR reg -> usage ([], [reg]) + FETCHPC reg -> usage ([], [reg]) + _ -> noUsage + where + usage (src, dst) = RU (filter interesting src) + (filter interesting dst) + regAddr (AddrRegReg r1 r2) = [r1, r2] + regAddr (AddrRegImm r1 _) = [r1] + + regRI (RIReg r) = [r] + regRI _ = [] + +interesting :: Reg -> Bool +interesting (VirtualRegI _) = True +interesting (VirtualRegHi _) = True +interesting (VirtualRegF _) = True +interesting (VirtualRegD _) = True +interesting (RealReg i) = isFastTrue (freeReg i) + + + + +-- | Apply a given mapping to all the register references in this +-- instruction. +ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +ppc_patchRegsOfInstr instr env + = case instr of + LD sz reg addr -> LD sz (env reg) (fixAddr addr) + LA sz reg addr -> LA sz (env reg) (fixAddr addr) + ST sz reg addr -> ST sz (env reg) (fixAddr addr) + STU sz reg addr -> STU sz (env reg) (fixAddr addr) + LIS reg imm -> LIS (env reg) imm + LI reg imm -> LI (env reg) imm + MR reg1 reg2 -> MR (env reg1) (env reg2) + CMP sz reg ri -> CMP sz (env reg) (fixRI ri) + CMPL sz reg ri -> CMPL sz (env reg) (fixRI ri) + BCC cond lbl -> BCC cond lbl + BCCFAR cond lbl -> BCCFAR cond lbl + MTCTR reg -> MTCTR (env reg) + BCTR targets -> BCTR targets + BL imm argRegs -> BL imm argRegs -- argument regs + BCTRL argRegs -> BCTRL argRegs -- cannot be remapped + ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) + ADDC reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3) + ADDE reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3) + ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm + SUBF reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3) + MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) + DIVW reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3) + DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3) + MULLW_MayOflo reg1 reg2 reg3 + -> MULLW_MayOflo (env reg1) (env reg2) (env reg3) + AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) + OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) + XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) + XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm + EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2) + NEG reg1 reg2 -> NEG (env reg1) (env reg2) + NOT reg1 reg2 -> NOT (env reg1) (env reg2) + SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri) + SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri) + SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri) + RLWINM reg1 reg2 sh mb me + -> RLWINM (env reg1) (env reg2) sh mb me + FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3) + FSUB sz r1 r2 r3 -> FSUB sz (env r1) (env r2) (env r3) + FMUL sz r1 r2 r3 -> FMUL sz (env r1) (env r2) (env r3) + FDIV sz r1 r2 r3 -> FDIV sz (env r1) (env r2) (env r3) + FNEG r1 r2 -> FNEG (env r1) (env r2) + FCMP r1 r2 -> FCMP (env r1) (env r2) + FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) + FRSP r1 r2 -> FRSP (env r1) (env r2) + MFCR reg -> MFCR (env reg) + MFLR reg -> MFLR (env reg) + FETCHPC reg -> FETCHPC (env reg) + _ -> instr + where + fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) + fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i + + fixRI (RIReg r) = RIReg (env r) + fixRI other = other + + +-------------------------------------------------------------------------------- +-- | Checks whether this instruction is a jump/branch instruction. +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +ppc_isJumpishInstr :: Instr -> Bool +ppc_isJumpishInstr instr + = case instr of + BCC{} -> True + BCCFAR{} -> True + BCTR{} -> True + BCTRL{} -> True + BL{} -> True + JMP{} -> True + _ -> False + + +-- | Checks whether this instruction is a jump/branch instruction. +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +ppc_jumpDestsOfInstr :: Instr -> [BlockId] +ppc_jumpDestsOfInstr insn + = case insn of + BCC _ id -> [id] + BCCFAR _ id -> [id] + BCTR targets -> targets + _ -> [] + + +-- | Change the destination of this jump instruction. +-- Used in the linear allocator when adding fixup blocks for join +-- points. +ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr +ppc_patchJumpInstr insn patchF + = case insn of + BCC cc id -> BCC cc (patchF id) + BCCFAR cc id -> BCCFAR cc (patchF id) + BCTR _ -> error "Cannot patch BCTR" + _ -> insn + + +-- ----------------------------------------------------------------------------- + +-- | An instruction to spill a register into a spill slot. +ppc_mkSpillInstr + :: Reg -- register to spill + -> Int -- current stack delta + -> Int -- spill slot to use + -> Instr + +ppc_mkSpillInstr reg delta slot + = let off = spillSlotToOffset slot + in + let sz = case regClass reg of + RcInteger -> II32 + RcDouble -> FF64 + _ -> panic "PPC.Instr.mkSpillInstr: no match" + in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) + + +ppc_mkLoadInstr + :: Reg -- register to load + -> Int -- current stack delta + -> Int -- spill slot to use + -> Instr + +ppc_mkLoadInstr reg delta slot + = let off = spillSlotToOffset slot + in + let sz = case regClass reg of + RcInteger -> II32 + RcDouble -> FF64 + _ -> panic "PPC.Instr.mkLoadInstr: no match" + in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) + + +spillSlotSize :: Int +spillSlotSize = 8 + +maxSpillSlots :: Int +maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 + +-- convert a spill slot number to a *byte* offset, with no sign: +-- decide on a per arch basis whether you are spilling above or below +-- the C stack pointer. +spillSlotToOffset :: Int -> Int +spillSlotToOffset slot + | slot >= 0 && slot < maxSpillSlots + = 64 + spillSlotSize * slot + | otherwise + = pprPanic "spillSlotToOffset:" + ( text "invalid spill location: " <> int slot + $$ text "maxSpillSlots: " <> int maxSpillSlots) + + +-------------------------------------------------------------------------------- +-- | See if this instruction is telling us the current C stack delta +ppc_takeDeltaInstr + :: Instr + -> Maybe Int + +ppc_takeDeltaInstr instr + = case instr of + DELTA i -> Just i + _ -> Nothing + + +ppc_isMetaInstr + :: Instr + -> Bool + +ppc_isMetaInstr instr + = case instr of + COMMENT{} -> True + LDATA{} -> True + NEWBLOCK{} -> True + DELTA{} -> True + _ -> False + + +-- | Copy the value in a register to another one. +-- Must work for all register classes. +ppc_mkRegRegMoveInstr + :: Reg + -> Reg + -> Instr + +ppc_mkRegRegMoveInstr src dst + = MR dst src + + +-- | Make an unconditional jump instruction. +-- For architectures with branch delay slots, its ok to put +-- a NOP after the jump. Don't fill the delay slot with an +-- instruction that references regs or you'll confuse the +-- linear allocator. +ppc_mkJumpInstr + :: BlockId + -> [Instr] + +ppc_mkJumpInstr id + = [BCC ALWAYS id] + + +-- | Take the source and destination from this reg -> reg move instruction +-- or Nothing if it's not one +ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) +ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst) +ppc_takeRegRegMoveInstr _ = Nothing + diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index ac83600..f12d32a 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -7,12 +7,15 @@ ----------------------------------------------------------------------------- module PPC.Ppr ( + pprNatCmmTop, + pprBasicBlock, + pprSectionHeader, + pprData, + pprInstr, pprUserReg, pprSize, pprImm, - pprSectionHeader, pprDataItem, - pprInstr ) where @@ -20,26 +23,134 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" -import RegsBase -import PprBase import PPC.Regs import PPC.Instr +import PPC.Cond +import PprBase +import Instruction +import Size +import Reg +import RegClass import BlockId import Cmm -import CLabel ( mkAsmTempLabel ) +import CLabel import Unique ( pprUnique ) import Pretty import FastString import qualified Outputable -import Outputable ( panic ) +import Outputable ( Outputable, panic ) -import Data.Word(Word32) +import Data.Word import Data.Bits +-- ----------------------------------------------------------------------------- +-- Printing this stuff out + +pprNatCmmTop :: NatCmmTop Instr -> Doc +pprNatCmmTop (CmmData section dats) = + pprSectionHeader section $$ vcat (map pprData dats) + + -- special case for split markers: +pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl + +pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = + pprSectionHeader Text $$ + (if null info then -- blocks guaranteed not null, so label needed + pprLabel lbl + else +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + <> char ':' $$ +#endif + vcat (map pprData info) $$ + pprLabel (entryLblToInfoLbl lbl) + ) $$ + vcat (map pprBasicBlock blocks) + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- If we are using the .subsections_via_symbols directive + -- (available on recent versions of Darwin), + -- we have to make sure that there is some kind of reference + -- from the entry code to a label on the _top_ of of the info table, + -- so that the linker will not think it is unreferenced and dead-strip + -- it. That's why the label is called a DeadStripPreventer (_dsp). + $$ if not (null info) + then text "\t.long " + <+> pprCLabel_asm (entryLblToInfoLbl lbl) + <+> char '-' + <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + else empty +#endif + + +pprBasicBlock :: NatBasicBlock Instr -> Doc +pprBasicBlock (BasicBlock (BlockId id) instrs) = + pprLabel (mkAsmTempLabel id) $$ + vcat (map pprInstr instrs) + + +pprData :: CmmStatic -> Doc +pprData (CmmAlign bytes) = pprAlign bytes +pprData (CmmDataLabel lbl) = pprLabel lbl +pprData (CmmString str) = pprASCII str +pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes +pprData (CmmStaticLit lit) = pprDataItem lit + +pprGloblDecl :: CLabel -> Doc +pprGloblDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext IF_ARCH_sparc((sLit ".global "), + (sLit ".globl ")) <> + pprCLabel_asm lbl + +pprTypeAndSizeDecl :: CLabel -> Doc +#if linux_TARGET_OS +pprTypeAndSizeDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext (sLit ".type ") <> + pprCLabel_asm lbl <> ptext (sLit ", @object") +#else +pprTypeAndSizeDecl _ + = empty +#endif + +pprLabel :: CLabel -> Doc +pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':') + + +pprASCII :: [Word8] -> Doc +pprASCII str + = vcat (map do1 str) $$ do1 0 + where + do1 :: Word8 -> Doc + do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w) + +pprAlign :: Int -> Doc +pprAlign bytes = + ptext (sLit ".align ") <> int pow2 + where + pow2 = log2 bytes + + log2 :: Int -> Int -- cache the common ones + log2 1 = 0 + log2 2 = 1 + log2 4 = 2 + log2 8 = 3 + log2 n = 1 + log2 (n `quot` 2) + + +-- ----------------------------------------------------------------------------- +-- pprInstr: print an 'Instr' + +instance Outputable Instr where + ppr instr = Outputable.docToSDoc $ pprInstr instr + + pprUserReg :: Reg -> Doc pprUserReg = pprReg @@ -255,7 +366,7 @@ pprInstr (NEWBLOCK _) pprInstr (LDATA _ _) = panic "PprMach.pprInstr: LDATA" - +{- pprInstr (SPILL reg slot) = hcat [ ptext (sLit "\tSPILL"), @@ -271,6 +382,7 @@ pprInstr (RELOAD slot reg) ptext (sLit "SLOT") <> parens (int slot), comma, pprReg reg] +-} pprInstr (LD sz reg addr) = hcat [ char '\t', diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs index ea882a0..b2806c7 100644 --- a/compiler/nativeGen/PPC/RegInfo.hs +++ b/compiler/nativeGen/PPC/RegInfo.hs @@ -7,27 +7,14 @@ ----------------------------------------------------------------------------- module PPC.RegInfo ( - RegUsage(..), - noUsage, - regUsage, - patchRegs, - jumpDests, - isJumpish, - patchJump, - isRegRegMove, + mkVReg, - JumpDest(..), + JumpDest, canShortcut, shortcutJump, - mkSpillInstr, - mkLoadInstr, - mkRegRegMoveInstr, - mkBranchInstr, - - spillSlotSize, - maxSpillSlots, - spillSlotToOffset + shortcutStatic, + regDotColor ) where @@ -35,203 +22,29 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" -import BlockId -import RegsBase import PPC.Regs import PPC.Instr -import Outputable -import Constants ( rESERVED_C_STACK_BYTES ) -import FastBool - -data RegUsage = RU [Reg] [Reg] - -noUsage :: RegUsage -noUsage = RU [] [] - -regUsage :: Instr -> RegUsage -regUsage instr = case instr of - SPILL reg _ -> usage ([reg], []) - RELOAD _ reg -> usage ([], [reg]) - - LD _ reg addr -> usage (regAddr addr, [reg]) - LA _ reg addr -> usage (regAddr addr, [reg]) - ST _ reg addr -> usage (reg : regAddr addr, []) - STU _ reg addr -> usage (reg : regAddr addr, []) - LIS reg _ -> usage ([], [reg]) - LI reg _ -> usage ([], [reg]) - MR reg1 reg2 -> usage ([reg2], [reg1]) - CMP _ reg ri -> usage (reg : regRI ri,[]) - CMPL _ reg ri -> usage (reg : regRI ri,[]) - BCC _ _ -> noUsage - BCCFAR _ _ -> noUsage - MTCTR reg -> usage ([reg],[]) - BCTR _ -> noUsage - BL _ params -> usage (params, callClobberedRegs) - BCTRL params -> usage (params, callClobberedRegs) - ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - ADDC reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - ADDE reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) - SUBF reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - DIVW reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1]) - MULLW_MayOflo reg1 reg2 reg3 - -> usage ([reg2,reg3], [reg1]) - AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - XORIS reg1 reg2 _ -> usage ([reg2], [reg1]) - EXTS _ reg1 reg2 -> usage ([reg2], [reg1]) - NEG reg1 reg2 -> usage ([reg2], [reg1]) - NOT reg1 reg2 -> usage ([reg2], [reg1]) - SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - RLWINM reg1 reg2 _ _ _ - -> usage ([reg2], [reg1]) - FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FNEG r1 r2 -> usage ([r2], [r1]) - FCMP r1 r2 -> usage ([r1,r2], []) - FCTIWZ r1 r2 -> usage ([r2], [r1]) - FRSP r1 r2 -> usage ([r2], [r1]) - MFCR reg -> usage ([], [reg]) - MFLR reg -> usage ([], [reg]) - FETCHPC reg -> usage ([], [reg]) - _ -> noUsage - where - usage (src, dst) = RU (filter interesting src) - (filter interesting dst) - regAddr (AddrRegReg r1 r2) = [r1, r2] - regAddr (AddrRegImm r1 _) = [r1] - - regRI (RIReg r) = [r] - regRI _ = [] - -interesting :: Reg -> Bool -interesting (VirtualRegI _) = True -interesting (VirtualRegHi _) = True -interesting (VirtualRegF _) = True -interesting (VirtualRegD _) = True -interesting (RealReg i) = isFastTrue (freeReg i) - - --- ----------------------------------------------------------------------------- --- 'patchRegs' function - --- 'patchRegs' takes an instruction and applies the given mapping to --- all the register references. - -patchRegs :: Instr -> (Reg -> Reg) -> Instr -patchRegs instr env = case instr of - SPILL reg slot -> SPILL (env reg) slot - RELOAD slot reg -> RELOAD slot (env reg) - - LD sz reg addr -> LD sz (env reg) (fixAddr addr) - LA sz reg addr -> LA sz (env reg) (fixAddr addr) - ST sz reg addr -> ST sz (env reg) (fixAddr addr) - STU sz reg addr -> STU sz (env reg) (fixAddr addr) - LIS reg imm -> LIS (env reg) imm - LI reg imm -> LI (env reg) imm - MR reg1 reg2 -> MR (env reg1) (env reg2) - CMP sz reg ri -> CMP sz (env reg) (fixRI ri) - CMPL sz reg ri -> CMPL sz (env reg) (fixRI ri) - BCC cond lbl -> BCC cond lbl - BCCFAR cond lbl -> BCCFAR cond lbl - MTCTR reg -> MTCTR (env reg) - BCTR targets -> BCTR targets - BL imm argRegs -> BL imm argRegs -- argument regs - BCTRL argRegs -> BCTRL argRegs -- cannot be remapped - ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) - ADDC reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3) - ADDE reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3) - ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm - SUBF reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3) - MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) - DIVW reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3) - DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3) - MULLW_MayOflo reg1 reg2 reg3 - -> MULLW_MayOflo (env reg1) (env reg2) (env reg3) - AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) - OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) - XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) - XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm - EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2) - NEG reg1 reg2 -> NEG (env reg1) (env reg2) - NOT reg1 reg2 -> NOT (env reg1) (env reg2) - SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri) - SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri) - SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri) - RLWINM reg1 reg2 sh mb me - -> RLWINM (env reg1) (env reg2) sh mb me - FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3) - FSUB sz r1 r2 r3 -> FSUB sz (env r1) (env r2) (env r3) - FMUL sz r1 r2 r3 -> FMUL sz (env r1) (env r2) (env r3) - FDIV sz r1 r2 r3 -> FDIV sz (env r1) (env r2) (env r3) - FNEG r1 r2 -> FNEG (env r1) (env r2) - FCMP r1 r2 -> FCMP (env r1) (env r2) - FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) - FRSP r1 r2 -> FRSP (env r1) (env r2) - MFCR reg -> MFCR (env reg) - MFLR reg -> MFLR (env reg) - FETCHPC reg -> FETCHPC (env reg) - _ -> instr - where - fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) - fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i - - fixRI (RIReg r) = RIReg (env r) - fixRI other = other - +import RegClass +import Reg +import Size +import BlockId +import Cmm +import CLabel -jumpDests :: Instr -> [BlockId] -> [BlockId] -jumpDests insn acc - = case insn of - BCC _ id -> id : acc - BCCFAR _ id -> id : acc - BCTR targets -> targets ++ acc - _ -> acc - - --- | Check whether a particular instruction is a jump, branch or call instruction (jumpish) --- We can't just use jumpDests above because the jump might take its arg, --- so the instr won't contain a blockid. --- -isJumpish :: Instr -> Bool -isJumpish instr - = case instr of - BCC{} -> True - BCCFAR{} -> True - BCTR{} -> True - BCTRL{} -> True - BL{} -> True - JMP{} -> True - _ -> False - --- | Change the destination of this jump instruction --- Used in joinToTargets in the linear allocator, when emitting fixup code --- for join points. -patchJump :: Instr -> BlockId -> BlockId -> Instr -patchJump insn old new - = case insn of - BCC cc id - | id == old -> BCC cc new - - BCCFAR cc id - | id == old -> BCCFAR cc new - - BCTR _ -> error "Cannot patch BCTR" +import Outputable +import Unique - _ -> insn +mkVReg :: Unique -> Size -> Reg +mkVReg u size + | not (isFloatSize size) = VirtualRegI u + | otherwise + = case size of + FF32 -> VirtualRegD u + FF64 -> VirtualRegD u + _ -> panic "mkVReg" -isRegRegMove :: Instr -> Maybe (Reg,Reg) -isRegRegMove (MR dst src) = Just (src,dst) -isRegRegMove _ = Nothing data JumpDest = DestBlockId BlockId | DestImm Imm @@ -243,71 +56,39 @@ shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr shortcutJump _ other = other +-- Here because it knows about JumpDest +shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic +shortcutStatic fn (CmmStaticLit (CmmLabel lab)) + | Just uq <- maybeAsmTemp lab + = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) --- ----------------------------------------------------------------------------- --- Generating spill instructions - -mkSpillInstr - :: Reg -- register to spill - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr -mkSpillInstr reg delta slot - = let off = spillSlotToOffset slot - in - let sz = case regClass reg of - RcInteger -> II32 - RcDouble -> FF64 - RcFloat -> panic "PPC.RegInfo.mkSpillInstr: no match" - in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) + | Just uq <- maybeAsmTemp lbl1 + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + -- slightly dodgy, we're ignoring the second label, but this + -- works with the way we use CmmLabelDiffOff for jump tables now. +shortcutStatic _ other_static + = other_static -mkLoadInstr - :: Reg -- register to load - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr -mkLoadInstr reg delta slot - = let off = spillSlotToOffset slot - in - let sz = case regClass reg of - RcInteger -> II32 - RcDouble -> FF64 - RcFloat -> panic "PPC.RegInfo.mkSpillInstr: no match" - in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) +shortBlockId + :: (BlockId -> Maybe JumpDest) + -> BlockId + -> CLabel +shortBlockId fn blockid@(BlockId uq) = + case fn blockid of + Nothing -> mkAsmTempLabel uq + Just (DestBlockId blockid') -> shortBlockId fn blockid' + Just (DestImm (ImmCLbl lbl)) -> lbl + _other -> panic "shortBlockId" -mkRegRegMoveInstr - :: Reg - -> Reg - -> Instr -mkRegRegMoveInstr src dst - = MR dst src -mkBranchInstr - :: BlockId - -> [Instr] - -mkBranchInstr id = [BCC ALWAYS id] - - - -spillSlotSize :: Int -spillSlotSize = 8 - -maxSpillSlots :: Int -maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 - --- convert a spill slot number to a *byte* offset, with no sign: --- decide on a per arch basis whether you are spilling above or below --- the C stack pointer. -spillSlotToOffset :: Int -> Int -spillSlotToOffset slot - | slot >= 0 && slot < maxSpillSlots - = 64 + spillSlotSize * slot - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int maxSpillSlots) +regDotColor :: Reg -> SDoc +regDotColor reg + = case regClass reg of + RcInteger -> text "blue" + RcFloat -> text "red" + RcDouble -> text "green" diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index d6993b2..80c68dd 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -5,16 +5,6 @@ -- ----------------------------------------------------------------------------- module PPC.Regs ( - -- sizes - Size(..), - intSize, - floatSize, - isFloatSize, - wordSize, - cmmTypeSize, - sizeToWidth, - mkVReg, - -- immediates Imm(..), strImmLit, @@ -42,7 +32,10 @@ module PPC.Regs ( -- horrow show freeReg, - globalRegMaybe + globalRegMaybe, + get_GlobalReg_reg_or_addr, + allocatableRegs + ) where @@ -51,78 +44,22 @@ where #include "HsVersions.h" #include "../includes/MachRegs.h" -import RegsBase +import Reg +import RegClass +import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) import Pretty import Outputable ( Outputable(..), pprPanic, panic ) import qualified Outputable -import Unique import Constants import FastBool import Data.Word ( Word8, Word16, Word32 ) import Data.Int ( Int8, Int16, Int32 ) --- sizes ----------------------------------------------------------------------- --- For these three, the "size" also gives the int/float --- distinction, because the instructions for int/float --- differ only in their suffices -data Size - = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80 - deriving Eq - -intSize, floatSize :: Width -> Size -intSize W8 = II8 -intSize W16 = II16 -intSize W32 = II32 -intSize W64 = II64 -intSize other = pprPanic "MachInstrs.intSize" (ppr other) - -floatSize W32 = FF32 -floatSize W64 = FF64 -floatSize other = pprPanic "MachInstrs.intSize" (ppr other) - - -isFloatSize :: Size -> Bool -isFloatSize FF32 = True -isFloatSize FF64 = True -isFloatSize FF80 = True -isFloatSize _ = False - - -wordSize :: Size -wordSize = intSize wordWidth - - -cmmTypeSize :: CmmType -> Size -cmmTypeSize ty - | isFloatType ty = floatSize (typeWidth ty) - | otherwise = intSize (typeWidth ty) - - -sizeToWidth :: Size -> Width -sizeToWidth II8 = W8 -sizeToWidth II16 = W16 -sizeToWidth II32 = W32 -sizeToWidth II64 = W64 -sizeToWidth FF32 = W32 -sizeToWidth FF64 = W64 -sizeToWidth _ = panic "MachInstrs.sizeToWidth" - - -mkVReg :: Unique -> Size -> Reg -mkVReg u size - | not (isFloatSize size) = VirtualRegI u - | otherwise - = case size of - FF32 -> VirtualRegD u - FF64 -> VirtualRegD u - _ -> panic "mkVReg" - - -- immediates ------------------------------------------------------------------ data Imm @@ -490,7 +427,7 @@ freeReg REG_Hp = fastBool False #ifdef REG_HpLim freeReg REG_HpLim = fastBool False #endif -freeReg n = fastBool True +freeReg _ = fastBool True -- | Returns 'Nothing' if this global register is not stored @@ -582,3 +519,26 @@ freeReg _ = 0# globalRegMaybe _ = panic "PPC.Regs.globalRegMaybe: not defined" #endif /* powerpc_TARGET_ARCH */ + + +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real +-- register it is in, on this platform, or a CmmExpr denoting the +-- address in the register table holding it. +-- (See also get_GlobalReg_addr in CgUtils.) + +get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr +get_GlobalReg_reg_or_addr mid + = case globalRegMaybe mid of + Just rr -> Left rr + Nothing -> Right (get_GlobalReg_addr mid) + + +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = isFastTrue (freeReg i) + in filter isFree allMachRegNos diff --git a/compiler/nativeGen/Platform.hs b/compiler/nativeGen/Platform.hs new file mode 100644 index 0000000..8b01f5c --- /dev/null +++ b/compiler/nativeGen/Platform.hs @@ -0,0 +1,92 @@ + +-- | A description of the platform we're compiling for. +-- Used by the native code generator. +-- In the future, this module should be the only one that references +-- the evil #defines for each TARGET_ARCH and TARGET_OS +-- +module Platform ( + Platform(..), + Arch(..), + OS(..), + + defaultTargetPlatform +) + +where + +#include "HsVersions.h" + + +-- | Contains enough information for the native code generator to emit +-- code for this platform. +data Platform + = Platform + { platformArch :: Arch + , platformOS :: OS } + + +-- | Architectures that the native code generator knows about. +-- TODO: It might be nice to extend these constructors with information +-- about what instruction set extensions an architecture might support. +-- +data Arch + = ArchAlpha + | ArchX86 + | ArchX86_64 + | ArchPPC + | ArchPPC_64 + | ArchSPARC + deriving (Show, Eq) + + +-- | Operating systems that the native code generator knows about. +-- Having OSUnknown should produce a sensible default, but no promises. +data OS + = OSUnknown + | OSLinux + | OSDarwin + | OSSolaris + | OSMinGW32 + deriving (Show, Eq) + + +-- | This is the target platform as far as the #ifdefs are concerned. +-- These are set in includes/ghcplatform.h by the autoconf scripts +defaultTargetPlatform :: Platform +defaultTargetPlatform + = Platform defaultTargetArch defaultTargetOS + + +-- | Move the evil TARGET_ARCH #ifdefs into Haskell land. +defaultTargetArch :: Arch +#if alpha_TARGET_ARCH +defaultTargetArch = ArchAlpha +#elif i386_TARGET_ARCH +defaultTargetArch = ArchX86 +#elif x86_64_TARGET_ARCH +defaultTargetArch = ArchX86_64 +#elif powerpc_TARGET_ARCH +defaultTargetArch = ArchPPC +#elif powerpc64_TARGET_ARCH +defaultTargetArch = ArchPPC_64 +#elif sparc_TARGET_ARCH +defaultTargetArch = ArchSPARC +#else +#error "Platform.buildArch: undefined" +#endif + + +-- | Move the evil TARGET_OS #ifdefs into Haskell land. +defaultTargetOS :: OS +#if linux_TARGET_OS +defaultTargetOS = OSLinux +#elif darwin_TARGET_OS +defaultTargetOS = OSDarwin +#elif solaris_TARGET_OS +defaultTargetOS = OSSolaris +#elif mingw32_TARGET_OS +defaultTargetOS = OSMinGW32 +#else +defaultTargetOS = OSUnknown +#endif + diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs deleted file mode 100644 index 532d852..0000000 --- a/compiler/nativeGen/PprMach.hs +++ /dev/null @@ -1,183 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- Pretty-printing assembly language --- --- (c) The University of Glasgow 1993-2005 --- ------------------------------------------------------------------------------ - --- We start with the @pprXXX@s with some cross-platform commonality --- (e.g., 'pprReg'); we conclude with the no-commonality monster, --- 'pprInstr'. - -#include "nativeGen/NCG.h" - -module PprMach ( - pprNatCmmTop, pprBasicBlock, pprSectionHeader, pprData, - pprInstr, pprSize, pprUserReg, pprImm - ) where - -#include "HsVersions.h" - -import PprBase - -import BlockId -import Cmm -import Regs -- may differ per-platform -import Instrs -import Regs - -import CLabel ( CLabel, pprCLabel, externallyVisibleCLabel, - labelDynamic, mkAsmTempLabel, entryLblToInfoLbl ) -#if HAVE_SUBSECTIONS_VIA_SYMBOLS -import CLabel ( mkDeadStripPreventer ) -#endif - -import Panic ( panic ) -import Unique ( pprUnique ) -import Pretty -import FastString -import qualified Outputable -import Outputable ( Outputable, pprPanic, ppr, docToSDoc) - -import Data.Array.ST -import Data.Word ( Word8 ) -import Control.Monad.ST -import Data.Char ( chr, ord ) -import Data.Maybe ( isJust ) - - -#if alpha_TARGET_ARCH -import Alpha.Ppr -#elif powerpc_TARGET_ARCH -import PPC.Ppr -#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH -import X86.Ppr -#elif sparc_TARGET_ARCH -import SPARC.Ppr -#else -#error "Regs: not defined for this architecture" -#endif - - - --- ----------------------------------------------------------------------------- --- Printing this stuff out - -pprNatCmmTop :: NatCmmTop -> Doc -pprNatCmmTop (CmmData section dats) = - pprSectionHeader section $$ vcat (map pprData dats) - - -- special case for split markers: -pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl - -pprNatCmmTop (CmmProc info lbl params (ListGraph blocks)) = - pprSectionHeader Text $$ - (if null info then -- blocks guaranteed not null, so label needed - pprLabel lbl - else -#if HAVE_SUBSECTIONS_VIA_SYMBOLS - pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) - <> char ':' $$ -#endif - vcat (map pprData info) $$ - pprLabel (entryLblToInfoLbl lbl) - ) $$ - vcat (map pprBasicBlock blocks) - -- above: Even the first block gets a label, because with branch-chain - -- elimination, it might be the target of a goto. -#if HAVE_SUBSECTIONS_VIA_SYMBOLS - -- If we are using the .subsections_via_symbols directive - -- (available on recent versions of Darwin), - -- we have to make sure that there is some kind of reference - -- from the entry code to a label on the _top_ of of the info table, - -- so that the linker will not think it is unreferenced and dead-strip - -- it. That's why the label is called a DeadStripPreventer (_dsp). - $$ if not (null info) - then text "\t.long " - <+> pprCLabel_asm (entryLblToInfoLbl lbl) - <+> char '-' - <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) - else empty -#endif - - -pprBasicBlock :: NatBasicBlock -> Doc -pprBasicBlock (BasicBlock (BlockId id) instrs) = - pprLabel (mkAsmTempLabel id) $$ - vcat (map pprInstr instrs) - - -pprData :: CmmStatic -> Doc -pprData (CmmAlign bytes) = pprAlign bytes -pprData (CmmDataLabel lbl) = pprLabel lbl -pprData (CmmString str) = pprASCII str -pprData (CmmUninitialised bytes) = ptext (sLit s) <> int bytes - where s = -#if defined(solaris2_TARGET_OS) - ".skip " -#else - ".space " -#endif -pprData (CmmStaticLit lit) = pprDataItem lit - -pprGloblDecl :: CLabel -> Doc -pprGloblDecl lbl - | not (externallyVisibleCLabel lbl) = empty - | otherwise = ptext IF_ARCH_sparc((sLit ".global "), - (sLit ".globl ")) <> - pprCLabel_asm lbl - -pprTypeAndSizeDecl :: CLabel -> Doc -pprTypeAndSizeDecl lbl -#if linux_TARGET_OS - | not (externallyVisibleCLabel lbl) = empty - | otherwise = ptext (sLit ".type ") <> - pprCLabel_asm lbl <> ptext (sLit ", @object") -#else - = empty -#endif - -pprLabel :: CLabel -> Doc -pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':') - - -pprASCII str - = vcat (map do1 str) $$ do1 0 - where - do1 :: Word8 -> Doc - do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w) - -pprAlign bytes = - IF_ARCH_alpha(ptext (sLit ".align ") <> int pow2, - IF_ARCH_i386(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes), - IF_ARCH_x86_64(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes), - IF_ARCH_sparc(ptext (sLit ".align ") <> int bytes, - IF_ARCH_powerpc(ptext (sLit ".align ") <> int pow2,))))) - where - pow2 = log2 bytes - - log2 :: Int -> Int -- cache the common ones - log2 1 = 0 - log2 2 = 1 - log2 4 = 2 - log2 8 = 3 - log2 n = 1 + log2 (n `quot` 2) - - --- ----------------------------------------------------------------------------- --- pprInstr: print an 'Instr' - -instance Outputable Instr where - ppr instr = Outputable.docToSDoc $ pprInstr instr - - - - diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs new file mode 100644 index 0000000..1a341bb --- /dev/null +++ b/compiler/nativeGen/Reg.hs @@ -0,0 +1,113 @@ + +-- | An architecture independent description of a register. +-- This needs to stay architecture independent because it is used +-- by NCGMonad and the register allocators, which are shared +-- by all architectures. +-- +module Reg ( + RegNo, + Reg(..), + isRealReg, + unRealReg, + isVirtualReg, + renameVirtualReg, + getHiVRegFromLo +) + +where + +import Outputable +import Unique +import Panic + +-- | An identifier for a real machine register. +type RegNo + = Int + +-- RealRegs are machine regs which are available for allocation, in +-- the usual way. We know what class they are, because that's part of +-- the processor's architecture. + +-- VirtualRegs are virtual registers. The register allocator will +-- eventually have to map them into RealRegs, or into spill slots. +-- +-- VirtualRegs are allocated on the fly, usually to represent a single +-- value in the abstract assembly code (i.e. dynamic registers are +-- usually single assignment). +-- +-- With the new register allocator, the +-- single assignment restriction isn't necessary to get correct code, +-- although a better register allocation will result if single +-- assignment is used -- because the allocator maps a VirtualReg into +-- a single RealReg, even if the VirtualReg has multiple live ranges. + +-- Virtual regs can be of either class, so that info is attached. +data Reg + = RealReg {-# UNPACK #-} !RegNo + | VirtualRegI {-# UNPACK #-} !Unique + | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register + | VirtualRegF {-# UNPACK #-} !Unique + | VirtualRegD {-# UNPACK #-} !Unique + deriving (Eq, Ord) + + +-- We like to have Uniques for Reg so that we can make UniqFM and UniqSets +-- in the register allocator. +instance Uniquable Reg where + getUnique (RealReg i) = mkUnique 'C' i + getUnique (VirtualRegI u) = u + getUnique (VirtualRegHi u) = u + getUnique (VirtualRegF u) = u + getUnique (VirtualRegD u) = u + + +-- | Print a reg in a generic manner +-- If you want the architecture specific names, then use the pprReg +-- function from the appropriate Ppr module. +instance Outputable Reg where + ppr reg + = case reg of + RealReg i -> text "%r" <> int i + VirtualRegI u -> text "%vI_" <> pprUnique u + VirtualRegHi u -> text "%vHi_" <> pprUnique u + VirtualRegF u -> text "%vF_" <> pprUnique u + VirtualRegD u -> text "%vD_" <> pprUnique u + + + +isRealReg :: Reg -> Bool +isRealReg = not . isVirtualReg + +-- | Take the RegNo from a real reg +unRealReg :: Reg -> RegNo +unRealReg (RealReg i) = i +unRealReg _ = panic "unRealReg on VirtualReg" + +isVirtualReg :: Reg -> Bool +isVirtualReg (RealReg _) = False +isVirtualReg (VirtualRegI _) = True +isVirtualReg (VirtualRegHi _) = True +isVirtualReg (VirtualRegF _) = True +isVirtualReg (VirtualRegD _) = True + + +renameVirtualReg :: Unique -> Reg -> Reg +renameVirtualReg u r + = case r of + RealReg _ -> error "renameVirtualReg: can't change unique on a real reg" + VirtualRegI _ -> VirtualRegI u + VirtualRegHi _ -> VirtualRegHi u + VirtualRegF _ -> VirtualRegF u + VirtualRegD _ -> VirtualRegD u + +-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform +-- when supplied with the vreg for the lower-half of the quantity. +-- (NB. Not reversible). +getHiVRegFromLo :: Reg -> Reg +getHiVRegFromLo (VirtualRegI u) + = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H' + +getHiVRegFromLo _ + = panic "RegsBase.getHiVRegFromLo" + + diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs index 18e4b0e..8521e92 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs @@ -8,11 +8,11 @@ module RegAlloc.Graph.Coalesce ( where -import Cmm -import Regs import RegAlloc.Liveness -import RegAllocInfo +import Instruction +import Reg +import Cmm import Bag import UniqFM import UniqSet @@ -26,7 +26,11 @@ import Data.List -- then the mov only serves to join live ranges. The two regs can be renamed to be -- the same and the move instruction safely erased. -regCoalesce :: [LiveCmmTop] -> UniqSM [LiveCmmTop] +regCoalesce + :: Instruction instr + => [LiveCmmTop instr] + -> UniqSM [LiveCmmTop instr] + regCoalesce code = do let joins = foldl' unionBags emptyBag @@ -57,7 +61,11 @@ sinkReg fm r -- During a mov, if the source reg dies and the destiation reg is born -- then we can rename the two regs to the same thing and eliminate the move. -- -slurpJoinMovs :: LiveCmmTop -> Bag (Reg, Reg) +slurpJoinMovs + :: Instruction instr + => LiveCmmTop instr + -> Bag (Reg, Reg) + slurpJoinMovs live = slurpCmm emptyBag live where @@ -68,7 +76,7 @@ slurpJoinMovs live slurpLI rs (Instr _ Nothing) = rs slurpLI rs (Instr instr (Just live)) - | Just (r1, r2) <- isRegRegMove instr + | Just (r1, r2) <- takeRegRegMoveInstr instr , elementOfUniqSet r1 $ liveDieRead live , elementOfUniqSet r2 $ liveBorn live @@ -80,4 +88,7 @@ slurpJoinMovs live | otherwise = rs + slurpLI rs SPILL{} = rs + slurpLI rs RELOAD{} = rs + diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs index fe99aba..2e58461 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Main.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Main.hs @@ -5,8 +5,7 @@ -- module RegAlloc.Graph.Main ( - regAlloc, - regDotColor + regAlloc ) where @@ -17,9 +16,12 @@ import RegAlloc.Graph.Spill import RegAlloc.Graph.SpillClean import RegAlloc.Graph.SpillCost import RegAlloc.Graph.Stats -import Regs -import Instrs -import PprMach +import RegAlloc.Graph.TrivColorable +import Instruction +import TargetReg +import RegClass +import Reg + import UniqSupply import UniqSet @@ -43,18 +45,26 @@ maxSpinCount = 10 -- | The top level of the graph coloring register allocator. -- regAlloc - :: DynFlags + :: (Outputable instr, Instruction instr) + => DynFlags -> UniqFM (UniqSet Reg) -- ^ the registers we can use for allocation -> UniqSet Int -- ^ the set of available spill slots. - -> [LiveCmmTop] -- ^ code annotated with liveness information. - -> UniqSM ( [NatCmmTop], [RegAllocStats] ) + -> [LiveCmmTop instr] -- ^ code annotated with liveness information. + -> UniqSM ( [NatCmmTop instr], [RegAllocStats instr] ) -- ^ code with registers allocated and stats for each stage of -- allocation regAlloc dflags regsFree slotsFree code = do + -- TODO: the regClass function is currently hard coded to the default target + -- architecture. Would prefer to determine this from dflags. + -- There are other uses of targetRegClass later in this module. + let triv = trivColorable targetRegClass + (code_final, debug_codeGraphs, _) - <- regAlloc_spin dflags 0 trivColorable regsFree slotsFree [] code + <- regAlloc_spin dflags 0 + triv + regsFree slotsFree [] code return ( code_final , reverse debug_codeGraphs ) @@ -74,7 +84,7 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded." ( text "It looks like the register allocator is stuck in an infinite loop." $$ text "max cycles = " <> int maxSpinCount - $$ text "regsFree = " <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) + $$ text "regsFree = " <> (hcat $ punctuate space $ map ppr $ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree) $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree)) @@ -139,12 +149,12 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code -- clean out unneeded SPILL/RELOADs let code_spillclean = map cleanSpills code_patched - -- strip off liveness information - let code_nat = map stripLive code_spillclean + -- strip off liveness information, + -- and rewrite SPILL/RELOAD pseudos into real instructions along the way + let code_final = map stripLive code_spillclean - -- rewrite SPILL/RELOAD pseudos into real instructions - let spillNatTop = mapGenBlockTop spillNatBlock - let code_final = map spillNatTop code_nat +-- let spillNatTop = mapGenBlockTop spillNatBlock +-- let code_final = map spillNatTop code_nat -- record what happened in this stage for debugging let stat = @@ -213,7 +223,8 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code -- | Build a graph from the liveness and coalesce information in this code. buildGraph - :: [LiveCmmTop] + :: Instruction instr + => [LiveCmmTop instr] -> UniqSM (Color.Graph Reg RegClass Reg) buildGraph code @@ -248,8 +259,8 @@ graphAddConflictSet set graph = let reals = filterUFM isRealReg set virtuals = filterUFM (not . isRealReg) set - graph1 = Color.addConflicts virtuals regClass graph - graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 regClass r2) + graph1 = Color.addConflicts virtuals targetRegClass graph + graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 targetRegClass r2) graph1 [ (a, b) | a <- uniqSetToList virtuals @@ -276,13 +287,14 @@ graphAddCoalesce (r1, r2) graph | otherwise = Color.addCoalesce (regWithClass r1) (regWithClass r2) graph - where regWithClass r = (r, regClass r) + where regWithClass r = (r, targetRegClass r) -- | Patch registers in code using the reg -> reg mapping in this graph. patchRegsFromGraph - :: Color.Graph Reg RegClass Reg - -> LiveCmmTop -> LiveCmmTop + :: (Outputable instr, Instruction instr) + => Color.Graph Reg RegClass Reg + -> LiveCmmTop instr -> LiveCmmTop instr patchRegsFromGraph graph code = let @@ -303,7 +315,7 @@ patchRegsFromGraph graph code = pprPanic "patchRegsFromGraph: register mapping failed." ( text "There is no node in the graph for register " <> ppr reg $$ ppr code - $$ Color.dotGraph (\_ -> text "white") trivColorable graph) + $$ Color.dotGraph (\_ -> text "white") (trivColorable targetRegClass) graph) in patchEraseLive patchF code diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs index b5a6451..e6e5622 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs @@ -10,9 +10,8 @@ module RegAlloc.Graph.Spill ( where import RegAlloc.Liveness -import RegAllocInfo -import Regs -import Instrs +import Instruction +import Reg import Cmm import State @@ -35,11 +34,12 @@ import Data.Maybe -- address the spill slot directly. -- regSpill - :: [LiveCmmTop] -- ^ the code + :: Instruction instr + => [LiveCmmTop instr] -- ^ the code -> UniqSet Int -- ^ available stack slots -> UniqSet Reg -- ^ the regs to spill -> UniqSM - ([LiveCmmTop] -- code will spill instructions + ([LiveCmmTop instr] -- code will spill instructions , UniqSet Int -- left over slots , SpillStats ) -- stats about what happened during spilling @@ -75,6 +75,20 @@ regSpill_block regSlotMap (BasicBlock i instrs) = do instrss' <- mapM (regSpill_instr regSlotMap) instrs return $ BasicBlock i (concat instrss') + +regSpill_instr + :: Instruction instr + => UniqFM Int + -> LiveInstr instr -> SpillM [LiveInstr instr] + +-- | The thing we're spilling shouldn't already have spill or reloads in it +regSpill_instr _ SPILL{} + = panic "regSpill_instr: unexpected SPILL" + +regSpill_instr _ RELOAD{} + = panic "regSpill_instr: unexpected RELOAD" + + regSpill_instr _ li@(Instr _ Nothing) = do return [li] @@ -82,7 +96,7 @@ regSpill_instr regSlotMap (Instr instr (Just _)) = do -- work out which regs are read and written in this instr - let RU rlRead rlWritten = regUsage instr + let RU rlRead rlWritten = regUsageOfInstr instr -- sometimes a register is listed as being read more than once, -- nub this so we don't end up inserting two lots of spill code. @@ -109,9 +123,9 @@ regSpill_instr regSlotMap let postfixes = concat mPostfixes -- final code - let instrs' = map (\i -> Instr i Nothing) prefixes - ++ [ Instr instr3 Nothing ] - ++ map (\i -> Instr i Nothing) postfixes + let instrs' = prefixes + ++ [Instr instr3 Nothing] + ++ postfixes return {- $ pprTrace "* regSpill_instr spill" @@ -139,6 +153,7 @@ spillRead regSlotMap instr reg | otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg" + spillWrite regSlotMap instr reg | Just slot <- lookupUFM regSlotMap reg = do (instr', nReg) <- patchInstr reg instr @@ -152,6 +167,7 @@ spillWrite regSlotMap instr reg | otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg" + spillModify regSlotMap instr reg | Just slot <- lookupUFM regSlotMap reg = do (instr', nReg) <- patchInstr reg instr @@ -168,19 +184,25 @@ spillModify regSlotMap instr reg -- | rewrite uses of this virtual reg in an instr to use a different virtual reg -patchInstr :: Reg -> Instr -> SpillM (Instr, Reg) +patchInstr + :: Instruction instr + => Reg -> instr -> SpillM (instr, Reg) + patchInstr reg instr = do nUnique <- newUnique let nReg = renameVirtualReg nUnique reg let instr' = patchReg1 reg nReg instr return (instr', nReg) -patchReg1 :: Reg -> Reg -> Instr -> Instr +patchReg1 + :: Instruction instr + => Reg -> Reg -> instr -> instr + patchReg1 old new instr = let patchF r | r == old = new | otherwise = r - in patchRegs instr patchF + in patchRegsOfInstr instr patchF ------------------------------------------------------ diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs index b68648b..4f129c4 100644 --- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs +++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs @@ -29,13 +29,12 @@ module RegAlloc.Graph.SpillClean ( ) where -import BlockId import RegAlloc.Liveness -import RegAllocInfo -import Regs -import Instrs -import Cmm +import Instruction +import Reg +import BlockId +import Cmm import UniqSet import UniqFM import Unique @@ -51,12 +50,19 @@ type Slot = Int -- | Clean out unneeded spill\/reloads from this top level thing. -cleanSpills :: LiveCmmTop -> LiveCmmTop +cleanSpills + :: Instruction instr + => LiveCmmTop instr -> LiveCmmTop instr + cleanSpills cmm = evalState (cleanSpin 0 cmm) initCleanS -- | do one pass of cleaning -cleanSpin :: Int -> LiveCmmTop -> CleanM LiveCmmTop +cleanSpin + :: Instruction instr + => Int + -> LiveCmmTop instr + -> CleanM (LiveCmmTop instr) {- cleanSpin spinCount code @@ -103,7 +109,11 @@ cleanSpin spinCount code -- | Clean one basic block -cleanBlockForward :: LiveBasicBlock -> CleanM LiveBasicBlock +cleanBlockForward + :: Instruction instr + => LiveBasicBlock instr + -> CleanM (LiveBasicBlock instr) + cleanBlockForward (BasicBlock blockId instrs) = do -- see if we have a valid association for the entry to this block @@ -116,7 +126,11 @@ cleanBlockForward (BasicBlock blockId instrs) return $ BasicBlock blockId instrs_reload -cleanBlockBackward :: LiveBasicBlock -> CleanM LiveBasicBlock +cleanBlockBackward + :: Instruction instr + => LiveBasicBlock instr + -> CleanM (LiveBasicBlock instr) + cleanBlockBackward (BasicBlock blockId instrs) = do instrs_spill <- cleanBackward emptyUniqSet [] instrs return $ BasicBlock blockId instrs_spill @@ -130,11 +144,12 @@ cleanBlockBackward (BasicBlock blockId instrs) -- then we don't need to do the reload. -- cleanForward - :: BlockId -- ^ the block that we're currently in - -> Assoc Store -- ^ two store locations are associated if they have the same value - -> [LiveInstr] -- ^ acc - -> [LiveInstr] -- ^ instrs to clean (in backwards order) - -> CleanM [LiveInstr] -- ^ cleaned instrs (in forward order) + :: Instruction instr + => BlockId -- ^ the block that we're currently in + -> Assoc Store -- ^ two store locations are associated if they have the same value + -> [LiveInstr instr] -- ^ acc + -> [LiveInstr instr] -- ^ instrs to clean (in backwards order) + -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in forward order) cleanForward _ _ acc [] = return acc @@ -142,19 +157,19 @@ cleanForward _ _ acc [] -- write out live range joins via spill slots to just a spill and a reg-reg move -- hopefully the spill will be also be cleaned in the next pass -- -cleanForward blockId assoc acc (Instr i1 live1 : Instr i2 _ : instrs) +cleanForward blockId assoc acc (li1 : li2 : instrs) - | SPILL reg1 slot1 <- i1 - , RELOAD slot2 reg2 <- i2 + | SPILL reg1 slot1 <- li1 + , RELOAD slot2 reg2 <- li2 , slot1 == slot2 = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } cleanForward blockId assoc acc - (Instr i1 live1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs) + (li1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs) cleanForward blockId assoc acc (li@(Instr i1 _) : instrs) - | Just (r1, r2) <- isRegRegMove i1 + | Just (r1, r2) <- takeRegRegMoveInstr i1 = if r1 == r2 -- erase any left over nop reg reg moves while we're here -- this will also catch any nop moves that the "write out live range joins" case above @@ -170,38 +185,50 @@ cleanForward blockId assoc acc (li@(Instr i1 _) : instrs) cleanForward blockId assoc' (li : acc) instrs -cleanForward blockId assoc acc (li@(Instr instr _) : instrs) +cleanForward blockId assoc acc (li : instrs) -- update association due to the spill - | SPILL reg slot <- instr + | SPILL reg slot <- li = let assoc' = addAssoc (SReg reg) (SSlot slot) $ delAssoc (SSlot slot) $ assoc in cleanForward blockId assoc' (li : acc) instrs -- clean a reload instr - | RELOAD{} <- instr + | RELOAD{} <- li = do (assoc', mli) <- cleanReload blockId assoc li case mli of Nothing -> cleanForward blockId assoc' acc instrs Just li' -> cleanForward blockId assoc' (li' : acc) instrs -- remember the association over a jump - | targets <- jumpDests instr [] + | Instr instr _ <- li + , targets <- jumpDestsOfInstr instr , not $ null targets = do mapM_ (accJumpValid assoc) targets cleanForward blockId assoc (li : acc) instrs -- writing to a reg changes its value. - | RU _ written <- regUsage instr + | Instr instr _ <- li + , RU _ written <- regUsageOfInstr instr = let assoc' = foldr delAssoc assoc (map SReg $ nub written) in cleanForward blockId assoc' (li : acc) instrs +-- bogus, to stop pattern match warning +cleanForward _ _ _ _ + = panic "RegAlloc.Graph.SpillClean.cleanForward: no match" + -- | Try and rewrite a reload instruction to something more pleasing -- -cleanReload :: BlockId -> Assoc Store -> LiveInstr -> CleanM (Assoc Store, Maybe LiveInstr) -cleanReload blockId assoc li@(Instr (RELOAD slot reg) _) +cleanReload + :: Instruction instr + => BlockId + -> Assoc Store + -> LiveInstr instr + -> CleanM (Assoc Store, Maybe (LiveInstr instr)) + +cleanReload blockId assoc li@(RELOAD slot reg) -- if the reg we're reloading already has the same value as the slot -- then we can erase the instruction outright @@ -264,10 +291,10 @@ cleanReload _ _ _ -- we should really be updating the noReloads set as we cross jumps also. -- cleanBackward - :: UniqSet Int -- ^ slots that have been spilled, but not reloaded from - -> [LiveInstr] -- ^ acc - -> [LiveInstr] -- ^ instrs to clean (in forwards order) - -> CleanM [LiveInstr] -- ^ cleaned instrs (in backwards order) + :: UniqSet Int -- ^ slots that have been spilled, but not reloaded from + -> [LiveInstr instr] -- ^ acc + -> [LiveInstr instr] -- ^ instrs to clean (in forwards order) + -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in backwards order) cleanBackward noReloads acc lis @@ -277,15 +304,15 @@ cleanBackward noReloads acc lis cleanBackward' _ _ acc [] = return acc -cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs) +cleanBackward' reloadedBy noReloads acc (li : instrs) -- if nothing ever reloads from this slot then we don't need the spill - | SPILL _ slot <- instr + | SPILL _ slot <- li , Nothing <- lookupUFM reloadedBy (SSlot slot) = do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 } cleanBackward noReloads acc instrs - | SPILL _ slot <- instr + | SPILL _ slot <- li = if elementOfUniqSet slot noReloads -- we can erase this spill because the slot won't be read until after the next one @@ -299,7 +326,7 @@ cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs) cleanBackward noReloads' (li : acc) instrs -- if we reload from a slot then it's no longer unused - | RELOAD slot _ <- instr + | RELOAD slot _ <- li , noReloads' <- delOneFromUniqSet noReloads slot = cleanBackward noReloads' (li : acc) instrs diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs index 1d37cf7..d4dd75a 100644 --- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs +++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs @@ -16,14 +16,16 @@ module RegAlloc.Graph.SpillCost ( where -import GraphBase import RegAlloc.Liveness -import RegAllocInfo -import Instrs -import Regs +import Instruction +import RegClass +import Reg + +import GraphBase + + import BlockId import Cmm - import UniqFM import UniqSet import Outputable @@ -62,7 +64,8 @@ plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2) -- and the number of instructions it was live on entry to (lifetime) -- slurpSpillCostInfo - :: LiveCmmTop + :: (Outputable instr, Instruction instr) + => LiveCmmTop instr -> SpillCostInfo slurpSpillCostInfo cmm @@ -89,11 +92,14 @@ slurpSpillCostInfo cmm = return () -- skip over comment and delta pseudo instrs - countLIs rsLive (Instr instr Nothing : lis) - | COMMENT{} <- instr + countLIs rsLive (SPILL{} : lis) + = countLIs rsLive lis + + countLIs rsLive (RELOAD{} : lis) = countLIs rsLive lis - | DELTA{} <- instr + countLIs rsLive (Instr instr Nothing : lis) + | isMetaInstr instr = countLIs rsLive lis | otherwise @@ -106,7 +112,7 @@ slurpSpillCostInfo cmm mapM_ incLifetime $ uniqSetToList rsLiveEntry -- increment counts for what regs were read/written from - let (RU read written) = regUsage instr + let (RU read written) = regUsageOfInstr instr mapM_ incUses $ filter (not . isRealReg) $ nub read mapM_ incDefs $ filter (not . isRealReg) $ nub written @@ -226,8 +232,11 @@ lifeMapFromSpillCostInfo info -- | Work out the degree (number of neighbors) of this node which have the same class. -nodeDegree :: Graph Reg RegClass Reg -> Reg -> Int -nodeDegree graph reg +nodeDegree + :: (Reg -> RegClass) + -> Graph Reg RegClass Reg -> Reg -> Int + +nodeDegree regClass graph reg | Just node <- lookupUFM (graphMap graph) reg , virtConflicts <- length $ filter (\r -> regClass r == regClass reg) $ uniqSetToList $ nodeConflicts node @@ -238,12 +247,17 @@ nodeDegree graph reg -- | Show a spill cost record, including the degree from the graph and final calulated spill cos -pprSpillCostRecord :: Graph Reg RegClass Reg -> SpillCostRecord -> SDoc -pprSpillCostRecord graph (reg, uses, defs, life) +pprSpillCostRecord + :: (Reg -> RegClass) + -> (Reg -> SDoc) + -> Graph Reg RegClass Reg -> SpillCostRecord -> SDoc + +pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life) = hsep - [ ppr reg + [ pprReg reg , ppr uses , ppr defs , ppr life - , ppr $ nodeDegree graph reg - , text $ show $ (fromIntegral (uses + defs) / fromIntegral (nodeDegree graph reg) :: Float) ] + , ppr $ nodeDegree regClass graph reg + , text $ show $ (fromIntegral (uses + defs) + / fromIntegral (nodeDegree regClass graph reg) :: Float) ] diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs index 8082f9e..5e3dd32 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs @@ -5,7 +5,6 @@ module RegAlloc.Graph.Stats ( RegAllocStats (..), - regDotColor, pprStats, pprStatsSpills, @@ -22,13 +21,13 @@ where import qualified GraphColor as Color import RegAlloc.Liveness -import RegAllocInfo import RegAlloc.Graph.Spill import RegAlloc.Graph.SpillCost -import Regs -import Instrs -import Cmm +import Instruction +import RegClass +import Reg +import Cmm import Outputable import UniqFM import UniqSet @@ -36,11 +35,11 @@ import State import Data.List -data RegAllocStats +data RegAllocStats instr -- initial graph = RegAllocStatsStart - { raLiveCmm :: [LiveCmmTop] -- ^ initial code, with liveness + { raLiveCmm :: [LiveCmmTop instr] -- ^ initial code, with liveness , raGraph :: Color.Graph Reg RegClass Reg -- ^ the initial, uncolored graph , raSpillCosts :: SpillCostInfo } -- ^ information to help choose which regs to spill @@ -50,35 +49,35 @@ data RegAllocStats , raCoalesced :: UniqFM Reg -- ^ the regs that were coaleced , raSpillStats :: SpillStats -- ^ spiller stats , raSpillCosts :: SpillCostInfo -- ^ number of instrs each reg lives for - , raSpilled :: [LiveCmmTop] } -- ^ code with spill instructions added + , raSpilled :: [LiveCmmTop instr] } -- ^ code with spill instructions added -- a successful coloring | RegAllocStatsColored { raGraph :: Color.Graph Reg RegClass Reg -- ^ the uncolored graph , raGraphColored :: Color.Graph Reg RegClass Reg -- ^ the coalesced and colored graph , raCoalesced :: UniqFM Reg -- ^ the regs that were coaleced - , raPatched :: [LiveCmmTop] -- ^ code with vregs replaced by hregs - , raSpillClean :: [LiveCmmTop] -- ^ code with unneeded spill\/reloads cleaned out - , raFinal :: [NatCmmTop] -- ^ final code + , raPatched :: [LiveCmmTop instr] -- ^ code with vregs replaced by hregs + , raSpillClean :: [LiveCmmTop instr] -- ^ code with unneeded spill\/reloads cleaned out + , raFinal :: [NatCmmTop instr] -- ^ final code , raSRMs :: (Int, Int, Int) } -- ^ spill\/reload\/reg-reg moves present in this code -instance Outputable RegAllocStats where +instance Outputable instr => Outputable (RegAllocStats instr) where ppr (s@RegAllocStatsStart{}) = text "# Start" $$ text "# Native code with liveness information." $$ ppr (raLiveCmm s) $$ text "" - $$ text "# Initial register conflict graph." - $$ Color.dotGraph regDotColor trivColorable (raGraph s) +-- $$ text "# Initial register conflict graph." +-- $$ Color.dotGraph regDotColor trivColorable (raGraph s) ppr (s@RegAllocStatsSpill{}) = text "# Spill" - $$ text "# Register conflict graph." - $$ Color.dotGraph regDotColor trivColorable (raGraph s) - $$ text "" +-- $$ text "# Register conflict graph." +-- $$ Color.dotGraph regDotColor trivColorable (raGraph s) +-- $$ text "" $$ (if (not $ isNullUFM $ raCoalesced s) then text "# Registers coalesced." @@ -86,9 +85,9 @@ instance Outputable RegAllocStats where $$ text "" else empty) - $$ text "# Spill costs. reg uses defs lifetime degree cost" - $$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s) - $$ text "" +-- $$ text "# Spill costs. reg uses defs lifetime degree cost" +-- $$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s) +-- $$ text "" $$ text "# Spills inserted." $$ ppr (raSpillStats s) @@ -101,13 +100,13 @@ instance Outputable RegAllocStats where ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) }) = text "# Colored" - $$ text "# Register conflict graph (initial)." - $$ Color.dotGraph regDotColor trivColorable (raGraph s) - $$ text "" +-- $$ text "# Register conflict graph (initial)." +-- $$ Color.dotGraph regDotColor trivColorable (raGraph s) +-- $$ text "" - $$ text "# Register conflict graph (colored)." - $$ Color.dotGraph regDotColor trivColorable (raGraphColored s) - $$ text "" +-- $$ text "# Register conflict graph (colored)." +-- $$ Color.dotGraph regDotColor trivColorable (raGraphColored s) +-- $$ text "" $$ (if (not $ isNullUFM $ raCoalesced s) then text "# Registers coalesced." @@ -133,7 +132,7 @@ instance Outputable RegAllocStats where $$ text "" -- | Do all the different analysis on this list of RegAllocStats -pprStats :: [RegAllocStats] -> Color.Graph Reg RegClass Reg -> SDoc +pprStats :: [RegAllocStats instr] -> Color.Graph Reg RegClass Reg -> SDoc pprStats stats graph = let outSpills = pprStatsSpills stats outLife = pprStatsLifetimes stats @@ -145,7 +144,7 @@ pprStats stats graph -- | Dump a table of how many spill loads \/ stores were inserted for each vreg. pprStatsSpills - :: [RegAllocStats] -> SDoc + :: [RegAllocStats instr] -> SDoc pprStatsSpills stats = let @@ -163,7 +162,7 @@ pprStatsSpills stats -- | Dump a table of how long vregs tend to live for in the initial code. pprStatsLifetimes - :: [RegAllocStats] -> SDoc + :: [RegAllocStats instr] -> SDoc pprStatsLifetimes stats = let info = foldl' plusSpillCostInfo zeroSpillCostInfo @@ -191,7 +190,7 @@ binLifetimeCount fm -- | Dump a table of how many conflicts vregs tend to have in the initial code. pprStatsConflict - :: [RegAllocStats] -> SDoc + :: [RegAllocStats instr] -> SDoc pprStatsConflict stats = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2))) @@ -208,7 +207,7 @@ pprStatsConflict stats -- | For every vreg, dump it's how many conflicts it has and its lifetime -- good for making a scatter plot. pprStatsLifeConflict - :: [RegAllocStats] + :: [RegAllocStats instr] -> Color.Graph Reg RegClass Reg -- ^ global register conflict graph -> SDoc @@ -238,7 +237,10 @@ pprStatsLifeConflict stats graph -- | Count spill/reload/reg-reg moves. -- Lets us see how well the register allocator has done. -- -countSRMs :: LiveCmmTop -> (Int, Int, Int) +countSRMs + :: Instruction instr + => LiveCmmTop instr -> (Int, Int, Int) + countSRMs cmm = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0) @@ -246,16 +248,17 @@ countSRM_block (BasicBlock i instrs) = do instrs' <- mapM countSRM_instr instrs return $ BasicBlock i instrs' -countSRM_instr li@(Instr instr _) - | SPILL _ _ <- instr +countSRM_instr li + | SPILL _ _ <- li = do modify $ \(s, r, m) -> (s + 1, r, m) return li - | RELOAD _ _ <- instr + | RELOAD _ _ <- li = do modify $ \(s, r, m) -> (s, r + 1, m) return li - | Just _ <- isRegRegMove instr + | Instr instr _ <- li + , Just _ <- takeRegRegMoveInstr instr = do modify $ \(s, r, m) -> (s, r, m + 1) return li @@ -266,77 +269,9 @@ countSRM_instr li@(Instr instr _) addSRM (s1, r1, m1) (s2, r2, m2) = (s1+s2, r1+r2, m1+m2) ------ --- Register colors for drawing conflict graphs --- Keep this out of MachRegs.hs because it's specific to the graph coloring allocator. - - --- reg colors for x86 -#if i386_TARGET_ARCH -regDotColor :: Reg -> SDoc -regDotColor reg - = let Just str = lookupUFM regColors reg - in text str - -regColors - = listToUFM - $ [ (eax, "#00ff00") - , (ebx, "#0000ff") - , (ecx, "#00ffff") - , (edx, "#0080ff") - - , (fake0, "#ff00ff") - , (fake1, "#ff00aa") - , (fake2, "#aa00ff") - , (fake3, "#aa00aa") - , (fake4, "#ff0055") - , (fake5, "#5500ff") ] - - --- reg colors for x86_64 -#elif x86_64_TARGET_ARCH -regDotColor :: Reg -> SDoc -regDotColor reg - = let Just str = lookupUFM regColors reg - in text str - -regColors - = listToUFM - $ [ (rax, "#00ff00"), (eax, "#00ff00") - , (rbx, "#0000ff"), (ebx, "#0000ff") - , (rcx, "#00ffff"), (ecx, "#00ffff") - , (rdx, "#0080ff"), (edx, "#00ffff") - , (r8, "#00ff80") - , (r9, "#008080") - , (r10, "#0040ff") - , (r11, "#00ff40") - , (r12, "#008040") - , (r13, "#004080") - , (r14, "#004040") - , (r15, "#002080") ] - - ++ zip (map RealReg [16..31]) (repeat "red") - - --- reg colors for ppc -#elif powerpc_TARGET_ARCH -regDotColor :: Reg -> SDoc -regDotColor reg - = case regClass reg of - RcInteger -> text "blue" - RcFloat -> text "red" - RcDouble -> text "green" - -#elif sparc_TARGET_ARCH -regDotColor :: Reg -> SDoc -regDotColor reg - = case regClass reg of - RcInteger -> text "blue" - RcFloat -> text "red" - RcDouble -> text "green" -#else -#error ToDo: regDotColor -#endif + + + {- diff --git a/compiler/nativeGen/Regs.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs similarity index 55% rename from compiler/nativeGen/Regs.hs rename to compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs index 5239520..6a7211d 100644 --- a/compiler/nativeGen/Regs.hs +++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs @@ -1,164 +1,21 @@ --- ----------------------------------------------------------------------------- --- --- (c) The University of Glasgow 1994-2004 --- --- Machine-specific info about registers. --- --- Also includes stuff about immediate operands, which are --- often/usually quite entangled with registers. --- --- ----------------------------------------------------------------------------- - -#include "nativeGen/NCG.h" - -module Regs ( - -------------------------------- - -- Generic things, shared by all architectures. - module RegsBase, - getHiVRegFromLo, - get_GlobalReg_reg_or_addr, - allocatableRegs, - allocatableRegsInClass, - trivColorable, - - -------------------------------- - -- Things that are defined by the arch specific module. - -- - - -- sizes - Size(..), - intSize, - floatSize, - isFloatSize, - wordSize, - cmmTypeSize, - sizeToWidth, - mkVReg, - - -- immediates - Imm(..), - strImmLit, - litToImm, - - -- addressing modes - AddrMode(..), - addrOffset, - - -- registers - spRel, - argRegs, - allArgRegs, - callClobberedRegs, - allMachRegNos, - regClass, - showReg, - - -- machine specific things -#if powerpc_TARGET_ARCH - allFPArgRegs, - fits16Bits, - makeImmediate, - fReg, - sp, r3, r4, r27, r28, f1, f20, f21, - -#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH - EABase(..), EAIndex(..), addrModeRegs, - - eax, ebx, ecx, edx, esi, edi, ebp, esp, - fake0, fake1, fake2, fake3, fake4, fake5, - rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, - r8, r9, r10, r11, r12, r13, r14, r15, - xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, - xmm, - - ripRel, - allFPArgRegs, -#elif sparc_TARGET_ARCH - fpRel, - fits13Bits, - largeOffsetError, - gReg, iReg, lReg, oReg, fReg, - fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27, - nCG_FirstFloatReg, -#endif - -- horror show - freeReg, - globalRegMaybe -) +module RegAlloc.Graph.TrivColorable ( + trivColorable, +) where #include "HsVersions.h" -#include "../includes/MachRegs.h" - -import Cmm -import CgUtils ( get_GlobalReg_addr ) -import Outputable ( Outputable(..), pprPanic ) -import qualified Outputable -import Panic -import Unique -import UniqSet -import FastTypes -import FastBool -import UniqFM - - -import RegsBase - -#if alpha_TARGET_ARCH -import Alpha.Regs -#elif powerpc_TARGET_ARCH -import PPC.Regs -#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH -import X86.Regs -#elif sparc_TARGET_ARCH -import SPARC.Regs -#else -#error "Regs: not defined for this architecture" -#endif +import RegClass +import Reg +import GraphBase -instance Show Reg where - show (RealReg i) = showReg i - show (VirtualRegI u) = "%vI_" ++ show u - show (VirtualRegHi u) = "%vHi_" ++ show u - show (VirtualRegF u) = "%vF_" ++ show u - show (VirtualRegD u) = "%vD_" ++ show u - -instance Outputable Reg where - ppr r = Outputable.text (show r) - - --- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform --- when supplied with the vreg for the lower-half of the quantity. --- (NB. Not reversible). -getHiVRegFromLo :: Reg -> Reg -getHiVRegFromLo (VirtualRegI u) - = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H' - -getHiVRegFromLo other - = pprPanic "getHiVRegFromLo" (ppr other) - --- ----------------------------------------------------------------------------- --- Global registers - --- We map STG registers onto appropriate CmmExprs. Either they map --- to real machine registers or stored as offsets from BaseReg. Given --- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real --- register it is in, on this platform, or a CmmExpr denoting the --- address in the register table holding it. --- (See also get_GlobalReg_addr in CgUtils.) - -get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr -get_GlobalReg_reg_or_addr mid - = case globalRegMaybe mid of - Just rr -> Left rr - Nothing -> Right (get_GlobalReg_addr mid) - +import UniqFM +import FastTypes +{- -- allocatableRegs is allMachRegNos with the fixed-use regs removed. -- i.e., these are the regs for which we are prepared to allow the -- register allocator to attempt to map VRegs to. @@ -187,7 +44,7 @@ allocatableRegsDouble :: Int allocatableRegsDouble = length $ filter (\r -> regClass r == RcDouble) $ map RealReg allocatableRegs - +-} -- trivColorable --------------------------------------------------------------- @@ -277,8 +134,11 @@ worst n classN classC #error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE #endif -trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool -trivColorable _ conflicts exclusions +trivColorable + :: (Reg -> RegClass) + -> Triv Reg RegClass Reg + +trivColorable regClass _ conflicts exclusions = {-# SCC "trivColorable" #-} let isSqueesed cI cF ufm @@ -314,5 +174,3 @@ trivColorable _ conflicts exclusions (# True, _, _ #) -> False - - diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs index 60d0175..45fd640 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Base.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs @@ -21,7 +21,7 @@ where import RegAlloc.Linear.FreeRegs import RegAlloc.Linear.StackMap import RegAlloc.Liveness -import Regs +import Reg import Outputable import Unique diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs index bee8c98..b357160 100644 --- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs @@ -5,7 +5,8 @@ module RegAlloc.Linear.FreeRegs ( releaseReg, initFreeRegs, getFreeRegs, - allocateReg + allocateReg, + maxSpillSlots ) #include "HsVersions.h" @@ -27,12 +28,15 @@ where #if defined(powerpc_TARGET_ARCH) import RegAlloc.Linear.PPC.FreeRegs +import PPC.Instr (maxSpillSlots) #elif defined(sparc_TARGET_ARCH) import RegAlloc.Linear.SPARC.FreeRegs +import SPARC.Instr (maxSpillSlots) #elif defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH) import RegAlloc.Linear.X86.FreeRegs +import X86.Instr (maxSpillSlots) #else #error "RegAlloc.Linear.FreeRegs not defined for this architecture." diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs index d3f821b..7d2cbcd 100644 --- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs +++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs @@ -19,13 +19,11 @@ import RegAlloc.Linear.State import RegAlloc.Linear.Base import RegAlloc.Linear.FreeRegs import RegAlloc.Liveness +import Instruction +import Reg import BlockId -import Instrs -import Regs -import RegAllocInfo import Cmm hiding (RegSet) - import Digraph import Outputable import Unique @@ -37,39 +35,41 @@ import UniqSet -- vregs are in the correct regs for its destination. -- joinToTargets - :: BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs + :: Instruction instr + => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs -- that are known to be live on the entry to each block. -> BlockId -- ^ id of the current block - -> Instr -- ^ branch instr on the end of the source block. + -> instr -- ^ branch instr on the end of the source block. - -> RegM ([NatBasicBlock] -- fresh blocks of fixup code. - , Instr) -- the original branch instruction, but maybe patched to jump + -> RegM ([NatBasicBlock instr] -- fresh blocks of fixup code. + , instr) -- the original branch instruction, but maybe patched to jump -- to a fixup block first. joinToTargets block_live id instr -- we only need to worry about jump instructions. - | not $ isJumpish instr + | not $ isJumpishInstr instr = return ([], instr) | otherwise - = joinToTargets' block_live [] id instr (jumpDests instr []) + = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr) ----- joinToTargets' - :: BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs + :: Instruction instr + => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs -- that are known to be live on the entry to each block. - -> [NatBasicBlock] -- ^ acc blocks of fixup code. + -> [NatBasicBlock instr] -- ^ acc blocks of fixup code. -> BlockId -- ^ id of the current block - -> Instr -- ^ branch instr on the end of the source block. + -> instr -- ^ branch instr on the end of the source block. -> [BlockId] -- ^ branch destinations still to consider. - -> RegM ( [NatBasicBlock] - , Instr) + -> RegM ( [NatBasicBlock instr] + , instr) -- no more targets to consider. all done. joinToTargets' _ new_blocks _ instr [] @@ -173,7 +173,7 @@ joinToTargets_again -- then that will jump to our original destination. fixup_block_id <- getUniqueR let block = BasicBlock (BlockId fixup_block_id) - $ fixUpInstrs ++ mkBranchInstr dest + $ fixUpInstrs ++ mkJumpInstr dest {- pprTrace ("joinToTargets: fixup code is:") @@ -187,7 +187,11 @@ joinToTargets_again -- patch the original branch instruction so it goes to our -- fixup block instead. - _ -> let instr' = patchJump instr dest (BlockId fixup_block_id) + _ -> let instr' = patchJumpInstr instr + (\bid -> if bid == dest + then BlockId fixup_block_id + else dest) + in joinToTargets' block_live (block : new_blocks) block_id instr' dests @@ -256,7 +260,9 @@ expandNode vreg src dst -- destinations. We have eliminated any possibility of single-node -- cycles in expandNode above. -- -handleComponent :: Int -> Instr -> SCC (Unique, Loc, [Loc]) -> RegM [Instr] +handleComponent + :: Instruction instr + => Int -> instr -> SCC (Unique, Loc, [Loc]) -> RegM [instr] -- If the graph is acyclic then we won't get the swapping problem below. -- In this case we can just do the moves directly, and avoid having to @@ -305,11 +311,12 @@ handleComponent _ _ (CyclicSCC _) -- | Move a vreg between these two locations. -- makeMove - :: Int -- ^ current C stack delta. + :: Instruction instr + => Int -- ^ current C stack delta. -> Unique -- ^ unique of the vreg that we're moving. -> Loc -- ^ source location. -> Loc -- ^ destination location. - -> RegM Instr -- ^ move instruction. + -> RegM instr -- ^ move instruction. makeMove _ vreg (InReg src) (InReg dst) = do recordSpill (SpillJoinRR vreg) diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index bfd9ca5..47529d2 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -96,14 +96,14 @@ import RegAlloc.Linear.StackMap import RegAlloc.Linear.FreeRegs import RegAlloc.Linear.Stats import RegAlloc.Linear.JoinToTargets +import TargetReg import RegAlloc.Liveness +import Instruction +import Reg -- import PprMach import BlockId -import Regs -import Instrs -import RegAllocInfo import Cmm hiding (RegSet) import Digraph @@ -112,7 +112,6 @@ import UniqSet import UniqFM import UniqSupply import Outputable -import FastString import Data.Maybe import Data.List @@ -126,8 +125,9 @@ import Control.Monad -- Allocate registers regAlloc - :: LiveCmmTop - -> UniqSM (NatCmmTop, Maybe RegAllocStats) + :: (Outputable instr, Instruction instr) + => LiveCmmTop instr + -> UniqSM (NatCmmTop instr, Maybe RegAllocStats) regAlloc (CmmData sec d) = return @@ -171,10 +171,11 @@ regAlloc (CmmProc _ _ _ _) -- an entry in the block map or it is the first block. -- linearRegAlloc - :: BlockId -- ^ the first block + :: (Outputable instr, Instruction instr) + => BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block - -> [SCC LiveBasicBlock] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock], RegAllocStats) + -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" + -> UniqSM ([NatBasicBlock instr], RegAllocStats) linearRegAlloc first_id block_live sccs = do us <- getUs @@ -234,9 +235,10 @@ process first_id block_live (b@(BasicBlock id _) : blocks) next_round accum -- | Do register allocation on this basic block -- processBlock - :: BlockMap RegSet -- ^ live regs on entry to each basic block - -> LiveBasicBlock -- ^ block to do register allocation on - -> RegM [NatBasicBlock] -- ^ block with registers allocated + :: (Outputable instr, Instruction instr) + => BlockMap RegSet -- ^ live regs on entry to each basic block + -> LiveBasicBlock instr -- ^ block to do register allocation on + -> RegM [NatBasicBlock instr] -- ^ block with registers allocated processBlock block_live (BasicBlock id instrs) = do initBlock id @@ -265,20 +267,21 @@ initBlock id -- | Do allocation for a sequence of instructions. linearRA - :: BlockMap RegSet -- ^ map of what vregs are live on entry to each block. - -> [Instr] -- ^ accumulator for instructions already processed. - -> [NatBasicBlock] -- ^ accumulator for blocks of fixup code. - -> BlockId -- ^ id of the current block, for debugging. - -> [LiveInstr] -- ^ liveness annotated instructions in this block. + :: (Outputable instr, Instruction instr) + => BlockMap RegSet -- ^ map of what vregs are live on entry to each block. + -> [instr] -- ^ accumulator for instructions already processed. + -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code. + -> BlockId -- ^ id of the current block, for debugging. + -> [LiveInstr instr] -- ^ liveness annotated instructions in this block. - -> RegM ( [Instr] -- instructions after register allocation - , [NatBasicBlock]) -- fresh blocks of fixup code. + -> RegM ( [instr] -- instructions after register allocation + , [NatBasicBlock instr]) -- fresh blocks of fixup code. linearRA _ accInstr accFixup _ [] = return - ( reverse accInstr -- instrs need to be returned in the correct order. - , accFixup) -- it doesn't matter what order the fixup blocks are returned in. + ( reverse accInstr -- instrs need to be returned in the correct order. + , accFixup) -- it doesn't matter what order the fixup blocks are returned in. linearRA block_live accInstr accFixups id (instr:instrs) @@ -291,21 +294,24 @@ linearRA block_live accInstr accFixups id (instr:instrs) -- | Do allocation for a single instruction. raInsn - :: BlockMap RegSet -- ^ map of what vregs are love on entry to each block. - -> [Instr] -- ^ accumulator for instructions already processed. - -> BlockId -- ^ the id of the current block, for debugging - -> LiveInstr -- ^ the instr to have its regs allocated, with liveness info. + :: (Outputable instr, Instruction instr) + => BlockMap RegSet -- ^ map of what vregs are love on entry to each block. + -> [instr] -- ^ accumulator for instructions already processed. + -> BlockId -- ^ the id of the current block, for debugging + -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info. -> RegM - ( [Instr] -- new instructions - , [NatBasicBlock]) -- extra fixup blocks + ( [instr] -- new instructions + , [NatBasicBlock instr]) -- extra fixup blocks -raInsn _ new_instrs _ (Instr (COMMENT _) Nothing) - = return (new_instrs, []) +raInsn _ new_instrs _ (Instr ii Nothing) + | Just n <- takeDeltaInstr ii + = do setDeltaR n + return (new_instrs, []) + +raInsn _ new_instrs _ (Instr ii Nothing) + | isMetaInstr ii + = return (new_instrs, []) -raInsn _ new_instrs _ (Instr (DELTA n) Nothing) - = do - setDeltaR n - return (new_instrs, []) raInsn block_live new_instrs id (Instr instr (Just live)) = do @@ -318,7 +324,7 @@ raInsn block_live new_instrs id (Instr instr (Just live)) -- then we can eliminate the instruction. -- (we can't eliminitate it if the source register is on the stack, because -- we do not want to use one spill slot for different virtual registers) - case isRegRegMove instr of + case takeRegRegMoveInstr instr of Just (src,dst) | src `elementOfUniqSet` (liveDieRead live), isVirtualReg dst, not (dst `elemUFM` assig), @@ -354,7 +360,7 @@ raInsn _ _ _ instr genRaInsn block_live new_instrs block_id instr r_dying w_dying = - case regUsage instr of { RU read written -> + case regUsageOfInstr instr of { RU read written -> case partition isRealReg written of { (real_written1,virt_written) -> do let @@ -410,7 +416,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = (t,r) <- zip virt_read r_allocd ++ zip virt_written w_allocd ] - patched_instr = patchRegs adjusted_instr patchLookup + patched_instr = patchRegsOfInstr adjusted_instr patchLookup patchLookup x = case lookupUFM patch_map x of Nothing -> x Just y -> y @@ -424,7 +430,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = -- erase reg->reg moves where the source and destination are the same. -- If the src temp didn't die in this instr but happened to be allocated -- to the same real reg as the destination, then we can erase the move anyway. - let squashed_instr = case isRegRegMove patched_instr of + let squashed_instr = case takeRegRegMoveInstr patched_instr of Just (src, dst) | src == dst -> [] _ -> [patched_instr] @@ -473,10 +479,11 @@ for allocateRegs on the temps *written*, -} saveClobberedTemps - :: [RegNo] -- real registers clobbered by this instruction - -> [Reg] -- registers which are no longer live after this insn - -> RegM [Instr] -- return: instructions to spill any temps that will - -- be clobbered. + :: Instruction instr + => [RegNo] -- real registers clobbered by this instruction + -> [Reg] -- registers which are no longer live after this insn + -> RegM [instr] -- return: instructions to spill any temps that will + -- be clobbered. saveClobberedTemps [] _ = return [] -- common case saveClobberedTemps clobbered dying = do @@ -498,7 +505,7 @@ saveClobberedTemps clobbered dying = do recordSpill (SpillClobber temp) let new_assign = addToUFM assig temp (InBoth reg slot) - clobber new_assign (spill : COMMENT (fsLit "spill clobber") : instrs) rest + clobber new_assign (spill : {- COMMENT (fsLit "spill clobber") : -} instrs) rest clobberRegs :: [RegNo] -> RegM () clobberRegs [] = return () -- common case @@ -533,12 +540,13 @@ clobberRegs clobbered = do -- the list of free registers and free stack slots. allocateRegsAndSpill - :: Bool -- True <=> reading (load up spilled regs) + :: Instruction instr + => Bool -- True <=> reading (load up spilled regs) -> [Reg] -- don't push these out - -> [Instr] -- spill insns + -> [instr] -- spill insns -> [RegNo] -- real registers allocated (accum.) -> [Reg] -- temps to allocate - -> RegM ([Instr], [RegNo]) + -> RegM ([instr], [RegNo]) allocateRegsAndSpill _ _ spills alloc [] = return (spills,reverse alloc) @@ -563,7 +571,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do loc -> do freeregs <- getFreeRegsR - case getFreeRegs (regClass r) freeregs of + case getFreeRegs (targetRegClass r) freeregs of -- case (2): we have a free register my_reg:_ -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -} @@ -582,10 +590,10 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do keep' = map getUnique keep candidates1 = [ (temp,reg,mem) | (temp, InBoth reg mem) <- ufmToList assig, - temp `notElem` keep', regClass (RealReg reg) == regClass r ] + temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r ] candidates2 = [ (temp,reg) | (temp, InReg reg) <- ufmToList assig, - temp `notElem` keep', regClass (RealReg reg) == regClass r ] + temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r ] -- in ASSERT2(not (null candidates1 && null candidates2), text (show freeregs) <+> ppr r <+> ppr assig) do @@ -622,8 +630,8 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do (spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out let spill_store = (if reading then id else reverse) - [ COMMENT (fsLit "spill alloc") - , spill_insn ] + [ -- COMMENT (fsLit "spill alloc") + spill_insn ] -- record that this temp was spilled recordSpill (SpillAlloc temp_to_push_out) @@ -643,18 +651,19 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do -- | Load up a spilled temporary if we need to. loadTemp - :: Bool + :: Instruction instr + => Bool -> Reg -- the temp being loaded -> Maybe Loc -- the current location of this temp -> RegNo -- the hreg to load the temp into - -> [Instr] - -> RegM [Instr] + -> [instr] + -> RegM [instr] loadTemp True vreg (Just (InMem slot)) hreg spills = do insn <- loadR (RealReg hreg) slot recordSpill (SpillLoad $ getUnique vreg) - return $ COMMENT (fsLit "spill load") : insn : spills + return $ {- COMMENT (fsLit "spill load") : -} insn : spills loadTemp _ _ _ _ spills = return spills diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs index 6d8809d..878bfe3 100644 --- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs @@ -3,7 +3,9 @@ module RegAlloc.Linear.PPC.FreeRegs where -import Regs +import PPC.Regs +import RegClass +import Reg import Outputable diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs index aa716b5..5514056 100644 --- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs @@ -3,7 +3,9 @@ module RegAlloc.Linear.SPARC.FreeRegs where -import Regs +import SPARC.Regs +import RegClass +import Reg import Outputable import FastBool diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs index 5656941..62bf6ad 100644 --- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs +++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs @@ -19,7 +19,7 @@ module RegAlloc.Linear.StackMap ( where -import RegAllocInfo (maxSpillSlots) +import RegAlloc.Linear.FreeRegs import Outputable import UniqFM diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs index 94a8f7b..b9f7049 100644 --- a/compiler/nativeGen/RegAlloc/Linear/State.hs +++ b/compiler/nativeGen/RegAlloc/Linear/State.hs @@ -34,11 +34,8 @@ import RegAlloc.Linear.StackMap import RegAlloc.Linear.Base import RegAlloc.Linear.FreeRegs import RegAlloc.Liveness - - -import Instrs -import Regs -import RegAllocInfo +import Instruction +import Reg import Unique import UniqSupply @@ -85,14 +82,19 @@ makeRAStats state { ra_spillInstrs = binSpillReasons (ra_spills state) } -spillR :: Reg -> Unique -> RegM (Instr, Int) +spillR :: Instruction instr + => Reg -> Unique -> RegM (instr, Int) + spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} -> let (stack',slot) = getStackSlotFor stack temp instr = mkSpillInstr reg delta slot in (# s{ra_stack=stack'}, (instr,slot) #) -loadR :: Reg -> Int -> RegM Instr + +loadR :: Instruction instr + => Reg -> Int -> RegM instr + loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} -> (# s, mkLoadInstr reg delta slot #) diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs index 95bf8ed..137168e 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Stats.hs @@ -8,9 +8,8 @@ where import RegAlloc.Linear.Base import RegAlloc.Liveness +import Instruction -import RegAllocInfo -import Instrs import Cmm (GenBasicBlock(..)) import UniqFM @@ -36,7 +35,10 @@ binSpillReasons reasons -- | Count reg-reg moves remaining in this code. -countRegRegMovesNat :: NatCmmTop -> Int +countRegRegMovesNat + :: Instruction instr + => NatCmmTop instr -> Int + countRegRegMovesNat cmm = execState (mapGenBlockTopM countBlock cmm) 0 where @@ -45,7 +47,7 @@ countRegRegMovesNat cmm return b countInstr instr - | Just _ <- isRegRegMove instr + | Just _ <- takeRegRegMoveInstr instr = do modify (+ 1) return instr @@ -54,7 +56,10 @@ countRegRegMovesNat cmm -- | Pretty print some RegAllocStats -pprStats :: [NatCmmTop] -> [RegAllocStats] -> SDoc +pprStats + :: Instruction instr + => [NatCmmTop instr] -> [RegAllocStats] -> SDoc + pprStats code statss = let -- sum up all the instrs inserted by the spiller spills = foldl' (plusUFM_C (zipWith (+))) diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs index 1306deb..eedaca8 100644 --- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs +++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs @@ -3,7 +3,9 @@ module RegAlloc.Linear.X86.FreeRegs where -import Regs +import X86.Regs +import RegClass +import Reg import Data.Word import Data.Bits diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs index 8445034..8faab5a 100644 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ b/compiler/nativeGen/RegAlloc/Liveness.hs @@ -20,7 +20,7 @@ module RegAlloc.Liveness ( mapBlockTop, mapBlockTopM, mapGenBlockTop, mapGenBlockTopM, stripLive, - spillNatBlock, + stripLiveBlock, slurpConflicts, slurpReloadCoalesce, eraseDeltasLive, @@ -30,12 +30,13 @@ module RegAlloc.Liveness ( ) where + +import Reg +import Instruction + import BlockId -import Regs -import Instrs -import PprMach -import RegAllocInfo import Cmm hiding (RegSet) +import PprCmm() import Digraph import Outputable @@ -65,18 +66,25 @@ emptyBlockMap = emptyBlockEnv -- | A top level thing which carries liveness information. -type LiveCmmTop +type LiveCmmTop instr = GenCmmTop CmmStatic LiveInfo - (ListGraph (GenBasicBlock LiveInstr)) + (ListGraph (GenBasicBlock (LiveInstr instr))) -- the "instructions" here are actually more blocks, -- single blocks are acyclic -- multiple blocks are taken to be cyclic. -- | An instruction with liveness information. -data LiveInstr - = Instr Instr (Maybe Liveness) +data LiveInstr instr + = Instr instr (Maybe Liveness) + + -- | spill this reg to a stack slot + | SPILL Reg Int + + -- | reload this reg from a stack slot + | RELOAD Int Reg + -- | Liveness information. -- The regs which die are ones which are no longer live in the *next* instruction @@ -100,11 +108,28 @@ data LiveInfo (BlockMap RegSet) -- argument locals live on entry to this block -- | A basic block with liveness information. -type LiveBasicBlock - = GenBasicBlock LiveInstr +type LiveBasicBlock instr + = GenBasicBlock (LiveInstr instr) + + +instance Outputable instr + => Outputable (LiveInstr instr) where + ppr (SPILL reg slot) + = hcat [ + ptext (sLit "\tSPILL"), + char ' ', + ppr reg, + comma, + ptext (sLit "SLOT") <> parens (int slot)] + + ppr (RELOAD slot reg) + = hcat [ + ptext (sLit "\tRELOAD"), + char ' ', + ptext (sLit "SLOT") <> parens (int slot), + comma, + ppr reg] - -instance Outputable LiveInstr where ppr (Instr instr Nothing) = ppr instr @@ -120,8 +145,7 @@ instance Outputable LiveInstr where where pprRegs :: SDoc -> RegSet -> SDoc pprRegs name regs | isEmptyUniqSet regs = empty - | otherwise = name <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) $ uniqSetToList regs) - + | otherwise = name <> (hcat $ punctuate space $ map ppr $ uniqSetToList regs) instance Outputable LiveInfo where ppr (LiveInfo static firstId liveOnEntry) @@ -130,11 +154,12 @@ instance Outputable LiveInfo where $$ text "# liveOnEntry = " <> ppr liveOnEntry + -- | map a function across all the basic blocks in this code -- mapBlockTop - :: (LiveBasicBlock -> LiveBasicBlock) - -> LiveCmmTop -> LiveCmmTop + :: (LiveBasicBlock instr -> LiveBasicBlock instr) + -> LiveCmmTop instr -> LiveCmmTop instr mapBlockTop f cmm = evalState (mapBlockTopM (\x -> return $ f x) cmm) () @@ -144,8 +169,8 @@ mapBlockTop f cmm -- mapBlockTopM :: Monad m - => (LiveBasicBlock -> m LiveBasicBlock) - -> LiveCmmTop -> m LiveCmmTop + => (LiveBasicBlock instr -> m (LiveBasicBlock instr)) + -> LiveCmmTop instr -> m (LiveCmmTop instr) mapBlockTopM _ cmm@(CmmData{}) = return cmm @@ -187,7 +212,11 @@ mapGenBlockTopM f (CmmProc header label params (ListGraph blocks)) -- Slurping of conflicts and moves is wrapped up together so we don't have -- to make two passes over the same code when we want to build the graph. -- -slurpConflicts :: LiveCmmTop -> (Bag (UniqSet Reg), Bag (Reg, Reg)) +slurpConflicts + :: Instruction instr + => LiveCmmTop instr + -> (Bag (UniqSet Reg), Bag (Reg, Reg)) + slurpConflicts live = slurpCmm (emptyBag, emptyBag) live @@ -205,12 +234,20 @@ slurpConflicts live = (consBag rsLiveEntry conflicts, moves) | otherwise - = panic "RegLiveness.slurpBlock: bad block" + = panic "Liveness.slurpConflicts: bad block" slurpLIs rsLive (conflicts, moves) [] = (consBag rsLive conflicts, moves) - slurpLIs rsLive rs (Instr _ Nothing : lis) = slurpLIs rsLive rs lis + slurpLIs rsLive rs (Instr _ Nothing : lis) + = slurpLIs rsLive rs lis + + -- we're not expecting to be slurping conflicts from spilled code + slurpLIs _ _ (SPILL _ _ : _) + = panic "Liveness.slurpConflicts: unexpected SPILL" + + slurpLIs _ _ (RELOAD _ _ : _) + = panic "Liveness.slurpConflicts: unexpected RELOAD" slurpLIs rsLiveEntry (conflicts, moves) (Instr instr (Just live) : lis) = let @@ -234,7 +271,7 @@ slurpConflicts live -- rsConflicts = unionUniqSets rsLiveNext rsOrphans - in case isRegRegMove instr of + in case takeRegRegMoveInstr instr of Just rr -> slurpLIs rsLiveNext ( consBag rsConflicts conflicts , consBag rr moves) lis @@ -254,7 +291,11 @@ slurpConflicts live -- the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move. -- -- -slurpReloadCoalesce :: LiveCmmTop -> Bag (Reg, Reg) +slurpReloadCoalesce + :: Instruction instr + => LiveCmmTop instr + -> Bag (Reg, Reg) + slurpReloadCoalesce live = slurpCmm emptyBag live @@ -285,23 +326,24 @@ slurpReloadCoalesce live (_, mMoves) <- mapAccumLM slurpLI slotMap instrs return $ listToBag $ catMaybes mMoves - slurpLI :: UniqFM Reg -- current slotMap - -> LiveInstr + slurpLI :: Instruction instr + => UniqFM Reg -- current slotMap + -> LiveInstr instr -> State (UniqFM [UniqFM Reg]) -- blockId -> [slot -> reg] -- for tracking slotMaps across jumps ( UniqFM Reg -- new slotMap , Maybe (Reg, Reg)) -- maybe a new coalesce edge - slurpLI slotMap (Instr instr _) + slurpLI slotMap li -- remember what reg was stored into the slot - | SPILL reg slot <- instr + | SPILL reg slot <- li , slotMap' <- addToUFM slotMap slot reg = return (slotMap', Nothing) -- add an edge betwen the this reg and the last one stored into the slot - | RELOAD slot reg <- instr + | RELOAD slot reg <- li = case lookupUFM slotMap slot of Just reg2 | reg /= reg2 -> return (slotMap, Just (reg, reg2)) @@ -310,7 +352,8 @@ slurpReloadCoalesce live Nothing -> return (slotMap, Nothing) -- if we hit a jump, remember the current slotMap - | targets <- jumpDests instr [] + | Instr instr _ <- li + , targets <- jumpDestsOfInstr instr , not $ null targets = do mapM_ (accSlotMap slotMap) targets return (slotMap, Nothing) @@ -340,7 +383,11 @@ slurpReloadCoalesce live -- | Strip away liveness information, yielding NatCmmTop -stripLive :: LiveCmmTop -> NatCmmTop +stripLive + :: Instruction instr + => LiveCmmTop instr + -> NatCmmTop instr + stripLive live = stripCmm live @@ -349,26 +396,26 @@ stripLive live = CmmProc info label params (ListGraph $ concatMap stripComp comps) - stripComp (BasicBlock _ blocks) = map stripBlock blocks - stripBlock (BasicBlock i instrs) = BasicBlock i (map stripLI instrs) - stripLI (Instr instr _) = instr + stripComp (BasicBlock _ blocks) = map stripLiveBlock blocks --- | Make real spill instructions out of SPILL, RELOAD pseudos +-- | Strip away liveness information from a basic block, +-- and make real spill instructions out of SPILL, RELOAD pseudos along the way. -spillNatBlock :: NatBasicBlock -> NatBasicBlock -spillNatBlock (BasicBlock i is) +stripLiveBlock + :: Instruction instr + => LiveBasicBlock instr + -> NatBasicBlock instr + +stripLiveBlock (BasicBlock i lis) = BasicBlock i instrs' + where (instrs', _) - = runState (spillNat [] is) 0 + = runState (spillNat [] lis) 0 spillNat acc [] = return (reverse acc) - spillNat acc (DELTA i : instrs) - = do put i - spillNat acc instrs - spillNat acc (SPILL reg slot : instrs) = do delta <- get spillNat (mkSpillInstr reg delta slot : acc) instrs @@ -377,22 +424,28 @@ spillNatBlock (BasicBlock i is) = do delta <- get spillNat (mkLoadInstr reg delta slot : acc) instrs - spillNat acc (instr : instrs) + spillNat acc (Instr instr _ : instrs) + | Just i <- takeDeltaInstr instr + = do put i + spillNat acc instrs + + spillNat acc (Instr instr _ : instrs) = spillNat (instr : acc) instrs -- | Erase Delta instructions. -eraseDeltasLive :: LiveCmmTop -> LiveCmmTop +eraseDeltasLive + :: Instruction instr + => LiveCmmTop instr + -> LiveCmmTop instr + eraseDeltasLive cmm = mapBlockTop eraseBlock cmm where - isDelta (DELTA _) = True - isDelta _ = False - eraseBlock (BasicBlock id lis) = BasicBlock id - $ filter (\(Instr i _) -> not $ isDelta i) + $ filter (\(Instr i _) -> not $ isJust $ takeDeltaInstr i) $ lis @@ -401,8 +454,9 @@ eraseDeltasLive cmm -- also erase reg -> reg moves when the destination dies in this instr. patchEraseLive - :: (Reg -> Reg) - -> LiveCmmTop -> LiveCmmTop + :: Instruction instr + => (Reg -> Reg) + -> LiveCmmTop instr -> LiveCmmTop instr patchEraseLive patchF cmm = patchCmm cmm @@ -427,7 +481,7 @@ patchEraseLive patchF cmm patchInstrs (li : lis) | Instr i (Just live) <- li' - , Just (r1, r2) <- isRegRegMove i + , Just (r1, r2) <- takeRegRegMoveInstr i , eatMe r1 r2 live = patchInstrs lis @@ -451,30 +505,38 @@ patchEraseLive patchF cmm -- | Patch registers in this LiveInstr, including the liveness information. -- patchRegsLiveInstr - :: (Reg -> Reg) - -> LiveInstr -> LiveInstr + :: Instruction instr + => (Reg -> Reg) + -> LiveInstr instr -> LiveInstr instr patchRegsLiveInstr patchF li = case li of Instr instr Nothing - -> Instr (patchRegs instr patchF) Nothing + -> Instr (patchRegsOfInstr instr patchF) Nothing Instr instr (Just live) -> Instr - (patchRegs instr patchF) + (patchRegsOfInstr instr patchF) (Just live { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg liveBorn = mkUniqSet $ map patchF $ uniqSetToList $ liveBorn live , liveDieRead = mkUniqSet $ map patchF $ uniqSetToList $ liveDieRead live , liveDieWrite = mkUniqSet $ map patchF $ uniqSetToList $ liveDieWrite live }) + SPILL reg slot + -> SPILL (patchF reg) slot + + RELOAD slot reg + -> RELOAD slot (patchF reg) + --------------------------------------------------------------------------------- -- Annotate code with register liveness information -- regLiveness - :: NatCmmTop - -> UniqSM LiveCmmTop + :: Instruction instr + => NatCmmTop instr + -> UniqSM (LiveCmmTop instr) regLiveness (CmmData i d) = returnUs $ CmmData i d @@ -501,11 +563,15 @@ regLiveness (CmmProc info lbl params (ListGraph blocks@(first : _))) lbl params (ListGraph liveBlocks) -sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock] +sccBlocks + :: Instruction instr + => [NatBasicBlock instr] + -> [SCC (NatBasicBlock instr)] + sccBlocks blocks = stronglyConnCompFromEdgedVertices graph where - getOutEdges :: [Instr] -> [BlockId] - getOutEdges instrs = foldl' (\a x -> jumpDests x a) [] instrs + getOutEdges :: Instruction instr => [instr] -> [BlockId] + getOutEdges instrs = concat $ map jumpDestsOfInstr instrs graph = [ (block, getUnique id, map getUnique (getOutEdges instrs)) | block@(BasicBlock id instrs) <- blocks ] @@ -515,12 +581,13 @@ sccBlocks blocks = stronglyConnCompFromEdgedVertices graph -- Computing liveness computeLiveness - :: [SCC NatBasicBlock] - -> ([SCC LiveBasicBlock], -- instructions annotated with list of registers - -- which are "dead after this instruction". - BlockMap RegSet) -- blocks annontated with set of live registers - -- on entry to the block. - + :: Instruction instr + => [SCC (NatBasicBlock instr)] + -> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers + -- which are "dead after this instruction". + BlockMap RegSet) -- blocks annontated with set of live registers + -- on entry to the block. + -- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer -- control to earlier ones only. The SCCs returned are in the *opposite* -- order, which is exactly what we want for the next pass. @@ -530,10 +597,12 @@ computeLiveness sccs livenessSCCs - :: BlockMap RegSet - -> [SCC LiveBasicBlock] -- accum - -> [SCC NatBasicBlock] - -> ([SCC LiveBasicBlock], BlockMap RegSet) + :: Instruction instr + => BlockMap RegSet + -> [SCC (LiveBasicBlock instr)] -- accum + -> [SCC (NatBasicBlock instr)] + -> ( [SCC (LiveBasicBlock instr)] + , BlockMap RegSet) livenessSCCs blockmap done [] = (done, blockmap) @@ -561,8 +630,11 @@ livenessSCCs blockmap done (a, panic "RegLiveness.livenessSCCs") - linearLiveness :: BlockMap RegSet -> [NatBasicBlock] - -> (BlockMap RegSet, [LiveBasicBlock]) + linearLiveness + :: Instruction instr + => BlockMap RegSet -> [NatBasicBlock instr] + -> (BlockMap RegSet, [LiveBasicBlock instr]) + linearLiveness = mapAccumL livenessBlock -- probably the least efficient way to compare two @@ -578,9 +650,10 @@ livenessSCCs blockmap done -- | Annotate a basic block with register liveness information. -- livenessBlock - :: BlockMap RegSet - -> NatBasicBlock - -> (BlockMap RegSet, LiveBasicBlock) + :: Instruction instr + => BlockMap RegSet + -> NatBasicBlock instr + -> (BlockMap RegSet, LiveBasicBlock instr) livenessBlock blockmap (BasicBlock block_id instrs) = let @@ -598,8 +671,9 @@ livenessBlock blockmap (BasicBlock block_id instrs) -- filling in when regs are born livenessForward - :: RegSet -- regs live on this instr - -> [LiveInstr] -> [LiveInstr] + :: Instruction instr + => RegSet -- regs live on this instr + -> [LiveInstr instr] -> [LiveInstr instr] livenessForward _ [] = [] livenessForward rsLiveEntry (li@(Instr instr mLive) : lis) @@ -607,7 +681,7 @@ livenessForward rsLiveEntry (li@(Instr instr mLive) : lis) = li : livenessForward rsLiveEntry lis | Just live <- mLive - , RU _ written <- regUsage instr + , RU _ written <- regUsageOfInstr instr = let -- Regs that are written to but weren't live on entry to this instruction -- are recorded as being born here. @@ -628,11 +702,12 @@ livenessForward _ _ = panic "RegLiveness.livenessForward: no match" -- filling in when regs die, and what regs are live across each instruction livenessBack - :: RegSet -- regs live on this instr + :: Instruction instr + => RegSet -- regs live on this instr -> BlockMap RegSet -- regs live on entry to other BBs - -> [LiveInstr] -- instructions (accum) - -> [Instr] -- instructions - -> (RegSet, [LiveInstr]) + -> [LiveInstr instr] -- instructions (accum) + -> [instr] -- instructions + -> (RegSet, [LiveInstr instr]) livenessBack liveregs _ done [] = (liveregs, done) @@ -640,32 +715,37 @@ livenessBack liveregs blockmap acc (instr : instrs) = let (liveregs', instr') = liveness1 liveregs blockmap instr in livenessBack liveregs' blockmap (instr' : acc) instrs --- don't bother tagging comments or deltas with liveness -liveness1 :: RegSet -> BlockMap RegSet -> Instr -> (RegSet, LiveInstr) -liveness1 liveregs _ (instr@COMMENT{}) - = (liveregs, Instr instr Nothing) -liveness1 liveregs _ (instr@DELTA{}) +-- don't bother tagging comments or deltas with liveness +liveness1 + :: Instruction instr + => RegSet + -> BlockMap RegSet + -> instr + -> (RegSet, LiveInstr instr) + +liveness1 liveregs _ instr + | isMetaInstr instr = (liveregs, Instr instr Nothing) liveness1 liveregs blockmap instr - | not_a_branch - = (liveregs1, Instr instr + | not_a_branch + = (liveregs1, Instr instr (Just $ Liveness { liveBorn = emptyUniqSet , liveDieRead = mkUniqSet r_dying , liveDieWrite = mkUniqSet w_dying })) - | otherwise - = (liveregs_br, Instr instr + | otherwise + = (liveregs_br, Instr instr (Just $ Liveness { liveBorn = emptyUniqSet , liveDieRead = mkUniqSet r_dying_br , liveDieWrite = mkUniqSet w_dying })) - where - RU read written = regUsage instr + where + RU read written = regUsageOfInstr instr -- registers that were written here are dead going backwards. -- registers that were read here are live going backwards. @@ -682,7 +762,7 @@ liveness1 liveregs blockmap instr -- union in the live regs from all the jump destinations of this -- instruction. - targets = jumpDests instr [] -- where we go from here + targets = jumpDestsOfInstr instr -- where we go from here not_a_branch = null targets targetLiveRegs target diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs deleted file mode 100644 index f0cb8b5..0000000 --- a/compiler/nativeGen/RegAllocInfo.hs +++ /dev/null @@ -1,94 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- Machine-specific parts of the register allocator --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ - - -module RegAllocInfo ( - -- shared code - shortcutStatic, - - -- machine specific - RegUsage(..), - noUsage, - regUsage, - patchRegs, - jumpDests, - isJumpish, - patchJump, - isRegRegMove, - - JumpDest, - canShortcut, - shortcutJump, - - mkSpillInstr, - mkLoadInstr, - mkRegRegMoveInstr, - mkBranchInstr, - - maxSpillSlots, - spillSlotToOffset - ) where - -#include "nativeGen/NCG.h" -#include "HsVersions.h" - -import BlockId -import Cmm -import CLabel -import Instrs -import Regs -import Outputable -import Constants ( rESERVED_C_STACK_BYTES ) -import FastBool - -#if alpha_TARGET_ARCH -import Alpha.RegInfo - -#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH -import X86.RegInfo - -#elif powerpc_TARGET_ARCH -import PPC.RegInfo - -#elif sparc_TARGET_ARCH -import SPARC.RegInfo - -#endif - - --- Here because it knows about JumpDest -shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic -shortcutStatic fn (CmmStaticLit (CmmLabel lab)) - | Just uq <- maybeAsmTemp lab - = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) - | Just uq <- maybeAsmTemp lbl1 - = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) - -- slightly dodgy, we're ignoring the second label, but this - -- works with the way we use CmmLabelDiffOff for jump tables now. -shortcutStatic fn other_static - = other_static - -shortBlockId fn blockid@(BlockId uq) = - case fn blockid of - Nothing -> mkAsmTempLabel uq - Just (DestBlockId blockid') -> shortBlockId fn blockid' - Just (DestImm (ImmCLbl lbl)) -> lbl - _other -> panic "shortBlockId" - - - - - diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs new file mode 100644 index 0000000..8b6b2d4 --- /dev/null +++ b/compiler/nativeGen/RegClass.hs @@ -0,0 +1,31 @@ + +-- | An architecture independent description of a register's class. +module RegClass + ( RegClass (..) ) + +where + +import Outputable +import Unique + + +-- | The class of a register. +-- Used in the register allocator. +-- We treat all registers in a class as being interchangable. +-- +data RegClass + = RcInteger + | RcFloat + | RcDouble + deriving Eq + + +instance Uniquable RegClass where + getUnique RcInteger = mkUnique 'L' 0 + getUnique RcFloat = mkUnique 'L' 1 + getUnique RcDouble = mkUnique 'L' 2 + +instance Outputable RegClass where + ppr RcInteger = Outputable.text "I" + ppr RcFloat = Outputable.text "F" + ppr RcDouble = Outputable.text "D" diff --git a/compiler/nativeGen/RegsBase.hs b/compiler/nativeGen/RegsBase.hs deleted file mode 100644 index 00c87cb..0000000 --- a/compiler/nativeGen/RegsBase.hs +++ /dev/null @@ -1,105 +0,0 @@ - -module RegsBase ( - RegNo, - Reg(..), - isRealReg, - unRealReg, - isVirtualReg, - renameVirtualReg, - - RegClass(..) -) - -where - -import Outputable ( Outputable(..) ) -import qualified Outputable -import Panic -import Unique - --- --------------------------------------------------------------------------- --- Registers - --- RealRegs are machine regs which are available for allocation, in --- the usual way. We know what class they are, because that's part of --- the processor's architecture. - --- VirtualRegs are virtual registers. The register allocator will --- eventually have to map them into RealRegs, or into spill slots. --- VirtualRegs are allocated on the fly, usually to represent a single --- value in the abstract assembly code (i.e. dynamic registers are --- usually single assignment). With the new register allocator, the --- single assignment restriction isn't necessary to get correct code, --- although a better register allocation will result if single --- assignment is used -- because the allocator maps a VirtualReg into --- a single RealReg, even if the VirtualReg has multiple live ranges. - --- Virtual regs can be of either class, so that info is attached. - -type RegNo - = Int - -data Reg - = RealReg {-# UNPACK #-} !RegNo - | VirtualRegI {-# UNPACK #-} !Unique - | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register - | VirtualRegF {-# UNPACK #-} !Unique - | VirtualRegD {-# UNPACK #-} !Unique - deriving (Eq, Ord) - - --- We like to have Uniques for Reg so that we can make UniqFM and UniqSets --- in the register allocator. -instance Uniquable Reg where - getUnique (RealReg i) = mkUnique 'C' i - getUnique (VirtualRegI u) = u - getUnique (VirtualRegHi u) = u - getUnique (VirtualRegF u) = u - getUnique (VirtualRegD u) = u - - -isRealReg :: Reg -> Bool -isRealReg = not . isVirtualReg - --- | Take the RegNo from a real reg -unRealReg :: Reg -> RegNo -unRealReg (RealReg i) = i -unRealReg _ = panic "unRealReg on VirtualReg" - -isVirtualReg :: Reg -> Bool -isVirtualReg (RealReg _) = False -isVirtualReg (VirtualRegI _) = True -isVirtualReg (VirtualRegHi _) = True -isVirtualReg (VirtualRegF _) = True -isVirtualReg (VirtualRegD _) = True - - -renameVirtualReg :: Unique -> Reg -> Reg -renameVirtualReg u r - = case r of - RealReg _ -> error "renameVirtualReg: can't change unique on a real reg" - VirtualRegI _ -> VirtualRegI u - VirtualRegHi _ -> VirtualRegHi u - VirtualRegF _ -> VirtualRegF u - VirtualRegD _ -> VirtualRegD u - - --- RegClass -------------------------------------------------------------------- -data RegClass - = RcInteger - | RcFloat - | RcDouble - deriving Eq - -instance Uniquable RegClass where - getUnique RcInteger = mkUnique 'L' 0 - getUnique RcFloat = mkUnique 'L' 1 - getUnique RcDouble = mkUnique 'L' 2 - -instance Outputable RegClass where - ppr RcInteger = Outputable.text "I" - ppr RcFloat = Outputable.text "F" - ppr RcDouble = Outputable.text "D" - - - diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs new file mode 100644 index 0000000..d921c12 --- /dev/null +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -0,0 +1,1545 @@ +{-# OPTIONS -w #-} +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +module SPARC.CodeGen ( + cmmTopCodeGen, + InstrBlock +) + +where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import SPARC.Instr +import SPARC.Cond +import SPARC.Regs +import SPARC.RegInfo +import Instruction +import Size +import Reg +import PIC +import NCGMonad + +-- Our intermediate code: +import BlockId +import Cmm +import CLabel + +-- The rest: +import BasicTypes +import StaticFlags ( opt_PIC ) +import OrdList +import qualified Outputable as O +import Outputable +import FastString + +import Control.Monad ( mapAndUnzipM ) +import Data.Int +import DynFlags + +-- | Top level code generation +cmmTopCodeGen + :: DynFlags + -> RawCmmTop + -> NatM [NatCmmTop Instr] + +cmmTopCodeGen _ + (CmmProc info lab params (ListGraph blocks)) + = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + +-- picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + let tops = proc : concat statics + +-- case picBaseMb of +-- Just picBase -> initializePicBase picBase tops +-- Nothing -> return tops + + return tops + + +cmmTopCodeGen _ (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + + + +basicBlockCodeGen + :: CmmBasicBlock + -> NatM ( [NatBasicBlock Instr] + , [NatCmmTop Instr]) + +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode size reg src + | isWord64 ty -> assignReg_I64Code reg src + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode size addr src + | isWord64 ty -> assignMem_I64Code addr src + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty + + CmmCall target result_regs args _ _ + -> genCCall target result_regs args + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg _ -> genJump arg + + CmmReturn _ + -> panic "stmtToInstrs: return statement should have been cps'd away" + + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + + +-- | Condition codes passed up the tree. +-- +data CondCode + = CondCode Bool Cond InstrBlock + + +-- | a.k.a "Register64" +-- Reg is the lower 32-bit temporary which contains the result. +-- Use getHiVRegFromLo to find the other VRegUnique. +-- +-- Rules of this simplified insn selection game are therefore that +-- the returned Reg may be modified +-- +data ChildCode64 + = ChildCode64 + InstrBlock + Reg + + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) + + +swizzleRegisterRep :: Register -> Size -> Register +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn + + +-- | Grab the Reg for a CmmReg +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +-- | Memory addressing modes passed up the tree. +data Amode + = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + + +-- | Check whether an integer will fit in 32 bits. +-- A CmmInt is intended to be truncated to the appropriate +-- number of bits, so here we truncate it to Int64. This is +-- important because e.g. -1 as a CmmInt might be either +-- -1 or 18446744073709551615. +-- +is32BitInteger :: Integer -> Bool +is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 + where i64 = fromIntegral i :: Int64 + + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + + + + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + + +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_I64Code addrTree valueTree = do + Amode _ addr_code <- getAmode addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + + (src, code) <- getSomeReg addrTree + let + rhi = getHiVRegFromLo rlo + -- Big-endian store + mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0)) + mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4)) + + return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo) + + +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst (cmmTypeSize pk) + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = mkMOV r_src_lo r_dst_lo + mov_hi = mkMOV r_src_hi r_dst_hi + mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg + return (vcode `snocOL` mov_hi `snocOL` mov_lo) +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(sparc): invalid lvalue" + + +-- Load a 64 bit word +iselExpr64 (CmmLoad addrTree ty) + | isWord64 ty + = do Amode amode addr_code <- getAmode addrTree + let result + + | AddrRegReg r1 r2 <- amode + = do rlo <- getNewRegNat II32 + tmp <- getNewRegNat II32 + let rhi = getHiVRegFromLo rlo + + return $ ChildCode64 + ( addr_code + `appOL` toOL + [ ADD False False r1 (RIReg r2) tmp + , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi + , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ]) + rlo + + | AddrRegImm r1 (ImmInt i) <- amode + = do rlo <- getNewRegNat II32 + let rhi = getHiVRegFromLo rlo + + return $ ChildCode64 + ( addr_code + `appOL` toOL + [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi + , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ]) + rlo + + result + + +-- Add a literal to a 64 bit integer +iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) + = do ChildCode64 code1 r1_lo <- iselExpr64 e1 + let r1_hi = getHiVRegFromLo r1_lo + + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + return $ ChildCode64 + ( toOL + [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo + , ADD True False r1_hi (RIReg g0) r_dst_hi ]) + r_dst_lo + + +-- Addition of II64 +iselExpr64 (CmmMachOp (MO_Add width) [e1, e2]) + = do ChildCode64 code1 r1_lo <- iselExpr64 e1 + let r1_hi = getHiVRegFromLo r1_lo + + ChildCode64 code2 r2_lo <- iselExpr64 e2 + let r2_hi = getHiVRegFromLo r2_lo + + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + let code = code1 + `appOL` code2 + `appOL` toOL + [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo + , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ] + + return $ ChildCode64 code r_dst_lo + + +iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_lo = mkVReg uq II32 + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = mkMOV r_src_lo r_dst_lo + mov_hi = mkMOV r_src_hi r_dst_hi + mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg + return ( + ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo + ) + +-- Convert something into II64 +iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) + = do + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + -- compute expr and load it into r_dst_lo + (a_reg, a_code) <- getSomeReg expr + + let code = a_code + `appOL` toOL + [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits + , mkRegRegMoveInstr a_reg r_dst_lo ] + + return $ ChildCode64 code r_dst_lo + + +iselExpr64 expr + = pprPanic "iselExpr64(sparc)" (ppr expr) + + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + + +-- +getRegister :: CmmExpr -> NatM Register + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + + + +-- Load a literal float into a float register. +-- The actual literal is stored in a new data area, and we load it +-- at runtime. +getRegister (CmmLit (CmmFloat f W32)) = do + + -- a label for the new data area + lbl <- getNewLabelNat + tmp <- getNewRegNat II32 + + let code dst = toOL [ + -- the data area + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f W32)], + + -- load the literal + SETHI (HI (ImmCLbl lbl)) tmp, + LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] + + return (Any FF32 code) + +getRegister (CmmLit (CmmFloat d W64)) = do + lbl <- getNewLabelNat + tmp <- getNewRegNat II32 + let code dst = toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat d W64)], + SETHI (HI (ImmCLbl lbl)) tmp, + LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] + return (Any FF64 code) + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of + MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x + MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x + + MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x + MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x + + MO_FF_Conv W64 W32-> coerceDbl2Flt x + MO_FF_Conv W32 W64-> coerceFlt2Dbl x + + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + -- Conversions which are a nop on sparc + MO_UU_Conv from to + | from == to -> conversionNop (intSize to) x + MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) + MO_UU_Conv W32 to -> conversionNop (intSize to) x + MO_SS_Conv W32 to -> conversionNop (intSize to) x + + MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x + MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x + MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x + + -- sign extension + MO_SS_Conv W8 W32 -> integerExtend W8 W32 x + MO_SS_Conv W16 W32 -> integerExtend W16 W32 x + MO_SS_Conv W8 W16 -> integerExtend W8 W16 x + + other_op -> panic ("Unknown unary mach op: " ++ show mop) + where + + -- | sign extend and widen + integerExtend + :: Width -- ^ width of source expression + -> Width -- ^ width of result + -> CmmExpr -- ^ source expression + -> NatM Register + + integerExtend from to expr + = do -- load the expr into some register + (reg, e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + let bitCount + = case (from, to) of + (W8, W32) -> 24 + (W16, W32) -> 16 + (W8, W16) -> 24 + let code dst + = e_code + + -- local shift word left to load the sign bit + `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp + + -- arithmetic shift right to sign extend + `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst + + return (Any (intSize to) code) + + + conversionNop new_rep expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_rep) + +getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps + = case mop of + MO_Eq rep -> condIntReg EQQ x y + MO_Ne rep -> condIntReg NE x y + + MO_S_Gt rep -> condIntReg GTT x y + MO_S_Ge rep -> condIntReg GE x y + MO_S_Lt rep -> condIntReg LTT x y + MO_S_Le rep -> condIntReg LE x y + + MO_U_Gt W32 -> condIntReg GTT x y + MO_U_Ge W32 -> condIntReg GE x y + MO_U_Lt W32 -> condIntReg LTT x y + MO_U_Le W32 -> condIntReg LE x y + + MO_U_Gt W16 -> condIntReg GU x y + MO_U_Ge W16 -> condIntReg GEU x y + MO_U_Lt W16 -> condIntReg LU x y + MO_U_Le W16 -> condIntReg LEU x y + + MO_Add W32 -> trivialCode W32 (ADD False False) x y + MO_Sub W32 -> trivialCode W32 (SUB False False) x y + + MO_S_MulMayOflo rep -> imulMayOflo rep x y + + MO_S_Quot W32 -> idiv True False x y + MO_U_Quot W32 -> idiv False False x y + + MO_S_Rem W32 -> irem True x y + MO_U_Rem W32 -> irem False x y + + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_F_Add w -> trivialFCode w FADD x y + MO_F_Sub w -> trivialFCode w FSUB x y + MO_F_Mul w -> trivialFCode w FMUL x y + MO_F_Quot w -> trivialFCode w FDIV x y + + MO_And rep -> trivialCode rep (AND False) x y + MO_Or rep -> trivialCode rep (OR False) x y + MO_Xor rep -> trivialCode rep (XOR False) x y + + MO_Mul rep -> trivialCode rep (SMUL False) x y + + MO_Shl rep -> trivialCode rep SLL x y + MO_U_Shr rep -> trivialCode rep SRL x y + MO_S_Shr rep -> trivialCode rep SRA x y + +{- + MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 + [promote x, promote y]) + where promote x = CmmMachOp MO_F32_to_Dbl [x] + MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 + [x, y]) +-} + other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) + where + -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) + + + -- | Generate an integer division instruction. + idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned division with a 32 bit numerator, + -- we can just clear the Y register. + idiv False cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- For _signed_ division with a 32 bit numerator, + -- we have to sign extend the numerator into the Y register. + idiv True cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend + , SRA tmp (RIImm (ImmInt 16)) tmp + + , WRY tmp g0 + , SDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- | Do an integer remainder. + -- + -- NOTE: The SPARC v8 architecture manual says that integer division + -- instructions _may_ generate a remainder, depending on the implementation. + -- If so it is _recommended_ that the remainder is placed in the Y register. + -- + -- The UltraSparc 2007 manual says Y is _undefined_ after division. + -- + -- The SPARC T2 doesn't store the remainder, not sure about the others. + -- It's probably best not to worry about it, and just generate our own + -- remainders. + -- + irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned operands: + -- Division is between a 64 bit numerator and a 32 bit denominator, + -- so we still have to clear the Y register. + irem False x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV False a_reg (RIReg b_reg) tmp_reg + , UMUL False tmp_reg (RIReg b_reg) tmp_reg + , SUB False False a_reg (RIReg tmp_reg) dst] + + return (Any II32 code) + + + -- For signed operands: + -- Make sure to sign extend into the Y register, or the remainder + -- will have the wrong sign when the numerator is negative. + -- + -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, + -- not the full 32. Not sure why this is, something to do with overflow? + -- If anyone cares enough about the speed of signed remainder they + -- can work it out themselves (then tell me). -- BL 2009/01/20 + + irem True x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp1_reg <- getNewRegNat II32 + tmp2_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , WRY tmp1_reg g0 + + , SDIV False a_reg (RIReg b_reg) tmp2_reg + , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg + , SUB False False a_reg (RIReg tmp2_reg) dst] + + return (Any II32 code) + + + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register + imulMayOflo rep a b = do + (a_reg, a_code) <- getSomeReg a + (b_reg, b_code) <- getSomeReg b + res_lo <- getNewRegNat II32 + res_hi <- getNewRegNat II32 + let + shift_amt = case rep of + W32 -> 31 + W64 -> 63 + _ -> panic "shift_amt" + code dst = a_code `appOL` b_code `appOL` + toOL [ + SMUL False a_reg (RIReg b_reg) res_lo, + RDY res_hi, + SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, + SUB False False res_lo (RIReg res_hi) dst + ] + return (Any II32 code) + +getRegister (CmmLoad mem pk) = do + Amode src code <- getAmode mem + let + code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst + return (Any (cmmTypeSize pk) code__2) + +getRegister (CmmLit (CmmInt i _)) + | fits13Bits i + = let + src = ImmInt (fromInteger i) + code dst = unitOL (OR False g0 (RIImm src) dst) + in + return (Any II32 code) + +getRegister (CmmLit lit) + = let rep = cmmLitType lit + imm = litToImm lit + code dst = toOL [ + SETHI (HI imm) dst, + OR False dst (RIImm (LO imm)) dst] + in return (Any II32 code) + + + +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)]) + | fits13Bits (-i) + = do + (reg, code) <- getSomeReg x + let + off = ImmInt (-(fromInteger i)) + return (Amode (AddrRegImm reg off) code) + + +getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)]) + | fits13Bits i + = do + (reg, code) <- getSomeReg x + let + off = ImmInt (fromInteger i) + return (Amode (AddrRegImm reg off) code) + +getAmode (CmmMachOp (MO_Add rep) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + let + code = codeX `appOL` codeY + return (Amode (AddrRegReg regX regY) code) + +getAmode (CmmLit lit) + = do + let imm__2 = litToImm lit + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + + let code = toOL [ SETHI (HI imm__2) tmp1 + , OR False tmp1 (RIImm (LO imm__2)) tmp2] + + return (Amode (AddrRegReg tmp2 g0) code) + +getAmode other + = do + (reg, code) <- getSomeReg other + let + off = ImmInt 0 + return (Amode (AddrRegImm reg off) code) + + +getCondCode :: CmmExpr -> NatM CondCode +getCondCode (CmmMachOp mop [x, y]) + = + case mop of + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ x y + MO_Ne rep -> condIntCode NE x y + + MO_S_Gt rep -> condIntCode GTT x y + MO_S_Ge rep -> condIntCode GE x y + MO_S_Lt rep -> condIntCode LTT x y + MO_S_Le rep -> condIntCode LE x y + + MO_U_Gt rep -> condIntCode GU x y + MO_U_Ge rep -> condIntCode GEU x y + MO_U_Lt rep -> condIntCode LU x y + MO_U_Le rep -> condIntCode LEU x y + + other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) + +getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) + + + + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode +condIntCode cond x (CmmLit (CmmInt y rep)) + | fits13Bits y + = do + (src1, code) <- getSomeReg x + let + src2 = ImmInt (fromInteger y) + code' = code `snocOL` SUB False True src1 (RIImm src2) g0 + return (CondCode False cond code') + +condIntCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + code__2 = code1 `appOL` code2 `snocOL` + SUB False True src1 (RIReg src2) g0 + return (CondCode False cond code__2) + + +condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode +condFltCode cond x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp <- getNewRegNat FF64 + let + promote x = FxTOy FF32 FF64 x tmp + + pk1 = cmmExprType x + pk2 = cmmExprType y + + code__2 = + if pk1 `cmmEqType` pk2 then + code1 `appOL` code2 `snocOL` + FCMP True (cmmTypeSize pk1) src1 src2 + else if typeWidth pk1 == W32 then + code1 `snocOL` promote src1 `appOL` code2 `snocOL` + FCMP True FF64 tmp src2 + else + code1 `appOL` code2 `snocOL` promote src2 `snocOL` + FCMP True FF64 src1 tmp + return (CondCode True cond code__2) + + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_IntCode pk addr src = do + (srcReg, code) <- getSomeReg src + Amode dstAddr addr_code <- getAmode addr + return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr + + +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_IntCode pk reg src = do + r <- getRegister src + return $ case r of + Any _ code -> code dst + Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst + where + dst = getRegisterReg reg + + + +-- Floating point assignment to memory +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_FltCode pk addr src = do + Amode dst__2 code1 <- getAmode addr + (src__2, code2) <- getSomeReg src + tmp1 <- getNewRegNat pk + let + pk__2 = cmmExprType src + code__2 = code1 `appOL` code2 `appOL` + if sizeToWidth pk == typeWidth pk__2 + then unitOL (ST pk src__2 dst__2) + else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1 + , ST pk tmp1 dst__2] + return code__2 + +-- Floating point assignment to a register/temporary +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_FltCode pk dstCmmReg srcCmmExpr = do + srcRegister <- getRegister srcCmmExpr + let dstReg = getRegisterReg dstCmmReg + + return $ case srcRegister of + Any _ code -> code dstReg + Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg + + + + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +genJump (CmmLit (CmmLabel lbl)) + = return (toOL [CALL (Left target) 0 True, NOP]) + where + target = ImmCLbl lbl + +genJump tree + = do + (target, code) <- getSomeReg tree + return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP) + +-- ----------------------------------------------------------------------------- +-- Unconditional branches + +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +SPARC: First, we have to ensure that the condition codes are set +according to the supplied comparison operation. We generate slightly +different code for floating point comparisons, because a floating +point operation cannot directly precede a @BF@. We assume the worst +and fill that slot with a @NOP@. + +SPARC: Do not fill the delay slots here; you will confuse the register +allocator. +-} + + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + + + +genCondJump bid bool = do + CondCode is_float cond code <- getCondCode bool + return ( + code `appOL` + toOL ( + if is_float + then [NOP, BF cond False bid, NOP] + else [BI cond False bid, NOP] + ) + ) + + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +{- + The SPARC calling convention is an absolute + nightmare. The first 6x32 bits of arguments are mapped into + %o0 through %o5, and the remaining arguments are dumped to the + stack, beginning at [%sp+92]. (Note that %o6 == %sp.) + + If we have to put args on the stack, move %o6==%sp down by + the number of words to go on the stack, to ensure there's enough space. + + According to Fraser and Hanson's lcc book, page 478, fig 17.2, + 16 words above the stack pointer is a word for the address of + a structure return value. I use this as a temporary location + for moving values from float to int regs. Certainly it isn't + safe to put anything in the 16 words starting at %sp, since + this area can get trashed at any time due to window overflows + caused by signal handlers. + + A final complication (if the above isn't enough) is that + we can't blithely calculate the arguments one by one into + %o0 .. %o5. Consider the following nested calls: + + fff a (fff b c) + + Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately + the inner call will itself use %o0, which trashes the value put there + in preparation for the outer call. Upshot: we need to calculate the + args into temporary regs, and move those to arg regs or onto the + stack only immediately prior to the call proper. Sigh. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + +-} + + +-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream +-- are guaranteed to take place before writes afterwards (unlike on PowerPC). +-- Ref: Section 8.4 of the SPARC V9 Architecture manual. +-- +-- In the SPARC case we don't need a barrier. +-- +genCCall (CmmPrim (MO_WriteBarrier)) _ _ + = do return nilOL + +genCCall target dest_regs argsAndHints + = do + -- strip hints from the arg regs + let args :: [CmmExpr] + args = map hintlessCmm argsAndHints + + + -- work out the arguments, and assign them to integer regs + argcode_and_vregs <- mapM arg_to_int_vregs args + let (argcodes, vregss) = unzip argcode_and_vregs + let vregs = concat vregss + + let n_argRegs = length allArgRegs + let n_argRegs_used = min (length vregs) n_argRegs + + + -- deal with static vs dynamic call targets + callinsns <- case target of + CmmCallee (CmmLit (CmmLabel lbl)) conv -> + return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) + + CmmCallee expr conv + -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr + return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) + + CmmPrim mop + -> do res <- outOfLineFloatOp mop + lblOrMopExpr <- case res of + Left lbl -> do + return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) + + Right mopExpr -> do + (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr + return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) + + return lblOrMopExpr + + let argcode = concatOL argcodes + + let (move_sp_down, move_sp_up) + = let diff = length vregs - n_argRegs + nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment + in if nn <= 0 + then (nilOL, nilOL) + else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn))) + + let transfer_code + = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE) + + return + $ argcode `appOL` + move_sp_down `appOL` + transfer_code `appOL` + callinsns `appOL` + unitOL NOP `appOL` + move_sp_up `appOL` + assign_code dest_regs + + +-- | Generate code to calculate an argument, and move it into one +-- or two integer vregs. +arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) +arg_to_int_vregs arg + + -- If the expr produces a 64 bit int, then we can just use iselExpr64 + | isWord64 (cmmExprType arg) + = do (ChildCode64 code r_lo) <- iselExpr64 arg + let r_hi = getHiVRegFromLo r_lo + return (code, [r_hi, r_lo]) + + | otherwise + = do (src, code) <- getSomeReg arg + tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg) + let pk = cmmExprType arg + + case cmmTypeSize pk of + + -- Load a 64 bit float return value into two integer regs. + FF64 -> do + v1 <- getNewRegNat II32 + v2 <- getNewRegNat II32 + + let Just f0_high = fPair f0 + + let code2 = + code `snocOL` + FMOV FF64 src f0 `snocOL` + ST FF32 f0 (spRel 16) `snocOL` + LD II32 (spRel 16) v1 `snocOL` + ST FF32 f0_high (spRel 16) `snocOL` + LD II32 (spRel 16) v2 + + return (code2, [v1,v2]) + + -- Load a 32 bit float return value into an integer reg + FF32 -> do + v1 <- getNewRegNat II32 + + let code2 = + code `snocOL` + ST FF32 src (spRel 16) `snocOL` + LD II32 (spRel 16) v1 + + return (code2, [v1]) + + -- Move an integer return value into its destination reg. + other -> do + v1 <- getNewRegNat II32 + + let code2 = + code `snocOL` + OR False g0 (RIReg src) v1 + + return (code2, [v1]) + + +-- | Move args from the integer vregs into which they have been +-- marshalled, into %o0 .. %o5, and the rest onto the stack. +-- +move_final :: [Reg] -> [Reg] -> Int -> [Instr] + +-- all args done +move_final [] _ offset + = [] + +-- out of aregs; move to stack +move_final (v:vs) [] offset + = ST II32 v (spRel offset) + : move_final vs [] (offset+1) + +-- move into an arg (%o[0..5]) reg +move_final (v:vs) (a:az) offset + = OR False g0 (RIReg v) a + : move_final vs az offset + + +-- | Assign results returned from the call into their +-- desination regs. +-- +assign_code :: [CmmHinted LocalReg] -> OrdList Instr +assign_code [] = nilOL + +assign_code [CmmHinted dest _hint] + = let rep = localRegType dest + width = typeWidth rep + r_dest = getRegisterReg (CmmLocal dest) + + result + | isFloatType rep + , W32 <- width + = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest + + | isFloatType rep + , W64 <- width + = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest + + | not $ isFloatType rep + , W32 <- width + = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest + + | not $ isFloatType rep + , W64 <- width + , r_dest_hi <- getHiVRegFromLo r_dest + = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi + , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest] + in result + + +-- | Generate a call to implement an out-of-line floating point operation +outOfLineFloatOp + :: CallishMachOp + -> NatM (Either CLabel CmmExpr) + +outOfLineFloatOp mop + = do let functionName + = outOfLineFloatOp_table mop + + dflags <- getDynFlagsNat + mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference + $ mkForeignLabel functionName Nothing True IsFunction + + let mopLabelOrExpr + = case mopExpr of + CmmLit (CmmLabel lbl) -> Left lbl + _ -> Right mopExpr + + return mopLabelOrExpr + + +-- | Decide what C function to use to implement a CallishMachOp +-- +outOfLineFloatOp_table + :: CallishMachOp + -> FastString + +outOfLineFloatOp_table mop + = case mop of + MO_F32_Exp -> fsLit "expf" + MO_F32_Log -> fsLit "logf" + MO_F32_Sqrt -> fsLit "sqrtf" + MO_F32_Pwr -> fsLit "powf" + + MO_F32_Sin -> fsLit "sinf" + MO_F32_Cos -> fsLit "cosf" + MO_F32_Tan -> fsLit "tanf" + + MO_F32_Asin -> fsLit "asinf" + MO_F32_Acos -> fsLit "acosf" + MO_F32_Atan -> fsLit "atanf" + + MO_F32_Sinh -> fsLit "sinhf" + MO_F32_Cosh -> fsLit "coshf" + MO_F32_Tanh -> fsLit "tanhf" + + MO_F64_Exp -> fsLit "exp" + MO_F64_Log -> fsLit "log" + MO_F64_Sqrt -> fsLit "sqrt" + MO_F64_Pwr -> fsLit "pow" + + MO_F64_Sin -> fsLit "sin" + MO_F64_Cos -> fsLit "cos" + MO_F64_Tan -> fsLit "tan" + + MO_F64_Asin -> fsLit "asin" + MO_F64_Acos -> fsLit "acos" + MO_F64_Atan -> fsLit "atan" + + MO_F64_Sinh -> fsLit "sinh" + MO_F64_Cosh -> fsLit "cosh" + MO_F64_Tanh -> fsLit "tanh" + + other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op " + (pprCallishMachOp mop) + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock +genSwitch expr ids + | opt_PIC + = error "MachCodeGen: sparc genSwitch PIC not finished\n" + + | otherwise + = do (e_reg, e_code) <- getSomeReg expr + + base_reg <- getNewRegNat II32 + offset_reg <- getNewRegNat II32 + dst <- getNewRegNat II32 + + label <- getNewLabelNat + let jumpTable = map jumpTableEntry ids + + return $ e_code `appOL` + toOL + -- the jump table + [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable) + + -- load base of jump table + , SETHI (HI (ImmCLbl label)) base_reg + , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg + + -- the addrs in the table are 32 bits wide.. + , SLL e_reg (RIImm $ ImmInt 2) offset_reg + + -- load and jump to the destination + , LD II32 (AddrRegReg base_reg offset_reg) dst + , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids] + , NOP ] + + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat II32 + let + code__2 dst = code `appOL` toOL [ + SUB False True g0 (RIReg src) g0, + SUB True False g0 (RIImm (ImmInt (-1))) dst] + return (Any II32 code__2) + +condIntReg EQQ x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + let + code__2 dst = code1 `appOL` code2 `appOL` toOL [ + XOR False src1 (RIReg src2) dst, + SUB False True g0 (RIReg dst) g0, + SUB True False g0 (RIImm (ImmInt (-1))) dst] + return (Any II32 code__2) + +condIntReg NE x (CmmLit (CmmInt 0 d)) = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat II32 + let + code__2 dst = code `appOL` toOL [ + SUB False True g0 (RIReg src) g0, + ADD True False g0 (RIImm (ImmInt 0)) dst] + return (Any II32 code__2) + +condIntReg NE x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + let + code__2 dst = code1 `appOL` code2 `appOL` toOL [ + XOR False src1 (RIReg src2) dst, + SUB False True g0 (RIReg dst) g0, + ADD True False g0 (RIImm (ImmInt 0)) dst] + return (Any II32 code__2) + +condIntReg cond x y = do + bid1@(BlockId lbl1) <- getBlockIdNat + bid2@(BlockId lbl2) <- getBlockIdNat + CondCode _ cond cond_code <- condIntCode cond x y + let + code__2 dst = cond_code `appOL` toOL [ + BI cond False bid1, NOP, + OR False g0 (RIImm (ImmInt 0)) dst, + BI ALWAYS False bid2, NOP, + NEWBLOCK bid1, + OR False g0 (RIImm (ImmInt 1)) dst, + NEWBLOCK bid2] + return (Any II32 code__2) + +condFltReg cond x y = do + bid1@(BlockId lbl1) <- getBlockIdNat + bid2@(BlockId lbl2) <- getBlockIdNat + CondCode _ cond cond_code <- condFltCode cond x y + let + code__2 dst = cond_code `appOL` toOL [ + NOP, + BF cond False bid1, NOP, + OR False g0 (RIImm (ImmInt 0)) dst, + BI ALWAYS False bid2, NOP, + NEWBLOCK bid1, + OR False g0 (RIImm (ImmInt 1)) dst, + NEWBLOCK bid2] + return (Any II32 code__2) + + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + +trivialCode pk instr x (CmmLit (CmmInt y d)) + | fits13Bits y + = do + (src1, code) <- getSomeReg x + tmp <- getNewRegNat II32 + let + src2 = ImmInt (fromInteger y) + code__2 dst = code `snocOL` instr src1 (RIImm src2) dst + return (Any II32 code__2) + +trivialCode pk instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + let + code__2 dst = code1 `appOL` code2 `snocOL` + instr src1 (RIReg src2) dst + return (Any II32 code__2) + +------------ +trivialFCode pk instr x y = do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x) + tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y) + tmp <- getNewRegNat FF64 + let + promote x = FxTOy FF32 FF64 x tmp + + pk1 = cmmExprType x + pk2 = cmmExprType y + + code__2 dst = + if pk1 `cmmEqType` pk2 then + code1 `appOL` code2 `snocOL` + instr (floatSize pk) src1 src2 dst + else if typeWidth pk1 == W32 then + code1 `snocOL` promote src1 `appOL` code2 `snocOL` + instr FF64 tmp src2 dst + else + code1 `appOL` code2 `snocOL` promote src2 `snocOL` + instr FF64 src1 tmp dst + return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) + code__2) + +------------ +trivialUCode size instr x = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat size + let + code__2 dst = code `snocOL` instr (RIReg src) dst + return (Any size code__2) + +------------- +trivialUFCode pk instr x = do + (src, code) <- getSomeReg x + tmp <- getNewRegNat pk + let + code__2 dst = code `snocOL` instr src dst + return (Any pk code__2) + + + +coerceDbl2Flt :: CmmExpr -> NatM Register +coerceFlt2Dbl :: CmmExpr -> NatM Register + + +coerceInt2FP width1 width2 x = do + (src, code) <- getSomeReg x + let + code__2 dst = code `appOL` toOL [ + ST (intSize width1) src (spRel (-2)), + LD (intSize width1) (spRel (-2)) dst, + FxTOy (intSize width1) (floatSize width2) dst dst] + return (Any (floatSize $ width2) code__2) + + +-- | Coerce a floating point value to integer +-- +-- NOTE: On sparc v9 there are no instructions to move a value from an +-- FP register directly to an int register, so we have to use a load/store. +-- +coerceFP2Int width1 width2 x + = do let fsize1 = floatSize width1 + fsize2 = floatSize width2 + + isize2 = intSize width2 + + (fsrc, code) <- getSomeReg x + fdst <- getNewRegNat fsize2 + + let code2 dst + = code + `appOL` toOL + -- convert float to int format, leaving it in a float reg. + [ FxTOy fsize1 isize2 fsrc fdst + + -- store the int into mem, then load it back to move + -- it into an actual int reg. + , ST fsize2 fdst (spRel (-2)) + , LD isize2 (spRel (-2)) dst] + + return (Any isize2 code2) + +------------ +coerceDbl2Flt x = do + (src, code) <- getSomeReg x + return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) + +------------ +coerceFlt2Dbl x = do + (src, code) <- getSomeReg x + return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst)) + + + +-- eXTRA_STK_ARGS_HERE + +-- We (allegedly) put the first six C-call arguments in registers; +-- where do we start putting the rest of them? + +-- Moved from Instrs (SDM): + +eXTRA_STK_ARGS_HERE :: Int +eXTRA_STK_ARGS_HERE + = 23 diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs new file mode 100644 index 0000000..d0f12ef --- /dev/null +++ b/compiler/nativeGen/SPARC/Cond.hs @@ -0,0 +1,53 @@ + +module SPARC.Cond ( + Cond(..), + condUnsigned, + condToSigned, + condToUnsigned +) + +where + +-- | Branch condition codes. +data Cond + = ALWAYS + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + | NEG + | NEVER + | POS + | VC + | VS + deriving Eq + + +condUnsigned :: Cond -> Bool +condUnsigned GU = True +condUnsigned LU = True +condUnsigned GEU = True +condUnsigned LEU = True +condUnsigned _ = False + + +condToSigned :: Cond -> Cond +condToSigned GU = GTT +condToSigned LU = LTT +condToSigned GEU = GE +condToSigned LEU = LE +condToSigned x = x + + +condToUnsigned :: Cond -> Cond +condToUnsigned GTT = GU +condToUnsigned LTT = LU +condToUnsigned GE = GEU +condToUnsigned LE = LEU +condToUnsigned x = x diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 9c33231..6dc6477 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -10,56 +10,55 @@ #include "nativeGen/NCG.h" module SPARC.Instr ( - Cond(..), RI(..), Instr(..), - riZero, - fpRelEA, - moveSp, - fPair, + maxSpillSlots ) where -import BlockId -import RegsBase import SPARC.Regs +import SPARC.Cond +import Instruction +import RegClass +import Reg +import Size + +import BlockId import Cmm import Outputable -import Constants ( wORD_SIZE ) +import Constants (rESERVED_C_STACK_BYTES ) import FastString +import FastBool import GHC.Exts --- | Branch condition codes. -data Cond - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | NEVER - | POS - | VC - | VS - deriving Eq - - -- | Register or immediate data RI = RIReg Reg | RIImm Imm --- | SPARC isntruction set. +-- | instance for sparc instruction set +instance Instruction Instr where + regUsageOfInstr = sparc_regUsageOfInstr + patchRegsOfInstr = sparc_patchRegsOfInstr + isJumpishInstr = sparc_isJumpishInstr + jumpDestsOfInstr = sparc_jumpDestsOfInstr + patchJumpInstr = sparc_patchJumpInstr + mkSpillInstr = sparc_mkSpillInstr + mkLoadInstr = sparc_mkLoadInstr + takeDeltaInstr = sparc_takeDeltaInstr + isMetaInstr = sparc_isMetaInstr + mkRegRegMoveInstr = sparc_mkRegRegMoveInstr + takeRegRegMoveInstr = sparc_takeRegRegMoveInstr + mkJumpInstr = sparc_mkJumpInstr + + +-- | SPARC instruction set. +-- Not complete. This is only the ones we need. +-- data Instr -- meta ops -------------------------------------------------- @@ -78,12 +77,6 @@ data Instr -- specify current stack offset for benefit of subsequent passes. | DELTA Int - -- | spill this reg to a stack slot - | SPILL Reg Int - - -- | reload this reg from a stack slot - | RELOAD Int Reg - -- real instrs ----------------------------------------------- -- Loads and stores. | LD Size AddrMode Reg -- size, src, dst @@ -157,39 +150,290 @@ data Instr | CALL (Either Imm Reg) Int Bool -- target, args, terminal --- | Check if a RI represents a zero value. --- - a literal zero --- - register %g0, which is always zero. +-- | regUsage returns the sets of src and destination registers used +-- by a particular instruction. Machine registers that are +-- pre-allocated to stgRegs are filtered out, because they are +-- uninteresting from a register allocation standpoint. (We wouldn't +-- want them to end up on the free list!) As far as we are concerned, +-- the fixed registers simply don't exist (for allocation purposes, +-- anyway). + +-- regUsage doesn't need to do any trickery for jumps and such. Just +-- state precisely the regs read and written by that insn. The +-- consequences of control flow transfers, as far as register +-- allocation goes, are taken care of by the register allocator. +-- +sparc_regUsageOfInstr :: Instr -> RegUsage +sparc_regUsageOfInstr instr + = case instr of + LD _ addr reg -> usage (regAddr addr, [reg]) + ST _ reg addr -> usage (reg : regAddr addr, []) + ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + RDY rd -> usage ([], [rd]) + WRY r1 r2 -> usage ([r1, r2], []) + AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SLL r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SRL r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SRA r1 ar r2 -> usage (r1 : regRI ar, [r2]) + SETHI _ reg -> usage ([], [reg]) + FABS _ r1 r2 -> usage ([r1], [r2]) + FADD _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FCMP _ _ r1 r2 -> usage ([r1, r2], []) + FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FMOV _ r1 r2 -> usage ([r1], [r2]) + FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FNEG _ r1 r2 -> usage ([r1], [r2]) + FSQRT _ r1 r2 -> usage ([r1], [r2]) + FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3]) + FxTOy _ _ r1 r2 -> usage ([r1], [r2]) + + JMP addr -> usage (regAddr addr, []) + JMP_TBL addr _ -> usage (regAddr addr, []) + + CALL (Left _ ) _ True -> noUsage + CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs) + CALL (Right reg) _ True -> usage ([reg], []) + CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs) + _ -> noUsage + + where + usage (src, dst) + = RU (filter interesting src) (filter interesting dst) + + regAddr (AddrRegReg r1 r2) = [r1, r2] + regAddr (AddrRegImm r1 _) = [r1] + + regRI (RIReg r) = [r] + regRI _ = [] + + +-- | Interesting regs are virtuals, or ones that are allocatable +-- by the register allocator. +interesting :: Reg -> Bool +interesting reg + = case reg of + VirtualRegI _ -> True + VirtualRegHi _ -> True + VirtualRegF _ -> True + VirtualRegD _ -> True + RealReg i -> isFastTrue (freeReg i) + + + +-- | Apply a given mapping to tall the register references in this instruction. +sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +sparc_patchRegsOfInstr instr env = case instr of + LD sz addr reg -> LD sz (fixAddr addr) (env reg) + ST sz reg addr -> ST sz (env reg) (fixAddr addr) + + ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2) + SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2) + UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2) + SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2) + UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2) + SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2) + RDY rd -> RDY (env rd) + WRY r1 r2 -> WRY (env r1) (env r2) + AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2) + ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2) + OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2) + ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2) + XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2) + XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2) + SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2) + SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2) + SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2) + + SETHI imm reg -> SETHI imm (env reg) + + FABS s r1 r2 -> FABS s (env r1) (env r2) + FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3) + FCMP e s r1 r2 -> FCMP e s (env r1) (env r2) + FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3) + FMOV s r1 r2 -> FMOV s (env r1) (env r2) + FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3) + FNEG s r1 r2 -> FNEG s (env r1) (env r2) + FSQRT s r1 r2 -> FSQRT s (env r1) (env r2) + FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3) + FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2) + + JMP addr -> JMP (fixAddr addr) + JMP_TBL addr ids -> JMP_TBL (fixAddr addr) ids + + CALL (Left i) n t -> CALL (Left i) n t + CALL (Right r) n t -> CALL (Right (env r)) n t + _ -> instr + + where + fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) + fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i + + fixRI (RIReg r) = RIReg (env r) + fixRI other = other + + +-------------------------------------------------------------------------------- +sparc_isJumpishInstr :: Instr -> Bool +sparc_isJumpishInstr instr + = case instr of + BI{} -> True + BF{} -> True + JMP{} -> True + JMP_TBL{} -> True + CALL{} -> True + _ -> False + +sparc_jumpDestsOfInstr :: Instr -> [BlockId] +sparc_jumpDestsOfInstr insn + = case insn of + BI _ _ id -> [id] + BF _ _ id -> [id] + JMP_TBL _ ids -> ids + _ -> [] + + +sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr +sparc_patchJumpInstr insn patchF + = case insn of + BI cc annul id -> BI cc annul (patchF id) + BF cc annul id -> BF cc annul (patchF id) + _ -> insn + + +-------------------------------------------------------------------------------- +-- | Make a spill instruction. +-- On SPARC we spill below frame pointer leaving 2 words/spill +sparc_mkSpillInstr + :: Reg -- ^ register to spill + -> Int -- ^ current stack delta + -> Int -- ^ spill slot to use + -> Instr + +sparc_mkSpillInstr reg _ slot + = let off = spillSlotToOffset slot + off_w = 1 + (off `div` 4) + sz = case regClass reg of + RcInteger -> II32 + RcFloat -> FF32 + RcDouble -> FF64 + + in ST sz reg (fpRel (negate off_w)) + + +-- | Make a spill reload instruction. +sparc_mkLoadInstr + :: Reg -- ^ register to load + -> Int -- ^ current stack delta + -> Int -- ^ spill slot to use + -> Instr + +sparc_mkLoadInstr reg _ slot + = let off = spillSlotToOffset slot + off_w = 1 + (off `div` 4) + sz = case regClass reg of + RcInteger -> II32 + RcFloat -> FF32 + RcDouble -> FF64 + + in LD sz (fpRel (- off_w)) reg + +-- | Convert a spill slot number to a *byte* offset, with no sign. -- -riZero :: RI -> Bool -riZero (RIImm (ImmInt 0)) = True -riZero (RIImm (ImmInteger 0)) = True -riZero (RIReg (RealReg 0)) = True -riZero _ = False +spillSlotToOffset :: Int -> Int +spillSlotToOffset slot + | slot >= 0 && slot < maxSpillSlots + = 64 + spillSlotSize * slot + + | otherwise + = pprPanic "spillSlotToOffset:" + ( text "invalid spill location: " <> int slot + $$ text "maxSpillSlots: " <> int maxSpillSlots) + + +-- | We need 8 bytes because our largest registers are 64 bit. +spillSlotSize :: Int +spillSlotSize = 8 --- | Calculate the effective address which would be used by the --- corresponding fpRel sequence. fpRel is in MachRegs.lhs, --- alas -- can't have fpRelEA here because of module dependencies. -fpRelEA :: Int -> Reg -> Instr -fpRelEA n dst - = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst +-- | The maximum number of spill slots available on the C stack. +-- If we use up all of the slots, then we're screwed. +maxSpillSlots :: Int +maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 --- | Code to shift the stack pointer by n words. -moveSp :: Int -> Instr -moveSp n - = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp +-------------------------------------------------------------------------------- +-- | See if this instruction is telling us the current C stack delta +sparc_takeDeltaInstr + :: Instr + -> Maybe Int + +sparc_takeDeltaInstr instr + = case instr of + DELTA i -> Just i + _ -> Nothing + + +sparc_isMetaInstr + :: Instr + -> Bool + +sparc_isMetaInstr instr + = case instr of + COMMENT{} -> True + LDATA{} -> True + NEWBLOCK{} -> True + DELTA{} -> True + _ -> False + + +-- | Make a reg-reg move instruction. +-- On SPARC v8 there are no instructions to move directly between +-- floating point and integer regs. If we need to do that then we +-- have to go via memory. +-- +sparc_mkRegRegMoveInstr + :: Reg + -> Reg + -> Instr + +sparc_mkRegRegMoveInstr src dst + = case regClass src of + RcInteger -> ADD False False src (RIReg g0) dst + RcDouble -> FMOV FF64 src dst + RcFloat -> FMOV FF32 src dst + + +-- | Check whether an instruction represents a reg-reg move. +-- The register allocator attempts to eliminate reg->reg moves whenever it can, +-- by assigning the src and dest temporaries to the same real register. +-- +sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) +sparc_takeRegRegMoveInstr instr + = case instr of + ADD False False src (RIReg src2) dst + | g0 == src2 -> Just (src, dst) + + FMOV FF64 src dst -> Just (src, dst) + FMOV FF32 src dst -> Just (src, dst) + _ -> Nothing --- | Produce the second-half-of-a-double register given the first half. -fPair :: Reg -> Maybe Reg -fPair (RealReg n) - | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) +-- | Make an unconditional branch instruction. +sparc_mkJumpInstr + :: BlockId + -> [Instr] -fPair (VirtualRegD u) - = Just (VirtualRegHi u) +sparc_mkJumpInstr id + = [BI ALWAYS False id + , NOP] -- fill the branch delay slot. -fPair _ - = trace ("MachInstrs.fPair: can't get high half of supposed double reg ") - Nothing diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index 7d64df1..a0d5fff 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -7,12 +7,15 @@ ----------------------------------------------------------------------------- module SPARC.Ppr ( + pprNatCmmTop, + pprBasicBlock, + pprSectionHeader, + pprData, + pprInstr, pprUserReg, pprSize, pprImm, - pprSectionHeader, - pprDataItem, - pprInstr + pprDataItem ) where @@ -20,20 +23,119 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" -import PprBase -import RegsBase import SPARC.Regs +import SPARC.RegInfo import SPARC.Instr +import SPARC.Cond +import Instruction +import Reg +import Size +import PprBase import BlockId import Cmm - import CLabel -import Panic ( panic ) import Unique ( pprUnique ) +import qualified Outputable +import Outputable (Outputable, panic) import Pretty import FastString +import Data.Word + +-- ----------------------------------------------------------------------------- +-- Printing this stuff out + +pprNatCmmTop :: NatCmmTop Instr -> Doc +pprNatCmmTop (CmmData section dats) = + pprSectionHeader section $$ vcat (map pprData dats) + + -- special case for split markers: +pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl + +pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = + pprSectionHeader Text $$ + (if null info then -- blocks guaranteed not null, so label needed + pprLabel lbl + else +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + <> char ':' $$ +#endif + vcat (map pprData info) $$ + pprLabel (entryLblToInfoLbl lbl) + ) $$ + vcat (map pprBasicBlock blocks) + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- If we are using the .subsections_via_symbols directive + -- (available on recent versions of Darwin), + -- we have to make sure that there is some kind of reference + -- from the entry code to a label on the _top_ of of the info table, + -- so that the linker will not think it is unreferenced and dead-strip + -- it. That's why the label is called a DeadStripPreventer (_dsp). + $$ if not (null info) + then text "\t.long " + <+> pprCLabel_asm (entryLblToInfoLbl lbl) + <+> char '-' + <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + else empty +#endif + + +pprBasicBlock :: NatBasicBlock Instr -> Doc +pprBasicBlock (BasicBlock (BlockId id) instrs) = + pprLabel (mkAsmTempLabel id) $$ + vcat (map pprInstr instrs) + + +pprData :: CmmStatic -> Doc +pprData (CmmAlign bytes) = pprAlign bytes +pprData (CmmDataLabel lbl) = pprLabel lbl +pprData (CmmString str) = pprASCII str +pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes +pprData (CmmStaticLit lit) = pprDataItem lit + +pprGloblDecl :: CLabel -> Doc +pprGloblDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext IF_ARCH_sparc((sLit ".global "), + (sLit ".globl ")) <> + pprCLabel_asm lbl + +pprTypeAndSizeDecl :: CLabel -> Doc +#if linux_TARGET_OS +pprTypeAndSizeDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext (sLit ".type ") <> + pprCLabel_asm lbl <> ptext (sLit ", @object") +#else +pprTypeAndSizeDecl _ + = empty +#endif + +pprLabel :: CLabel -> Doc +pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':') + + +pprASCII :: [Word8] -> Doc +pprASCII str + = vcat (map do1 str) $$ do1 0 + where + do1 :: Word8 -> Doc + do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w) + +pprAlign :: Int -> Doc +pprAlign bytes = + ptext (sLit ".align ") <> int bytes + + +-- ----------------------------------------------------------------------------- +-- pprInstr: print an 'Instr' + +instance Outputable Instr where + ppr instr = Outputable.docToSDoc $ pprInstr instr -- | Pretty print a register. @@ -101,12 +203,13 @@ pprSize :: Size -> Doc pprSize x = ptext (case x of - II8 -> sLit "ub" - II16 -> sLit "uh" - II32 -> sLit "" - II64 -> sLit "d" - FF32 -> sLit "" - FF64 -> sLit "d") + II8 -> sLit "ub" + II16 -> sLit "uh" + II32 -> sLit "" + II64 -> sLit "d" + FF32 -> sLit "" + FF64 -> sLit "d" + _ -> panic "SPARC.Ppr.pprSize: no match") -- | Pretty print a size for an instruction suffix. @@ -120,7 +223,8 @@ pprStSize x II32 -> sLit "" II64 -> sLit "x" FF32 -> sLit "" - FF64 -> sLit "d") + FF64 -> sLit "d" + _ -> panic "SPARC.Ppr.pprSize: no match") -- | Pretty print a condition code. @@ -258,6 +362,7 @@ pprInstr (NEWBLOCK _) pprInstr (LDATA _ _) = panic "PprMach.pprInstr: LDATA" +{- pprInstr (SPILL reg slot) = hcat [ ptext (sLit "\tSPILL"), @@ -273,7 +378,7 @@ pprInstr (RELOAD slot reg) ptext (sLit "SLOT") <> parens (int slot), comma, pprReg reg] - +-} -- a clumsy hack for now, to handle possible double alignment problems -- even clumsier, to allow for RegReg regs that show when doing indexed diff --git a/compiler/nativeGen/SPARC/RegInfo.hs b/compiler/nativeGen/SPARC/RegInfo.hs index 8f8a977..025e302 100644 --- a/compiler/nativeGen/SPARC/RegInfo.hs +++ b/compiler/nativeGen/SPARC/RegInfo.hs @@ -8,241 +8,115 @@ ----------------------------------------------------------------------------- module SPARC.RegInfo ( - -- machine specific - RegUsage(..), - noUsage, - regUsage, - patchRegs, - jumpDests, - isJumpish, - patchJump, - isRegRegMove, + mkVReg, + + riZero, + fpRelEA, + moveSp, + fPair, + + shortcutStatic, + regDotColor, JumpDest(..), canShortcut, - shortcutJump, - - mkSpillInstr, - mkLoadInstr, - mkRegRegMoveInstr, - mkBranchInstr, - - spillSlotSize, - maxSpillSlots, - spillSlotToOffset + shortcutJump, ) where -#include "nativeGen/NCG.h" -#include "HsVersions.h" - import SPARC.Instr import SPARC.Regs -import RegsBase +import RegClass +import Reg +import Size +import Constants (wORD_SIZE) +import Cmm +import CLabel import BlockId import Outputable -import Constants ( rESERVED_C_STACK_BYTES ) -import FastBool - - --- | Represents what regs are read and written to in an instruction. --- -data RegUsage - = RU [Reg] -- regs read from - [Reg] -- regs written to - - --- | No regs read or written to. -noUsage :: RegUsage -noUsage = RU [] [] - - --- | regUsage returns the sets of src and destination registers used --- by a particular instruction. Machine registers that are --- pre-allocated to stgRegs are filtered out, because they are --- uninteresting from a register allocation standpoint. (We wouldn't --- want them to end up on the free list!) As far as we are concerned, --- the fixed registers simply don't exist (for allocation purposes, --- anyway). - --- regUsage doesn't need to do any trickery for jumps and such. Just --- state precisely the regs read and written by that insn. The --- consequences of control flow transfers, as far as register --- allocation goes, are taken care of by the register allocator. --- -regUsage :: Instr -> RegUsage -regUsage instr - = case instr of - SPILL reg _ -> usage ([reg], []) - RELOAD _ reg -> usage ([], [reg]) - - LD _ addr reg -> usage (regAddr addr, [reg]) - ST _ reg addr -> usage (reg : regAddr addr, []) - ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - RDY rd -> usage ([], [rd]) - WRY r1 r2 -> usage ([r1, r2], []) - AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SLL r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SRL r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SRA r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SETHI _ reg -> usage ([], [reg]) - FABS _ r1 r2 -> usage ([r1], [r2]) - FADD _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FCMP _ _ r1 r2 -> usage ([r1, r2], []) - FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FMOV _ r1 r2 -> usage ([r1], [r2]) - FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FNEG _ r1 r2 -> usage ([r1], [r2]) - FSQRT _ r1 r2 -> usage ([r1], [r2]) - FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FxTOy _ _ r1 r2 -> usage ([r1], [r2]) - - JMP addr -> usage (regAddr addr, []) - JMP_TBL addr _ -> usage (regAddr addr, []) - - CALL (Left _ ) _ True -> noUsage - CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs) - CALL (Right reg) _ True -> usage ([reg], []) - CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs) - _ -> noUsage - - where - usage (src, dst) - = RU (filter interesting src) (filter interesting dst) - - regAddr (AddrRegReg r1 r2) = [r1, r2] - regAddr (AddrRegImm r1 _) = [r1] +import Unique - regRI (RIReg r) = [r] - regRI _ = [] +-- | Make a virtual reg with this size. +mkVReg :: Unique -> Size -> Reg +mkVReg u size + | not (isFloatSize size) + = VirtualRegI u --- | Interesting regs are virtuals, or ones that are allocatable --- by the register allocator. -interesting :: Reg -> Bool -interesting reg - = case reg of - VirtualRegI _ -> True - VirtualRegHi _ -> True - VirtualRegF _ -> True - VirtualRegD _ -> True - RealReg i -> isFastTrue (freeReg i) - - - --- | Apply a given mapping to tall the register references in this instruction. - -patchRegs :: Instr -> (Reg -> Reg) -> Instr -patchRegs instr env = case instr of - SPILL reg slot -> SPILL (env reg) slot - RELOAD slot reg -> RELOAD slot (env reg) - - LD sz addr reg -> LD sz (fixAddr addr) (env reg) - ST sz reg addr -> ST sz (env reg) (fixAddr addr) - - ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2) - SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2) - UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2) - SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2) - UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2) - SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2) - RDY rd -> RDY (env rd) - WRY r1 r2 -> WRY (env r1) (env r2) - AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2) - ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2) - OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2) - ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2) - XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2) - XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2) - SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2) - SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2) - SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2) - - SETHI imm reg -> SETHI imm (env reg) - - FABS s r1 r2 -> FABS s (env r1) (env r2) - FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3) - FCMP e s r1 r2 -> FCMP e s (env r1) (env r2) - FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3) - FMOV s r1 r2 -> FMOV s (env r1) (env r2) - FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3) - FNEG s r1 r2 -> FNEG s (env r1) (env r2) - FSQRT s r1 r2 -> FSQRT s (env r1) (env r2) - FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3) - FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2) - - JMP addr -> JMP (fixAddr addr) - JMP_TBL addr ids -> JMP_TBL (fixAddr addr) ids - - CALL (Left i) n t -> CALL (Left i) n t - CALL (Right r) n t -> CALL (Right (env r)) n t - _ -> instr - - where - fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) - fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i - - fixRI (RIReg r) = RIReg (env r) - fixRI other = other - - --- ----------------------------------------------------------------------------- --- Determine the possible destinations from the current instruction. - --- (we always assume that the next instruction is also a valid destination; --- if this isn't the case then the jump should be at the end of the basic --- block). - -jumpDests :: Instr -> [BlockId] -> [BlockId] -jumpDests insn acc - = case insn of - BI _ _ id -> id : acc - BF _ _ id -> id : acc - JMP_TBL _ ids -> ids ++ acc - _other -> acc + | otherwise + = case size of + FF32 -> VirtualRegF u + FF64 -> VirtualRegD u + _ -> panic "mkVReg" --- | Check whether a particular instruction is a jump, branch or call instruction (jumpish) --- We can't just use jumpDests above because the jump might take its arg, --- so the instr won't contain a blockid. +-- | Check if a RI represents a zero value. +-- - a literal zero +-- - register %g0, which is always zero. -- -isJumpish :: Instr -> Bool -isJumpish instr - = case instr of - BI{} -> True - BF{} -> True - JMP{} -> True - JMP_TBL{} -> True - CALL{} -> True - _ -> False - - --- | Change the destination of this jump instruction --- Used in joinToTargets in the linear allocator, when emitting fixup code --- for join points. -patchJump :: Instr -> BlockId -> BlockId -> Instr -patchJump insn old new - = case insn of - BI cc annul id - | id == old -> BI cc annul new - - BF cc annul id - | id == old -> BF cc annul new - - _other -> insn - +riZero :: RI -> Bool +riZero (RIImm (ImmInt 0)) = True +riZero (RIImm (ImmInteger 0)) = True +riZero (RIReg (RealReg 0)) = True +riZero _ = False + + +-- | Calculate the effective address which would be used by the +-- corresponding fpRel sequence. fpRel is in MachRegs.lhs, +-- alas -- can't have fpRelEA here because of module dependencies. +fpRelEA :: Int -> Reg -> Instr +fpRelEA n dst + = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst + + +-- | Code to shift the stack pointer by n words. +moveSp :: Int -> Instr +moveSp n + = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp + + +-- | Produce the second-half-of-a-double register given the first half. +fPair :: Reg -> Maybe Reg +fPair (RealReg n) + | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) + +fPair (VirtualRegD u) + = Just (VirtualRegHi u) + +fPair _ + = trace ("MachInstrs.fPair: can't get high half of supposed double reg ") + Nothing + +-- Here because it knows about JumpDest +shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic +shortcutStatic fn (CmmStaticLit (CmmLabel lab)) + | Just uq <- maybeAsmTemp lab + = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) + | Just uq <- maybeAsmTemp lbl1 + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + -- slightly dodgy, we're ignoring the second label, but this + -- works with the way we use CmmLabelDiffOff for jump tables now. +shortcutStatic _ other_static + = other_static + +shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel +shortBlockId fn blockid@(BlockId uq) = + case fn blockid of + Nothing -> mkAsmTempLabel uq + Just (DestBlockId blockid') -> shortBlockId fn blockid' + Just (DestImm (ImmCLbl lbl)) -> lbl + _other -> panic "shortBlockId" + + +regDotColor :: Reg -> SDoc +regDotColor reg + = case regClass reg of + RcInteger -> text "blue" + RcFloat -> text "red" + RcDouble -> text "green" @@ -253,108 +127,3 @@ canShortcut _ = Nothing shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr shortcutJump _ other = other - - - --- | Make a spill instruction. --- On SPARC we spill below frame pointer leaving 2 words/spill -mkSpillInstr - :: Reg -- ^ register to spill - -> Int -- ^ current stack delta - -> Int -- ^ spill slot to use - -> Instr - -mkSpillInstr reg _ slot - = let off = spillSlotToOffset slot - off_w = 1 + (off `div` 4) - sz = case regClass reg of - RcInteger -> II32 - RcFloat -> FF32 - RcDouble -> FF64 - - in ST sz reg (fpRel (negate off_w)) - - --- | Make a spill reload instruction. -mkLoadInstr - :: Reg -- ^ register to load - -> Int -- ^ current stack delta - -> Int -- ^ spill slot to use - -> Instr - -mkLoadInstr reg _ slot - = let off = spillSlotToOffset slot - off_w = 1 + (off `div` 4) - sz = case regClass reg of - RcInteger -> II32 - RcFloat -> FF32 - RcDouble -> FF64 - - in LD sz (fpRel (- off_w)) reg - - --- | Make a reg-reg move instruction. --- On SPARC v8 there are no instructions to move directly between --- floating point and integer regs. If we need to do that then we --- have to go via memory. --- -mkRegRegMoveInstr - :: Reg - -> Reg - -> Instr - -mkRegRegMoveInstr src dst - = case regClass src of - RcInteger -> ADD False False src (RIReg g0) dst - RcDouble -> FMOV FF64 src dst - RcFloat -> FMOV FF32 src dst - - --- | Check whether an instruction represents a reg-reg move. --- The register allocator attempts to eliminate reg->reg moves whenever it can, --- by assigning the src and dest temporaries to the same real register. --- -isRegRegMove :: Instr -> Maybe (Reg,Reg) -isRegRegMove instr - = case instr of - ADD False False src (RIReg src2) dst - | g0 == src2 -> Just (src, dst) - - FMOV FF64 src dst -> Just (src, dst) - FMOV FF32 src dst -> Just (src, dst) - _ -> Nothing - - --- | Make an unconditional branch instruction. -mkBranchInstr - :: BlockId - -> [Instr] - -mkBranchInstr id - = [BI ALWAYS False id - , NOP] -- fill the branch delay slot. - - --- | TODO: Why do we need 8 bytes per slot?? -BL 2009/02 -spillSlotSize :: Int -spillSlotSize = 8 - - --- | The maximum number of spill slots available on the C stack. --- If we use up all of the slots, then we're screwed. -maxSpillSlots :: Int -maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1 - - --- | Convert a spill slot number to a *byte* offset, with no sign. --- -spillSlotToOffset :: Int -> Int -spillSlotToOffset slot - | slot >= 0 && slot < maxSpillSlots - = 64 + spillSlotSize * slot - - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int maxSpillSlots) - diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index 987fc2d..1fb6a01 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -5,17 +5,6 @@ -- ----------------------------------------------------------------------------- module SPARC.Regs ( - - -- sizes - Size(..), - intSize, - floatSize, - isFloatSize, - wordSize, - cmmTypeSize, - sizeToWidth, - mkVReg, - -- immediate values Imm(..), strImmLit, @@ -39,113 +28,33 @@ module SPARC.Regs ( fits13Bits, largeOffsetError, gReg, iReg, lReg, oReg, fReg, - fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27, + fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f22, f26, f27, nCG_FirstFloatReg, - -- horror show + -- allocatable freeReg, - globalRegMaybe + allocatableRegs, + globalRegMaybe, + + get_GlobalReg_reg_or_addr ) where -#include "nativeGen/NCG.h" -#include "HsVersions.h" -#include "../includes/MachRegs.h" -import RegsBase +import Reg +import RegClass +import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) import Pretty -import Outputable ( Outputable(..), pprPanic, panic ) +import Outputable ( panic ) import qualified Outputable -import Unique import Constants import FastBool --- sizes ----------------------------------------------------------------------- - --- | A 'Size' also includes format information, such as whether --- the word is signed or unsigned. --- -data Size - = II8 -- byte (signed) - | II16 -- halfword (signed, 2 bytes) - | II32 -- word (4 bytes) - | II64 -- word (8 bytes) - | FF32 -- IEEE single-precision floating pt - | FF64 -- IEEE single-precision floating pt - deriving Eq - - --- | Get the integer size of this width. -intSize :: Width -> Size -intSize width - = case width of - W8 -> II8 - W16 -> II16 - W32 -> II32 - W64 -> II64 - other -> pprPanic "SPARC.Regs.intSize" (ppr other) - - --- | Get the float size of this width. -floatSize :: Width -> Size -floatSize width - = case width of - W32 -> FF32 - W64 -> FF64 - other -> pprPanic "SPARC.Regs.intSize" (ppr other) - - --- | Check if a size represents a floating point value. -isFloatSize :: Size -> Bool -isFloatSize size - = case size of - FF32 -> True - FF64 -> True - _ -> False - - --- | Size of a machine word. --- This is big enough to hold a pointer. -wordSize :: Size -wordSize = intSize wordWidth - - --- | Convert a Cmm type to a Size. -cmmTypeSize :: CmmType -> Size -cmmTypeSize ty - | isFloatType ty = floatSize (typeWidth ty) - | otherwise = intSize (typeWidth ty) - - --- | Get the Width of a Size. -sizeToWidth :: Size -> Width -sizeToWidth size - = case size of - II8 -> W8 - II16 -> W16 - II32 -> W32 - II64 -> W64 - FF32 -> W32 - FF64 -> W64 - - --- | Make a virtual reg with this size. -mkVReg :: Unique -> Size -> Reg -mkVReg u size - | not (isFloatSize size) - = VirtualRegI u - - | otherwise - = case size of - FF32 -> VirtualRegF u - FF64 -> VirtualRegD u - _ -> panic "mkVReg" - -- immediates ------------------------------------------------------------------ @@ -390,48 +299,13 @@ o1 = RealReg (oReg 1) f0 = RealReg (fReg 0) +-- | We use he first few float regs as double precision. +-- This is the RegNo of the first float regs we use as single precision. +-- nCG_FirstFloatReg :: RegNo -nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg -#else -nCG_FirstFloatReg :: RegNo -nCG_FirstFloatReg = unRealReg f22 -#endif - - --- horror show ----------------------------------------------------------------- -#if sparc_TARGET_ARCH -#define g0 0 -#define g1 1 -#define g2 2 -#define g3 3 -#define g4 4 -#define g5 5 -#define g6 6 -#define g7 7 -#define o0 8 -#define o1 9 -#define o2 10 -#define o3 11 -#define o4 12 -#define o5 13 -#define o6 14 -#define o7 15 -#define l0 16 -#define l1 17 -#define l2 18 -#define l3 19 -#define l4 20 -#define l5 21 -#define l6 22 -#define l7 23 -#define i0 24 -#define i1 25 -#define i2 26 -#define i3 27 -#define i4 28 -#define i5 29 -#define i6 30 -#define i7 31 +nCG_FirstFloatReg = 54 + + -- | Check whether a machine register is free for allocation. -- This needs to match the info in includes/MachRegs.h otherwise modules @@ -445,7 +319,11 @@ freeReg regno -- %g1(r1) - %g4(r4) are allocable ----------------- -freeReg :: RegNo -> FastBool + -- %g5(r5) - %g7(r7) + -- are reserved for the OS + 5 -> fastBool False + 6 -> fastBool False + 7 -> fastBool False -- %o0(r8) - %o5(r13) are allocable ---------------- @@ -507,7 +385,15 @@ freeReg :: RegNo -> FastBool -- regs not matched above are allocable. _ -> fastBool True - + + +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = isFastTrue (freeReg i) + in filter isFree allMachRegNos -- | Returns Just the real register that a global register is stored in. @@ -539,15 +425,20 @@ globalRegMaybe gg Hp -> Just (RealReg 27) -- %i3 HpLim -> Just (RealReg 28) -- %i4 -globalRegMaybe :: GlobalReg -> Maybe Reg - - - -#else - -freeReg _ = 0# -globalRegMaybe = panic "SPARC.Regs.globalRegMaybe: not defined" + BaseReg -> Just (RealReg 25) -- %i1 + + _ -> Nothing -#endif +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real +-- register it is in, on this platform, or a CmmExpr denoting the +-- address in the register table holding it. +-- (See also get_GlobalReg_addr in CgUtils.) +get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr +get_GlobalReg_reg_or_addr mid + = case globalRegMaybe mid of + Just rr -> Left rr + Nothing -> Right (get_GlobalReg_addr mid) diff --git a/compiler/nativeGen/Size.hs b/compiler/nativeGen/Size.hs new file mode 100644 index 0000000..3be5430 --- /dev/null +++ b/compiler/nativeGen/Size.hs @@ -0,0 +1,103 @@ +-- | Sizes on this architecture +-- A Size is a combination of width and class +-- +-- TODO: Rename this to "Format" instead of "Size" to reflect +-- the fact that it represents floating point vs integer. +-- +-- TODO: Signed vs unsigned? +-- +-- TODO: This module is currenly shared by all architectures because +-- NCGMonad need to know about it to make a VReg. It would be better +-- to have architecture specific formats, and do the overloading +-- properly. eg SPARC doesn't care about FF80. +-- +module Size ( + Size(..), + intSize, + floatSize, + isFloatSize, + cmmTypeSize, + sizeToWidth +) + +where + +import Cmm +import Outputable + +-- It looks very like the old MachRep, but it's now of purely local +-- significance, here in the native code generator. You can change it +-- without global consequences. +-- +-- A major use is as an opcode qualifier; thus the opcode +-- mov.l a b +-- might be encoded +-- MOV II32 a b +-- where the Size field encodes the ".l" part. + +-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes +-- here. I've removed them from the x86 version, we'll see what happens --SDM + +-- ToDo: quite a few occurrences of Size could usefully be replaced by Width + +data Size + = II8 + | II16 + | II32 + | II64 + | FF32 + | FF64 + | FF80 + deriving (Show, Eq) + + +-- | Get the integer size of this width. +intSize :: Width -> Size +intSize width + = case width of + W8 -> II8 + W16 -> II16 + W32 -> II32 + W64 -> II64 + other -> pprPanic "Size.intSize" (ppr other) + + +-- | Get the float size of this width. +floatSize :: Width -> Size +floatSize width + = case width of + W32 -> FF32 + W64 -> FF64 + other -> pprPanic "Size.floatSize" (ppr other) + + +-- | Check if a size represents a floating point value. +isFloatSize :: Size -> Bool +isFloatSize size + = case size of + FF32 -> True + FF64 -> True + FF80 -> True + _ -> False + + +-- | Convert a Cmm type to a Size. +cmmTypeSize :: CmmType -> Size +cmmTypeSize ty + | isFloatType ty = floatSize (typeWidth ty) + | otherwise = intSize (typeWidth ty) + + +-- | Get the Width of a Size. +sizeToWidth :: Size -> Width +sizeToWidth size + = case size of + II8 -> W8 + II16 -> W16 + II32 -> W32 + II64 -> W64 + FF32 -> W32 + FF64 -> W64 + FF80 -> W80 + + diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs new file mode 100644 index 0000000..2643b00 --- /dev/null +++ b/compiler/nativeGen/TargetReg.hs @@ -0,0 +1,101 @@ + +-- | Hard wired things related to registers. +-- This is module is preventing the native code generator being able to +-- emit code for non-host architectures. +-- +-- TODO: Do a better job of the overloading, and eliminate this module. +-- We'd probably do better with a Register type class, and hook this to +-- Instruction somehow. +-- +-- TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable + +module TargetReg ( + targetRegClass, + targetMkVReg, + targetWordSize, + targetRegDotColor +) + +where + +#include "HsVersions.h" + +import Reg +import RegClass +import Size + +import CmmExpr (wordWidth) +import Outputable +import Unique + + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +import qualified X86.Regs as X86 +import qualified X86.RegInfo as X86 + +#elif powerpc_TARGET_ARCH +import qualified PPC.Regs as PPC +import qualified PPC.RegInfo as PPC + +#elif sparc_TARGET_ARCH +import qualified SPARC.Regs as SPARC +import qualified SPARC.RegInfo as SPARC + + +#else +#error "RegAlloc.Graph.TargetReg: not defined" +#endif + +-- x86 ------------------------------------------------------------------------- +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +targetRegClass :: Reg -> RegClass +targetRegClass = X86.regClass + +targetWordSize :: Size +targetWordSize = intSize wordWidth + +targetMkVReg :: Unique -> Size -> Reg +targetMkVReg = X86.mkVReg + +targetRegDotColor :: Reg -> SDoc +targetRegDotColor = X86.regDotColor + + +-- ppc ------------------------------------------------------------------------- +#elif powerpc_TARGET_ARCH +targetRegClass :: Reg -> RegClass +targetRegClass = PPC.regClass + +targetWordSize :: Size +targetWordSize = intSize wordWidth + +targetMkVReg :: Unique -> Size -> Reg +targetMkVReg = PPC.mkVReg + +targetRegDotColor :: Reg -> SDoc +targetRegDotColor = PPC.regDotColor + + +-- sparc ----------------------------------------------------------------------- +#elif sparc_TARGET_ARCH +targetRegClass :: Reg -> RegClass +targetRegClass = SPARC.regClass + +-- | Size of a machine word. +-- This is big enough to hold a pointer. +targetWordSize :: Size +targetWordSize = intSize wordWidth + +targetMkVReg :: Unique -> Size -> Reg +targetMkVReg = SPARC.mkVReg + +targetRegDotColor :: Reg -> SDoc +targetRegDotColor = SPARC.regDotColor + +-------------------------------------------------------------------------------- +#else +#error "RegAlloc.Graph.TargetReg: not defined" +#endif + + + diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs new file mode 100644 index 0000000..43495a4 --- /dev/null +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -0,0 +1,2313 @@ +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +----------------------------------------------------------------------------- +-- +-- Generating machine code (instruction selection) +-- +-- (c) The University of Glasgow 1996-2004 +-- +----------------------------------------------------------------------------- + +-- This is a big module, but, if you pay attention to +-- (a) the sectioning, (b) the type signatures, and +-- (c) the #if blah_TARGET_ARCH} things, the +-- structure should not be too overwhelming. + +module X86.CodeGen ( + cmmTopCodeGen, + InstrBlock +) + +where + +#include "HsVersions.h" +#include "nativeGen/NCG.h" +#include "MachDeps.h" + +-- NCG stuff: +import X86.Instr +import X86.Cond +import X86.Regs +import X86.RegInfo +import X86.Ppr +import Instruction +import PIC +import NCGMonad +import Size +import Reg +import RegClass +import Platform + +-- Our intermediate code: +import BasicTypes +import BlockId +import PprCmm ( pprExpr ) +import Cmm +import CLabel +import ClosureInfo ( C_SRT(..) ) + +-- The rest: +import StaticFlags ( opt_PIC ) +import ForeignCall ( CCallConv(..) ) +import OrdList +import Pretty +import qualified Outputable as O +import Outputable +import FastString +import FastBool ( isFastTrue ) +import Constants ( wORD_SIZE ) +import DynFlags + +import Debug.Trace ( trace ) + +import Control.Monad ( mapAndUnzipM ) +import Data.Maybe ( fromJust ) +import Data.Bits +import Data.Word +import Data.Int + + +cmmTopCodeGen + :: DynFlags + -> RawCmmTop + -> NatM [NatCmmTop Instr] + +cmmTopCodeGen dynflags + (CmmProc info lab params (ListGraph blocks)) = do + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) + tops = proc : concat statics + os = platformOS $ targetPlatform dynflags + + case picBaseMb of + Just picBase -> initializePicBase_x86 ArchX86 os picBase tops + Nothing -> return tops + +cmmTopCodeGen _ (CmmData sec dat) = do + return [CmmData sec dat] -- no translation, we just use CmmStatic + + +basicBlockCodeGen + :: CmmBasicBlock + -> NatM ( [NatBasicBlock Instr] + , [NatCmmTop Instr]) + +basicBlockCodeGen (BasicBlock id stmts) = do + instrs <- stmtsToInstrs stmts + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + -- in + return (BasicBlock id top : other_blocks, statics) + + +stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock +stmtsToInstrs stmts + = do instrss <- mapM stmtToInstrs stmts + return (concatOL instrss) + + +stmtToInstrs :: CmmStmt -> NatM InstrBlock +stmtToInstrs stmt = case stmt of + CmmNop -> return nilOL + CmmComment s -> return (unitOL (COMMENT s)) + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode size reg src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignReg_I64Code reg src +#endif + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode size addr src +#if WORD_SIZE_IN_BITS==32 + | isWord64 ty -> assignMem_I64Code addr src +#endif + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty + + CmmCall target result_regs args _ _ + -> genCCall target result_regs args + + CmmBranch id -> genBranch id + CmmCondBranch arg id -> genCondJump id arg + CmmSwitch arg ids -> genSwitch arg ids + CmmJump arg params -> genJump arg + CmmReturn params -> + panic "stmtToInstrs: return statement should have been cps'd away" + + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + + +-- | Condition codes passed up the tree. +-- +data CondCode + = CondCode Bool Cond InstrBlock + + +-- | a.k.a "Register64" +-- Reg is the lower 32-bit temporary which contains the result. +-- Use getHiVRegFromLo to find the other VRegUnique. +-- +-- Rules of this simplified insn selection game are therefore that +-- the returned Reg may be modified +-- +data ChildCode64 + = ChildCode64 + InstrBlock + Reg + + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) + + +swizzleRegisterRep :: Register -> Size -> Register +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn + + +-- | Grab the Reg for a CmmReg +getRegisterReg :: CmmReg -> Reg + +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) + +getRegisterReg (CmmGlobal mid) + = case get_GlobalReg_reg_or_addr mid of + Left (RealReg rrno) -> RealReg rrno + _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... + + +-- | Memory addressing modes passed up the tree. +data Amode + = Amode AddrMode InstrBlock + +{- +Now, given a tree (the argument to an CmmLoad) that references memory, +produce a suitable addressing mode. + +A Rule of the Game (tm) for Amodes: use of the addr bit must +immediately follow use of the code part, since the code part puts +values in registers which the addr then refers to. So you can't put +anything in between, lest it overwrite some of those registers. If +you need to do some other computation between the code part and use of +the addr bit, first store the effective address from the amode in a +temporary, then do the other computation, and then use the temporary: + + code + LEA amode, tmp + ... other computation ... + ... (tmp) ... +-} + + +-- | Check whether an integer will fit in 32 bits. +-- A CmmInt is intended to be truncated to the appropriate +-- number of bits, so here we truncate it to Int64. This is +-- important because e.g. -1 as a CmmInt might be either +-- -1 or 18446744073709551615. +-- +is32BitInteger :: Integer -> Bool +is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 + where i64 = fromIntegral i :: Int64 + + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) +jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) + where blockLabel = mkAsmTempLabel id + + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- Expand CmmRegOff. ToDo: should we do it this way around, or convert +-- CmmExprs into CmmRegOff? +mangleIndexTree :: CmmExpr -> CmmExpr +mangleIndexTree (CmmRegOff reg off) + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed _ reg code -> + return (reg, code) + + + + + +assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock +assignMem_I64Code addrTree valueTree = do + Amode addr addr_code <- getAmode addrTree + ChildCode64 vcode rlo <- iselExpr64 valueTree + let + rhi = getHiVRegFromLo rlo + + -- Little-endian store + mov_lo = MOV II32 (OpReg rlo) (OpAddr addr) + mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) + -- in + return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) + + +assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do + ChildCode64 vcode r_src_lo <- iselExpr64 valueTree + let + r_dst_lo = mkVReg u_dst II32 + r_dst_hi = getHiVRegFromLo r_dst_lo + r_src_hi = getHiVRegFromLo r_src_lo + mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo) + mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi) + -- in + return ( + vcode `snocOL` mov_lo `snocOL` mov_hi + ) + +assignReg_I64Code lvalue valueTree + = panic "assignReg_I64Code(i386): invalid lvalue" + + + + +iselExpr64 :: CmmExpr -> NatM ChildCode64 +iselExpr64 (CmmLit (CmmInt i _)) = do + (rlo,rhi) <- getNewRegPairNat II32 + let + r = fromIntegral (fromIntegral i :: Word32) + q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) + code = toOL [ + MOV II32 (OpImm (ImmInteger r)) (OpReg rlo), + MOV II32 (OpImm (ImmInteger q)) (OpReg rhi) + ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do + Amode addr addr_code <- getAmode addrTree + (rlo,rhi) <- getNewRegPairNat II32 + let + mov_lo = MOV II32 (OpAddr addr) (OpReg rlo) + mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) + -- in + return ( + ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo + ) + +iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty + = return (ChildCode64 nilOL (mkVReg vu II32)) + +-- we handle addition, but rather badly +iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + (rlo,rhi) <- getNewRegPairNat II32 + let + r = fromIntegral (fromIntegral i :: Word32) + q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) + r1hi = getHiVRegFromLo r1lo + code = code1 `appOL` + toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), + ADD II32 (OpImm (ImmInteger r)) (OpReg rlo), + MOV II32 (OpReg r1hi) (OpReg rhi), + ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), + ADD II32 (OpReg r2lo) (OpReg rlo), + MOV II32 (OpReg r1hi) (OpReg rhi), + ADC II32 (OpReg r2hi) (OpReg rhi) ] + -- in + return (ChildCode64 code rlo) + +iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do + fn <- getAnyReg expr + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + code = fn r_dst_lo + return ( + ChildCode64 (code `snocOL` + MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) + r_dst_lo + ) + +iselExpr64 expr + = pprPanic "iselExpr64(i386)" (ppr expr) + + + +-------------------------------------------------------------------------------- +getRegister :: CmmExpr -> NatM Register + +#if !x86_64_TARGET_ARCH + -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured + -- register, it can only be used for rip-relative addressing. +getRegister (CmmReg (CmmGlobal PicBaseReg)) + = do + reg <- getPicBaseNat archWordSize + return (Fixed archWordSize reg nilOL) +#endif + +getRegister (CmmReg reg) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) + +getRegister tree@(CmmRegOff _ _) + = getRegister (mangleIndexTree tree) + + +#if WORD_SIZE_IN_BITS==32 + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: + -- TO_W_(x), TO_W_(x >> 32) + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed II32 rlo code + +#endif + + + + +#if i386_TARGET_ARCH + +getRegister (CmmLit (CmmFloat f W32)) = do + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let code dst = + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f W32)] + `consOL` (addr_code `snocOL` + GLD FF32 addr dst) + -- in + return (Any FF32 code) + + +getRegister (CmmLit (CmmFloat d W64)) + | d == 0.0 + = let code dst = unitOL (GLDZ dst) + in return (Any FF64 code) + + | d == 1.0 + = let code dst = unitOL (GLD1 dst) + in return (Any FF64 code) + + | otherwise = do + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + Amode addr addr_code <- getAmode dynRef + let code dst = + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat d W64)] + `consOL` (addr_code `snocOL` + GLD FF64 addr dst) + -- in + return (Any FF64 code) + +#endif /* i386_TARGET_ARCH */ + + + + +#if x86_64_TARGET_ARCH +getRegister (CmmLit (CmmFloat 0.0 w)) = do + let size = floatSize w + code dst = unitOL (XOR size (OpReg dst) (OpReg dst)) + -- I don't know why there are xorpd, xorps, and pxor instructions. + -- They all appear to do the same thing --SDM + return (Any size code) + +getRegister (CmmLit (CmmFloat f w)) = do + lbl <- getNewLabelNat + let code dst = toOL [ + LDATA ReadOnlyData + [CmmDataLabel lbl, + CmmStaticLit (CmmFloat f w)], + MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + ] + -- in + return (Any size code) + where size = floatSize w + +#endif /* x86_64_TARGET_ARCH */ + + + + + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II8) addr + return (Any II32 code) + +getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II8) addr + return (Any II32 code) + +getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II16) addr + return (Any II32 code) + +getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II16) addr + return (Any II32 code) + + +#if x86_64_TARGET_ARCH + +-- catch simple cases of zero- or sign-extended load +getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II8) addr + return (Any II64 code) + +getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II8) addr + return (Any II64 code) + +getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II16) addr + return (Any II64 code) + +getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II16) addr + return (Any II64 code) + +getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend + return (Any II64 code) + +getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II32) addr + return (Any II64 code) + +getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), + CmmLit displacement]) + = return $ Any II64 (\dst -> unitOL $ + LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) + +getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do + x_code <- getAnyReg x + lbl <- getNewLabelNat + let + code dst = x_code dst `appOL` toOL [ + -- This is how gcc does it, so it can't be that bad: + LDATA ReadOnlyData16 [ + CmmAlign 16, + CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x80000000 W32), + CmmStaticLit (CmmInt 0 W32), + CmmStaticLit (CmmInt 0 W32), + CmmStaticLit (CmmInt 0 W32) + ], + XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + -- xorps, so we need the 128-bit constant + -- ToDo: rip-relative + ] + -- + return (Any FF32 code) + +getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do + x_code <- getAnyReg x + lbl <- getNewLabelNat + let + -- This is how gcc does it, so it can't be that bad: + code dst = x_code dst `appOL` toOL [ + LDATA ReadOnlyData16 [ + CmmAlign 16, + CmmDataLabel lbl, + CmmStaticLit (CmmInt 0x8000000000000000 W64), + CmmStaticLit (CmmInt 0 W64) + ], + -- gcc puts an unpck here. Wonder if we need it. + XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + -- xorpd, so we need the 128-bit constant + ] + -- + return (Any FF64 code) + +#endif /* x86_64_TARGET_ARCH */ + + + + + +getRegister (CmmMachOp mop [x]) -- unary MachOps + = case mop of +#if i386_TARGET_ARCH + MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x + MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x +#endif + + MO_S_Neg w -> triv_ucode NEGI (intSize w) + MO_F_Neg w -> triv_ucode NEGI (floatSize w) + MO_Not w -> triv_ucode NOT (intSize w) + + -- Nop conversions + MO_UU_Conv W32 W8 -> toI8Reg W32 x + MO_SS_Conv W32 W8 -> toI8Reg W32 x + MO_UU_Conv W16 W8 -> toI8Reg W16 x + MO_SS_Conv W16 W8 -> toI8Reg W16 x + MO_UU_Conv W32 W16 -> toI16Reg W32 x + MO_SS_Conv W32 W16 -> toI16Reg W32 x + +#if x86_64_TARGET_ARCH + MO_UU_Conv W64 W32 -> conversionNop II64 x + MO_SS_Conv W64 W32 -> conversionNop II64 x + MO_UU_Conv W64 W16 -> toI16Reg W64 x + MO_SS_Conv W64 W16 -> toI16Reg W64 x + MO_UU_Conv W64 W8 -> toI8Reg W64 x + MO_SS_Conv W64 W8 -> toI8Reg W64 x +#endif + + MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x + MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x + + -- widenings + MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x + MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x + MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x + + MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x + MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x + MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x + +#if x86_64_TARGET_ARCH + MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x + MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x + MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x + MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x + MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x + MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x + -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. + -- However, we don't want the register allocator to throw it + -- away as an unnecessary reg-to-reg move, so we keep it in + -- the form of a movzl and print it as a movl later. +#endif + +#if i386_TARGET_ARCH + MO_FF_Conv W32 W64 -> conversionNop FF64 x + MO_FF_Conv W64 W32 -> conversionNop FF32 x +#else + MO_FF_Conv W32 W64 -> coerceFP2FP W64 x + MO_FF_Conv W64 W32 -> coerceFP2FP W32 x +#endif + + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + other -> pprPanic "getRegister" (pprMachOp mop) + where + triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register + triv_ucode instr size = trivialUCode size (instr size) x + + -- signed or unsigned extension. + integerExtend :: Width -> Width + -> (Size -> Operand -> Operand -> Instr) + -> CmmExpr -> NatM Register + integerExtend from to instr expr = do + (reg,e_code) <- if from == W8 then getByteReg expr + else getSomeReg expr + let + code dst = + e_code `snocOL` + instr (intSize from) (OpReg reg) (OpReg dst) + return (Any (intSize to) code) + + toI8Reg :: Width -> CmmExpr -> NatM Register + toI8Reg new_rep expr + = do codefn <- getAnyReg expr + return (Any (intSize new_rep) codefn) + -- HACK: use getAnyReg to get a byte-addressable register. + -- If the source was a Fixed register, this will add the + -- mov instruction to put it into the desired destination. + -- We're assuming that the destination won't be a fixed + -- non-byte-addressable register; it won't be, because all + -- fixed registers are word-sized. + + toI16Reg = toI8Reg -- for now + + conversionNop :: Size -> CmmExpr -> NatM Register + conversionNop new_size expr + = do e_code <- getRegister expr + return (swizzleRegisterRep e_code new_size) + + +getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps + = case mop of + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_Eq rep -> condIntReg EQQ x y + MO_Ne rep -> condIntReg NE x y + + MO_S_Gt rep -> condIntReg GTT x y + MO_S_Ge rep -> condIntReg GE x y + MO_S_Lt rep -> condIntReg LTT x y + MO_S_Le rep -> condIntReg LE x y + + MO_U_Gt rep -> condIntReg GU x y + MO_U_Ge rep -> condIntReg GEU x y + MO_U_Lt rep -> condIntReg LU x y + MO_U_Le rep -> condIntReg LEU x y + +#if i386_TARGET_ARCH + MO_F_Add w -> trivialFCode w GADD x y + MO_F_Sub w -> trivialFCode w GSUB x y + MO_F_Quot w -> trivialFCode w GDIV x y + MO_F_Mul w -> trivialFCode w GMUL x y +#endif + +#if x86_64_TARGET_ARCH + MO_F_Add w -> trivialFCode w ADD x y + MO_F_Sub w -> trivialFCode w SUB x y + MO_F_Quot w -> trivialFCode w FDIV x y + MO_F_Mul w -> trivialFCode w MUL x y +#endif + + MO_Add rep -> add_code rep x y + MO_Sub rep -> sub_code rep x y + + MO_S_Quot rep -> div_code rep True True x y + MO_S_Rem rep -> div_code rep True False x y + MO_U_Quot rep -> div_code rep False True x y + MO_U_Rem rep -> div_code rep False False x y + + MO_S_MulMayOflo rep -> imulMayOflo rep x y + + MO_Mul rep -> triv_op rep IMUL + MO_And rep -> triv_op rep AND + MO_Or rep -> triv_op rep OR + MO_Xor rep -> triv_op rep XOR + + {- Shift ops on x86s have constraints on their source, it + either has to be Imm, CL or 1 + => trivialCode is not restrictive enough (sigh.) + -} + MO_Shl rep -> shift_code rep SHL x y {-False-} + MO_U_Shr rep -> shift_code rep SHR x y {-False-} + MO_S_Shr rep -> shift_code rep SAR x y {-False-} + + other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) + where + -------------------- + triv_op width instr = trivialCode width op (Just op) x y + where op = instr (intSize width) + + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register + imulMayOflo rep a b = do + (a_reg, a_code) <- getNonClobberedReg a + b_code <- getAnyReg b + let + shift_amt = case rep of + W32 -> 31 + W64 -> 63 + _ -> panic "shift_amt" + + size = intSize rep + code = a_code `appOL` b_code eax `appOL` + toOL [ + IMUL2 size (OpReg a_reg), -- result in %edx:%eax + SAR size (OpImm (ImmInt shift_amt)) (OpReg eax), + -- sign extend lower part + SUB size (OpReg edx) (OpReg eax) + -- compare against upper + -- eax==0 if high part == sign extended low part + ] + -- in + return (Fixed size eax code) + + -------------------- + shift_code :: Width + -> (Size -> Operand -> Operand -> Instr) + -> CmmExpr + -> CmmExpr + -> NatM Register + + {- Case1: shift length as immediate -} + shift_code width instr x y@(CmmLit lit) = do + x_code <- getAnyReg x + let + size = intSize width + code dst + = x_code dst `snocOL` + instr size (OpImm (litToImm lit)) (OpReg dst) + -- in + return (Any size code) + + {- Case2: shift length is complex (non-immediate) + * y must go in %ecx. + * we cannot do y first *and* put its result in %ecx, because + %ecx might be clobbered by x. + * if we do y second, then x cannot be + in a clobbered reg. Also, we cannot clobber x's reg + with the instruction itself. + * so we can either: + - do y first, put its result in a fresh tmp, then copy it to %ecx later + - do y second and put its result into %ecx. x gets placed in a fresh + tmp. This is likely to be better, becuase the reg alloc can + eliminate this reg->reg move here (it won't eliminate the other one, + because the move is into the fixed %ecx). + -} + shift_code width instr x y{-amount-} = do + x_code <- getAnyReg x + let size = intSize width + tmp <- getNewRegNat size + y_code <- getAnyReg y + let + code = x_code tmp `appOL` + y_code ecx `snocOL` + instr size (OpReg ecx) (OpReg tmp) + -- in + return (Fixed size tmp code) + + -------------------- + add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register + add_code rep x (CmmLit (CmmInt y _)) + | is32BitInteger y = add_int rep x y + add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y + where size = intSize rep + + -------------------- + sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register + sub_code rep x (CmmLit (CmmInt y _)) + | is32BitInteger (-y) = add_int rep x (-y) + sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y + + -- our three-operand add instruction: + add_int width x y = do + (x_reg, x_code) <- getSomeReg x + let + size = intSize width + imm = ImmInt (fromInteger y) + code dst + = x_code `snocOL` + LEA size + (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) + (OpReg dst) + -- + return (Any size code) + + ---------------------- + div_code width signed quotient x y = do + (y_op, y_code) <- getRegOrMem y -- cannot be clobbered + x_code <- getAnyReg x + let + size = intSize width + widen | signed = CLTD size + | otherwise = XOR size (OpReg edx) (OpReg edx) + + instr | signed = IDIV + | otherwise = DIV + + code = y_code `appOL` + x_code eax `appOL` + toOL [widen, instr size y_op] + + result | quotient = eax + | otherwise = edx + + -- in + return (Fixed size result code) + + +getRegister (CmmLoad mem pk) + | isFloatType pk + = do + Amode src mem_code <- getAmode mem + let + size = cmmTypeSize pk + code dst = mem_code `snocOL` + IF_ARCH_i386(GLD size src dst, + MOV size (OpAddr src) (OpReg dst)) + return (Any size code) + +#if i386_TARGET_ARCH +getRegister (CmmLoad mem pk) + | not (isWord64 pk) + = do + code <- intLoadCode instr mem + return (Any size code) + where + width = typeWidth pk + size = intSize width + instr = case width of + W8 -> MOVZxL II8 + _other -> MOV size + -- We always zero-extend 8-bit loads, if we + -- can't think of anything better. This is because + -- we can't guarantee access to an 8-bit variant of every register + -- (esi and edi don't have 8-bit variants), so to make things + -- simpler we do our 8-bit arithmetic with full 32-bit registers. +#endif + +#if x86_64_TARGET_ARCH +-- Simpler memory load code on x86_64 +getRegister (CmmLoad mem pk) + = do + code <- intLoadCode (MOV size) mem + return (Any size code) + where size = intSize $ typeWidth pk +#endif + +getRegister (CmmLit (CmmInt 0 width)) + = let + size = intSize width + + -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits + adj_size = case size of II64 -> II32; _ -> size + size1 = IF_ARCH_i386( size, adj_size ) + code dst + = unitOL (XOR size1 (OpReg dst) (OpReg dst)) + in + return (Any size code) + +#if x86_64_TARGET_ARCH + -- optimisation for loading small literals on x86_64: take advantage + -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit + -- instruction forms are shorter. +getRegister (CmmLit lit) + | isWord64 (cmmLitType lit), not (isBigLit lit) + = let + imm = litToImm lit + code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst)) + in + return (Any II64 code) + where + isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff + isBigLit _ = False + -- note1: not the same as (not.is32BitLit), because that checks for + -- signed literals that fit in 32 bits, but we want unsigned + -- literals here. + -- note2: all labels are small, because we're assuming the + -- small memory model (see gcc docs, -mcmodel=small). +#endif + +getRegister (CmmLit lit) + = let + size = cmmTypeSize (cmmLitType lit) + imm = litToImm lit + code dst = unitOL (MOV size (OpImm imm) (OpReg dst)) + in + return (Any size code) + +getRegister other = pprPanic "getRegister(x86)" (ppr other) + + +intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr + -> NatM (Reg -> InstrBlock) +intLoadCode instr mem = do + Amode src mem_code <- getAmode mem + return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst)) + +-- Compute an expression into *any* register, adding the appropriate +-- move instruction if necessary. +getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock) +getAnyReg expr = do + r <- getRegister expr + anyReg r + +anyReg :: Register -> NatM (Reg -> InstrBlock) +anyReg (Any _ code) = return code +anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst) + +-- A bit like getSomeReg, but we want a reg that can be byte-addressed. +-- Fixed registers might not be byte-addressable, so we make sure we've +-- got a temporary, inserting an extra reg copy if necessary. +getByteReg :: CmmExpr -> NatM (Reg, InstrBlock) +#if x86_64_TARGET_ARCH +getByteReg = getSomeReg -- all regs are byte-addressable on x86_64 +#else +getByteReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed rep reg code + | isVirtualReg reg -> return (reg,code) + | otherwise -> do + tmp <- getNewRegNat rep + return (tmp, code `snocOL` reg2reg rep reg tmp) + -- ToDo: could optimise slightly by checking for byte-addressable + -- real registers, but that will happen very rarely if at all. +#endif + +-- Another variant: this time we want the result in a register that cannot +-- be modified by code to evaluate an arbitrary expression. +getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock) +getNonClobberedReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, code tmp) + Fixed rep reg code + -- only free regs can be clobbered + | RealReg rr <- reg, isFastTrue (freeReg rr) -> do + tmp <- getNewRegNat rep + return (tmp, code `snocOL` reg2reg rep reg tmp) + | otherwise -> + return (reg, code) + +reg2reg :: Size -> Reg -> Reg -> Instr +reg2reg size src dst +#if i386_TARGET_ARCH + | isFloatSize size = GMOV src dst +#endif + | otherwise = MOV size (OpReg src) (OpReg dst) + + + +-------------------------------------------------------------------------------- +getAmode :: CmmExpr -> NatM Amode +getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) + +#if x86_64_TARGET_ARCH + +getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), + CmmLit displacement]) + = return $ Amode (ripRel (litToImm displacement)) nilOL + +#endif + + +-- This is all just ridiculous, since it carefully undoes +-- what mangleIndexTree has just done. +getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) + | is32BitLit lit + -- ASSERT(rep == II32)??? + = do (x_reg, x_code) <- getSomeReg x + let off = ImmInt (-(fromInteger i)) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) + +getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) + | is32BitLit lit + -- ASSERT(rep == II32)??? + = do (x_reg, x_code) <- getSomeReg x + let off = ImmInt (fromInteger i) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) + +-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be +-- recognised by the next rule. +getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), + b@(CmmLit _)]) + = getAmode (CmmMachOp (MO_Add rep) [b,a]) + +getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) + [y, CmmLit (CmmInt shift _)]]) + | shift == 0 || shift == 1 || shift == 2 || shift == 3 + = x86_complex_amode x y shift 0 + +getAmode (CmmMachOp (MO_Add rep) + [x, CmmMachOp (MO_Add _) + [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)], + CmmLit (CmmInt offset _)]]) + | shift == 0 || shift == 1 || shift == 2 || shift == 3 + && is32BitInteger offset + = x86_complex_amode x y shift offset + +getAmode (CmmMachOp (MO_Add rep) [x,y]) + = x86_complex_amode x y 0 0 + +getAmode (CmmLit lit) | is32BitLit lit + = return (Amode (ImmAddr (litToImm lit) 0) nilOL) + +getAmode expr = do + (reg,code) <- getSomeReg expr + return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) + + +x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode +x86_complex_amode base index shift offset + = do (x_reg, x_code) <- getNonClobberedReg base + -- x must be in a temp, because it has to stay live over y_code + -- we could compre x_reg and y_reg and do something better here... + (y_reg, y_code) <- getSomeReg index + let + code = x_code `appOL` y_code + base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 + return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset))) + code) + + + + +-- ----------------------------------------------------------------------------- +-- getOperand: sometimes any operand will do. + +-- getNonClobberedOperand: the value of the operand will remain valid across +-- the computation of an arbitrary expression, unless the expression +-- is computed directly into a register which the operand refers to +-- (see trivialCode where this function is used for an example). + +getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) +#if x86_64_TARGET_ARCH +getNonClobberedOperand (CmmLit lit) + | isSuitableFloatingPointLit lit = do + lbl <- getNewLabelNat + let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit lit]) + return (OpAddr (ripRel (ImmCLbl lbl)), code) +#endif +getNonClobberedOperand (CmmLit lit) + | is32BitLit lit && not (isFloatType (cmmLitType lit)) = + return (OpImm (litToImm lit), nilOL) +getNonClobberedOperand (CmmLoad mem pk) + | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do + Amode src mem_code <- getAmode mem + (src',save_code) <- + if (amodeCouldBeClobbered src) + then do + tmp <- getNewRegNat archWordSize + return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), + unitOL (LEA II32 (OpAddr src) (OpReg tmp))) + else + return (src, nilOL) + return (OpAddr src', save_code `appOL` mem_code) +getNonClobberedOperand e = do + (reg, code) <- getNonClobberedReg e + return (OpReg reg, code) + +amodeCouldBeClobbered :: AddrMode -> Bool +amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode) + +regClobbered (RealReg rr) = isFastTrue (freeReg rr) +regClobbered _ = False + +-- getOperand: the operand is not required to remain valid across the +-- computation of an arbitrary expression. +getOperand :: CmmExpr -> NatM (Operand, InstrBlock) +#if x86_64_TARGET_ARCH +getOperand (CmmLit lit) + | isSuitableFloatingPointLit lit = do + lbl <- getNewLabelNat + let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit lit]) + return (OpAddr (ripRel (ImmCLbl lbl)), code) +#endif +getOperand (CmmLit lit) + | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do + return (OpImm (litToImm lit), nilOL) +getOperand (CmmLoad mem pk) + | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do + Amode src mem_code <- getAmode mem + return (OpAddr src, mem_code) +getOperand e = do + (reg, code) <- getSomeReg e + return (OpReg reg, code) + +isOperand :: CmmExpr -> Bool +isOperand (CmmLoad _ _) = True +isOperand (CmmLit lit) = is32BitLit lit + || isSuitableFloatingPointLit lit +isOperand _ = False + +-- if we want a floating-point literal as an operand, we can +-- use it directly from memory. However, if the literal is +-- zero, we're better off generating it into a register using +-- xor. +isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 +isSuitableFloatingPointLit _ = False + +getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) +getRegOrMem (CmmLoad mem pk) + | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do + Amode src mem_code <- getAmode mem + return (OpAddr src, mem_code) +getRegOrMem e = do + (reg, code) <- getNonClobberedReg e + return (OpReg reg, code) + +#if x86_64_TARGET_ARCH +is32BitLit (CmmInt i W64) = is32BitInteger i + -- assume that labels are in the range 0-2^31-1: this assumes the + -- small memory model (see gcc docs, -mcmodel=small). +#endif +is32BitLit x = True + + + + +-- Set up a condition code for a conditional branch. + +getCondCode :: CmmExpr -> NatM CondCode + +-- yes, they really do seem to want exactly the same! + +getCondCode (CmmMachOp mop [x, y]) + = + case mop of + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y + + MO_Eq rep -> condIntCode EQQ x y + MO_Ne rep -> condIntCode NE x y + + MO_S_Gt rep -> condIntCode GTT x y + MO_S_Ge rep -> condIntCode GE x y + MO_S_Lt rep -> condIntCode LTT x y + MO_S_Le rep -> condIntCode LE x y + + MO_U_Gt rep -> condIntCode GU x y + MO_U_Ge rep -> condIntCode GEU x y + MO_U_Lt rep -> condIntCode LU x y + MO_U_Le rep -> condIntCode LEU x y + + other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) + +getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) + + + + +-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be +-- passed back up the tree. + +condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode + +-- memory vs immediate +condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do + Amode x_addr x_code <- getAmode x + let + imm = litToImm lit + code = x_code `snocOL` + CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr) + -- + return (CondCode False cond code) + +-- anything vs zero, using a mask +-- TODO: Add some sanity checking!!!! +condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) + | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit + = do + (x_reg, x_code) <- getSomeReg x + let + code = x_code `snocOL` + TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg) + -- + return (CondCode False cond code) + +-- anything vs zero +condIntCode cond x (CmmLit (CmmInt 0 pk)) = do + (x_reg, x_code) <- getSomeReg x + let + code = x_code `snocOL` + TEST (intSize pk) (OpReg x_reg) (OpReg x_reg) + -- + return (CondCode False cond code) + +-- anything vs operand +condIntCode cond x y | isOperand y = do + (x_reg, x_code) <- getNonClobberedReg x + (y_op, y_code) <- getOperand y + let + code = x_code `appOL` y_code `snocOL` + CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg) + -- in + return (CondCode False cond code) + +-- anything vs anything +condIntCode cond x y = do + (y_reg, y_code) <- getNonClobberedReg y + (x_op, x_code) <- getRegOrMem x + let + code = y_code `appOL` + x_code `snocOL` + CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op + -- in + return (CondCode False cond code) + + + +-------------------------------------------------------------------------------- +condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode + +#if i386_TARGET_ARCH +condFltCode cond x y + = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do + (x_reg, x_code) <- getNonClobberedReg x + (y_reg, y_code) <- getSomeReg y + let + code = x_code `appOL` y_code `snocOL` + GCMP cond x_reg y_reg + -- The GCMP insn does the test and sets the zero flag if comparable + -- and true. Hence we always supply EQQ as the condition to test. + return (CondCode True EQQ code) + +#elif x86_64_TARGET_ARCH +-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be +-- an operand, but the right must be a reg. We can probably do better +-- than this general case... +condFltCode cond x y = do + (x_reg, x_code) <- getNonClobberedReg x + (y_op, y_code) <- getOperand y + let + code = x_code `appOL` + y_code `snocOL` + CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg) + -- NB(1): we need to use the unsigned comparison operators on the + -- result of this comparison. + -- in + return (CondCode True (condToUnsigned cond) code) + +#else +condFltCode = panic "X86.condFltCode: not defined" + +#endif + + + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock + + +-- integer assignment to memory + +-- specific case of adding/subtracting an integer to a particular address. +-- ToDo: catch other cases where we can use an operation directly on a memory +-- address. +assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, + CmmLit (CmmInt i _)]) + | addr == addr2, pk /= II64 || is32BitInteger i, + Just instr <- check op + = do Amode amode code_addr <- getAmode addr + let code = code_addr `snocOL` + instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode) + return code + where + check (MO_Add _) = Just ADD + check (MO_Sub _) = Just SUB + check _ = Nothing + -- ToDo: more? + +-- general case +assignMem_IntCode pk addr src = do + Amode addr code_addr <- getAmode addr + (code_src, op_src) <- get_op_RI src + let + code = code_src `appOL` + code_addr `snocOL` + MOV pk op_src (OpAddr addr) + -- NOTE: op_src is stable, so it will still be valid + -- after code_addr. This may involve the introduction + -- of an extra MOV to a temporary register, but we hope + -- the register allocator will get rid of it. + -- + return code + where + get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator + get_op_RI (CmmLit lit) | is32BitLit lit + = return (nilOL, OpImm (litToImm lit)) + get_op_RI op + = do (reg,code) <- getNonClobberedReg op + return (code, OpReg reg) + + +-- Assign; dst is a reg, rhs is mem +assignReg_IntCode pk reg (CmmLoad src _) = do + load_code <- intLoadCode (MOV pk) src + return (load_code (getRegisterReg reg)) + +-- dst is a reg, but src could be anything +assignReg_IntCode pk reg src = do + code <- getAnyReg src + return (code (getRegisterReg reg)) + + +-- Floating point assignment to memory +assignMem_FltCode pk addr src = do + (src_reg, src_code) <- getNonClobberedReg src + Amode addr addr_code <- getAmode addr + let + code = src_code `appOL` + addr_code `snocOL` + IF_ARCH_i386(GST pk src_reg addr, + MOV pk (OpReg src_reg) (OpAddr addr)) + return code + +-- Floating point assignment to a register/temporary +assignReg_FltCode pk reg src = do + src_code <- getAnyReg src + return (src_code (getRegisterReg reg)) + + +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock + +genJump (CmmLoad mem pk) = do + Amode target code <- getAmode mem + return (code `snocOL` JMP (OpAddr target)) + +genJump (CmmLit lit) = do + return (unitOL (JMP (OpImm (litToImm lit)))) + +genJump expr = do + (reg,code) <- getSomeReg expr + return (code `snocOL` JMP (OpReg reg)) + + +-- ----------------------------------------------------------------------------- +-- Unconditional branches + +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + + + +-- ----------------------------------------------------------------------------- +-- Conditional jumps + +{- +Conditional jumps are always to local labels, so we can use branch +instructions. We peek at the arguments to decide what kind of +comparison to do. + +I386: First, we have to ensure that the condition +codes are set according to the supplied comparison operation. +-} + +genCondJump + :: BlockId -- the branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock + +#if i386_TARGET_ARCH +genCondJump id bool = do + CondCode _ cond code <- getCondCode bool + return (code `snocOL` JXX cond id) + +#elif x86_64_TARGET_ARCH +genCondJump id bool = do + CondCode is_float cond cond_code <- getCondCode bool + if not is_float + then + return (cond_code `snocOL` JXX cond id) + else do + lbl <- getBlockIdNat + + -- see comment with condFltReg + let code = case cond of + NE -> or_unordered + GU -> plain_test + GEU -> plain_test + _ -> and_ordered + + plain_test = unitOL ( + JXX cond id + ) + or_unordered = toOL [ + JXX cond id, + JXX PARITY id + ] + and_ordered = toOL [ + JXX PARITY lbl, + JXX cond id, + JXX ALWAYS lbl, + NEWBLOCK lbl + ] + return (cond_code `appOL` code) + +#else +genCondJump = panic "X86.genCondJump: not defined" + +#endif + + + + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +genCCall + :: CmmCallTarget -- function to call + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) + -> NatM InstrBlock + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if i386_TARGET_ARCH + +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL + -- write barrier compiles to no code on x86/x86-64; + -- we keep it this long in order to prevent earlier optimisations. + +-- we only cope with a single result for foreign calls +genCCall (CmmPrim op) [CmmHinted r _] args = do + l1 <- getNewLabelNat + l2 <- getNewLabelNat + case op of + MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args + MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args + + MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args + MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args + + MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args + MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args + + MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args + MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args + + other_op -> outOfLineFloatOp op r args + where + actuallyInlineFloatOp instr size [CmmHinted x _] + = do res <- trivialUFCode size (instr size) x + any <- anyReg res + return (any (getRegisterReg (CmmLocal r))) + +genCCall target dest_regs args = do + let + sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args) +#if !darwin_TARGET_OS + tot_arg_size = sum sizes +#else + raw_arg_size = sum sizes + tot_arg_size = roundTo 16 raw_arg_size + arg_pad_size = tot_arg_size - raw_arg_size + delta0 <- getDeltaNat + setDeltaNat (delta0 - arg_pad_size) +#endif + + push_codes <- mapM push_arg (reverse args) + delta <- getDeltaNat + + -- in + -- deal with static vs dynamic call targets + (callinsns,cconv) <- + case target of + -- CmmPrim -> ... + CmmCallee (CmmLit (CmmLabel lbl)) conv + -> -- ToDo: stdcall arg sizes + return (unitOL (CALL (Left fn_imm) []), conv) + where fn_imm = ImmCLbl lbl + CmmCallee expr conv + -> do { (dyn_c, dyn_r) <- get_op expr + ; ASSERT( isWord32 (cmmExprType expr) ) + return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } + + let push_code +#if darwin_TARGET_OS + | arg_pad_size /= 0 + = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), + DELTA (delta0 - arg_pad_size)] + `appOL` concatOL push_codes + | otherwise +#endif + = concatOL push_codes + call = callinsns `appOL` + toOL ( + -- Deallocate parameters after call for ccall; + -- but not for stdcall (callee does it) + (if cconv == StdCallConv || tot_arg_size==0 then [] else + [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) + ++ + [DELTA (delta + tot_arg_size)] + ) + -- in + setDeltaNat (delta + tot_arg_size) + + let + -- assign the results, if necessary + assign_code [] = nilOL + assign_code [CmmHinted dest _hint] + | isFloatType ty = unitOL (GMOV fake0 r_dest) + | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), + MOV II32 (OpReg edx) (OpReg r_dest_hi)] + | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest)) + where + ty = localRegType dest + w = typeWidth ty + r_dest_hi = getHiVRegFromLo r_dest + r_dest = getRegisterReg (CmmLocal dest) + assign_code many = panic "genCCall.assign_code many" + + return (push_code `appOL` + call `appOL` + assign_code dest_regs) + + where + arg_size :: CmmType -> Int -- Width in bytes + arg_size ty = widthInBytes (typeWidth ty) + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + + + push_arg :: HintedCmmActual {-current argument-} + -> NatM InstrBlock -- code + + push_arg (CmmHinted arg _hint) -- we don't need the hints on x86 + | isWord64 arg_ty = do + ChildCode64 code r_lo <- iselExpr64 arg + delta <- getDeltaNat + setDeltaNat (delta - 8) + let + r_hi = getHiVRegFromLo r_lo + -- in + return ( code `appOL` + toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), + PUSH II32 (OpReg r_lo), DELTA (delta - 8), + DELTA (delta-8)] + ) + + | otherwise = do + (code, reg) <- get_op arg + delta <- getDeltaNat + let size = arg_size arg_ty -- Byte size + setDeltaNat (delta-size) + if (isFloatType arg_ty) + then return (code `appOL` + toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), + DELTA (delta-size), + GST (floatSize (typeWidth arg_ty)) + reg (AddrBaseIndex (EABaseReg esp) + EAIndexNone + (ImmInt 0))] + ) + else return (code `snocOL` + PUSH II32 (OpReg reg) `snocOL` + DELTA (delta-size) + ) + where + arg_ty = cmmExprType arg + + ------------ + get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg + get_op op = do + (reg,code) <- getSomeReg op + return (code, reg) + +#elif x86_64_TARGET_ARCH + +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL + -- write barrier compiles to no code on x86/x86-64; + -- we keep it this long in order to prevent earlier optimisations. + + +genCCall (CmmPrim op) [CmmHinted r _] args = + outOfLineFloatOp op r args + +genCCall target dest_regs args = do + + -- load up the register arguments + (stack_args, aregs, fregs, load_args_code) + <- load_args args allArgRegs allFPArgRegs nilOL + + let + fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs)) + int_regs_used = reverse (drop (length aregs) (reverse allArgRegs)) + arg_regs = [eax] ++ int_regs_used ++ fp_regs_used + -- for annotating the call instruction with + + sse_regs = length fp_regs_used + + tot_arg_size = arg_size * length stack_args + + -- On entry to the called function, %rsp should be aligned + -- on a 16-byte boundary +8 (i.e. the first stack arg after + -- the return address is 16-byte aligned). In STG land + -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just + -- need to make sure we push a multiple of 16-bytes of args, + -- plus the return address, to get the correct alignment. + -- Urg, this is hard. We need to feed the delta back into + -- the arg pushing code. + (real_size, adjust_rsp) <- + if tot_arg_size `rem` 16 == 0 + then return (tot_arg_size, nilOL) + else do -- we need to adjust... + delta <- getDeltaNat + setDeltaNat (delta-8) + return (tot_arg_size+8, toOL [ + SUB II64 (OpImm (ImmInt 8)) (OpReg rsp), + DELTA (delta-8) + ]) + + -- push the stack args, right to left + push_code <- push_args (reverse stack_args) nilOL + delta <- getDeltaNat + + -- deal with static vs dynamic call targets + (callinsns,cconv) <- + case target of + -- CmmPrim -> ... + CmmCallee (CmmLit (CmmLabel lbl)) conv + -> -- ToDo: stdcall arg sizes + return (unitOL (CALL (Left fn_imm) arg_regs), conv) + where fn_imm = ImmCLbl lbl + CmmCallee expr conv + -> do (dyn_r, dyn_c) <- getSomeReg expr + return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) + + let + -- The x86_64 ABI requires us to set %al to the number of SSE + -- registers that contain arguments, if the called routine + -- is a varargs function. We don't know whether it's a + -- varargs function or not, so we have to assume it is. + -- + -- It's not safe to omit this assignment, even if the number + -- of SSE regs in use is zero. If %al is larger than 8 + -- on entry to a varargs function, seg faults ensue. + assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) + + let call = callinsns `appOL` + toOL ( + -- Deallocate parameters after call for ccall; + -- but not for stdcall (callee does it) + (if cconv == StdCallConv || real_size==0 then [] else + [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)]) + ++ + [DELTA (delta + real_size)] + ) + -- in + setDeltaNat (delta + real_size) + + let + -- assign the results, if necessary + assign_code [] = nilOL + assign_code [CmmHinted dest _hint] = + case typeWidth rep of + W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) + W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) + _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest)) + where + rep = localRegType dest + r_dest = getRegisterReg (CmmLocal dest) + assign_code many = panic "genCCall.assign_code many" + + return (load_args_code `appOL` + adjust_rsp `appOL` + push_code `appOL` + assign_eax sse_regs `appOL` + call `appOL` + assign_code dest_regs) + + where + arg_size = 8 -- always, at the mo + + load_args :: [CmmHinted CmmExpr] + -> [Reg] -- int regs avail for args + -> [Reg] -- FP regs avail for args + -> InstrBlock + -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock) + load_args args [] [] code = return (args, [], [], code) + -- no more regs to use + load_args [] aregs fregs code = return ([], aregs, fregs, code) + -- no more args to push + load_args ((CmmHinted arg hint) : rest) aregs fregs code + | isFloatType arg_rep = + case fregs of + [] -> push_this_arg + (r:rs) -> do + arg_code <- getAnyReg arg + load_args rest aregs rs (code `appOL` arg_code r) + | otherwise = + case aregs of + [] -> push_this_arg + (r:rs) -> do + arg_code <- getAnyReg arg + load_args rest rs fregs (code `appOL` arg_code r) + where + arg_rep = cmmExprType arg + + push_this_arg = do + (args',ars,frs,code') <- load_args rest aregs fregs code + return ((CmmHinted arg hint):args', ars, frs, code') + + push_args [] code = return code + push_args ((CmmHinted arg hint):rest) code + | isFloatType arg_rep = do + (arg_reg, arg_code) <- getSomeReg arg + delta <- getDeltaNat + setDeltaNat (delta-arg_size) + let code' = code `appOL` arg_code `appOL` toOL [ + SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) , + DELTA (delta-arg_size), + MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))] + push_args rest code' + + | otherwise = do + -- we only ever generate word-sized function arguments. Promotion + -- has already happened: our Int8# type is kept sign-extended + -- in an Int#, for example. + ASSERT(width == W64) return () + (arg_op, arg_code) <- getOperand arg + delta <- getDeltaNat + setDeltaNat (delta-arg_size) + let code' = code `appOL` arg_code `appOL` toOL [ + PUSH II64 arg_op, + DELTA (delta-arg_size)] + push_args rest code' + where + arg_rep = cmmExprType arg + width = typeWidth arg_rep + +#else +genCCall = panic "X86.genCCAll: not defined" + +#endif /* x86_64_TARGET_ARCH */ + + + + +outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock +outOfLineFloatOp mop res args + = do + dflags <- getDynFlagsNat + targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl + let target = CmmCallee targetExpr CCallConv + + if isFloat64 (localRegType res) + then + stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn) + else do + uq <- getUniqueNat + let + tmp = LocalReg uq f64 + -- in + code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn) + code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) + return (code1 `appOL` code2) + where + lbl = mkForeignLabel fn Nothing False IsFunction + + fn = case mop of + MO_F32_Sqrt -> fsLit "sqrtf" + MO_F32_Sin -> fsLit "sinf" + MO_F32_Cos -> fsLit "cosf" + MO_F32_Tan -> fsLit "tanf" + MO_F32_Exp -> fsLit "expf" + MO_F32_Log -> fsLit "logf" + + MO_F32_Asin -> fsLit "asinf" + MO_F32_Acos -> fsLit "acosf" + MO_F32_Atan -> fsLit "atanf" + + MO_F32_Sinh -> fsLit "sinhf" + MO_F32_Cosh -> fsLit "coshf" + MO_F32_Tanh -> fsLit "tanhf" + MO_F32_Pwr -> fsLit "powf" + + MO_F64_Sqrt -> fsLit "sqrt" + MO_F64_Sin -> fsLit "sin" + MO_F64_Cos -> fsLit "cos" + MO_F64_Tan -> fsLit "tan" + MO_F64_Exp -> fsLit "exp" + MO_F64_Log -> fsLit "log" + + MO_F64_Asin -> fsLit "asin" + MO_F64_Acos -> fsLit "acos" + MO_F64_Atan -> fsLit "atan" + + MO_F64_Sinh -> fsLit "sinh" + MO_F64_Cosh -> fsLit "cosh" + MO_F64_Tanh -> fsLit "tanh" + MO_F64_Pwr -> fsLit "pow" + + + + + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock + +genSwitch expr ids + | opt_PIC + = do + (reg,e_code) <- getSomeReg expr + lbl <- getNewLabelNat + dflags <- getDynFlagsNat + dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl + (tableReg,t_code) <- getSomeReg $ dynRef + let + jumpTable = map jumpTableEntryRel ids + + jumpTableEntryRel Nothing + = CmmStaticLit (CmmInt 0 wordWidth) + jumpTableEntryRel (Just (BlockId id)) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) + where blockLabel = mkAsmTempLabel id + + op = OpAddr (AddrBaseIndex (EABaseReg tableReg) + (EAIndex reg wORD_SIZE) (ImmInt 0)) + +#if x86_64_TARGET_ARCH +#if darwin_TARGET_OS + -- on Mac OS X/x86_64, put the jump table in the text section + -- to work around a limitation of the linker. + -- ld64 is unable to handle the relocations for + -- .quad L1 - L0 + -- if L0 is not preceded by a non-anonymous label in its section. + + code = e_code `appOL` t_code `appOL` toOL [ + ADD (intSize wordWidth) op (OpReg tableReg), + JMP_TBL (OpReg tableReg) [ id | Just id <- ids ], + LDATA Text (CmmDataLabel lbl : jumpTable) + ] +#else + -- HACK: On x86_64 binutils<2.17 is only able to generate PC32 + -- relocations, hence we only get 32-bit offsets in the jump + -- table. As these offsets are always negative we need to properly + -- sign extend them to 64-bit. This hack should be removed in + -- conjunction with the hack in PprMach.hs/pprDataItem once + -- binutils 2.17 is standard. + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + MOVSxL II32 + (OpAddr (AddrBaseIndex (EABaseReg tableReg) + (EAIndex reg wORD_SIZE) (ImmInt 0))) + (OpReg reg), + ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg), + JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] + ] +#endif +#else + code = e_code `appOL` t_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + ADD (intSize wordWidth) op (OpReg tableReg), + JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] + ] +#endif + return code + | otherwise + = do + (reg,e_code) <- getSomeReg expr + lbl <- getNewLabelNat + let + jumpTable = map jumpTableEntry ids + op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) + code = e_code `appOL` toOL [ + LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), + JMP_TBL op [ id | Just id <- ids ] + ] + -- in + return code + + +-- ----------------------------------------------------------------------------- +-- 'condIntReg' and 'condFltReg': condition codes into registers + +-- Turn those condition codes into integers now (when they appear on +-- the right hand side of an assignment). +-- +-- (If applicable) Do not fill the delay slots here; you will confuse the +-- register allocator. + +condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +condIntReg cond x y = do + CondCode _ cond cond_code <- condIntCode cond x y + tmp <- getNewRegNat II8 + let + code dst = cond_code `appOL` toOL [ + SETCC cond (OpReg tmp), + MOVZxL II8 (OpReg tmp) (OpReg dst) + ] + -- in + return (Any II32 code) + + + +condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register + +#if i386_TARGET_ARCH +condFltReg cond x y = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp <- getNewRegNat II8 + let + code dst = cond_code `appOL` toOL [ + SETCC cond (OpReg tmp), + MOVZxL II8 (OpReg tmp) (OpReg dst) + ] + -- in + return (Any II32 code) + +#elif x86_64_TARGET_ARCH +condFltReg cond x y = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp1 <- getNewRegNat wordSize + tmp2 <- getNewRegNat wordSize + let + -- We have to worry about unordered operands (eg. comparisons + -- against NaN). If the operands are unordered, the comparison + -- sets the parity flag, carry flag and zero flag. + -- All comparisons are supposed to return false for unordered + -- operands except for !=, which returns true. + -- + -- Optimisation: we don't have to test the parity flag if we + -- know the test has already excluded the unordered case: eg > + -- and >= test for a zero carry flag, which can only occur for + -- ordered operands. + -- + -- ToDo: by reversing comparisons we could avoid testing the + -- parity flag in more cases. + + code dst = + cond_code `appOL` + (case cond of + NE -> or_unordered dst + GU -> plain_test dst + GEU -> plain_test dst + _ -> and_ordered dst) + + plain_test dst = toOL [ + SETCC cond (OpReg tmp1), + MOVZxL II8 (OpReg tmp1) (OpReg dst) + ] + or_unordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC PARITY (OpReg tmp2), + OR II8 (OpReg tmp1) (OpReg tmp2), + MOVZxL II8 (OpReg tmp2) (OpReg dst) + ] + and_ordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC NOTPARITY (OpReg tmp2), + AND II8 (OpReg tmp1) (OpReg tmp2), + MOVZxL II8 (OpReg tmp2) (OpReg dst) + ] + -- in + return (Any II32 code) + +#else +condFltReg = panic "X86.condFltReg: not defined" + +#endif + + + + +-- ----------------------------------------------------------------------------- +-- 'trivial*Code': deal with trivial instructions + +-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', +-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. +-- Only look for constants on the right hand side, because that's +-- where the generic optimizer will have put them. + +-- Similarly, for unary instructions, we don't have to worry about +-- matching an StInt as the argument, because genericOpt will already +-- have handled the constant-folding. + + +{- +The Rules of the Game are: + +* You cannot assume anything about the destination register dst; + it may be anything, including a fixed reg. + +* You may compute an operand into a fixed reg, but you may not + subsequently change the contents of that fixed reg. If you + want to do so, first copy the value either to a temporary + or into dst. You are free to modify dst even if it happens + to be a fixed reg -- that's not your problem. + +* You cannot assume that a fixed reg will stay live over an + arbitrary computation. The same applies to the dst reg. + +* Temporary regs obtained from getNewRegNat are distinct from + each other and from all other regs, and stay live over + arbitrary computations. + +-------------------- + +SDM's version of The Rules: + +* If getRegister returns Any, that means it can generate correct + code which places the result in any register, period. Even if that + register happens to be read during the computation. + + Corollary #1: this means that if you are generating code for an + operation with two arbitrary operands, you cannot assign the result + of the first operand into the destination register before computing + the second operand. The second operand might require the old value + of the destination register. + + Corollary #2: A function might be able to generate more efficient + code if it knows the destination register is a new temporary (and + therefore not read by any of the sub-computations). + +* If getRegister returns Any, then the code it generates may modify only: + (a) fresh temporaries + (b) the destination register + (c) known registers (eg. %ecx is used by shifts) + In particular, it may *not* modify global registers, unless the global + register happens to be the destination register. +-} + +trivialCode width instr (Just revinstr) (CmmLit lit_a) b + | is32BitLit lit_a = do + b_code <- getAnyReg b + let + code dst + = b_code dst `snocOL` + revinstr (OpImm (litToImm lit_a)) (OpReg dst) + -- in + return (Any (intSize width) code) + +trivialCode width instr maybe_revinstr a b + = genTrivialCode (intSize width) instr a b + +-- This is re-used for floating pt instructions too. +genTrivialCode rep instr a b = do + (b_op, b_code) <- getNonClobberedOperand b + a_code <- getAnyReg a + tmp <- getNewRegNat rep + let + -- We want the value of b to stay alive across the computation of a. + -- But, we want to calculate a straight into the destination register, + -- because the instruction only has two operands (dst := dst `op` src). + -- The troublesome case is when the result of b is in the same register + -- as the destination reg. In this case, we have to save b in a + -- new temporary across the computation of a. + code dst + | dst `regClashesWithOp` b_op = + b_code `appOL` + unitOL (MOV rep b_op (OpReg tmp)) `appOL` + a_code dst `snocOL` + instr (OpReg tmp) (OpReg dst) + | otherwise = + b_code `appOL` + a_code dst `snocOL` + instr b_op (OpReg dst) + -- in + return (Any rep code) + +reg `regClashesWithOp` OpReg reg2 = reg == reg2 +reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) +reg `regClashesWithOp` _ = False + +----------- + +trivialUCode rep instr x = do + x_code <- getAnyReg x + let + code dst = + x_code dst `snocOL` + instr (OpReg dst) + return (Any rep code) + +----------- + +#if i386_TARGET_ARCH + +trivialFCode width instr x y = do + (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too + (y_reg, y_code) <- getSomeReg y + let + size = floatSize width + code dst = + x_code `appOL` + y_code `snocOL` + instr size x_reg y_reg dst + return (Any size code) + +#endif + +#if x86_64_TARGET_ARCH +trivialFCode pk instr x y + = genTrivialCode size (instr size) x y + where size = floatSize pk +#endif + +trivialUFCode size instr x = do + (x_reg, x_code) <- getSomeReg x + let + code dst = + x_code `snocOL` + instr x_reg dst + -- in + return (Any size code) + + +-------------------------------------------------------------------------------- +coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register + +#if i386_TARGET_ARCH +coerceInt2FP from to x = do + (x_reg, x_code) <- getSomeReg x + let + opc = case to of W32 -> GITOF; W64 -> GITOD + code dst = x_code `snocOL` opc x_reg dst + -- ToDo: works for non-II32 reps? + return (Any (floatSize to) code) + +#elif x86_64_TARGET_ARCH +coerceInt2FP from to x = do + (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand + let + opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD + code dst = x_code `snocOL` opc x_op dst + -- in + return (Any (floatSize to) code) -- works even if the destination rep is Width -> CmmExpr -> NatM Register + +#if i386_TARGET_ARCH +coerceFP2Int from to x = do + (x_reg, x_code) <- getSomeReg x + let + opc = case from of W32 -> GFTOI; W64 -> GDTOI + code dst = x_code `snocOL` opc x_reg dst + -- ToDo: works for non-II32 reps? + -- in + return (Any (intSize to) code) + +#elif x86_64_TARGET_ARCH +coerceFP2Int from to x = do + (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand + let + opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ + code dst = x_code `snocOL` opc x_op dst + -- in + return (Any (intSize to) code) -- works even if the destination rep is CmmExpr -> NatM Register + +#if x86_64_TARGET_ARCH +coerceFP2FP to x = do + (x_reg, x_code) <- getSomeReg x + let + opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD + code dst = x_code `snocOL` opc x_reg dst + -- in + return (Any (floatSize to) code) + +#else +coerceFP2FP = panic "X86.coerceFP2FP: not defined" + +#endif + + + diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs new file mode 100644 index 0000000..60e40b9 --- /dev/null +++ b/compiler/nativeGen/X86/Cond.hs @@ -0,0 +1,52 @@ + +module X86.Cond ( + Cond(..), + condUnsigned, + condToSigned, + condToUnsigned +) + +where + +data Cond + = ALWAYS -- What's really used? ToDo + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + | NEG + | POS + | CARRY + | OFLO + | PARITY + | NOTPARITY + + +condUnsigned :: Cond -> Bool +condUnsigned GU = True +condUnsigned LU = True +condUnsigned GEU = True +condUnsigned LEU = True +condUnsigned _ = False + + +condToSigned :: Cond -> Cond +condToSigned GU = GTT +condToSigned LU = LTT +condToSigned GEU = GE +condToSigned LEU = LE +condToSigned x = x + + +condToUnsigned :: Cond -> Cond +condToUnsigned GTT = GU +condToUnsigned LTT = LU +condToUnsigned GE = GEU +condToUnsigned LE = LEU +condToUnsigned x = x diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 0dea1dd..b4b6fb5 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -12,34 +12,46 @@ module X86.Instr where -import BlockId +import X86.Cond import X86.Regs -import RegsBase +import Instruction +import Size +import RegClass +import Reg + +import BlockId import Cmm import FastString +import FastBool import CLabel import Panic -data Cond - = ALWAYS -- What's really used? ToDo - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | POS - | CARRY - | OFLO - | PARITY - | NOTPARITY - deriving (Eq) +-- Size of a PPC memory address, in bytes. +-- +archWordSize :: Size +#if i386_TARGET_ARCH +archWordSize = II32 +#elif x86_64_TARGET_ARCH +archWordSize = II64 +#else +archWordSize = panic "X86.Instr.archWordSize: not defined" +#endif + +-- | Instruction instance for x86 instruction set. +instance Instruction Instr where + regUsageOfInstr = x86_regUsageOfInstr + patchRegsOfInstr = x86_patchRegsOfInstr + isJumpishInstr = x86_isJumpishInstr + jumpDestsOfInstr = x86_jumpDestsOfInstr + patchJumpInstr = x86_patchJumpInstr + mkSpillInstr = x86_mkSpillInstr + mkLoadInstr = x86_mkLoadInstr + takeDeltaInstr = x86_takeDeltaInstr + isMetaInstr = x86_isMetaInstr + mkRegRegMoveInstr = x86_mkRegRegMoveInstr + takeRegRegMoveInstr = x86_takeRegRegMoveInstr + mkJumpInstr = x86_mkJumpInstr -- ----------------------------------------------------------------------------- @@ -154,13 +166,6 @@ data Instr -- benefit of subsequent passes | DELTA Int - -- | spill this reg to a stack slot - | SPILL Reg Int - - -- | reload this reg from a stack slot - | RELOAD Int Reg - - -- Moves. | MOV Size Operand Operand | MOVZxL Size Operand Operand -- size is the size of operand 1 @@ -301,7 +306,436 @@ data Operand -i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr] +x86_regUsageOfInstr :: Instr -> RegUsage +x86_regUsageOfInstr instr + = case instr of + MOV _ src dst -> usageRW src dst + MOVZxL _ src dst -> usageRW src dst + MOVSxL _ src dst -> usageRW src dst + LEA _ src dst -> usageRW src dst + ADD _ src dst -> usageRM src dst + ADC _ src dst -> usageRM src dst + SUB _ src dst -> usageRM src dst + IMUL _ src dst -> usageRM src dst + IMUL2 _ src -> mkRU (eax:use_R src) [eax,edx] + MUL _ src dst -> usageRM src dst + DIV _ op -> mkRU (eax:edx:use_R op) [eax,edx] + IDIV _ op -> mkRU (eax:edx:use_R op) [eax,edx] + AND _ src dst -> usageRM src dst + OR _ src dst -> usageRM src dst + + XOR _ (OpReg src) (OpReg dst) + | src == dst -> mkRU [] [dst] + + XOR _ src dst -> usageRM src dst + NOT _ op -> usageM op + NEGI _ op -> usageM op + SHL _ imm dst -> usageRM imm dst + SAR _ imm dst -> usageRM imm dst + SHR _ imm dst -> usageRM imm dst + BT _ _ src -> mkRUR (use_R src) + + PUSH _ op -> mkRUR (use_R op) + POP _ op -> mkRU [] (def_W op) + TEST _ src dst -> mkRUR (use_R src ++ use_R dst) + CMP _ src dst -> mkRUR (use_R src ++ use_R dst) + SETCC _ op -> mkRU [] (def_W op) + JXX _ _ -> mkRU [] [] + JXX_GBL _ _ -> mkRU [] [] + JMP op -> mkRUR (use_R op) + JMP_TBL op _ -> mkRUR (use_R op) + CALL (Left _) params -> mkRU params callClobberedRegs + CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs + CLTD _ -> mkRU [eax] [edx] + NOP -> mkRU [] [] + +#if i386_TARGET_ARCH + GMOV src dst -> mkRU [src] [dst] + GLD _ src dst -> mkRU (use_EA src) [dst] + GST _ src dst -> mkRUR (src : use_EA dst) + + GLDZ dst -> mkRU [] [dst] + GLD1 dst -> mkRU [] [dst] + + GFTOI src dst -> mkRU [src] [dst] + GDTOI src dst -> mkRU [src] [dst] + + GITOF src dst -> mkRU [src] [dst] + GITOD src dst -> mkRU [src] [dst] + + GADD _ s1 s2 dst -> mkRU [s1,s2] [dst] + GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst] + GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst] + GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst] + + GCMP _ src1 src2 -> mkRUR [src1,src2] + GABS _ src dst -> mkRU [src] [dst] + GNEG _ src dst -> mkRU [src] [dst] + GSQRT _ src dst -> mkRU [src] [dst] + GSIN _ _ _ src dst -> mkRU [src] [dst] + GCOS _ _ _ src dst -> mkRU [src] [dst] + GTAN _ _ _ src dst -> mkRU [src] [dst] +#endif + +#if x86_64_TARGET_ARCH + CVTSS2SD src dst -> mkRU [src] [dst] + CVTSD2SS src dst -> mkRU [src] [dst] + CVTTSS2SIQ src dst -> mkRU (use_R src) [dst] + CVTTSD2SIQ src dst -> mkRU (use_R src) [dst] + CVTSI2SS src dst -> mkRU (use_R src) [dst] + CVTSI2SD src dst -> mkRU (use_R src) [dst] + FDIV _ src dst -> usageRM src dst +#endif + + FETCHGOT reg -> mkRU [] [reg] + FETCHPC reg -> mkRU [] [reg] + + COMMENT _ -> noUsage + DELTA _ -> noUsage + + _other -> panic "regUsage: unrecognised instr" + + where + -- 2 operand form; first operand Read; second Written + usageRW :: Operand -> Operand -> RegUsage + usageRW op (OpReg reg) = mkRU (use_R op) [reg] + usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea) + usageRW _ _ = panic "X86.RegInfo.usageRW: no match" + + -- 2 operand form; first operand Read; second Modified + usageRM :: Operand -> Operand -> RegUsage + usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg] + usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea) + usageRM _ _ = panic "X86.RegInfo.usageRM: no match" + + -- 1 operand form; operand Modified + usageM :: Operand -> RegUsage + usageM (OpReg reg) = mkRU [reg] [reg] + usageM (OpAddr ea) = mkRUR (use_EA ea) + usageM _ = panic "X86.RegInfo.usageM: no match" + + -- Registers defd when an operand is written. + def_W (OpReg reg) = [reg] + def_W (OpAddr _ ) = [] + def_W _ = panic "X86.RegInfo.def_W: no match" + + -- Registers used when an operand is read. + use_R (OpReg reg) = [reg] + use_R (OpImm _) = [] + use_R (OpAddr ea) = use_EA ea + + -- Registers used to compute an effective address. + use_EA (ImmAddr _ _) = [] + use_EA (AddrBaseIndex base index _) = + use_base base $! use_index index + where use_base (EABaseReg r) x = r : x + use_base _ x = x + use_index EAIndexNone = [] + use_index (EAIndex i _) = [i] + + mkRUR src = src' `seq` RU src' [] + where src' = filter interesting src + + mkRU src dst = src' `seq` dst' `seq` RU src' dst' + where src' = filter interesting src + dst' = filter interesting dst + +interesting :: Reg -> Bool +interesting (VirtualRegI _) = True +interesting (VirtualRegHi _) = True +interesting (VirtualRegF _) = True +interesting (VirtualRegD _) = True +interesting (RealReg i) = isFastTrue (freeReg i) + + + + +x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +x86_patchRegsOfInstr instr env + = case instr of + MOV sz src dst -> patch2 (MOV sz) src dst + MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst + MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst + LEA sz src dst -> patch2 (LEA sz) src dst + ADD sz src dst -> patch2 (ADD sz) src dst + ADC sz src dst -> patch2 (ADC sz) src dst + SUB sz src dst -> patch2 (SUB sz) src dst + IMUL sz src dst -> patch2 (IMUL sz) src dst + IMUL2 sz src -> patch1 (IMUL2 sz) src + MUL sz src dst -> patch2 (MUL sz) src dst + IDIV sz op -> patch1 (IDIV sz) op + DIV sz op -> patch1 (DIV sz) op + AND sz src dst -> patch2 (AND sz) src dst + OR sz src dst -> patch2 (OR sz) src dst + XOR sz src dst -> patch2 (XOR sz) src dst + NOT sz op -> patch1 (NOT sz) op + NEGI sz op -> patch1 (NEGI sz) op + SHL sz imm dst -> patch1 (SHL sz imm) dst + SAR sz imm dst -> patch1 (SAR sz imm) dst + SHR sz imm dst -> patch1 (SHR sz imm) dst + BT sz imm src -> patch1 (BT sz imm) src + TEST sz src dst -> patch2 (TEST sz) src dst + CMP sz src dst -> patch2 (CMP sz) src dst + PUSH sz op -> patch1 (PUSH sz) op + POP sz op -> patch1 (POP sz) op + SETCC cond op -> patch1 (SETCC cond) op + JMP op -> patch1 JMP op + JMP_TBL op ids -> patch1 JMP_TBL op $ ids + +#if i386_TARGET_ARCH + GMOV src dst -> GMOV (env src) (env dst) + GLD sz src dst -> GLD sz (lookupAddr src) (env dst) + GST sz src dst -> GST sz (env src) (lookupAddr dst) + + GLDZ dst -> GLDZ (env dst) + GLD1 dst -> GLD1 (env dst) + + GFTOI src dst -> GFTOI (env src) (env dst) + GDTOI src dst -> GDTOI (env src) (env dst) + + GITOF src dst -> GITOF (env src) (env dst) + GITOD src dst -> GITOD (env src) (env dst) + + GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst) + GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst) + GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst) + GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst) + + GCMP sz src1 src2 -> GCMP sz (env src1) (env src2) + GABS sz src dst -> GABS sz (env src) (env dst) + GNEG sz src dst -> GNEG sz (env src) (env dst) + GSQRT sz src dst -> GSQRT sz (env src) (env dst) + GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst) + GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst) + GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst) +#endif + +#if x86_64_TARGET_ARCH + CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) + CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) + CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst) + CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst) + CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst) + CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst) + FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst) +#endif + + CALL (Left _) _ -> instr + CALL (Right reg) p -> CALL (Right (env reg)) p + + FETCHGOT reg -> FETCHGOT (env reg) + FETCHPC reg -> FETCHPC (env reg) + + NOP -> instr + COMMENT _ -> instr + DELTA _ -> instr + + JXX _ _ -> instr + JXX_GBL _ _ -> instr + CLTD _ -> instr + + _other -> panic "patchRegs: unrecognised instr" + + where + patch1 insn op = insn $! patchOp op + patch2 insn src dst = (insn $! patchOp src) $! patchOp dst + + patchOp (OpReg reg) = OpReg $! env reg + patchOp (OpImm imm) = OpImm imm + patchOp (OpAddr ea) = OpAddr $! lookupAddr ea + + lookupAddr (ImmAddr imm off) = ImmAddr imm off + lookupAddr (AddrBaseIndex base index disp) + = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp + where + lookupBase EABaseNone = EABaseNone + lookupBase EABaseRip = EABaseRip + lookupBase (EABaseReg r) = EABaseReg (env r) + + lookupIndex EAIndexNone = EAIndexNone + lookupIndex (EAIndex r i) = EAIndex (env r) i + + +-------------------------------------------------------------------------------- +x86_isJumpishInstr + :: Instr -> Bool + +x86_isJumpishInstr instr + = case instr of + JMP{} -> True + JXX{} -> True + JXX_GBL{} -> True + JMP_TBL{} -> True + CALL{} -> True + _ -> False + + +x86_jumpDestsOfInstr + :: Instr + -> [BlockId] + +x86_jumpDestsOfInstr insn + = case insn of + JXX _ id -> [id] + JMP_TBL _ ids -> ids + _ -> [] + + +x86_patchJumpInstr + :: Instr -> (BlockId -> BlockId) -> Instr + +x86_patchJumpInstr insn patchF + = case insn of + JXX cc id -> JXX cc (patchF id) + JMP_TBL _ _ -> error "Cannot patch JMP_TBL" + _ -> insn + + + + +-- ----------------------------------------------------------------------------- +-- | Make a spill instruction. +x86_mkSpillInstr + :: Reg -- register to spill + -> Int -- current stack delta + -> Int -- spill slot to use + -> Instr + +#if i386_TARGET_ARCH +x86_mkSpillInstr reg delta slot + = let off = spillSlotToOffset slot + in + let off_w = (off-delta) `div` 4 + in case regClass reg of + RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w)) + _ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -} + +#elif x86_64_TARGET_ARCH +x86_mkSpillInstr reg delta slot + = let off = spillSlotToOffset slot + in + let off_w = (off-delta) `div` 8 + in case regClass reg of + RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w)) + RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w)) + _ -> panic "X86.mkSpillInstr: no match" + -- ToDo: will it work to always spill as a double? + -- does that cause a stall if the data was a float? +#else +x86_mkSpillInstr _ _ _ + = panic "X86.RegInfo.mkSpillInstr: not defined for this architecture." +#endif + + +-- | Make a spill reload instruction. +x86_mkLoadInstr + :: Reg -- register to load + -> Int -- current stack delta + -> Int -- spill slot to use + -> Instr + +#if i386_TARGET_ARCH +x86_mkLoadInstr reg delta slot + = let off = spillSlotToOffset slot + in + let off_w = (off-delta) `div` 4 + in case regClass reg of { + RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg); + _ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -} +#elif x86_64_TARGET_ARCH +x86_mkLoadInstr reg delta slot + = let off = spillSlotToOffset slot + in + let off_w = (off-delta) `div` 8 + in case regClass reg of + RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg) + _ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg) +#else +x86_mkLoadInstr _ _ _ + = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture." +#endif + + +-------------------------------------------------------------------------------- + +-- | See if this instruction is telling us the current C stack delta +x86_takeDeltaInstr + :: Instr + -> Maybe Int + +x86_takeDeltaInstr instr + = case instr of + DELTA i -> Just i + _ -> Nothing + + +x86_isMetaInstr + :: Instr + -> Bool + +x86_isMetaInstr instr + = case instr of + COMMENT{} -> True + LDATA{} -> True + NEWBLOCK{} -> True + DELTA{} -> True + _ -> False + + + +-- | Make a reg-reg move instruction. +-- On SPARC v8 there are no instructions to move directly between +-- floating point and integer regs. If we need to do that then we +-- have to go via memory. +-- +x86_mkRegRegMoveInstr + :: Reg + -> Reg + -> Instr + +x86_mkRegRegMoveInstr src dst + = case regClass src of +#if i386_TARGET_ARCH + RcInteger -> MOV II32 (OpReg src) (OpReg dst) + RcDouble -> GMOV src dst + RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" +#else + RcInteger -> MOV II64 (OpReg src) (OpReg dst) + RcDouble -> MOV FF64 (OpReg src) (OpReg dst) + RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" +#endif + + +-- | Check whether an instruction represents a reg-reg move. +-- The register allocator attempts to eliminate reg->reg moves whenever it can, +-- by assigning the src and dest temporaries to the same real register. +-- +x86_takeRegRegMoveInstr + :: Instr + -> Maybe (Reg,Reg) + +x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2)) + = Just (r1,r2) + +x86_takeRegRegMoveInstr _ = Nothing + + +-- | Make an unconditional branch instruction. +x86_mkJumpInstr + :: BlockId + -> [Instr] + +x86_mkJumpInstr id + = [JXX ALWAYS id] + + + + + +i386_insert_ffrees + :: [GenBasicBlock Instr] + -> [GenBasicBlock Instr] + i386_insert_ffrees blocks | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]) = map ffree_before_nonlocal_transfers blocks diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index c0ad496..3f181fc 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -7,12 +7,15 @@ ----------------------------------------------------------------------------- module X86.Ppr ( + pprNatCmmTop, + pprBasicBlock, + pprSectionHeader, + pprData, + pprInstr, pprUserReg, pprSize, pprImm, - pprSectionHeader, pprDataItem, - pprInstr ) where @@ -20,24 +23,145 @@ where #include "HsVersions.h" #include "nativeGen/NCG.h" -import PprBase -import RegsBase import X86.Regs import X86.Instr +import X86.Cond +import Instruction +import Size +import Reg +import PprBase + import BlockId import Cmm - -import CLabel ( CLabel, mkAsmTempLabel ) -#if HAVE_SUBSECTIONS_VIA_SYMBOLS -import CLabel ( mkDeadStripPreventer ) -#endif - +import CLabel import Unique ( pprUnique ) import Pretty import FastString import qualified Outputable -import Outputable (panic) +import Outputable (panic, Outputable) + +import Data.Word + + + +-- ----------------------------------------------------------------------------- +-- Printing this stuff out + +pprNatCmmTop :: NatCmmTop Instr -> Doc +pprNatCmmTop (CmmData section dats) = + pprSectionHeader section $$ vcat (map pprData dats) + + -- special case for split markers: +pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl + +pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) = + pprSectionHeader Text $$ + (if null info then -- blocks guaranteed not null, so label needed + pprLabel lbl + else +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + <> char ':' $$ +#endif + vcat (map pprData info) $$ + pprLabel (entryLblToInfoLbl lbl) + ) $$ + vcat (map pprBasicBlock blocks) + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. +#if HAVE_SUBSECTIONS_VIA_SYMBOLS + -- If we are using the .subsections_via_symbols directive + -- (available on recent versions of Darwin), + -- we have to make sure that there is some kind of reference + -- from the entry code to a label on the _top_ of of the info table, + -- so that the linker will not think it is unreferenced and dead-strip + -- it. That's why the label is called a DeadStripPreventer (_dsp). + $$ if not (null info) + then text "\t.long " + <+> pprCLabel_asm (entryLblToInfoLbl lbl) + <+> char '-' + <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl) + else empty +#endif + + +pprBasicBlock :: NatBasicBlock Instr -> Doc +pprBasicBlock (BasicBlock (BlockId id) instrs) = + pprLabel (mkAsmTempLabel id) $$ + vcat (map pprInstr instrs) + + +pprData :: CmmStatic -> Doc +pprData (CmmAlign bytes) = pprAlign bytes +pprData (CmmDataLabel lbl) = pprLabel lbl +pprData (CmmString str) = pprASCII str +pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes +pprData (CmmStaticLit lit) = pprDataItem lit + +pprGloblDecl :: CLabel -> Doc +pprGloblDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext IF_ARCH_sparc((sLit ".global "), + (sLit ".globl ")) <> + pprCLabel_asm lbl + +pprTypeAndSizeDecl :: CLabel -> Doc +#if linux_TARGET_OS +pprTypeAndSizeDecl lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = ptext (sLit ".type ") <> + pprCLabel_asm lbl <> ptext (sLit ", @object") +#else +pprTypeAndSizeDecl _ + = empty +#endif + +pprLabel :: CLabel -> Doc +pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':') + + +pprASCII :: [Word8] -> Doc +pprASCII str + = vcat (map do1 str) $$ do1 0 + where + do1 :: Word8 -> Doc + do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w) + +pprAlign :: Int -> Doc + + +pprAlign bytes + = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes) + where + +#if darwin_TARGET_OS + pow2 = log2 bytes + + log2 :: Int -> Int -- cache the common ones + log2 1 = 0 + log2 2 = 1 + log2 4 = 2 + log2 8 = 3 + log2 n = 1 + log2 (n `quot` 2) +#endif + +-- ----------------------------------------------------------------------------- +-- pprInstr: print an 'Instr' + +instance Outputable Instr where + ppr instr = Outputable.docToSDoc $ pprInstr instr + + + + + + + + + + + #if i386_TARGET_ARCH || x86_64_TARGET_ARCH pprUserReg :: Reg -> Doc @@ -49,7 +173,6 @@ pprUserReg = panic "X86.Ppr.pprUserReg: not defined" #endif - pprReg :: Size -> Reg -> Doc pprReg s r @@ -228,7 +351,7 @@ pprAddr (AddrBaseIndex base index displacement) = let pp_disp = ppr_disp displacement pp_off p = pp_disp <> char '(' <> p <> char ')' - pp_reg r = pprReg wordSize r + pp_reg r = pprReg archWordSize r in case (base, index) of (EABaseNone, EAIndexNone) -> pp_disp @@ -384,6 +507,7 @@ pprInstr (NEWBLOCK _) pprInstr (LDATA _ _) = panic "PprMach.pprInstr: LDATA" +{- pprInstr (SPILL reg slot) = hcat [ ptext (sLit "\tSPILL"), @@ -399,6 +523,7 @@ pprInstr (RELOAD slot reg) ptext (sLit "SLOT") <> parens (int slot), comma, pprUserReg reg] +-} pprInstr (MOV size src dst) = pprSizeOpOp (sLit "mov") size src dst @@ -414,7 +539,7 @@ pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src -- the remaining zero-extension to 64 bits is automatic, and the 32-bit -- instruction is shorter. -pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordSize src dst +pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst -- here we do some patching, since the physical registers are only set late -- in the code generation. @@ -497,10 +622,10 @@ pprInstr (JXX cond (BlockId id)) pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm) pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm) -pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordSize op) +pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op) pprInstr (JMP_TBL op _) = pprInstr (JMP op) pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm) -pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg wordSize reg) +pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg) pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op @@ -941,9 +1066,9 @@ pprRegReg :: LitString -> Reg -> Reg -> Doc pprRegReg name reg1 reg2 = hcat [ pprMnemonic_ name, - pprReg wordSize reg1, + pprReg archWordSize reg1, comma, - pprReg wordSize reg2 + pprReg archWordSize reg2 ] @@ -951,9 +1076,9 @@ pprOpReg :: LitString -> Operand -> Reg -> Doc pprOpReg name op1 reg2 = hcat [ pprMnemonic_ name, - pprOperand wordSize op1, + pprOperand archWordSize op1, comma, - pprReg wordSize reg2 + pprReg archWordSize reg2 ] diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs index 39bc6de..58d063b 100644 --- a/compiler/nativeGen/X86/RegInfo.hs +++ b/compiler/nativeGen/X86/RegInfo.hs @@ -1,26 +1,17 @@ module X86.RegInfo ( - RegUsage(..), - noUsage, - regUsage, - patchRegs, - jumpDests, - isJumpish, - patchJump, - isRegRegMove, - - JumpDest(..), + mkVReg, + + JumpDest, canShortcut, shortcutJump, - mkSpillInstr, - mkLoadInstr, - mkRegRegMoveInstr, - mkBranchInstr, - spillSlotSize, maxSpillSlots, - spillSlotToOffset + spillSlotToOffset, + + shortcutStatic, + regDotColor ) where @@ -29,341 +20,26 @@ where #include "HsVersions.h" import X86.Instr +import X86.Cond import X86.Regs -import RegsBase +import Size +import Reg +import Cmm +import CLabel import BlockId import Outputable import Constants ( rESERVED_C_STACK_BYTES ) -import FastBool - - --- ----------------------------------------------------------------------------- --- RegUsage type - --- @regUsage@ returns the sets of src and destination registers used --- by a particular instruction. Machine registers that are --- pre-allocated to stgRegs are filtered out, because they are --- uninteresting from a register allocation standpoint. (We wouldn't --- want them to end up on the free list!) As far as we are concerned, --- the fixed registers simply don't exist (for allocation purposes, --- anyway). - --- regUsage doesn't need to do any trickery for jumps and such. Just --- state precisely the regs read and written by that insn. The --- consequences of control flow transfers, as far as register --- allocation goes, are taken care of by the register allocator. - -data RegUsage = RU [Reg] [Reg] - -noUsage :: RegUsage -noUsage = RU [] [] - - -regUsage :: Instr -> RegUsage -regUsage instr = case instr of - MOV _ src dst -> usageRW src dst - MOVZxL _ src dst -> usageRW src dst - MOVSxL _ src dst -> usageRW src dst - LEA _ src dst -> usageRW src dst - ADD _ src dst -> usageRM src dst - ADC _ src dst -> usageRM src dst - SUB _ src dst -> usageRM src dst - IMUL _ src dst -> usageRM src dst - IMUL2 _ src -> mkRU (eax:use_R src) [eax,edx] - MUL _ src dst -> usageRM src dst - DIV _ op -> mkRU (eax:edx:use_R op) [eax,edx] - IDIV _ op -> mkRU (eax:edx:use_R op) [eax,edx] - AND _ src dst -> usageRM src dst - OR _ src dst -> usageRM src dst - - XOR _ (OpReg src) (OpReg dst) - | src == dst -> mkRU [] [dst] - - XOR _ src dst -> usageRM src dst - NOT _ op -> usageM op - NEGI _ op -> usageM op - SHL _ imm dst -> usageRM imm dst - SAR _ imm dst -> usageRM imm dst - SHR _ imm dst -> usageRM imm dst - BT _ _ src -> mkRUR (use_R src) - - PUSH _ op -> mkRUR (use_R op) - POP _ op -> mkRU [] (def_W op) - TEST _ src dst -> mkRUR (use_R src ++ use_R dst) - CMP _ src dst -> mkRUR (use_R src ++ use_R dst) - SETCC _ op -> mkRU [] (def_W op) - JXX _ _ -> mkRU [] [] - JXX_GBL _ _ -> mkRU [] [] - JMP op -> mkRUR (use_R op) - JMP_TBL op _ -> mkRUR (use_R op) - CALL (Left _) params -> mkRU params callClobberedRegs - CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs - CLTD _ -> mkRU [eax] [edx] - NOP -> mkRU [] [] - -#if i386_TARGET_ARCH - GMOV src dst -> mkRU [src] [dst] - GLD _ src dst -> mkRU (use_EA src) [dst] - GST _ src dst -> mkRUR (src : use_EA dst) - - GLDZ dst -> mkRU [] [dst] - GLD1 dst -> mkRU [] [dst] - - GFTOI src dst -> mkRU [src] [dst] - GDTOI src dst -> mkRU [src] [dst] - - GITOF src dst -> mkRU [src] [dst] - GITOD src dst -> mkRU [src] [dst] - - GADD _ s1 s2 dst -> mkRU [s1,s2] [dst] - GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst] - GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst] - GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst] - - GCMP _ src1 src2 -> mkRUR [src1,src2] - GABS _ src dst -> mkRU [src] [dst] - GNEG _ src dst -> mkRU [src] [dst] - GSQRT _ src dst -> mkRU [src] [dst] - GSIN _ _ _ src dst -> mkRU [src] [dst] - GCOS _ _ _ src dst -> mkRU [src] [dst] - GTAN _ _ _ src dst -> mkRU [src] [dst] -#endif - -#if x86_64_TARGET_ARCH - CVTSS2SD src dst -> mkRU [src] [dst] - CVTSD2SS src dst -> mkRU [src] [dst] - CVTTSS2SIQ src dst -> mkRU (use_R src) [dst] - CVTTSD2SIQ src dst -> mkRU (use_R src) [dst] - CVTSI2SS src dst -> mkRU (use_R src) [dst] - CVTSI2SD src dst -> mkRU (use_R src) [dst] - FDIV _ src dst -> usageRM src dst -#endif - - FETCHGOT reg -> mkRU [] [reg] - FETCHPC reg -> mkRU [] [reg] - - COMMENT _ -> noUsage - DELTA _ -> noUsage - SPILL reg _ -> mkRU [reg] [] - RELOAD _ reg -> mkRU [] [reg] - - _other -> panic "regUsage: unrecognised instr" - - where - -- 2 operand form; first operand Read; second Written - usageRW :: Operand -> Operand -> RegUsage - usageRW op (OpReg reg) = mkRU (use_R op) [reg] - usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea) - usageRW _ _ = panic "X86.RegInfo.usageRW: no match" - - -- 2 operand form; first operand Read; second Modified - usageRM :: Operand -> Operand -> RegUsage - usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg] - usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea) - usageRM _ _ = panic "X86.RegInfo.usageRM: no match" - - -- 1 operand form; operand Modified - usageM :: Operand -> RegUsage - usageM (OpReg reg) = mkRU [reg] [reg] - usageM (OpAddr ea) = mkRUR (use_EA ea) - usageM _ = panic "X86.RegInfo.usageM: no match" - - -- Registers defd when an operand is written. - def_W (OpReg reg) = [reg] - def_W (OpAddr _ ) = [] - def_W _ = panic "X86.RegInfo.def_W: no match" - - -- Registers used when an operand is read. - use_R (OpReg reg) = [reg] - use_R (OpImm _) = [] - use_R (OpAddr ea) = use_EA ea - - -- Registers used to compute an effective address. - use_EA (ImmAddr _ _) = [] - use_EA (AddrBaseIndex base index _) = - use_base base $! use_index index - where use_base (EABaseReg r) x = r : x - use_base _ x = x - use_index EAIndexNone = [] - use_index (EAIndex i _) = [i] - - mkRUR src = src' `seq` RU src' [] - where src' = filter interesting src - - mkRU src dst = src' `seq` dst' `seq` RU src' dst' - where src' = filter interesting src - dst' = filter interesting dst - -interesting :: Reg -> Bool -interesting (VirtualRegI _) = True -interesting (VirtualRegHi _) = True -interesting (VirtualRegF _) = True -interesting (VirtualRegD _) = True -interesting (RealReg i) = isFastTrue (freeReg i) - - - - --- ----------------------------------------------------------------------------- --- 'patchRegs' function - --- 'patchRegs' takes an instruction and applies the given mapping to --- all the register references. - -patchRegs :: Instr -> (Reg -> Reg) -> Instr -patchRegs instr env = case instr of - MOV sz src dst -> patch2 (MOV sz) src dst - MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst - MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst - LEA sz src dst -> patch2 (LEA sz) src dst - ADD sz src dst -> patch2 (ADD sz) src dst - ADC sz src dst -> patch2 (ADC sz) src dst - SUB sz src dst -> patch2 (SUB sz) src dst - IMUL sz src dst -> patch2 (IMUL sz) src dst - IMUL2 sz src -> patch1 (IMUL2 sz) src - MUL sz src dst -> patch2 (MUL sz) src dst - IDIV sz op -> patch1 (IDIV sz) op - DIV sz op -> patch1 (DIV sz) op - AND sz src dst -> patch2 (AND sz) src dst - OR sz src dst -> patch2 (OR sz) src dst - XOR sz src dst -> patch2 (XOR sz) src dst - NOT sz op -> patch1 (NOT sz) op - NEGI sz op -> patch1 (NEGI sz) op - SHL sz imm dst -> patch1 (SHL sz imm) dst - SAR sz imm dst -> patch1 (SAR sz imm) dst - SHR sz imm dst -> patch1 (SHR sz imm) dst - BT sz imm src -> patch1 (BT sz imm) src - TEST sz src dst -> patch2 (TEST sz) src dst - CMP sz src dst -> patch2 (CMP sz) src dst - PUSH sz op -> patch1 (PUSH sz) op - POP sz op -> patch1 (POP sz) op - SETCC cond op -> patch1 (SETCC cond) op - JMP op -> patch1 JMP op - JMP_TBL op ids -> patch1 JMP_TBL op $ ids - -#if i386_TARGET_ARCH - GMOV src dst -> GMOV (env src) (env dst) - GLD sz src dst -> GLD sz (lookupAddr src) (env dst) - GST sz src dst -> GST sz (env src) (lookupAddr dst) - - GLDZ dst -> GLDZ (env dst) - GLD1 dst -> GLD1 (env dst) - - GFTOI src dst -> GFTOI (env src) (env dst) - GDTOI src dst -> GDTOI (env src) (env dst) - - GITOF src dst -> GITOF (env src) (env dst) - GITOD src dst -> GITOD (env src) (env dst) - - GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst) - GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst) - GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst) - GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst) - - GCMP sz src1 src2 -> GCMP sz (env src1) (env src2) - GABS sz src dst -> GABS sz (env src) (env dst) - GNEG sz src dst -> GNEG sz (env src) (env dst) - GSQRT sz src dst -> GSQRT sz (env src) (env dst) - GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst) - GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst) - GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst) -#endif - -#if x86_64_TARGET_ARCH - CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) - CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) - CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst) - CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst) - CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst) - CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst) - FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst) -#endif - - CALL (Left _) _ -> instr - CALL (Right reg) p -> CALL (Right (env reg)) p - - FETCHGOT reg -> FETCHGOT (env reg) - FETCHPC reg -> FETCHPC (env reg) - - NOP -> instr - COMMENT _ -> instr - DELTA _ -> instr - SPILL reg slot -> SPILL (env reg) slot - RELOAD slot reg -> RELOAD slot (env reg) - - JXX _ _ -> instr - JXX_GBL _ _ -> instr - CLTD _ -> instr - - _other -> panic "patchRegs: unrecognised instr" - - where - patch1 insn op = insn $! patchOp op - patch2 insn src dst = (insn $! patchOp src) $! patchOp dst - - patchOp (OpReg reg) = OpReg $! env reg - patchOp (OpImm imm) = OpImm imm - patchOp (OpAddr ea) = OpAddr $! lookupAddr ea - - lookupAddr (ImmAddr imm off) = ImmAddr imm off - lookupAddr (AddrBaseIndex base index disp) - = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp - where - lookupBase EABaseNone = EABaseNone - lookupBase EABaseRip = EABaseRip - lookupBase (EABaseReg r) = EABaseReg (env r) - - lookupIndex EAIndexNone = EAIndexNone - lookupIndex (EAIndex r i) = EAIndex (env r) i - - --- ----------------------------------------------------------------------------- --- Determine the possible destinations from the current instruction. - --- (we always assume that the next instruction is also a valid destination; --- if this isn't the case then the jump should be at the end of the basic --- block). - -jumpDests :: Instr -> [BlockId] -> [BlockId] -jumpDests insn acc - = case insn of - JXX _ id -> id : acc - JMP_TBL _ ids -> ids ++ acc - _ -> acc - - -isJumpish :: Instr -> Bool -isJumpish instr - = case instr of - JMP{} -> True - JXX{} -> True - JXX_GBL{} -> True - JMP_TBL{} -> True - CALL{} -> True - _ -> False - --- | Change the destination of this jump instruction --- Used in joinToTargets in the linear allocator, when emitting fixup code --- for join points. -patchJump :: Instr -> BlockId -> BlockId -> Instr -patchJump insn old new - = case insn of - JXX cc id | id == old -> JXX cc new - JMP_TBL _ _ -> error "Cannot patch JMP_TBL" - _other -> insn - - --- ----------------------------------------------------------------------------- --- Detecting reg->reg moves - --- The register allocator attempts to eliminate reg->reg moves whenever it can, --- by assigning the src and dest temporaries to the same real register. - -isRegRegMove :: Instr -> Maybe (Reg,Reg) -isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2) -isRegRegMove _ = Nothing +import Unique +mkVReg :: Unique -> Size -> Reg +mkVReg u size + | not (isFloatSize size) = VirtualRegI u + | otherwise + = case size of + FF32 -> VirtualRegD u + FF64 -> VirtualRegD u + _ -> panic "mkVReg" data JumpDest = DestBlockId BlockId | DestImm Imm @@ -386,92 +62,6 @@ shortcutJump _ other = other --- ----------------------------------------------------------------------------- --- Generating spill instructions - -mkSpillInstr - :: Reg -- register to spill - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr - -#if i386_TARGET_ARCH -mkSpillInstr reg delta slot - = let off = spillSlotToOffset slot - in - let off_w = (off-delta) `div` 4 - in case regClass reg of - RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w)) - _ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -} - -#elif x86_64_TARGET_ARCH -mkSpillInstr reg delta slot - = let off = spillSlotToOffset slot - in - let off_w = (off-delta) `div` 8 - in case regClass reg of - RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w)) - RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w)) - RcFloat -> panic "mkSpillInstr/RcFloat" - -- ToDo: will it work to always spill as a double? - -- does that cause a stall if the data was a float? -#else -mkSpillInstr _ _ _ - = panic "X86.RegInfo.mkSpillInstr: not defined for this architecture." -#endif - - -mkLoadInstr - :: Reg -- register to load - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr -#if i386_TARGET_ARCH -mkLoadInstr reg delta slot - = let off = spillSlotToOffset slot - in - let off_w = (off-delta) `div` 4 - in case regClass reg of { - RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg); - _ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -} -#elif x86_64_TARGET_ARCH -mkLoadInstr reg delta slot - = let off = spillSlotToOffset slot - in - let off_w = (off-delta) `div` 8 - in case regClass reg of - RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg) - _ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg) -#else -mkLoadInstr _ _ _ - = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture." -#endif - - - -mkRegRegMoveInstr - :: Reg - -> Reg - -> Instr -mkRegRegMoveInstr src dst - = case regClass src of - RcInteger -> MOV wordSize (OpReg src) (OpReg dst) -#if i386_TARGET_ARCH - RcDouble -> GMOV src dst - RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" -#else - RcDouble -> MOV FF64 (OpReg src) (OpReg dst) - RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" -#endif - - -mkBranchInstr - :: BlockId - -> [Instr] - -mkBranchInstr id = [JXX ALWAYS id] - - spillSlotSize :: Int spillSlotSize = IF_ARCH_i386(12, 8) @@ -489,3 +79,82 @@ spillSlotToOffset slot = pprPanic "spillSlotToOffset:" ( text "invalid spill location: " <> int slot $$ text "maxSpillSlots: " <> int maxSpillSlots) + + +-- Here because it knows about JumpDest +shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic +shortcutStatic fn (CmmStaticLit (CmmLabel lab)) + | Just uq <- maybeAsmTemp lab + = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) + | Just uq <- maybeAsmTemp lbl1 + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + -- slightly dodgy, we're ignoring the second label, but this + -- works with the way we use CmmLabelDiffOff for jump tables now. + +shortcutStatic _ other_static + = other_static + +shortBlockId + :: (BlockId -> Maybe JumpDest) + -> BlockId + -> CLabel + +shortBlockId fn blockid@(BlockId uq) = + case fn blockid of + Nothing -> mkAsmTempLabel uq + Just (DestBlockId blockid') -> shortBlockId fn blockid' + Just (DestImm (ImmCLbl lbl)) -> lbl + _other -> panic "shortBlockId" + + + +-- reg colors for x86 +#if i386_TARGET_ARCH +regDotColor :: Reg -> SDoc +regDotColor reg + = let Just str = lookupUFM regColors reg + in text str + +regColors + = listToUFM + $ [ (eax, "#00ff00") + , (ebx, "#0000ff") + , (ecx, "#00ffff") + , (edx, "#0080ff") + + , (fake0, "#ff00ff") + , (fake1, "#ff00aa") + , (fake2, "#aa00ff") + , (fake3, "#aa00aa") + , (fake4, "#ff0055") + , (fake5, "#5500ff") ] + + +-- reg colors for x86_64 +#elif x86_64_TARGET_ARCH +regDotColor :: Reg -> SDoc +regDotColor reg + = let Just str = lookupUFM regColors reg + in text str + +regColors + = listToUFM + $ [ (rax, "#00ff00"), (eax, "#00ff00") + , (rbx, "#0000ff"), (ebx, "#0000ff") + , (rcx, "#00ffff"), (ecx, "#00ffff") + , (rdx, "#0080ff"), (edx, "#00ffff") + , (r8, "#00ff80") + , (r9, "#008080") + , (r10, "#0040ff") + , (r11, "#00ff40") + , (r12, "#008040") + , (r13, "#004080") + , (r14, "#004040") + , (r15, "#002080") ] + + ++ zip (map RealReg [16..31]) (repeat "red") +#else +regDotColor :: Reg -> SDoc +regDotColor = panic "not defined" +#endif diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 1f1c724..87564b8 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -1,15 +1,4 @@ module X86.Regs ( - - -- sizes - Size(..), - intSize, - floatSize, - isFloatSize, - wordSize, - cmmTypeSize, - sizeToWidth, - mkVReg, - -- immediates Imm(..), strImmLit, @@ -45,7 +34,10 @@ module X86.Regs ( -- horror show freeReg, - globalRegMaybe + globalRegMaybe, + + get_GlobalReg_reg_or_addr, + allocatableRegs ) where @@ -60,93 +52,22 @@ where #include "../includes/MachRegs.h" -import RegsBase +import Reg +import RegClass +import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) import Pretty -import Outputable ( Outputable(..), pprPanic, panic ) +import Outputable ( panic ) import qualified Outputable -import Unique import FastBool #if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH) import Constants #endif --- ----------------------------------------------------------------------------- --- Sizes on this architecture --- --- A Size is usually a combination of width and class - --- It looks very like the old MachRep, but it's now of purely local --- significance, here in the native code generator. You can change it --- without global consequences. --- --- A major use is as an opcode qualifier; thus the opcode --- mov.l a b --- might be encoded --- MOV II32 a b --- where the Size field encodes the ".l" part. - --- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes --- here. I've removed them from the x86 version, we'll see what happens --SDM - --- ToDo: quite a few occurrences of Size could usefully be replaced by Width - -data Size - = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80 - deriving Eq - -intSize, floatSize :: Width -> Size -intSize W8 = II8 -intSize W16 = II16 -intSize W32 = II32 -intSize W64 = II64 -intSize other = pprPanic "MachInstrs.intSize" (ppr other) - - -floatSize W32 = FF32 -floatSize W64 = FF64 -floatSize other = pprPanic "MachInstrs.intSize" (ppr other) - - -isFloatSize :: Size -> Bool -isFloatSize FF32 = True -isFloatSize FF64 = True -isFloatSize FF80 = True -isFloatSize _ = False - - -wordSize :: Size -wordSize = intSize wordWidth - - -cmmTypeSize :: CmmType -> Size -cmmTypeSize ty | isFloatType ty = floatSize (typeWidth ty) - | otherwise = intSize (typeWidth ty) - - -sizeToWidth :: Size -> Width -sizeToWidth II8 = W8 -sizeToWidth II16 = W16 -sizeToWidth II32 = W32 -sizeToWidth II64 = W64 -sizeToWidth FF32 = W32 -sizeToWidth FF64 = W64 -sizeToWidth _ = panic "MachInstrs.sizeToWidth" - - -mkVReg :: Unique -> Size -> Reg -mkVReg u size - | not (isFloatSize size) = VirtualRegI u - | otherwise - = case size of - FF32 -> VirtualRegD u - FF64 -> VirtualRegD u - _ -> panic "mkVReg" - -- ----------------------------------------------------------------------------- -- Immediates @@ -699,4 +620,26 @@ callClobberedRegs = panic "X86.Regs.globalRegMaybe: not defined" #endif +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real +-- register it is in, on this platform, or a CmmExpr denoting the +-- address in the register table holding it. +-- (See also get_GlobalReg_addr in CgUtils.) + +get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr +get_GlobalReg_reg_or_addr mid + = case globalRegMaybe mid of + Just rr -> Left rr + Nothing -> Right (get_GlobalReg_addr mid) + + +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = isFastTrue (freeReg i) + in filter isFree allMachRegNos +