X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FX86%2FInstr.hs;h=a96452b9f16f152fc39b2eb5cd4924a23351e026;hb=6cec61d14a324285dbb8ce73d4c7215f1f8d6766;hp=7e11d233639d4398ce00a2fd49779661964b0ffc;hpb=9d9eef1f78e25c716e2c0c7559005b730f425231;p=ghc-hetmet.git diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 7e11d23..a96452b 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -21,14 +21,15 @@ import Reg import TargetReg import BlockId -import Cmm +import OldCmm import FastString import FastBool import Outputable import Constants (rESERVED_C_STACK_BYTES) import CLabel -import Panic +import UniqSet +import Unique -- Size of a PPC memory address, in bytes. -- @@ -101,7 +102,7 @@ Hence GLDZ and GLD1. Bwahahahahahahaha! -} {- -MORE FLOATING POINT MUSINGS... +Note [x86 Floating point precision] Intel's internal floating point registers are by default 80 bit extended precision. This means that all operations done on values in @@ -140,11 +141,12 @@ This is what gcc does. Spilling at 80 bits requires taking up a full 128 bit slot (so we get alignment). We spill at 80-bits and ignore the alignment problems. -In the future, we'll use the SSE registers for floating point. This -requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit -precision float ops), which means P4 or Xeon and above. Using SSE -will solve all these problems, because the SSE registers use fixed 32 -bit or 64 bit precision. +In the future [edit: now available in GHC 7.0.1, with the -msse2 +flag], we'll use the SSE registers for floating point. This requires +a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision +float ops), which means P4 or Xeon and above. Using SSE will solve +all these problems, because the SSE registers use fixed 32 bit or 64 +bit precision. --SDM 1/2003 -} @@ -226,6 +228,8 @@ data Instr | GITOF Reg Reg -- src(intreg), dst(fpreg) | GITOD Reg Reg -- src(intreg), dst(fpreg) + | GDTOF Reg Reg -- src(fpreg), dst(fpreg) + | GADD Size Reg Reg Reg -- src1, src2, dst | GDIV Size Reg Reg Reg -- src1, src2, dst | GSUB Size Reg Reg Reg -- src1, src2, dst @@ -252,10 +256,10 @@ data Instr -- use MOV for moving (either movss or movsd (movlpd better?)) | CVTSS2SD Reg Reg -- F32 to F64 | CVTSD2SS Reg Reg -- F64 to F32 - | CVTTSS2SIQ Operand Reg -- F32 to I32/I64 (with truncation) - | CVTTSD2SIQ Operand Reg -- F64 to I32/I64 (with truncation) - | CVTSI2SS Operand Reg -- I32/I64 to F32 - | CVTSI2SD Operand Reg -- I32/I64 to F64 + | CVTTSS2SIQ Size Operand Reg -- F32 to I32/I64 (with truncation) + | CVTTSD2SIQ Size Operand Reg -- F64 to I32/I64 (with truncation) + | CVTSI2SS Size Operand Reg -- I32/I64 to F32 + | CVTSI2SD Size Operand Reg -- I32/I64 to F64 -- use ADD & SUB for arithmetic. In both cases, operands -- are Operand Reg. @@ -352,7 +356,6 @@ x86_regUsageOfInstr instr CLTD _ -> mkRU [eax] [edx] NOP -> mkRU [] [] -#if i386_TARGET_ARCH GMOV src dst -> mkRU [src] [dst] GLD _ src dst -> mkRU (use_EA src) [dst] GST _ src dst -> mkRUR (src : use_EA dst) @@ -366,6 +369,8 @@ x86_regUsageOfInstr instr GITOF src dst -> mkRU [src] [dst] GITOD src dst -> mkRU [src] [dst] + GDTOF src dst -> mkRU [src] [dst] + GADD _ s1 s2 dst -> mkRU [s1,s2] [dst] GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst] GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst] @@ -378,17 +383,14 @@ x86_regUsageOfInstr instr GSIN _ _ _ src dst -> mkRU [src] [dst] GCOS _ _ _ src dst -> mkRU [src] [dst] GTAN _ _ _ src dst -> mkRU [src] [dst] -#endif -#if x86_64_TARGET_ARCH CVTSS2SD src dst -> mkRU [src] [dst] CVTSD2SS src dst -> mkRU [src] [dst] - CVTTSS2SIQ src dst -> mkRU (use_R src) [dst] - CVTTSD2SIQ src dst -> mkRU (use_R src) [dst] - CVTSI2SS src dst -> mkRU (use_R src) [dst] - CVTSI2SD src dst -> mkRU (use_R src) [dst] + CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst] + CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst] + CVTSI2SS _ src dst -> mkRU (use_R src) [dst] + CVTSI2SD _ src dst -> mkRU (use_R src) [dst] FDIV _ src dst -> usageRM src dst -#endif FETCHGOT reg -> mkRU [] [reg] FETCHPC reg -> mkRU [] [reg] @@ -482,7 +484,6 @@ x86_patchRegsOfInstr instr env JMP op -> patch1 JMP op JMP_TBL op ids -> patch1 JMP_TBL op $ ids -#if i386_TARGET_ARCH GMOV src dst -> GMOV (env src) (env dst) GLD sz src dst -> GLD sz (lookupAddr src) (env dst) GST sz src dst -> GST sz (env src) (lookupAddr dst) @@ -496,6 +497,8 @@ x86_patchRegsOfInstr instr env GITOF src dst -> GITOF (env src) (env dst) GITOD src dst -> GITOD (env src) (env dst) + GDTOF src dst -> GDTOF (env src) (env dst) + GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst) GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst) GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst) @@ -508,17 +511,14 @@ x86_patchRegsOfInstr instr env GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst) GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst) GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst) -#endif -#if x86_64_TARGET_ARCH CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) - CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst) - CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst) - CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst) - CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst) + CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst) + CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst) + CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst) + CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst) FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst) -#endif CALL (Left _) _ -> instr CALL (Right reg) p -> CALL (Right (env reg)) p @@ -537,7 +537,9 @@ x86_patchRegsOfInstr instr env _other -> panic "patchRegs: unrecognised instr" where + patch1 :: (Operand -> a) -> Operand -> a patch1 insn op = insn $! patchOp op + patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a patch2 insn src dst = (insn $! patchOp src) $! patchOp dst patchOp (OpReg reg) = OpReg $! env reg @@ -601,30 +603,16 @@ x86_mkSpillInstr -> Int -- spill slot to use -> Instr -#if i386_TARGET_ARCH -x86_mkSpillInstr reg delta slot - = let off = spillSlotToOffset slot - in - let off_w = (off-delta) `div` 4 - in case targetClassOfReg reg of - RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w)) - _ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -} - -#elif x86_64_TARGET_ARCH x86_mkSpillInstr reg delta slot = let off = spillSlotToOffset slot in - let off_w = (off-delta) `div` 8 + let off_w = (off-delta) `div` IF_ARCH_i386(4,8) in case targetClassOfReg reg of - RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w)) - RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w)) + RcInteger -> MOV IF_ARCH_i386(II32,II64) + (OpReg reg) (OpAddr (spRel off_w)) + RcDouble -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w)) _ -> panic "X86.mkSpillInstr: no match" - -- ToDo: will it work to always spill as a double? - -- does that cause a stall if the data was a float? -#else -x86_mkSpillInstr _ _ _ - = panic "X86.RegInfo.mkSpillInstr: not defined for this architecture." -#endif -- | Make a spill reload instruction. @@ -634,26 +622,16 @@ x86_mkLoadInstr -> Int -- spill slot to use -> Instr -#if i386_TARGET_ARCH -x86_mkLoadInstr reg delta slot - = let off = spillSlotToOffset slot - in - let off_w = (off-delta) `div` 4 - in case targetClassOfReg reg of { - RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg); - _ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -} -#elif x86_64_TARGET_ARCH x86_mkLoadInstr reg delta slot = let off = spillSlotToOffset slot in - let off_w = (off-delta) `div` 8 + let off_w = (off-delta) `div` IF_ARCH_i386(4,8) in case targetClassOfReg reg of - RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg) - _ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg) -#else -x86_mkLoadInstr _ _ _ - = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture." -#endif + RcInteger -> MOV IF_ARCH_i386(II32,II64) + (OpAddr (spRel off_w)) (OpReg reg) + RcDouble -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg) + _ -> panic "X86.x86_mkLoadInstr" spillSlotSize :: Int spillSlotSize = IF_ARCH_i386(12, 8) @@ -714,14 +692,12 @@ x86_mkRegRegMoveInstr src dst = case targetClassOfReg src of #if i386_TARGET_ARCH RcInteger -> MOV II32 (OpReg src) (OpReg dst) - RcDouble -> GMOV src dst - RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" #else RcInteger -> MOV II64 (OpReg src) (OpReg dst) - RcDouble -> MOV FF64 (OpReg src) (OpReg dst) - RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" #endif - + RcDouble -> GMOV src dst + RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst) + _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" -- | Check whether an instruction represents a reg-reg move. -- The register allocator attempts to eliminate reg->reg moves whenever it can, @@ -765,6 +741,7 @@ i386_insert_ffrees blocks where p insn r = case insn of CALL _ _ -> GFREE : insn : r JMP _ -> GFREE : insn : r + JXX_GBL _ _ -> GFREE : insn : r _ -> insn : r -- if you ever add a new FP insn to the fake x86 FP insn set, @@ -779,8 +756,9 @@ is_G_instr instr GLD1{} -> True GFTOI{} -> True GDTOI{} -> True - GITOF{} -> True - GITOD{} -> True + GITOF{} -> True + GITOD{} -> True + GDTOF{} -> True GADD{} -> True GDIV{} -> True GSUB{} -> True @@ -800,31 +778,32 @@ data JumpDest = DestBlockId BlockId | DestImm Imm canShortcut :: Instr -> Maybe JumpDest -canShortcut (JXX ALWAYS id) = Just (DestBlockId id) -canShortcut (JMP (OpImm imm)) = Just (DestImm imm) -canShortcut _ = Nothing +canShortcut (JXX ALWAYS id) = Just (DestBlockId id) +canShortcut (JMP (OpImm imm)) = Just (DestImm imm) +canShortcut _ = Nothing --- The helper ensures that we don't follow cycles. +-- This helper shortcuts a sequence of branches. +-- The blockset helps avoid following cycles. shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr -shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn +shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn where shortcutJump' fn seen insn@(JXX cc id) = - if elemBlockSet id seen then insn + if setMember id seen then insn else case fn id of Nothing -> insn Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id') Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm) - where seen' = extendBlockSet seen id + where seen' = setInsert id seen shortcutJump' _ _ other = other -- Here because it knows about JumpDest shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic shortcutStatic fn (CmmStaticLit (CmmLabel lab)) | Just uq <- maybeAsmTemp lab - = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq))) + = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq))) shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) | Just uq <- maybeAsmTemp lbl1 - = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off) + = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off) -- slightly dodgy, we're ignoring the second label, but this -- works with the way we use CmmLabelDiffOff for jump tables now. @@ -833,12 +812,15 @@ shortcutStatic _ other_static shortBlockId :: (BlockId -> Maybe JumpDest) + -> UniqSet Unique -> BlockId -> CLabel -shortBlockId fn blockid@(BlockId uq) = - case fn blockid of - Nothing -> mkAsmTempLabel uq - Just (DestBlockId blockid') -> shortBlockId fn blockid' - Just (DestImm (ImmCLbl lbl)) -> lbl - _other -> panic "shortBlockId" +shortBlockId fn seen blockid = + case (elementOfUniqSet uq seen, fn blockid) of + (True, _) -> mkAsmTempLabel uq + (_, Nothing) -> mkAsmTempLabel uq + (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid' + (_, Just (DestImm (ImmCLbl lbl))) -> lbl + (_, _other) -> panic "shortBlockId" + where uq = getUnique blockid