X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FnativeGen%2FMachCodeGen.hs;h=f700fbcb7184ab7c2e5098b95cbd479bf6c0cc13;hb=baf9d8508f3fd2d042dcbe19b68f4b49c12f4769;hp=3a0dfad9e7dfa0832f24cf3d6ebea22739c03e49;hpb=14a5aadb84c34dbe2bee129ed80fdfa1fb12e3e0;p=ghc-hetmet.git diff --git a/ghc/compiler/nativeGen/MachCodeGen.hs b/ghc/compiler/nativeGen/MachCodeGen.hs index 3a0dfad..f700fbc 100644 --- a/ghc/compiler/nativeGen/MachCodeGen.hs +++ b/ghc/compiler/nativeGen/MachCodeGen.hs @@ -513,17 +513,17 @@ getRegisterReg (CmmGlobal mid) getRegister :: CmmExpr -> NatM Register +getRegister (CmmReg (CmmGlobal PicBaseReg)) + = do + reg <- getPicBaseNat wordRep + return (Fixed wordRep reg nilOL) + getRegister (CmmReg reg) = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL) getRegister tree@(CmmRegOff _ _) = getRegister (mangleIndexTree tree) -getRegister CmmPicBaseReg - = do - reg <- getPicBaseNat wordRep - return (Fixed wordRep reg nilOL) - -- end of machine-"independent" bit; here we go on the rest... #if alpha_TARGET_ARCH @@ -809,8 +809,7 @@ getRegister (CmmLit (CmmFloat f rep)) = do LDATA ReadOnlyData [CmmDataLabel lbl, CmmStaticLit (CmmFloat f rep)], - MOV rep (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst) - -- ToDo: should use %rip-relative + MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) ] -- in return (Any rep code) @@ -869,9 +868,10 @@ getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do #if x86_64_TARGET_ARCH getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do + x_code <- getAnyReg x lbl <- getNewLabelNat let - code dst = toOL [ + code dst = x_code dst `appOL` toOL [ -- This is how gcc does it, so it can't be that bad: LDATA ReadOnlyData16 [ CmmAlign 16, @@ -881,7 +881,7 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do CmmStaticLit (CmmInt 0 I32), CmmStaticLit (CmmInt 0 I32) ], - XOR F32 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst) + XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) -- xorps, so we need the 128-bit constant -- ToDo: rip-relative ] @@ -889,10 +889,11 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do return (Any F32 code) getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do + x_code <- getAnyReg x lbl <- getNewLabelNat let -- This is how gcc does it, so it can't be that bad: - code dst = toOL [ + code dst = x_code dst `appOL` toOL [ LDATA ReadOnlyData16 [ CmmAlign 16, CmmDataLabel lbl, @@ -900,9 +901,8 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do CmmStaticLit (CmmInt 0 I64) ], -- gcc puts an unpck here. Wonder if we need it. - XOR F64 (OpAddr (ImmAddr (ImmCLbl lbl) 0)) (OpReg dst) + XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) -- xorpd, so we need the 128-bit constant - -- ToDo: rip-relative ] -- return (Any F64 code) @@ -1153,7 +1153,7 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps code dst = x_code `snocOL` LEA rep - (OpAddr (AddrBaseIndex (Just x_reg) Nothing imm)) + (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) (OpReg dst) -- return (Any rep code) @@ -1839,14 +1839,14 @@ getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) -- ASSERT(rep == I32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (-(fromInteger i)) - return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) | not (is64BitLit lit) -- ASSERT(rep == I32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (fromInteger i) - return (Amode (AddrBaseIndex (Just x_reg) Nothing off) x_code) + return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be -- recognised by the next rule. @@ -1864,7 +1864,7 @@ getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) let code = x_code `appOL` y_code base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 - return (Amode (AddrBaseIndex (Just x_reg) (Just (y_reg,base)) (ImmInt 0)) + return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0)) code) getAmode (CmmLit lit) | not (is64BitLit lit) @@ -1872,7 +1872,7 @@ getAmode (CmmLit lit) | not (is64BitLit lit) getAmode expr = do (reg,code) <- getSomeReg expr - return (Amode (AddrBaseIndex (Just reg) Nothing (ImmInt 0)) code) + return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ @@ -1996,6 +1996,14 @@ getAmode other #if i386_TARGET_ARCH || x86_64_TARGET_ARCH getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) +#if x86_64_TARGET_ARCH +getNonClobberedOperand (CmmLit lit) + | isSuitableFloatingPointLit lit = do + lbl <- getNewLabelNat + let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit lit]) + return (OpAddr (ripRel (ImmCLbl lbl)), code) +#endif getNonClobberedOperand (CmmLit lit) | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = return (OpImm (litToImm lit), nilOL) @@ -2006,7 +2014,7 @@ getNonClobberedOperand (CmmLoad mem pk) if (amodeCouldBeClobbered src) then do tmp <- getNewRegNat wordRep - return (AddrBaseIndex (Just tmp) Nothing (ImmInt 0), + return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), unitOL (LEA I32 (OpAddr src) (OpReg tmp))) else return (src, nilOL) @@ -2024,23 +2032,38 @@ regClobbered _ = False -- getOperand: the operand is not required to remain valid across the -- computation of an arbitrary expression. getOperand :: CmmExpr -> NatM (Operand, InstrBlock) +#if x86_64_TARGET_ARCH getOperand (CmmLit lit) - | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = + | isSuitableFloatingPointLit lit = do + lbl <- getNewLabelNat + let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, + CmmStaticLit lit]) + return (OpAddr (ripRel (ImmCLbl lbl)), code) +#endif +getOperand (CmmLit lit) + | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do return (OpImm (litToImm lit), nilOL) getOperand (CmmLoad mem pk) | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do Amode src mem_code <- getAmode mem return (OpAddr src, mem_code) getOperand e = do - (reg, code) <- getNonClobberedReg e + (reg, code) <- getSomeReg e return (OpReg reg, code) isOperand :: CmmExpr -> Bool isOperand (CmmLoad _ _) = True -isOperand (CmmLit lit) = not (is64BitLit lit) && - not (isFloatingRep (cmmLitRep lit)) +isOperand (CmmLit lit) = not (is64BitLit lit) + || isSuitableFloatingPointLit lit isOperand _ = False +-- if we want a floating-point literal as an operand, we can +-- use it directly from memory. However, if the literal is +-- zero, we're better off generating it into a register using +-- xor. +isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 +isSuitableFloatingPointLit _ = False + getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) getRegOrMem (CmmLoad mem pk) | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do @@ -2236,10 +2259,10 @@ condFltCode cond x y = do code = x_code `appOL` y_code `snocOL` CMP (cmmExprRep x) y_op (OpReg x_reg) - -- in - return (CondCode False (condToUnsigned cond) code) - -- we need to use the unsigned comparison operators on the + -- NB(1): we need to use the unsigned comparison operators on the -- result of this comparison. + -- in + return (CondCode True (condToUnsigned cond) code) #endif -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2886,14 +2909,51 @@ genCondJump lbl (StPrim op [x, y]) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH +#if i386_TARGET_ARCH genCondJump id bool = do CondCode _ cond code <- getCondCode bool return (code `snocOL` JXX cond id) -#endif /* i386_TARGET_ARCH */ +#endif +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#if x86_64_TARGET_ARCH + +genCondJump id bool = do + CondCode is_float cond cond_code <- getCondCode bool + if not is_float + then + return (cond_code `snocOL` JXX cond id) + else do + lbl <- getBlockIdNat + + -- see comment with condFltReg + let code = case cond of + NE -> or_unordered + GU -> plain_test + GEU -> plain_test + _ -> and_ordered + + plain_test = unitOL ( + JXX cond id + ) + or_unordered = toOL [ + JXX cond id, + JXX PARITY id + ] + and_ordered = toOL [ + JXX PARITY lbl, + JXX cond id, + JXX ALWAYS lbl, + NEWBLOCK lbl + ] + return (cond_code `appOL` code) + +#endif + +-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if sparc_TARGET_ARCH @@ -3048,12 +3108,12 @@ genCCall target dest_regs args vols = do -- CmmPrim -> ... CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm)), conv) + return (unitOL (CALL (Left fn_imm) []), conv) where fn_imm = ImmCLbl lbl CmmForeignCall expr conv -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr ASSERT(dyn_rep == I32) - return (dyn_c `snocOL` CALL (Right dyn_r), conv) + return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) let push_code = concatOL push_codes call = callinsns `appOL` @@ -3120,8 +3180,8 @@ genCCall target dest_regs args vols = do code `appOL` toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp), DELTA (delta-size), - GST sz reg (AddrBaseIndex (Just esp) - Nothing + GST sz reg (AddrBaseIndex (EABaseReg esp) + EAIndexNone (ImmInt 0))] ) else return (size, @@ -3138,6 +3198,9 @@ genCCall target dest_regs args vols = do (reg,code) <- getSomeReg op return (code, reg, cmmExprRep op) +#endif /* i386_TARGET_ARCH */ + +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)] -> Maybe [GlobalReg] -> NatM InstrBlock @@ -3161,6 +3224,10 @@ outOfLineFloatOp mop res args vols lbl = CmmLabel (mkForeignLabel fn Nothing False) fn = case mop of + MO_F32_Sqrt -> FSLIT("sqrt") + MO_F32_Sin -> FSLIT("sin") + MO_F32_Cos -> FSLIT("cos") + MO_F32_Tan -> FSLIT("tan") MO_F32_Exp -> FSLIT("exp") MO_F32_Log -> FSLIT("log") @@ -3173,6 +3240,10 @@ outOfLineFloatOp mop res args vols MO_F32_Tanh -> FSLIT("tanh") MO_F32_Pwr -> FSLIT("pow") + MO_F64_Sqrt -> FSLIT("sqrt") + MO_F64_Sin -> FSLIT("sin") + MO_F64_Cos -> FSLIT("cos") + MO_F64_Tan -> FSLIT("tan") MO_F64_Exp -> FSLIT("exp") MO_F64_Log -> FSLIT("log") @@ -3187,22 +3258,29 @@ outOfLineFloatOp mop res args vols other -> pprPanic "outOfLineFloatOp" (pprCallishMachOp mop) -#endif /* i386_TARGET_ARCH */ +#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if x86_64_TARGET_ARCH genCCall (CmmPrim op) [(r,_)] args vols = - panic "genCCall(CmmPrim)(x86_64)" + outOfLineFloatOp op r args vols genCCall target dest_regs args vols = do -- load up the register arguments - (stack_args, sse_regs, load_args_code) - <- load_args args allArgRegs allFPArgRegs 0 nilOL + (stack_args, aregs, fregs, load_args_code) + <- load_args args allArgRegs allFPArgRegs nilOL let + fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs)) + int_regs_used = reverse (drop (length aregs) (reverse allArgRegs)) + arg_regs = int_regs_used ++ fp_regs_used + -- for annotating the call instruction with + + sse_regs = length fp_regs_used + tot_arg_size = arg_size * length stack_args -- On entry to the called function, %rsp should be aligned @@ -3234,11 +3312,11 @@ genCCall target dest_regs args vols = do -- CmmPrim -> ... CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm)), conv) + return (unitOL (CALL (Left fn_imm) arg_regs), conv) where fn_imm = ImmCLbl lbl CmmForeignCall expr conv -> do (dyn_r, dyn_c) <- getSomeReg expr - return (dyn_c `snocOL` CALL (Right dyn_r), conv) + return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) let -- The x86_64 ABI requires us to set %al to the number of SSE @@ -3289,31 +3367,31 @@ genCCall target dest_regs args vols = do load_args :: [(CmmExpr,MachHint)] -> [Reg] -- int regs avail for args -> [Reg] -- FP regs avail for args - -> Int -> InstrBlock - -> NatM ([(CmmExpr,MachHint)],Int,InstrBlock) - load_args args [] [] sse_regs code = return (args, sse_regs, code) + -> InstrBlock + -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock) + load_args args [] [] code = return (args, [], [], code) -- no more regs to use - load_args [] aregs fregs sse_regs code = return ([],sse_regs,code) + load_args [] aregs fregs code = return ([], aregs, fregs, code) -- no more args to push - load_args ((arg,hint) : rest) aregs fregs sse_regs code + load_args ((arg,hint) : rest) aregs fregs code | isFloatingRep arg_rep = case fregs of [] -> push_this_arg (r:rs) -> do arg_code <- getAnyReg arg - load_args rest aregs rs (sse_regs+1) (code `appOL` arg_code r) + load_args rest aregs rs (code `appOL` arg_code r) | otherwise = case aregs of [] -> push_this_arg (r:rs) -> do arg_code <- getAnyReg arg - load_args rest rs fregs sse_regs (code `appOL` arg_code r) + load_args rest rs fregs (code `appOL` arg_code r) where arg_rep = cmmExprRep arg push_this_arg = do - (args',sse',code') <- load_args rest aregs fregs sse_regs code - return ((arg,hint):args', sse', code') + (args',ars,frs,code') <- load_args rest aregs fregs code + return ((arg,hint):args', ars, frs, code') push_args [] code = return code push_args ((arg,hint):rest) code @@ -3749,7 +3827,7 @@ genSwitch expr ids = do lbl <- getNewLabelNat let jumpTable = map jumpTableEntry ids - op = OpAddr (AddrBaseIndex Nothing (Just (reg,wORD_SIZE)) (ImmCLbl lbl)) + op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) code = e_code `appOL` toOL [ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), JMP_TBL op [ id | Just id <- ids ] @@ -3841,38 +3919,77 @@ condIntReg cond x y = do let code dst = cond_code `appOL` toOL [ SETCC cond (OpReg tmp), - MOV I32 (OpReg tmp) (OpReg dst), - AND I32 (OpImm (ImmInt 1)) (OpReg dst) + MOVZxL I8 (OpReg tmp) (OpReg dst) ] - -- NB. (1) Tha AND is needed here because the x86 only - -- sets the low byte in the SETCC instruction. - -- NB. (2) The extra temporary register is a hack to - -- work around the fact that the setcc instructions only - -- accept byte registers. dst might not be a byte-able reg, - -- but currently all free registers are byte-able, so we're - -- guaranteed that a new temporary is byte-able. -- in return (Any I32 code) +#endif + +#if i386_TARGET_ARCH condFltReg cond x y = do - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat CondCode _ cond cond_code <- condFltCode cond x y - let - code dst = cond_code `appOL` toOL [ - JXX cond lbl1, - MOV I32 (OpImm (ImmInt 0)) (OpReg dst), - JXX ALWAYS lbl2, - NEWBLOCK lbl1, - MOV I32 (OpImm (ImmInt 1)) (OpReg dst), - JXX ALWAYS lbl2, - NEWBLOCK lbl2] - -- SIGH, have to split up this block somehow... + tmp <- getNewRegNat I8 + let + code dst = cond_code `appOL` toOL [ + SETCC cond (OpReg tmp), + MOVZxL I8 (OpReg tmp) (OpReg dst) + ] -- in return (Any I32 code) -#endif /* i386_TARGET_ARCH */ +#endif + +#if x86_64_TARGET_ARCH + +condFltReg cond x y = do + CondCode _ cond cond_code <- condFltCode cond x y + tmp1 <- getNewRegNat wordRep + tmp2 <- getNewRegNat wordRep + let + -- We have to worry about unordered operands (eg. comparisons + -- against NaN). If the operands are unordered, the comparison + -- sets the parity flag, carry flag and zero flag. + -- All comparisons are supposed to return false for unordered + -- operands except for !=, which returns true. + -- + -- Optimisation: we don't have to test the parity flag if we + -- know the test has already excluded the unordered case: eg > + -- and >= test for a zero carry flag, which can only occur for + -- ordered operands. + -- + -- ToDo: by reversing comparisons we could avoid testing the + -- parity flag in more cases. + + code dst = + cond_code `appOL` + (case cond of + NE -> or_unordered dst + GU -> plain_test dst + GEU -> plain_test dst + _ -> and_ordered dst) + + plain_test dst = toOL [ + SETCC cond (OpReg tmp1), + MOVZxL I8 (OpReg tmp1) (OpReg dst) + ] + or_unordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC PARITY (OpReg tmp2), + OR I8 (OpReg tmp1) (OpReg tmp2), + MOVZxL I8 (OpReg tmp2) (OpReg dst) + ] + and_ordered dst = toOL [ + SETCC cond (OpReg tmp1), + SETCC NOTPARITY (OpReg tmp2), + AND I8 (OpReg tmp1) (OpReg tmp2), + MOVZxL I8 (OpReg tmp2) (OpReg dst) + ] + -- in + return (Any I32 code) + +#endif -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -4210,7 +4327,10 @@ trivialCode rep instr (Just revinstr) (CmmLit lit_a) b -- in return (Any rep code) -trivialCode rep instr maybe_revinstr a b = do +trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b + +-- This is re-used for floating pt instructions too. +genTrivialCode rep instr a b = do (b_op, b_code) <- getNonClobberedOperand b a_code <- getAnyReg a tmp <- getNewRegNat rep @@ -4222,7 +4342,7 @@ trivialCode rep instr maybe_revinstr a b = do -- as the destination reg. In this case, we have to save b in a -- new temporary across the computation of a. code dst - | dst `clashesWith` b_op = + | dst `regClashesWithOp` b_op = b_code `appOL` unitOL (MOV rep b_op (OpReg tmp)) `appOL` a_code dst `snocOL` @@ -4233,10 +4353,10 @@ trivialCode rep instr maybe_revinstr a b = do instr b_op (OpReg dst) -- in return (Any rep code) - where - reg `clashesWith` OpReg reg2 = reg == reg2 - reg `clashesWith` OpAddr amode = any (==reg) (addrModeRegs amode) - reg `clashesWith` _ = False + +reg `regClashesWithOp` OpReg reg2 = reg == reg2 +reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) +reg `regClashesWithOp` _ = False ----------- @@ -4268,19 +4388,7 @@ trivialFCode pk instr x y = do #if x86_64_TARGET_ARCH --- We use the 2-operand SSE2 floating pt instructions. ToDo: improve on --- this by using some of the special cases in trivialCode above. -trivialFCode pk instr x y = do - (y_reg, y_code) <- getNonClobberedReg y -- these work for float regs too - x_code <- getAnyReg x - let - code dst = - y_code `appOL` - x_code dst `snocOL` - instr pk (IF_ARCH_x86_64(OpReg,) y_reg) - (IF_ARCH_x86_64(OpReg,) dst) - -- in - return (Any pk code) +trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y #endif @@ -4547,7 +4655,7 @@ coerceFP2Int from to x = do coerceFP2Int from to x = do (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand let - opc = case to of F32 -> CVTSS2SI; F64 -> CVTSD2SI + opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI code dst = x_code `snocOL` opc x_op dst -- in return (Any to code) -- works even if the destination rep is