X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FMachCodeGen.hs;h=b29b59e8bbc69fe167fbf1f81e9a324f32ce5f05;hb=6e1e37433632160a73d30ad785476a1ef0ba14a8;hp=90ce6b5bf8cca25cfb2bbe5ca2ebaa16d7ecbb96;hpb=0065d5ab628975892cea1ec7303f968c3338cbe1;p=ghc-hetmet.git diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 90ce6b5..b29b59e 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -21,7 +21,7 @@ module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where import MachInstrs import MachRegs import NCGMonad -import PositionIndependentCode ( cmmMakeDynamicReference, initializePicBase ) +import PositionIndependentCode import RegAllocInfo ( mkBranchInstr ) -- Our intermediate code: @@ -29,6 +29,7 @@ import PprCmm ( pprExpr ) import Cmm import MachOp import CLabel +import ClosureInfo ( C_SRT(..) ) -- The rest: import StaticFlags ( opt_PIC ) @@ -41,14 +42,14 @@ import FastTypes ( isFastTrue ) import Constants ( wORD_SIZE ) #ifdef DEBUG -import Outputable ( assertPanic ) -import TRACE ( trace ) +import Debug.Trace ( trace ) #endif import Control.Monad ( mapAndUnzipM ) -import Maybe ( fromJust ) -import DATA_BITS -import DATA_WORD +import Data.Maybe ( fromJust ) +import Data.Bits +import Data.Word +import Data.Int -- ----------------------------------------------------------------------------- -- Top-level of the instruction selector @@ -60,7 +61,7 @@ import DATA_WORD type InstrBlock = OrdList Instr -cmmTopCodeGen :: CmmTop -> NatM [NatCmmTop] +cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop] cmmTopCodeGen (CmmProc info lab params blocks) = do (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks picBaseMb <- getPicBaseMaybeNat @@ -118,8 +119,8 @@ stmtToInstrs stmt = case stmt of | otherwise -> assignMem_IntCode kind addr src where kind = cmmExprRep src - CmmCall target result_regs args vols - -> genCCall target result_regs args vols + CmmCall target result_regs args _ + -> genCCall target result_regs args CmmBranch id -> genBranch id CmmCondBranch arg id -> genCondJump id arg @@ -187,7 +188,7 @@ assignMem_I64Code addrTree valueTree = do return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do +assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do ChildCode64 vcode r_src_lo <- iselExpr64 valueTree let r_dst_lo = mkVReg u_dst I32 @@ -229,7 +230,7 @@ iselExpr64 (CmmLoad addrTree I64) = do rlo ) -iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64))) +iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _))) = return (ChildCode64 nilOL (mkVReg vu I32)) -- we handle addition, but rather badly @@ -264,6 +265,17 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do -- in return (ChildCode64 code rlo) +iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do + fn <- getAnyReg expr + r_dst_lo <- getNewRegNat I32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + code = fn r_dst_lo + return ( + ChildCode64 (code `snocOL` + MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) + r_dst_lo + ) + iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) @@ -357,7 +369,7 @@ assignMem_I64Code addrTree valueTree = do -- in return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do +assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do ChildCode64 vcode r_src_lo <- iselExpr64 valueTree let r_dst_lo = mkVReg u_dst I32 @@ -387,7 +399,7 @@ iselExpr64 (CmmLoad addrTree I64) = do return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rlo -iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64))) +iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _))) = return (ChildCode64 nilOL (mkVReg vu I32)) iselExpr64 (CmmLit (CmmInt i _)) = do @@ -464,7 +476,7 @@ getSomeReg expr = do getRegisterReg :: CmmReg -> Reg -getRegisterReg (CmmLocal (LocalReg u pk)) +getRegisterReg (CmmLocal (LocalReg u pk _)) = mkVReg u pk getRegisterReg (CmmGlobal mid) @@ -486,10 +498,14 @@ getRegisterReg (CmmGlobal mid) getRegister :: CmmExpr -> NatM Register +#if !x86_64_TARGET_ARCH + -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured + -- register, it can only be used for rip-relative addressing. getRegister (CmmReg (CmmGlobal PicBaseReg)) = do reg <- getPicBaseNat wordRep return (Fixed wordRep reg nilOL) +#endif getRegister (CmmReg reg) = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL) @@ -497,6 +513,31 @@ getRegister (CmmReg reg) getRegister tree@(CmmRegOff _ _) = getRegister (mangleIndexTree tree) + +#if WORD_SIZE_IN_BITS==32 + -- for 32-bit architectuers, support some 64 -> 32 bit conversions: + -- TO_W_(x), TO_W_(x >> 32) + +getRegister (CmmMachOp (MO_U_Conv I64 I32) + [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed I32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_S_Conv I64 I32) + [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed I32 (getHiVRegFromLo rlo) code + +getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed I32 rlo code + +getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do + ChildCode64 code rlo <- iselExpr64 x + return $ Fixed I32 rlo code + +#endif + -- end of machine-"independent" bit; here we go on the rest... #if alpha_TARGET_ARCH @@ -736,7 +777,7 @@ getRegister leaf getRegister (CmmLit (CmmFloat f F32)) = do lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference addImportNat False lbl + dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl Amode addr addr_code <- getAmode dynRef let code dst = LDATA ReadOnlyData @@ -759,7 +800,7 @@ getRegister (CmmLit (CmmFloat d F64)) | otherwise = do lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference addImportNat False lbl + dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl Amode addr addr_code <- getAmode dynRef let code dst = LDATA ReadOnlyData @@ -844,6 +885,13 @@ getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do #endif #if x86_64_TARGET_ARCH +getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg), + CmmLit displacement]) + = return $ Any I64 (\dst -> unitOL $ + LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) +#endif + +#if x86_64_TARGET_ARCH getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do x_code <- getAnyReg x lbl <- getNewLabelNat @@ -898,21 +946,19 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps MO_Not rep -> trivialUCode rep (NOT rep) x -- Nop conversions - -- TODO: these are only nops if the arg is not a fixed register that - -- can't be byte-addressed. - MO_U_Conv I32 I8 -> conversionNop I32 x - MO_S_Conv I32 I8 -> conversionNop I32 x - MO_U_Conv I16 I8 -> conversionNop I16 x - MO_S_Conv I16 I8 -> conversionNop I16 x - MO_U_Conv I32 I16 -> conversionNop I32 x - MO_S_Conv I32 I16 -> conversionNop I32 x + MO_U_Conv I32 I8 -> toI8Reg I32 x + MO_S_Conv I32 I8 -> toI8Reg I32 x + MO_U_Conv I16 I8 -> toI8Reg I16 x + MO_S_Conv I16 I8 -> toI8Reg I16 x + MO_U_Conv I32 I16 -> toI16Reg I32 x + MO_S_Conv I32 I16 -> toI16Reg I32 x #if x86_64_TARGET_ARCH MO_U_Conv I64 I32 -> conversionNop I64 x MO_S_Conv I64 I32 -> conversionNop I64 x - MO_U_Conv I64 I16 -> conversionNop I64 x - MO_S_Conv I64 I16 -> conversionNop I64 x - MO_U_Conv I64 I8 -> conversionNop I64 x - MO_S_Conv I64 I8 -> conversionNop I64 x + MO_U_Conv I64 I16 -> toI16Reg I64 x + MO_S_Conv I64 I16 -> toI16Reg I64 x + MO_U_Conv I64 I8 -> toI8Reg I64 x + MO_S_Conv I64 I8 -> toI8Reg I64 x #endif MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x @@ -964,6 +1010,18 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps instr from (OpReg reg) (OpReg dst) return (Any to code) + toI8Reg new_rep expr + = do codefn <- getAnyReg expr + return (Any new_rep codefn) + -- HACK: use getAnyReg to get a byte-addressable register. + -- If the source was a Fixed register, this will add the + -- mov instruction to put it into the desired destination. + -- We're assuming that the destination won't be a fixed + -- non-byte-addressable register; it won't be, because all + -- fixed registers are word-sized. + + toI16Reg = toI8Reg -- for now + conversionNop new_rep expr = do e_code <- getRegister expr return (swizzleRegisterRep e_code new_rep) @@ -1101,16 +1159,30 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps -- in return (Any rep code) - {- Case2: shift length is complex (non-immediate) -} + {- Case2: shift length is complex (non-immediate) + * y must go in %ecx. + * we cannot do y first *and* put its result in %ecx, because + %ecx might be clobbered by x. + * if we do y second, then x cannot be + in a clobbered reg. Also, we cannot clobber x's reg + with the instruction itself. + * so we can either: + - do y first, put its result in a fresh tmp, then copy it to %ecx later + - do y second and put its result into %ecx. x gets placed in a fresh + tmp. This is likely to be better, becuase the reg alloc can + eliminate this reg->reg move here (it won't eliminate the other one, + because the move is into the fixed %ecx). + -} shift_code rep instr x y{-amount-} = do - (x_reg, x_code) <- getNonClobberedReg x + x_code <- getAnyReg x + tmp <- getNewRegNat rep y_code <- getAnyReg y let - code = x_code `appOL` + code = x_code tmp `appOL` y_code ecx `snocOL` - instr (OpReg ecx) (OpReg x_reg) + instr (OpReg ecx) (OpReg tmp) -- in - return (Fixed rep x_reg code) + return (Fixed rep tmp code) -------------------- add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register @@ -1648,7 +1720,7 @@ getRegister (CmmLit (CmmInt i rep)) getRegister (CmmLit (CmmFloat f frep)) = do lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference addImportNat False lbl + dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl Amode addr addr_code <- getAmode dynRef let code dst = LDATA ReadOnlyData [CmmDataLabel lbl, @@ -1747,6 +1819,14 @@ getAmode other -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +#if x86_64_TARGET_ARCH + +getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg), + CmmLit displacement]) + = return $ Amode (ripRel (litToImm displacement)) nilOL + +#endif + #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -- This is all just ridiculous, since it carefully undoes @@ -1774,15 +1854,18 @@ getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)]]) | shift == 0 || shift == 1 || shift == 2 || shift == 3 - = do (x_reg, x_code) <- getNonClobberedReg x - -- x must be in a temp, because it has to stay live over y_code - -- we could compre x_reg and y_reg and do something better here... - (y_reg, y_code) <- getSomeReg y - let - code = x_code `appOL` y_code - base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 - return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt 0)) - code) + = x86_complex_amode x y shift 0 + +getAmode (CmmMachOp (MO_Add rep) + [x, CmmMachOp (MO_Add _) + [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)], + CmmLit (CmmInt offset _)]]) + | shift == 0 || shift == 1 || shift == 2 || shift == 3 + && not (is64BitInteger offset) + = x86_complex_amode x y shift offset + +getAmode (CmmMachOp (MO_Add rep) [x,y]) + = x86_complex_amode x y 0 0 getAmode (CmmLit lit) | not (is64BitLit lit) = return (Amode (ImmAddr (litToImm lit) 0) nilOL) @@ -1791,6 +1874,19 @@ getAmode expr = do (reg,code) <- getSomeReg expr return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) + +x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode +x86_complex_amode base index shift offset + = do (x_reg, x_code) <- getNonClobberedReg base + -- x must be in a temp, because it has to stay live over y_code + -- we could compre x_reg and y_reg and do something better here... + (y_reg, y_code) <- getSomeReg index + let + code = x_code `appOL` y_code + base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 + return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset))) + code) + #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1983,7 +2079,12 @@ is64BitLit x = False #endif is64BitInteger :: Integer -> Bool -is64BitInteger i = i > 0x7fffffff || i < -0x80000000 +is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000 + where i64 = fromIntegral i :: Int64 + -- a CmmInt is intended to be truncated to the appropriate + -- number of bits, so here we truncate it to Int64. This is + -- important because e.g. -1 as a CmmInt might be either + -- -1 or 18446744073709551615. -- ----------------------------------------------------------------------------- -- The 'CondCode' type: Condition codes passed up the tree. @@ -2006,7 +2107,7 @@ getCondCode = panic "MachCode.getCondCode: not on Alphas" -- yes, they really do seem to want exactly the same! getCondCode (CmmMachOp mop [x, y]) - = ASSERT (cmmExprRep x /= I8) -- tmp, not set up to handle 8-bit comparisons + = case mop of MO_Eq F32 -> condFltCode EQQ x y MO_Ne F32 -> condFltCode NE x y @@ -2037,7 +2138,7 @@ getCondCode (CmmMachOp mop [x, y]) MO_U_Lt rep -> condIntCode LU x y MO_U_Le rep -> condIntCode LEU x y - other -> pprPanic "getCondCode(x86,sparc)" (pprMachOp mop) + other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) @@ -2305,6 +2406,25 @@ assignIntCode pk dst src #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -- integer assignment to memory + +-- specific case of adding/subtracting an integer to a particular address. +-- ToDo: catch other cases where we can use an operation directly on a memory +-- address. +assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, + CmmLit (CmmInt i _)]) + | addr == addr2, pk /= I64 || not (is64BitInteger i), + Just instr <- check op + = do Amode amode code_addr <- getAmode addr + let code = code_addr `snocOL` + instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode) + return code + where + check (MO_Add _) = Just ADD + check (MO_Sub _) = Just SUB + check _ = Nothing + -- ToDo: more? + +-- general case assignMem_IntCode pk addr src = do Amode addr code_addr <- getAmode addr (code_src, op_src) <- get_op_RI src @@ -2818,9 +2938,8 @@ genCondJump id bool = do genCCall :: CmmCallTarget -- function to call - -> [(CmmReg,MachHint)] -- where to put the result - -> [(CmmExpr,MachHint)] -- arguments (of mixed type) - -> Maybe [GlobalReg] -- volatile regs to save + -> CmmHintFormals -- where to put the result + -> CmmActuals -- arguments (of mixed type) -> NatM InstrBlock -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2899,8 +3018,12 @@ genCCall fn cconv result_regs args #if i386_TARGET_ARCH +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL + -- write barrier compiles to no code on x86/x86-64; + -- we keep it this long in order to prevent earlier optimisations. + -- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [(r,_)] args vols = do +genCCall (CmmPrim op) [(r,_)] args = do case op of MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args @@ -2914,14 +3037,14 @@ genCCall (CmmPrim op) [(r,_)] args vols = do MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args - other_op -> outOfLineFloatOp op r args vols + other_op -> outOfLineFloatOp op r args where actuallyInlineFloatOp rep instr [(x,_)] = do res <- trivialUFCode rep instr x any <- anyReg res - return (any (getRegisterReg r)) + return (any (getRegisterReg (CmmLocal r))) -genCCall target dest_regs args vols = do +genCCall target dest_regs args = do let sizes = map (arg_size . cmmExprRep . fst) (reverse args) #if !darwin_TARGET_OS @@ -2984,8 +3107,8 @@ genCCall target dest_regs args vols = do rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest)) where r_dest_hi = getHiVRegFromLo r_dest - rep = cmmRegRep dest - r_dest = getRegisterReg dest + rep = localRegRep dest + r_dest = getRegisterReg (CmmLocal dest) assign_code many = panic "genCCall.assign_code many" return (push_code `appOL` @@ -3049,26 +3172,26 @@ genCCall target dest_regs args vols = do #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -outOfLineFloatOp :: CallishMachOp -> CmmReg -> [(CmmExpr,MachHint)] - -> Maybe [GlobalReg] -> NatM InstrBlock -outOfLineFloatOp mop res args vols +outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals + -> NatM InstrBlock +outOfLineFloatOp mop res args = do - targetExpr <- cmmMakeDynamicReference addImportNat True lbl + targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl let target = CmmForeignCall targetExpr CCallConv - if cmmRegRep res == F64 + if localRegRep res == F64 then - stmtToInstrs (CmmCall target [(res,FloatHint)] args vols) + stmtToInstrs (CmmCall target [(res,FloatHint)] args CmmUnsafe) else do uq <- getUniqueNat let - tmp = CmmLocal (LocalReg uq F64) + tmp = LocalReg uq F64 KindNonPtr -- in - code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args vols) - code2 <- stmtToInstrs (CmmAssign res (CmmReg tmp)) + code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args CmmUnsafe) + code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) return (code1 `appOL` code2) where - lbl = mkForeignLabel fn Nothing True + lbl = mkForeignLabel fn Nothing False fn = case mop of MO_F32_Sqrt -> FSLIT("sqrtf") @@ -3109,10 +3232,14 @@ outOfLineFloatOp mop res args vols #if x86_64_TARGET_ARCH -genCCall (CmmPrim op) [(r,_)] args vols = - outOfLineFloatOp op r args vols +genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL + -- write barrier compiles to no code on x86/x86-64; + -- we keep it this long in order to prevent earlier optimisations. + +genCCall (CmmPrim op) [(r,_)] args = + outOfLineFloatOp op r args -genCCall target dest_regs args vols = do +genCCall target dest_regs args = do -- load up the register arguments (stack_args, aregs, fregs, load_args_code) @@ -3121,7 +3248,7 @@ genCCall target dest_regs args vols = do let fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs)) int_regs_used = reverse (drop (length aregs) (reverse allArgRegs)) - arg_regs = int_regs_used ++ fp_regs_used + arg_regs = [eax] ++ int_regs_used ++ fp_regs_used -- for annotating the call instruction with sse_regs = length fp_regs_used @@ -3195,8 +3322,8 @@ genCCall target dest_regs args vols = do F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest)) rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest)) where - rep = cmmRegRep dest - r_dest = getRegisterReg dest + rep = localRegRep dest + r_dest = getRegisterReg (CmmLocal dest) assign_code many = panic "genCCall.assign_code many" return (load_args_code `appOL` @@ -3244,10 +3371,10 @@ genCCall target dest_regs args vols = do (arg_reg, arg_code) <- getSomeReg arg delta <- getDeltaNat setDeltaNat (delta-arg_size) - let code' = code `appOL` toOL [ - MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0)), + let code' = code `appOL` arg_code `appOL` toOL [ SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) , - DELTA (delta-arg_size)] + DELTA (delta-arg_size), + MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))] push_args rest code' | otherwise = do @@ -3298,7 +3425,7 @@ genCCall target dest_regs args vols = do stack only immediately prior to the call proper. Sigh. -} -genCCall target dest_regs argsAndHints vols = do +genCCall target dest_regs argsAndHints = do let args = map fst argsAndHints argcode_and_vregs <- mapM arg_to_int_vregs args @@ -3405,7 +3532,7 @@ genCCall target dest_regs argsAndHints vols = do ) outOfLineFloatOp mop = do - mopExpr <- cmmMakeDynamicReference addImportNat True $ + mopExpr <- cmmMakeDynamicReference addImportNat CallReference $ mkForeignLabel functionName Nothing True let mopLabelOrExpr = case mopExpr of CmmLit (CmmLabel lbl) -> Left lbl @@ -3490,7 +3617,11 @@ outOfLineFloatOp mop = frame just before ccalling. -} -genCCall target dest_regs argsAndHints vols + +genCCall (CmmPrim MO_WriteBarrier) _ _ + = return $ unitOL LWSYNC + +genCCall target dest_regs argsAndHints = ASSERT (not $ any (`elem` [I8,I16]) argReps) -- we rely on argument promotion in the codeGen do @@ -3651,12 +3782,12 @@ genCCall target dest_regs argsAndHints vols | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3, MR r_dest r4] | otherwise -> unitOL (MR r_dest r3) - where rep = cmmRegRep dest - r_dest = getRegisterReg dest + where rep = cmmRegRep (CmmLocal dest) + r_dest = getRegisterReg (CmmLocal dest) outOfLineFloatOp mop = do - mopExpr <- cmmMakeDynamicReference addImportNat True $ + mopExpr <- cmmMakeDynamicReference addImportNat CallReference $ mkForeignLabel functionName Nothing True let mopLabelOrExpr = case mopExpr of CmmLit (CmmLabel lbl) -> Left lbl @@ -3716,7 +3847,7 @@ genSwitch expr ids = do (reg,e_code) <- getSomeReg expr lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference addImportNat False lbl + dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl (tableReg,t_code) <- getSomeReg $ dynRef let jumpTable = map jumpTableEntryRel ids @@ -3730,11 +3861,25 @@ genSwitch expr ids op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg wORD_SIZE) (ImmInt 0)) +#if x86_64_TARGET_ARCH && darwin_TARGET_OS + -- on Mac OS X/x86_64, put the jump table in the text section + -- to work around a limitation of the linker. + -- ld64 is unable to handle the relocations for + -- .quad L1 - L0 + -- if L0 is not preceded by a non-anonymous label in its section. + + code = e_code `appOL` t_code `appOL` toOL [ + ADD wordRep op (OpReg tableReg), + JMP_TBL (OpReg tableReg) [ id | Just id <- ids ], + LDATA Text (CmmDataLabel lbl : jumpTable) + ] +#else code = e_code `appOL` t_code `appOL` toOL [ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), ADD wordRep op (OpReg tableReg), JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] ] +#endif return code | otherwise = do @@ -3756,7 +3901,7 @@ genSwitch expr ids (reg,e_code) <- getSomeReg expr tmp <- getNewRegNat I32 lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference addImportNat False lbl + dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl (tableReg,t_code) <- getSomeReg $ dynRef let jumpTable = map jumpTableEntryRel ids @@ -4442,6 +4587,8 @@ remainderCode rep div x y = do -- ----------------------------------------------------------------------------- -- Coercing to/from integer/floating-point... +-- When going to integer, we truncate (round towards 0). + -- @coerce(Int2FP|FP2Int)@ are more complicated integer/float -- conversions. We have to store temporaries in memory to move -- between the integer and the floating point register sets. @@ -4527,7 +4674,7 @@ coerceFP2Int from to x = do coerceFP2Int from to x = do (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand let - opc = case from of F32 -> CVTSS2SI; F64 -> CVTSD2SI + opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ code dst = x_code `snocOL` opc x_op dst -- in return (Any to code) -- works even if the destination rep is