X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FMachCodeGen.hs;h=b685c9db7f14094a49350957fd4c07f32025a8d1;hb=70800c2246c28ca1cc8152876372b9a62cac84ed;hp=90285bf82cd469a217d1532cd01ea9f9491237e4;hpb=8db404018f101a182c92cc724a8de08cf5ab10ba;p=ghc-hetmet.git diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 90285bf..b685c9d 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -31,6 +31,7 @@ import NCGMonad import PositionIndependentCode import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr ) import MachRegs +import PprMach -- Our intermediate code: import BlockId @@ -58,6 +59,7 @@ import Data.Bits import Data.Word import Data.Int + -- ----------------------------------------------------------------------------- -- Top-level of the instruction selector @@ -321,21 +323,53 @@ assignReg_I64Code lvalue valueTree = panic "assignReg_I64Code(sparc): invalid lvalue" --- Don't delete this -- it's very handy for debugging. ---iselExpr64 expr --- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False --- = panic "iselExpr64(???)" +-- Load a 64 bit word +iselExpr64 (CmmLoad addrTree ty) + | isWord64 ty + = do Amode amode addr_code <- getAmode addrTree + let result + + | AddrRegReg r1 r2 <- amode + = do rlo <- getNewRegNat II32 + tmp <- getNewRegNat II32 + let rhi = getHiVRegFromLo rlo + + return $ ChildCode64 + ( addr_code + `appOL` toOL + [ ADD False False r1 (RIReg r2) tmp + , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi + , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ]) + rlo + + | AddrRegImm r1 (ImmInt i) <- amode + = do rlo <- getNewRegNat II32 + let rhi = getHiVRegFromLo rlo + + return $ ChildCode64 + ( addr_code + `appOL` toOL + [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi + , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ]) + rlo + + result + + +-- Add a literal to a 64 bit integer +iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) + = do ChildCode64 code1 r1_lo <- iselExpr64 e1 + let r1_hi = getHiVRegFromLo r1_lo + + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + return $ ChildCode64 + ( toOL + [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo + , ADD True False r1_hi (RIReg g0) r_dst_hi ]) + r_dst_lo -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree - rlo <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi - mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo - return ( - ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) - rlo - ) iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do r_dst_lo <- getNewRegNat II32 @@ -1449,23 +1483,46 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps MO_UU_Conv W32 to -> conversionNop (intSize to) x MO_SS_Conv W32 to -> conversionNop (intSize to) x - -- widenings - MO_UU_Conv W8 W32 -> integerExtend False W8 W32 x - MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x - MO_UU_Conv W8 W16 -> integerExtend False W8 W16 x - MO_SS_Conv W16 W32 -> integerExtend True W16 W32 x + MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x + MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x + MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x + + -- sign extension + MO_SS_Conv W8 W32 -> integerExtend W8 W32 x + MO_SS_Conv W16 W32 -> integerExtend W16 W32 x + MO_SS_Conv W8 W16 -> integerExtend W8 W16 x - other_op -> panic "Unknown unary mach op" + other_op -> panic ("Unknown unary mach op: " ++ show mop) where - -- XXX SLL/SRL? - integerExtend signed from to expr = do - (reg, e_code) <- getSomeReg expr - let - code dst = - e_code `snocOL` - ((if signed then SRA else SRL) - reg (RIImm (ImmInt 0)) dst) - return (Any (intSize to) code) + + -- | sign extend and widen + integerExtend + :: Width -- ^ width of source expression + -> Width -- ^ width of result + -> CmmExpr -- ^ source expression + -> NatM Register + + integerExtend from to expr + = do -- load the expr into some register + (reg, e_code) <- getSomeReg expr + tmp <- getNewRegNat II32 + let bitCount + = case (from, to) of + (W8, W32) -> 24 + (W16, W32) -> 16 + (W8, W16) -> 24 + let code dst + = e_code + + -- local shift word left to load the sign bit + `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp + + -- arithmetic shift right to sign extend + `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst + + return (Any (intSize to) code) + + conversionNop new_rep expr = do e_code <- getRegister expr return (swizzleRegisterRep e_code new_rep) @@ -1494,14 +1551,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_Sub W32 -> trivialCode W32 (SUB False False) x y MO_S_MulMayOflo rep -> imulMayOflo rep x y -{- - -- ToDo: teach about V8+ SPARC div instructions - MO_S_Quot W32 -> idiv FSLIT(".div") x y - MO_S_Rem W32 -> idiv FSLIT(".rem") x y - MO_U_Quot W32 -> idiv FSLIT(".udiv") x y - MO_U_Rem W32 -> idiv FSLIT(".urem") x y --} + MO_S_Quot W32 -> idiv True False x y + MO_U_Quot W32 -> idiv False False x y + + MO_S_Rem W32 -> irem True x y + MO_U_Rem W32 -> irem False x y + MO_F_Eq w -> condFltReg EQQ x y MO_F_Ne w -> condFltReg NE x y @@ -1534,9 +1590,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps -} other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) where - --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) + -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) + + + -- | Generate an integer division instruction. + idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned division with a 32 bit numerator, + -- we can just clear the Y register. + idiv False cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- For _signed_ division with a 32 bit numerator, + -- we have to sign extend the numerator into the Y register. + idiv True cc x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend + , SRA tmp (RIImm (ImmInt 16)) tmp + + , WRY tmp g0 + , SDIV cc a_reg (RIReg b_reg) dst] + + return (Any II32 code) + + + -- | Do an integer remainder. + -- + -- NOTE: The SPARC v8 architecture manual says that integer division + -- instructions _may_ generate a remainder, depending on the implementation. + -- If so it is _recommended_ that the remainder is placed in the Y register. + -- + -- The UltraSparc 2007 manual says Y is _undefined_ after division. + -- + -- The SPARC T2 doesn't store the remainder, not sure about the others. + -- It's probably best not to worry about it, and just generate our own + -- remainders. + -- + irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register + + -- For unsigned operands: + -- Division is between a 64 bit numerator and a 32 bit denominator, + -- so we still have to clear the Y register. + irem False x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ WRY g0 g0 + , UDIV False a_reg (RIReg b_reg) tmp_reg + , UMUL False tmp_reg (RIReg b_reg) tmp_reg + , SUB False False a_reg (RIReg tmp_reg) dst] + + return (Any II32 code) + + + -- For signed operands: + -- Make sure to sign extend into the Y register, or the remainder + -- will have the wrong sign when the numerator is negative. + -- + -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, + -- not the full 32. Not sure why this is, something to do with overflow? + -- If anyone cares enough about the speed of signed remainder they + -- can work it out themselves (then tell me). -- BL 2009/01/20 + + irem True x y = do + (a_reg, a_code) <- getSomeReg x + (b_reg, b_code) <- getSomeReg y + + tmp1_reg <- getNewRegNat II32 + tmp2_reg <- getNewRegNat II32 + + let code dst + = a_code + `appOL` b_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend + , WRY tmp1_reg g0 + + , SDIV False a_reg (RIReg b_reg) tmp2_reg + , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg + , SUB False False a_reg (RIReg tmp2_reg) dst] + + return (Any II32 code) + - -------------------- imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do (a_reg, a_code) <- getSomeReg a @@ -1928,15 +2090,16 @@ getAmode (CmmMachOp (MO_Add rep) [x, y]) code = codeX `appOL` codeY return (Amode (AddrRegReg regX regY) code) --- XXX Is this same as "leaf" in Stix? getAmode (CmmLit lit) = do - tmp <- getNewRegNat II32 - let - code = unitOL (SETHI (HI imm__2) tmp) - return (Amode (AddrRegImm tmp (LO imm__2)) code) - where - imm__2 = litToImm lit + let imm__2 = litToImm lit + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 + + let code = toOL [ SETHI (HI imm__2) tmp1 + , OR False tmp1 (RIImm (LO imm__2)) tmp2] + + return (Amode (AddrRegReg tmp2 g0) code) getAmode other = do @@ -2592,15 +2755,13 @@ assignMem_FltCode pk addr src = do return code__2 -- Floating point assignment to a register/temporary --- ToDo: Verify correctness -assignReg_FltCode pk reg src = do - r <- getRegister src - v1 <- getNewRegNat pk - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1 - where - dst = getRegisterReg reg +assignReg_FltCode pk dstCmmReg srcCmmExpr = do + srcRegister <- getRegister srcCmmExpr + let dstReg = getRegisterReg dstCmmReg + + return $ case srcRegister of + Any _ code -> code dstReg + Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg #endif /* sparc_TARGET_ARCH */ @@ -3506,6 +3667,12 @@ genCCall target dest_regs argsAndHints = do | not $ isFloatType rep , W32 <- width = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest + + | not $ isFloatType rep + , W64 <- width + , r_dest_hi <- getHiVRegFromLo r_dest + = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi + , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest] in result @@ -4012,15 +4179,43 @@ genSwitch expr ids return code #elif sparc_TARGET_ARCH genSwitch expr ids - | opt_PIC - = error "MachCodeGen: sparc genSwitch PIC not finished\n" + | opt_PIC + = error "MachCodeGen: sparc genSwitch PIC not finished\n" - | otherwise - = error "MachCodeGen: sparc genSwitch non-PIC not finished\n" + | otherwise + = do (e_reg, e_code) <- getSomeReg expr + + base_reg <- getNewRegNat II32 + offset_reg <- getNewRegNat II32 + dst <- getNewRegNat II32 + + label <- getNewLabelNat + let jumpTable = map jumpTableEntry ids + + return $ e_code `appOL` + toOL + -- the jump table + [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable) + + -- load base of jump table + , SETHI (HI (ImmCLbl label)) base_reg + , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg + + -- the addrs in the table are 32 bits wide.. + , SLL e_reg (RIImm $ ImmInt 2) offset_reg + + -- load and jump to the destination + , LD II32 (AddrRegReg base_reg offset_reg) dst + , JMP (AddrRegImm dst (ImmInt 0)) + , NOP ] + #else #error "ToDo: genSwitch" #endif + +-- | Convert a BlockId to some CmmStatic data +jumpTableEntry :: Maybe BlockId -> CmmStatic jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) where blockLabel = mkAsmTempLabel id @@ -4793,21 +4988,33 @@ coerceInt2FP width1 width2 x = do FxTOy (intSize width1) (floatSize width2) dst dst] return (Any (floatSize $ width2) code__2) ------------- -coerceFP2Int width1 width2 x = do - let pk = intSize width1 - fprep = floatSize width2 - (src, code) <- getSomeReg x - reg <- getNewRegNat fprep - tmp <- getNewRegNat pk - let - code__2 dst = ASSERT(fprep == FF64 || fprep == FF32) - code `appOL` toOL [ - FxTOy fprep pk src tmp, - ST pk tmp (spRel (-2)), - LD pk (spRel (-2)) dst] - return (Any pk code__2) +-- | Coerce a floating point value to integer +-- +-- NOTE: On sparc v9 there are no instructions to move a value from an +-- FP register directly to an int register, so we have to use a load/store. +-- +coerceFP2Int width1 width2 x + = do let fsize1 = floatSize width1 + fsize2 = floatSize width2 + + isize2 = intSize width2 + + (fsrc, code) <- getSomeReg x + fdst <- getNewRegNat fsize2 + + let code2 dst + = code + `appOL` toOL + -- convert float to int format, leaving it in a float reg. + [ FxTOy fsize1 isize2 fsrc fdst + + -- store the int into mem, then load it back to move + -- it into an actual int reg. + , ST fsize2 fdst (spRel (-2)) + , LD isize2 (spRel (-2)) dst] + + return (Any isize2 code2) ------------ coerceDbl2Flt x = do