X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FnativeGen%2FMachCodeGen.hs;fp=compiler%2FnativeGen%2FMachCodeGen.hs;h=0000000000000000000000000000000000000000;hb=b04a210e26ca57242fd052f2aa91011a80b76299;hp=d94a906bbddd79223b88f8b40e5c6a922825f8ce;hpb=77ed23d51b968505b3ad8541c075657ae94f0ea3;p=ghc-hetmet.git diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs deleted file mode 100644 index d94a906..0000000 --- a/compiler/nativeGen/MachCodeGen.hs +++ /dev/null @@ -1,5199 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- Generating machine code (instruction selection) --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ - --- This is a big module, but, if you pay attention to --- (a) the sectioning, (b) the type signatures, and --- (c) the #if blah_TARGET_ARCH} things, the --- structure should not be too overwhelming. - -module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where - -#include "HsVersions.h" -#include "nativeGen/NCG.h" -#include "MachDeps.h" - --- NCG stuff: -import Instrs -import Regs -import NCGMonad -import PositionIndependentCode -import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr ) -import PprMach - --- Our intermediate code: -import BlockId -import PprCmm ( pprExpr ) -import Cmm -import CLabel -import ClosureInfo ( C_SRT(..) ) - --- The rest: -import BasicTypes -import StaticFlags ( opt_PIC ) -import ForeignCall ( CCallConv(..) ) -import OrdList -import Pretty -import qualified Outputable as O -import Outputable -import FastString -import FastBool ( isFastTrue ) -import Constants ( wORD_SIZE ) - -import Debug.Trace ( trace ) - -import Control.Monad ( mapAndUnzipM ) -import Data.Maybe ( fromJust ) -import Data.Bits -import Data.Word -import Data.Int - - --- ----------------------------------------------------------------------------- --- Top-level of the instruction selector - --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal (pre-order?) yields the insns in the correct --- order. - -type InstrBlock = OrdList Instr - -cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop] -cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do - (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks - picBaseMb <- getPicBaseMaybeNat - let proc = CmmProc info lab params (ListGraph $ concat nat_blocks) - tops = proc : concat statics - case picBaseMb of - Just picBase -> initializePicBase picBase tops - Nothing -> return tops - -cmmTopCodeGen (CmmData sec dat) = do - return [CmmData sec dat] -- no translation, we just use CmmStatic - -basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop]) -basicBlockCodeGen (BasicBlock id stmts) = do - instrs <- stmtsToInstrs stmts - -- code generation may introduce new basic block boundaries, which - -- are indicated by the NEWBLOCK instruction. We must split up the - -- instruction stream into basic blocks again. Also, we extract - -- LDATAs here too. - let - (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs - - mkBlocks (NEWBLOCK id) (instrs,blocks,statics) - = ([], BasicBlock id instrs : blocks, statics) - mkBlocks (LDATA sec dat) (instrs,blocks,statics) - = (instrs, blocks, CmmData sec dat:statics) - mkBlocks instr (instrs,blocks,statics) - = (instr:instrs, blocks, statics) - -- in - return (BasicBlock id top : other_blocks, statics) - -stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock -stmtsToInstrs stmts - = do instrss <- mapM stmtToInstrs stmts - return (concatOL instrss) - -stmtToInstrs :: CmmStmt -> NatM InstrBlock -stmtToInstrs stmt = case stmt of - CmmNop -> return nilOL - CmmComment s -> return (unitOL (COMMENT s)) - - CmmAssign reg src - | isFloatType ty -> assignReg_FltCode size reg src -#if WORD_SIZE_IN_BITS==32 - | isWord64 ty -> assignReg_I64Code reg src -#endif - | otherwise -> assignReg_IntCode size reg src - where ty = cmmRegType reg - size = cmmTypeSize ty - - CmmStore addr src - | isFloatType ty -> assignMem_FltCode size addr src -#if WORD_SIZE_IN_BITS==32 - | isWord64 ty -> assignMem_I64Code addr src -#endif - | otherwise -> assignMem_IntCode size addr src - where ty = cmmExprType src - size = cmmTypeSize ty - - CmmCall target result_regs args _ _ - -> genCCall target result_regs args - - CmmBranch id -> genBranch id - CmmCondBranch arg id -> genCondJump id arg - CmmSwitch arg ids -> genSwitch arg ids - CmmJump arg params -> genJump arg - CmmReturn params -> - panic "stmtToInstrs: return statement should have been cps'd away" - --- ----------------------------------------------------------------------------- --- General things for putting together code sequences - --- Expand CmmRegOff. ToDo: should we do it this way around, or convert --- CmmExprs into CmmRegOff? -mangleIndexTree :: CmmExpr -> CmmExpr -mangleIndexTree (CmmRegOff reg off) - = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType reg) - --- ----------------------------------------------------------------------------- --- Code gen for 64-bit arithmetic on 32-bit platforms - -{- -Simple support for generating 64-bit code (ie, 64 bit values and 64 -bit assignments) on 32-bit platforms. Unlike the main code generator -we merely shoot for generating working code as simply as possible, and -pay little attention to code quality. Specifically, there is no -attempt to deal cleverly with the fixed-vs-floating register -distinction; all values are generated into (pairs of) floating -registers, even if this would mean some redundant reg-reg moves as a -result. Only one of the VRegUniques is returned, since it will be -of the VRegUniqueLo form, and the upper-half VReg can be determined -by applying getHiVRegFromLo to it. --} - -data ChildCode64 -- a.k.a "Register64" - = ChildCode64 - InstrBlock -- code - Reg -- the lower 32-bit temporary which contains the - -- result; use getHiVRegFromLo to find the other - -- VRegUnique. Rules of this simplified insn - -- selection game are therefore that the returned - -- Reg may be modified - -#if WORD_SIZE_IN_BITS==32 -assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock -#endif - -#ifndef x86_64_TARGET_ARCH -iselExpr64 :: CmmExpr -> NatM ChildCode64 -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -assignMem_I64Code addrTree valueTree = do - Amode addr addr_code <- getAmode addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Little-endian store - mov_lo = MOV II32 (OpReg rlo) (OpAddr addr) - mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) - -- in - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = mkVReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo) - mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi) - -- in - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code lvalue valueTree - = panic "assignReg_I64Code(i386): invalid lvalue" - ------------- - -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - r = fromIntegral (fromIntegral i :: Word32) - q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) - code = toOL [ - MOV II32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV II32 (OpImm (ImmInteger q)) (OpReg rhi) - ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - Amode addr addr_code <- getAmode addrTree - (rlo,rhi) <- getNewRegPairNat II32 - let - mov_lo = MOV II32 (OpAddr addr) (OpReg rlo) - mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) - -- in - return ( - ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - ) - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (mkVReg vu II32)) - --- we handle addition, but rather badly -iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - (rlo,rhi) <- getNewRegPairNat II32 - let - r = fromIntegral (fromIntegral i :: Word32) - q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) - r1hi = getHiVRegFromLo r1lo - code = code1 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - ADD II32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - ADD II32 (OpReg r2lo) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - ADC II32 (OpReg r2hi) (OpReg rhi) ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do - fn <- getAnyReg expr - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - code = fn r_dst_lo - return ( - ChildCode64 (code `snocOL` - MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) - r_dst_lo - ) - -iselExpr64 expr - = pprPanic "iselExpr64(i386)" (ppr expr) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -assignMem_I64Code addrTree valueTree = do - Amode addr addr_code <- getAmode addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - (src, code) <- getSomeReg addrTree - let - rhi = getHiVRegFromLo rlo - -- Big-endian store - mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0)) - mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4)) - return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo) - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = mkVReg u_dst (cmmTypeSize pk) - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = mkMOV r_src_lo r_dst_lo - mov_hi = mkMOV r_src_hi r_dst_hi - mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg - return (vcode `snocOL` mov_hi `snocOL` mov_lo) -assignReg_I64Code lvalue valueTree - = panic "assignReg_I64Code(sparc): invalid lvalue" - - --- Load a 64 bit word -iselExpr64 (CmmLoad addrTree ty) - | isWord64 ty - = do Amode amode addr_code <- getAmode addrTree - let result - - | AddrRegReg r1 r2 <- amode - = do rlo <- getNewRegNat II32 - tmp <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - - return $ ChildCode64 - ( addr_code - `appOL` toOL - [ ADD False False r1 (RIReg r2) tmp - , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi - , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ]) - rlo - - | AddrRegImm r1 (ImmInt i) <- amode - = do rlo <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - - return $ ChildCode64 - ( addr_code - `appOL` toOL - [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi - , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ]) - rlo - - result - - --- Add a literal to a 64 bit integer -iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) - = do ChildCode64 code1 r1_lo <- iselExpr64 e1 - let r1_hi = getHiVRegFromLo r1_lo - - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - return $ ChildCode64 - ( toOL - [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo - , ADD True False r1_hi (RIReg g0) r_dst_hi ]) - r_dst_lo - - --- Addition of II64 -iselExpr64 (CmmMachOp (MO_Add width) [e1, e2]) - = do ChildCode64 code1 r1_lo <- iselExpr64 e1 - let r1_hi = getHiVRegFromLo r1_lo - - ChildCode64 code2 r2_lo <- iselExpr64 e2 - let r2_hi = getHiVRegFromLo r2_lo - - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - let code = code1 - `appOL` code2 - `appOL` toOL - [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo - , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ] - - return $ ChildCode64 code r_dst_lo - - -iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_lo = mkVReg uq II32 - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = mkMOV r_src_lo r_dst_lo - mov_hi = mkMOV r_src_hi r_dst_hi - mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg - return ( - ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo - ) - --- Convert something into II64 -iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) - = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - -- compute expr and load it into r_dst_lo - (a_reg, a_code) <- getSomeReg expr - - let code = a_code - `appOL` toOL - [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits - , mkRegRegMoveInstr a_reg r_dst_lo ] - - return $ ChildCode64 code r_dst_lo - - -iselExpr64 expr - = pprPanic "iselExpr64(sparc)" (ppr expr) - -#endif /* sparc_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if powerpc_TARGET_ARCH - -getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) -getI64Amodes addrTree = do - Amode hi_addr addr_code <- getAmode addrTree - case addrOffset hi_addr 4 of - Just lo_addr -> return (hi_addr, lo_addr, addr_code) - Nothing -> do (hi_ptr, code) <- getSomeReg addrTree - return (AddrRegImm hi_ptr (ImmInt 0), - AddrRegImm hi_ptr (ImmInt 4), - code) - -assignMem_I64Code addrTree valueTree = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Big-endian store - mov_hi = ST II32 rhi hi_addr - mov_lo = ST II32 rlo lo_addr - -- in - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = mkVReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MR r_dst_lo r_src_lo - mov_hi = MR r_dst_hi r_src_hi - -- in - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code lvalue valueTree - = panic "assignReg_I64Code(powerpc): invalid lvalue" - - --- Don't delete this -- it's very handy for debugging. ---iselExpr64 expr --- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False --- = panic "iselExpr64(???)" - -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LD II32 rhi hi_addr - mov_lo = LD II32 rlo lo_addr - return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (mkVReg vu II32)) - -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - half0 = fromIntegral (fromIntegral i :: Word16) - half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) - half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16) - half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16) - - code = toOL [ - LIS rlo (ImmInt half1), - OR rlo rlo (RIImm $ ImmInt half0), - LIS rhi (ImmInt half3), - OR rlo rlo (RIImm $ ImmInt half2) - ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ ADDC rlo r1lo r2lo, - ADDE rhi r1hi r2hi ] - -- in - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do - (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LI rhi (ImmInt 0) - mov_lo = MR rlo expr_reg - return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo -iselExpr64 expr - = pprPanic "iselExpr64(powerpc)" (ppr expr) - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- The 'Register' type - --- 'Register's passed up the tree. If the stix code forces the register --- to live in a pre-decided machine register, it comes out as @Fixed@; --- otherwise, it comes out as @Any@, and the parent can decide which --- register to put it in. - -data Register - = Fixed Size Reg InstrBlock - | Any Size (Reg -> InstrBlock) - -swizzleRegisterRep :: Register -> Size -> Register --- Change the width; it's a no-op -swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code -swizzleRegisterRep (Any _ codefn) size = Any size codefn - - --- ----------------------------------------------------------------------------- --- Utils based on getRegister, below - --- The dual to getAnyReg: compute an expression into a register, but --- we don't mind which one it is. -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getSomeReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed _ reg code -> - return (reg, code) - --- ----------------------------------------------------------------------------- --- Grab the Reg for a CmmReg - -getRegisterReg :: CmmReg -> Reg - -getRegisterReg (CmmLocal (LocalReg u pk)) - = mkVReg u (cmmTypeSize pk) - -getRegisterReg (CmmGlobal mid) - = case get_GlobalReg_reg_or_addr mid of - Left (RealReg rrno) -> RealReg rrno - _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... - - --- ----------------------------------------------------------------------------- --- Generate code to get a subtree into a Register - --- Don't delete this -- it's very handy for debugging. ---getRegister expr --- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False --- = panic "getRegister(???)" - -getRegister :: CmmExpr -> NatM Register - -#if !x86_64_TARGET_ARCH - -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured - -- register, it can only be used for rip-relative addressing. -getRegister (CmmReg (CmmGlobal PicBaseReg)) - = do - reg <- getPicBaseNat wordSize - return (Fixed wordSize reg nilOL) -#endif - -getRegister (CmmReg reg) - = return (Fixed (cmmTypeSize (cmmRegType reg)) - (getRegisterReg reg) nilOL) - -getRegister tree@(CmmRegOff _ _) - = getRegister (mangleIndexTree tree) - - -#if WORD_SIZE_IN_BITS==32 - -- for 32-bit architectuers, support some 64 -> 32 bit conversions: - -- TO_W_(x), TO_W_(x >> 32) - -getRegister (CmmMachOp (MO_UU_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister (CmmMachOp (MO_SS_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -#endif - --- end of machine-"independent" bit; here we go on the rest... - -#if alpha_TARGET_ARCH - -getRegister (StDouble d) - = getBlockIdNat `thenNat` \ lbl -> - getNewRegNat PtrRep `thenNat` \ tmp -> - let code dst = mkSeqInstrs [ - LDATA RoDataSegment lbl [ - DATA TF [ImmLab (rational d)] - ], - LDA tmp (AddrImm (ImmCLbl lbl)), - LD TF dst (AddrReg tmp)] - in - return (Any FF64 code) - -getRegister (StPrim primop [x]) -- unary PrimOps - = case primop of - IntNegOp -> trivialUCode (NEG Q False) x - - NotOp -> trivialUCode NOT x - - FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x - DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x - - OrdOp -> coerceIntCode IntRep x - ChrOp -> chrCode x - - Float2IntOp -> coerceFP2Int x - Int2FloatOp -> coerceInt2FP pr x - Double2IntOp -> coerceFP2Int x - Int2DoubleOp -> coerceInt2FP pr x - - Double2FloatOp -> coerceFltCode x - Float2DoubleOp -> coerceFltCode x - - other_op -> getRegister (StCall fn CCallConv FF64 [x]) - where - fn = case other_op of - FloatExpOp -> fsLit "exp" - FloatLogOp -> fsLit "log" - FloatSqrtOp -> fsLit "sqrt" - FloatSinOp -> fsLit "sin" - FloatCosOp -> fsLit "cos" - FloatTanOp -> fsLit "tan" - FloatAsinOp -> fsLit "asin" - FloatAcosOp -> fsLit "acos" - FloatAtanOp -> fsLit "atan" - FloatSinhOp -> fsLit "sinh" - FloatCoshOp -> fsLit "cosh" - FloatTanhOp -> fsLit "tanh" - DoubleExpOp -> fsLit "exp" - DoubleLogOp -> fsLit "log" - DoubleSqrtOp -> fsLit "sqrt" - DoubleSinOp -> fsLit "sin" - DoubleCosOp -> fsLit "cos" - DoubleTanOp -> fsLit "tan" - DoubleAsinOp -> fsLit "asin" - DoubleAcosOp -> fsLit "acos" - DoubleAtanOp -> fsLit "atan" - DoubleSinhOp -> fsLit "sinh" - DoubleCoshOp -> fsLit "cosh" - DoubleTanhOp -> fsLit "tanh" - where - pr = panic "MachCode.getRegister: no primrep needed for Alpha" - -getRegister (StPrim primop [x, y]) -- dyadic PrimOps - = case primop of - CharGtOp -> trivialCode (CMP LTT) y x - CharGeOp -> trivialCode (CMP LE) y x - CharEqOp -> trivialCode (CMP EQQ) x y - CharNeOp -> int_NE_code x y - CharLtOp -> trivialCode (CMP LTT) x y - CharLeOp -> trivialCode (CMP LE) x y - - IntGtOp -> trivialCode (CMP LTT) y x - IntGeOp -> trivialCode (CMP LE) y x - IntEqOp -> trivialCode (CMP EQQ) x y - IntNeOp -> int_NE_code x y - IntLtOp -> trivialCode (CMP LTT) x y - IntLeOp -> trivialCode (CMP LE) x y - - WordGtOp -> trivialCode (CMP ULT) y x - WordGeOp -> trivialCode (CMP ULE) x y - WordEqOp -> trivialCode (CMP EQQ) x y - WordNeOp -> int_NE_code x y - WordLtOp -> trivialCode (CMP ULT) x y - WordLeOp -> trivialCode (CMP ULE) x y - - AddrGtOp -> trivialCode (CMP ULT) y x - AddrGeOp -> trivialCode (CMP ULE) y x - AddrEqOp -> trivialCode (CMP EQQ) x y - AddrNeOp -> int_NE_code x y - AddrLtOp -> trivialCode (CMP ULT) x y - AddrLeOp -> trivialCode (CMP ULE) x y - - FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y - FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y - FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y - FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y - FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y - FloatLeOp -> cmpF_code (FCMP TF LE) NE x y - - DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y - DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y - DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y - DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y - DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y - DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y - - IntAddOp -> trivialCode (ADD Q False) x y - IntSubOp -> trivialCode (SUB Q False) x y - IntMulOp -> trivialCode (MUL Q False) x y - IntQuotOp -> trivialCode (DIV Q False) x y - IntRemOp -> trivialCode (REM Q False) x y - - WordAddOp -> trivialCode (ADD Q False) x y - WordSubOp -> trivialCode (SUB Q False) x y - WordMulOp -> trivialCode (MUL Q False) x y - WordQuotOp -> trivialCode (DIV Q True) x y - WordRemOp -> trivialCode (REM Q True) x y - - FloatAddOp -> trivialFCode W32 (FADD TF) x y - FloatSubOp -> trivialFCode W32 (FSUB TF) x y - FloatMulOp -> trivialFCode W32 (FMUL TF) x y - FloatDivOp -> trivialFCode W32 (FDIV TF) x y - - DoubleAddOp -> trivialFCode W64 (FADD TF) x y - DoubleSubOp -> trivialFCode W64 (FSUB TF) x y - DoubleMulOp -> trivialFCode W64 (FMUL TF) x y - DoubleDivOp -> trivialFCode W64 (FDIV TF) x y - - AddrAddOp -> trivialCode (ADD Q False) x y - AddrSubOp -> trivialCode (SUB Q False) x y - AddrRemOp -> trivialCode (REM Q True) x y - - AndOp -> trivialCode AND x y - OrOp -> trivialCode OR x y - XorOp -> trivialCode XOR x y - SllOp -> trivialCode SLL x y - SrlOp -> trivialCode SRL x y - - ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll" - ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra" - ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl" - - FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) - DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) - where - {- ------------------------------------------------------------ - Some bizarre special code for getting condition codes into - registers. Integer non-equality is a test for equality - followed by an XOR with 1. (Integer comparisons always set - the result register to 0 or 1.) Floating point comparisons of - any kind leave the result in a floating point register, so we - need to wrangle an integer register out of things. - -} - int_NE_code :: StixTree -> StixTree -> NatM Register - - int_NE_code x y - = trivialCode (CMP EQQ) x y `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst) - in - return (Any IntRep code__2) - - {- ------------------------------------------------------------ - Comments for int_NE_code also apply to cmpF_code - -} - cmpF_code - :: (Reg -> Reg -> Reg -> Instr) - -> Cond - -> StixTree -> StixTree - -> NatM Register - - cmpF_code instr cond x y - = trivialFCode pr instr x y `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - getBlockIdNat `thenNat` \ lbl -> - let - code = registerCode register tmp - result = registerName register tmp - - code__2 dst = code . mkSeqInstrs [ - OR zeroh (RIImm (ImmInt 1)) dst, - BF cond result (ImmCLbl lbl), - OR zeroh (RIReg zeroh) dst, - NEWBLOCK lbl] - in - return (Any IntRep code__2) - where - pr = panic "trivialU?FCode: does not use PrimRep on Alpha" - ------------------------------------------------------------ - -getRegister (CmmLoad pk mem) - = getAmode mem `thenNat` \ amode -> - let - code = amodeCode amode - src = amodeAddr amode - size = primRepToSize pk - code__2 dst = code . mkSeqInstr (LD size dst src) - in - return (Any pk code__2) - -getRegister (StInt i) - | fits8Bits i - = let - code dst = mkSeqInstr (OR zeroh (RIImm src) dst) - in - return (Any IntRep code) - | otherwise - = let - code dst = mkSeqInstr (LDI Q dst src) - in - return (Any IntRep code) - where - src = ImmInt (fromInteger i) - -getRegister leaf - | isJust imm - = let - code dst = mkSeqInstr (LDA dst (AddrImm imm__2)) - in - return (Any PtrRep code) - where - imm = maybeImm leaf - imm__2 = case imm of Just x -> x - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -getRegister (CmmLit (CmmFloat f W32)) = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let code dst = - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f W32)] - `consOL` (addr_code `snocOL` - GLD FF32 addr dst) - -- in - return (Any FF32 code) - - -getRegister (CmmLit (CmmFloat d W64)) - | d == 0.0 - = let code dst = unitOL (GLDZ dst) - in return (Any FF64 code) - - | d == 1.0 - = let code dst = unitOL (GLD1 dst) - in return (Any FF64 code) - - | otherwise = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let code dst = - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d W64)] - `consOL` (addr_code `snocOL` - GLD FF64 addr dst) - -- in - return (Any FF64 code) - -#endif /* i386_TARGET_ARCH */ - -#if x86_64_TARGET_ARCH - -getRegister (CmmLit (CmmFloat 0.0 w)) = do - let size = floatSize w - code dst = unitOL (XOR size (OpReg dst) (OpReg dst)) - -- I don't know why there are xorpd, xorps, and pxor instructions. - -- They all appear to do the same thing --SDM - return (Any size code) - -getRegister (CmmLit (CmmFloat f w)) = do - lbl <- getNewLabelNat - let code dst = toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f w)], - MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - ] - -- in - return (Any size code) - where size = floatSize w - -#endif /* x86_64_TARGET_ARCH */ - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II8) addr - return (Any II32 code) - -getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II8) addr - return (Any II32 code) - -getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II16) addr - return (Any II32 code) - -getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II16) addr - return (Any II32 code) - -#endif - -#if x86_64_TARGET_ARCH - --- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II8) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II8) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II16) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II16) addr - return (Any II64 code) - -getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend - return (Any II64 code) - -getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II32) addr - return (Any II64 code) - -#endif - -#if x86_64_TARGET_ARCH -getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), - CmmLit displacement]) - = return $ Any II64 (\dst -> unitOL $ - LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) -#endif - -#if x86_64_TARGET_ARCH -getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do - x_code <- getAnyReg x - lbl <- getNewLabelNat - let - code dst = x_code dst `appOL` toOL [ - -- This is how gcc does it, so it can't be that bad: - LDATA ReadOnlyData16 [ - CmmAlign 16, - CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x80000000 W32), - CmmStaticLit (CmmInt 0 W32), - CmmStaticLit (CmmInt 0 W32), - CmmStaticLit (CmmInt 0 W32) - ], - XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - -- xorps, so we need the 128-bit constant - -- ToDo: rip-relative - ] - -- - return (Any FF32 code) - -getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do - x_code <- getAnyReg x - lbl <- getNewLabelNat - let - -- This is how gcc does it, so it can't be that bad: - code dst = x_code dst `appOL` toOL [ - LDATA ReadOnlyData16 [ - CmmAlign 16, - CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x8000000000000000 W64), - CmmStaticLit (CmmInt 0 W64) - ], - -- gcc puts an unpck here. Wonder if we need it. - XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) - -- xorpd, so we need the 128-bit constant - ] - -- - return (Any FF64 code) -#endif - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of -#if i386_TARGET_ARCH - MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x - MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x -#endif - - MO_S_Neg w -> triv_ucode NEGI (intSize w) - MO_F_Neg w -> triv_ucode NEGI (floatSize w) - MO_Not w -> triv_ucode NOT (intSize w) - - -- Nop conversions - MO_UU_Conv W32 W8 -> toI8Reg W32 x - MO_SS_Conv W32 W8 -> toI8Reg W32 x - MO_UU_Conv W16 W8 -> toI8Reg W16 x - MO_SS_Conv W16 W8 -> toI8Reg W16 x - MO_UU_Conv W32 W16 -> toI16Reg W32 x - MO_SS_Conv W32 W16 -> toI16Reg W32 x - -#if x86_64_TARGET_ARCH - MO_UU_Conv W64 W32 -> conversionNop II64 x - MO_SS_Conv W64 W32 -> conversionNop II64 x - MO_UU_Conv W64 W16 -> toI16Reg W64 x - MO_SS_Conv W64 W16 -> toI16Reg W64 x - MO_UU_Conv W64 W8 -> toI8Reg W64 x - MO_SS_Conv W64 W8 -> toI8Reg W64 x -#endif - - MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x - MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x - - -- widenings - MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x - MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x - MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x - - MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x - MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x - MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x - -#if x86_64_TARGET_ARCH - MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x - MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x - MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x - MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x - MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x - MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x - -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. - -- However, we don't want the register allocator to throw it - -- away as an unnecessary reg-to-reg move, so we keep it in - -- the form of a movzl and print it as a movl later. -#endif - -#if i386_TARGET_ARCH - MO_FF_Conv W32 W64 -> conversionNop FF64 x - MO_FF_Conv W64 W32 -> conversionNop FF32 x -#else - MO_FF_Conv W32 W64 -> coerceFP2FP W64 x - MO_FF_Conv W64 W32 -> coerceFP2FP W32 x -#endif - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - other -> pprPanic "getRegister" (pprMachOp mop) - where - triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register - triv_ucode instr size = trivialUCode size (instr size) x - - -- signed or unsigned extension. - integerExtend :: Width -> Width - -> (Size -> Operand -> Operand -> Instr) - -> CmmExpr -> NatM Register - integerExtend from to instr expr = do - (reg,e_code) <- if from == W8 then getByteReg expr - else getSomeReg expr - let - code dst = - e_code `snocOL` - instr (intSize from) (OpReg reg) (OpReg dst) - return (Any (intSize to) code) - - toI8Reg :: Width -> CmmExpr -> NatM Register - toI8Reg new_rep expr - = do codefn <- getAnyReg expr - return (Any (intSize new_rep) codefn) - -- HACK: use getAnyReg to get a byte-addressable register. - -- If the source was a Fixed register, this will add the - -- mov instruction to put it into the desired destination. - -- We're assuming that the destination won't be a fixed - -- non-byte-addressable register; it won't be, because all - -- fixed registers are word-sized. - - toI16Reg = toI8Reg -- for now - - conversionNop :: Size -> CmmExpr -> NatM Register - conversionNop new_size expr - = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_size) - - -getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps - = case mop of - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ x y - MO_Ne rep -> condIntReg NE x y - - MO_S_Gt rep -> condIntReg GTT x y - MO_S_Ge rep -> condIntReg GE x y - MO_S_Lt rep -> condIntReg LTT x y - MO_S_Le rep -> condIntReg LE x y - - MO_U_Gt rep -> condIntReg GU x y - MO_U_Ge rep -> condIntReg GEU x y - MO_U_Lt rep -> condIntReg LU x y - MO_U_Le rep -> condIntReg LEU x y - -#if i386_TARGET_ARCH - MO_F_Add w -> trivialFCode w GADD x y - MO_F_Sub w -> trivialFCode w GSUB x y - MO_F_Quot w -> trivialFCode w GDIV x y - MO_F_Mul w -> trivialFCode w GMUL x y -#endif - -#if x86_64_TARGET_ARCH - MO_F_Add w -> trivialFCode w ADD x y - MO_F_Sub w -> trivialFCode w SUB x y - MO_F_Quot w -> trivialFCode w FDIV x y - MO_F_Mul w -> trivialFCode w MUL x y -#endif - - MO_Add rep -> add_code rep x y - MO_Sub rep -> sub_code rep x y - - MO_S_Quot rep -> div_code rep True True x y - MO_S_Rem rep -> div_code rep True False x y - MO_U_Quot rep -> div_code rep False True x y - MO_U_Rem rep -> div_code rep False False x y - - MO_S_MulMayOflo rep -> imulMayOflo rep x y - - MO_Mul rep -> triv_op rep IMUL - MO_And rep -> triv_op rep AND - MO_Or rep -> triv_op rep OR - MO_Xor rep -> triv_op rep XOR - - {- Shift ops on x86s have constraints on their source, it - either has to be Imm, CL or 1 - => trivialCode is not restrictive enough (sigh.) - -} - MO_Shl rep -> shift_code rep SHL x y {-False-} - MO_U_Shr rep -> shift_code rep SHR x y {-False-} - MO_S_Shr rep -> shift_code rep SAR x y {-False-} - - other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) - where - -------------------- - triv_op width instr = trivialCode width op (Just op) x y - where op = instr (intSize width) - - imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register - imulMayOflo rep a b = do - (a_reg, a_code) <- getNonClobberedReg a - b_code <- getAnyReg b - let - shift_amt = case rep of - W32 -> 31 - W64 -> 63 - _ -> panic "shift_amt" - - size = intSize rep - code = a_code `appOL` b_code eax `appOL` - toOL [ - IMUL2 size (OpReg a_reg), -- result in %edx:%eax - SAR size (OpImm (ImmInt shift_amt)) (OpReg eax), - -- sign extend lower part - SUB size (OpReg edx) (OpReg eax) - -- compare against upper - -- eax==0 if high part == sign extended low part - ] - -- in - return (Fixed size eax code) - - -------------------- - shift_code :: Width - -> (Size -> Operand -> Operand -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - - {- Case1: shift length as immediate -} - shift_code width instr x y@(CmmLit lit) = do - x_code <- getAnyReg x - let - size = intSize width - code dst - = x_code dst `snocOL` - instr size (OpImm (litToImm lit)) (OpReg dst) - -- in - return (Any size code) - - {- Case2: shift length is complex (non-immediate) - * y must go in %ecx. - * we cannot do y first *and* put its result in %ecx, because - %ecx might be clobbered by x. - * if we do y second, then x cannot be - in a clobbered reg. Also, we cannot clobber x's reg - with the instruction itself. - * so we can either: - - do y first, put its result in a fresh tmp, then copy it to %ecx later - - do y second and put its result into %ecx. x gets placed in a fresh - tmp. This is likely to be better, becuase the reg alloc can - eliminate this reg->reg move here (it won't eliminate the other one, - because the move is into the fixed %ecx). - -} - shift_code width instr x y{-amount-} = do - x_code <- getAnyReg x - let size = intSize width - tmp <- getNewRegNat size - y_code <- getAnyReg y - let - code = x_code tmp `appOL` - y_code ecx `snocOL` - instr size (OpReg ecx) (OpReg tmp) - -- in - return (Fixed size tmp code) - - -------------------- - add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register - add_code rep x (CmmLit (CmmInt y _)) - | is32BitInteger y = add_int rep x y - add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y - where size = intSize rep - - -------------------- - sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register - sub_code rep x (CmmLit (CmmInt y _)) - | is32BitInteger (-y) = add_int rep x (-y) - sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y - - -- our three-operand add instruction: - add_int width x y = do - (x_reg, x_code) <- getSomeReg x - let - size = intSize width - imm = ImmInt (fromInteger y) - code dst - = x_code `snocOL` - LEA size - (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) - (OpReg dst) - -- - return (Any size code) - - ---------------------- - div_code width signed quotient x y = do - (y_op, y_code) <- getRegOrMem y -- cannot be clobbered - x_code <- getAnyReg x - let - size = intSize width - widen | signed = CLTD size - | otherwise = XOR size (OpReg edx) (OpReg edx) - - instr | signed = IDIV - | otherwise = DIV - - code = y_code `appOL` - x_code eax `appOL` - toOL [widen, instr size y_op] - - result | quotient = eax - | otherwise = edx - - -- in - return (Fixed size result code) - - -getRegister (CmmLoad mem pk) - | isFloatType pk - = do - Amode src mem_code <- getAmode mem - let - size = cmmTypeSize pk - code dst = mem_code `snocOL` - IF_ARCH_i386(GLD size src dst, - MOV size (OpAddr src) (OpReg dst)) - return (Any size code) - -#if i386_TARGET_ARCH -getRegister (CmmLoad mem pk) - | not (isWord64 pk) - = do - code <- intLoadCode instr mem - return (Any size code) - where - width = typeWidth pk - size = intSize width - instr = case width of - W8 -> MOVZxL II8 - _other -> MOV size - -- We always zero-extend 8-bit loads, if we - -- can't think of anything better. This is because - -- we can't guarantee access to an 8-bit variant of every register - -- (esi and edi don't have 8-bit variants), so to make things - -- simpler we do our 8-bit arithmetic with full 32-bit registers. -#endif - -#if x86_64_TARGET_ARCH --- Simpler memory load code on x86_64 -getRegister (CmmLoad mem pk) - = do - code <- intLoadCode (MOV size) mem - return (Any size code) - where size = intSize $ typeWidth pk -#endif - -getRegister (CmmLit (CmmInt 0 width)) - = let - size = intSize width - - -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits - adj_size = case size of II64 -> II32; _ -> size - size1 = IF_ARCH_i386( size, adj_size ) - code dst - = unitOL (XOR size1 (OpReg dst) (OpReg dst)) - in - return (Any size code) - -#if x86_64_TARGET_ARCH - -- optimisation for loading small literals on x86_64: take advantage - -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit - -- instruction forms are shorter. -getRegister (CmmLit lit) - | isWord64 (cmmLitType lit), not (isBigLit lit) - = let - imm = litToImm lit - code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst)) - in - return (Any II64 code) - where - isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff - isBigLit _ = False - -- note1: not the same as (not.is32BitLit), because that checks for - -- signed literals that fit in 32 bits, but we want unsigned - -- literals here. - -- note2: all labels are small, because we're assuming the - -- small memory model (see gcc docs, -mcmodel=small). -#endif - -getRegister (CmmLit lit) - = let - size = cmmTypeSize (cmmLitType lit) - imm = litToImm lit - code dst = unitOL (MOV size (OpImm imm) (OpReg dst)) - in - return (Any size code) - -getRegister other = pprPanic "getRegister(x86)" (ppr other) - - -intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr - -> NatM (Reg -> InstrBlock) -intLoadCode instr mem = do - Amode src mem_code <- getAmode mem - return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst)) - --- Compute an expression into *any* register, adding the appropriate --- move instruction if necessary. -getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock) -getAnyReg expr = do - r <- getRegister expr - anyReg r - -anyReg :: Register -> NatM (Reg -> InstrBlock) -anyReg (Any _ code) = return code -anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst) - --- A bit like getSomeReg, but we want a reg that can be byte-addressed. --- Fixed registers might not be byte-addressable, so we make sure we've --- got a temporary, inserting an extra reg copy if necessary. -getByteReg :: CmmExpr -> NatM (Reg, InstrBlock) -#if x86_64_TARGET_ARCH -getByteReg = getSomeReg -- all regs are byte-addressable on x86_64 -#else -getByteReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed rep reg code - | isVirtualReg reg -> return (reg,code) - | otherwise -> do - tmp <- getNewRegNat rep - return (tmp, code `snocOL` reg2reg rep reg tmp) - -- ToDo: could optimise slightly by checking for byte-addressable - -- real registers, but that will happen very rarely if at all. -#endif - --- Another variant: this time we want the result in a register that cannot --- be modified by code to evaluate an arbitrary expression. -getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock) -getNonClobberedReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed rep reg code - -- only free regs can be clobbered - | RealReg rr <- reg, isFastTrue (freeReg rr) -> do - tmp <- getNewRegNat rep - return (tmp, code `snocOL` reg2reg rep reg tmp) - | otherwise -> - return (reg, code) - -reg2reg :: Size -> Reg -> Reg -> Instr -reg2reg size src dst -#if i386_TARGET_ARCH - | isFloatSize size = GMOV src dst -#endif - | otherwise = MOV size (OpReg src) (OpReg dst) - -#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - --- getRegister :: CmmExpr -> NatM Register - --- Load a literal float into a float register. --- The actual literal is stored in a new data area, and we load it --- at runtime. -getRegister (CmmLit (CmmFloat f W32)) = do - - -- a label for the new data area - lbl <- getNewLabelNat - tmp <- getNewRegNat II32 - - let code dst = toOL [ - -- the data area - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f W32)], - - -- load the literal - SETHI (HI (ImmCLbl lbl)) tmp, - LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] - - return (Any FF32 code) - -getRegister (CmmLit (CmmFloat d W64)) = do - lbl <- getNewLabelNat - tmp <- getNewRegNat II32 - let code dst = toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d W64)], - SETHI (HI (ImmCLbl lbl)) tmp, - LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] - return (Any FF64 code) - -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of - MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x - MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x - - MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x - MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x - - MO_FF_Conv W64 W32-> coerceDbl2Flt x - MO_FF_Conv W32 W64-> coerceFlt2Dbl x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - -- Conversions which are a nop on sparc - MO_UU_Conv from to - | from == to -> conversionNop (intSize to) x - MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) - MO_UU_Conv W32 to -> conversionNop (intSize to) x - MO_SS_Conv W32 to -> conversionNop (intSize to) x - - MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x - MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x - MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x - - -- sign extension - MO_SS_Conv W8 W32 -> integerExtend W8 W32 x - MO_SS_Conv W16 W32 -> integerExtend W16 W32 x - MO_SS_Conv W8 W16 -> integerExtend W8 W16 x - - other_op -> panic ("Unknown unary mach op: " ++ show mop) - where - - -- | sign extend and widen - integerExtend - :: Width -- ^ width of source expression - -> Width -- ^ width of result - -> CmmExpr -- ^ source expression - -> NatM Register - - integerExtend from to expr - = do -- load the expr into some register - (reg, e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - let bitCount - = case (from, to) of - (W8, W32) -> 24 - (W16, W32) -> 16 - (W8, W16) -> 24 - let code dst - = e_code - - -- local shift word left to load the sign bit - `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp - - -- arithmetic shift right to sign extend - `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst - - return (Any (intSize to) code) - - - conversionNop new_rep expr - = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_rep) - -getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps - = case mop of - MO_Eq rep -> condIntReg EQQ x y - MO_Ne rep -> condIntReg NE x y - - MO_S_Gt rep -> condIntReg GTT x y - MO_S_Ge rep -> condIntReg GE x y - MO_S_Lt rep -> condIntReg LTT x y - MO_S_Le rep -> condIntReg LE x y - - MO_U_Gt W32 -> condIntReg GTT x y - MO_U_Ge W32 -> condIntReg GE x y - MO_U_Lt W32 -> condIntReg LTT x y - MO_U_Le W32 -> condIntReg LE x y - - MO_U_Gt W16 -> condIntReg GU x y - MO_U_Ge W16 -> condIntReg GEU x y - MO_U_Lt W16 -> condIntReg LU x y - MO_U_Le W16 -> condIntReg LEU x y - - MO_Add W32 -> trivialCode W32 (ADD False False) x y - MO_Sub W32 -> trivialCode W32 (SUB False False) x y - - MO_S_MulMayOflo rep -> imulMayOflo rep x y - - MO_S_Quot W32 -> idiv True False x y - MO_U_Quot W32 -> idiv False False x y - - MO_S_Rem W32 -> irem True x y - MO_U_Rem W32 -> irem False x y - - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_F_Add w -> trivialFCode w FADD x y - MO_F_Sub w -> trivialFCode w FSUB x y - MO_F_Mul w -> trivialFCode w FMUL x y - MO_F_Quot w -> trivialFCode w FDIV x y - - MO_And rep -> trivialCode rep (AND False) x y - MO_Or rep -> trivialCode rep (OR False) x y - MO_Xor rep -> trivialCode rep (XOR False) x y - - MO_Mul rep -> trivialCode rep (SMUL False) x y - - MO_Shl rep -> trivialCode rep SLL x y - MO_U_Shr rep -> trivialCode rep SRL x y - MO_S_Shr rep -> trivialCode rep SRA x y - -{- - MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 - [promote x, promote y]) - where promote x = CmmMachOp MO_F32_to_Dbl [x] - MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 - [x, y]) --} - other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) - where - -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) - - - -- | Generate an integer division instruction. - idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register - - -- For unsigned division with a 32 bit numerator, - -- we can just clear the Y register. - idiv False cc x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ WRY g0 g0 - , UDIV cc a_reg (RIReg b_reg) dst] - - return (Any II32 code) - - - -- For _signed_ division with a 32 bit numerator, - -- we have to sign extend the numerator into the Y register. - idiv True cc x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend - , SRA tmp (RIImm (ImmInt 16)) tmp - - , WRY tmp g0 - , SDIV cc a_reg (RIReg b_reg) dst] - - return (Any II32 code) - - - -- | Do an integer remainder. - -- - -- NOTE: The SPARC v8 architecture manual says that integer division - -- instructions _may_ generate a remainder, depending on the implementation. - -- If so it is _recommended_ that the remainder is placed in the Y register. - -- - -- The UltraSparc 2007 manual says Y is _undefined_ after division. - -- - -- The SPARC T2 doesn't store the remainder, not sure about the others. - -- It's probably best not to worry about it, and just generate our own - -- remainders. - -- - irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register - - -- For unsigned operands: - -- Division is between a 64 bit numerator and a 32 bit denominator, - -- so we still have to clear the Y register. - irem False x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp_reg <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ WRY g0 g0 - , UDIV False a_reg (RIReg b_reg) tmp_reg - , UMUL False tmp_reg (RIReg b_reg) tmp_reg - , SUB False False a_reg (RIReg tmp_reg) dst] - - return (Any II32 code) - - - -- For signed operands: - -- Make sure to sign extend into the Y register, or the remainder - -- will have the wrong sign when the numerator is negative. - -- - -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, - -- not the full 32. Not sure why this is, something to do with overflow? - -- If anyone cares enough about the speed of signed remainder they - -- can work it out themselves (then tell me). -- BL 2009/01/20 - - irem True x y = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp1_reg <- getNewRegNat II32 - tmp2_reg <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend - , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend - , WRY tmp1_reg g0 - - , SDIV False a_reg (RIReg b_reg) tmp2_reg - , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg - , SUB False False a_reg (RIReg tmp2_reg) dst] - - return (Any II32 code) - - - imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register - imulMayOflo rep a b = do - (a_reg, a_code) <- getSomeReg a - (b_reg, b_code) <- getSomeReg b - res_lo <- getNewRegNat II32 - res_hi <- getNewRegNat II32 - let - shift_amt = case rep of - W32 -> 31 - W64 -> 63 - _ -> panic "shift_amt" - code dst = a_code `appOL` b_code `appOL` - toOL [ - SMUL False a_reg (RIReg b_reg) res_lo, - RDY res_hi, - SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, - SUB False False res_lo (RIReg res_hi) dst - ] - return (Any II32 code) - -getRegister (CmmLoad mem pk) = do - Amode src code <- getAmode mem - let - code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst - return (Any (cmmTypeSize pk) code__2) - -getRegister (CmmLit (CmmInt i _)) - | fits13Bits i - = let - src = ImmInt (fromInteger i) - code dst = unitOL (OR False g0 (RIImm src) dst) - in - return (Any II32 code) - -getRegister (CmmLit lit) - = let rep = cmmLitType lit - imm = litToImm lit - code dst = toOL [ - SETHI (HI imm) dst, - OR False dst (RIImm (LO imm)) dst] - in return (Any II32 code) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -getRegister (CmmLoad mem pk) - | not (isWord64 pk) - = do - Amode addr addr_code <- getAmode mem - let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk) - addr_code `snocOL` LD size dst addr - return (Any size code) - where size = cmmTypeSize pk - --- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - --- Note: there is no Load Byte Arithmetic instruction, so no signed case here - -getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem - return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode mem - return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister (CmmMachOp mop [x]) -- unary MachOps - = case mop of - MO_Not rep -> triv_ucode_int rep NOT - - MO_F_Neg w -> triv_ucode_float w FNEG - MO_S_Neg w -> triv_ucode_int w NEG - - MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x - MO_FF_Conv W32 W64 -> conversionNop FF64 x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - MO_SS_Conv from to - | from == to -> conversionNop (intSize to) x - - -- narrowing is a nop: we treat the high bits as undefined - MO_SS_Conv W32 to -> conversionNop (intSize to) x - MO_SS_Conv W16 W8 -> conversionNop II8 x - MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) - MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) - - MO_UU_Conv from to - | from == to -> conversionNop (intSize to) x - -- narrowing is a nop: we treat the high bits as undefined - MO_UU_Conv W32 to -> conversionNop (intSize to) x - MO_UU_Conv W16 W8 -> conversionNop II8 x - MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) - MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) - - where - triv_ucode_int width instr = trivialUCode (intSize width) instr x - triv_ucode_float width instr = trivialUCode (floatSize width) instr x - - conversionNop new_size expr - = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_size) - -getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps - = case mop of - MO_F_Eq w -> condFltReg EQQ x y - MO_F_Ne w -> condFltReg NE x y - MO_F_Gt w -> condFltReg GTT x y - MO_F_Ge w -> condFltReg GE x y - MO_F_Lt w -> condFltReg LTT x y - MO_F_Le w -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) - MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) - - MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y) - MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y) - - MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) - - MO_F_Add w -> triv_float w FADD - MO_F_Sub w -> triv_float w FSUB - MO_F_Mul w -> triv_float w FMUL - MO_F_Quot w -> triv_float w FDIV - - -- optimize addition with 32-bit immediate - -- (needed for PIC) - MO_Add W32 -> - case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm) - -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) - CmmLit lit - -> do - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - code dst = srcCode `appOL` toOL [ - ADDIS dst src (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - return (Any II32 code) - _ -> trivialCode W32 True ADD x y - - MO_Add rep -> trivialCode rep True ADD x y - MO_Sub rep -> - case y of -- subfi ('substract from' with immediate) doesn't exist - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) - -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) - _ -> trivialCodeNoImm' (intSize rep) SUBF y x - - MO_Mul rep -> trivialCode rep True MULLW x y - - MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y - - MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented" - MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" - - MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) - MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y) - - MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) - MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) - - MO_And rep -> trivialCode rep False AND x y - MO_Or rep -> trivialCode rep False OR x y - MO_Xor rep -> trivialCode rep False XOR x y - - MO_Shl rep -> trivialCode rep False SLW x y - MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y - MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y - where - triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register - triv_float width instr = trivialCodeNoImm (floatSize width) instr x y - -getRegister (CmmLit (CmmInt i rep)) - | Just imm <- makeImmediate rep True i - = let - code dst = unitOL (LI dst imm) - in - return (Any (intSize rep) code) - -getRegister (CmmLit (CmmFloat f frep)) = do - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let size = floatSize frep - code dst = - LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f frep)] - `consOL` (addr_code `snocOL` LD size dst addr) - return (Any size code) - -getRegister (CmmLit lit) - = let rep = cmmLitType lit - imm = litToImm lit - code dst = toOL [ - LIS dst (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - in return (Any (cmmTypeSize rep) code) - -getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) - - -- extend?Rep: wrap integer expression of type rep - -- in a conversion to II32 -extendSExpr W32 x = x -extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x] -extendUExpr W32 x = x -extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x] - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- The 'Amode' type: Memory addressing modes passed up the tree. - -data Amode = Amode AddrMode InstrBlock - -{- -Now, given a tree (the argument to an CmmLoad) that references memory, -produce a suitable addressing mode. - -A Rule of the Game (tm) for Amodes: use of the addr bit must -immediately follow use of the code part, since the code part puts -values in registers which the addr then refers to. So you can't put -anything in between, lest it overwrite some of those registers. If -you need to do some other computation between the code part and use of -the addr bit, first store the effective address from the amode in a -temporary, then do the other computation, and then use the temporary: - - code - LEA amode, tmp - ... other computation ... - ... (tmp) ... --} - -getAmode :: CmmExpr -> NatM Amode -getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree) - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -getAmode (StPrim IntSubOp [x, StInt i]) - = getNewRegNat PtrRep `thenNat` \ tmp -> - getRegister x `thenNat` \ register -> - let - code = registerCode register tmp - reg = registerName register tmp - off = ImmInt (-(fromInteger i)) - in - return (Amode (AddrRegImm reg off) code) - -getAmode (StPrim IntAddOp [x, StInt i]) - = getNewRegNat PtrRep `thenNat` \ tmp -> - getRegister x `thenNat` \ register -> - let - code = registerCode register tmp - reg = registerName register tmp - off = ImmInt (fromInteger i) - in - return (Amode (AddrRegImm reg off) code) - -getAmode leaf - | isJust imm - = return (Amode (AddrImm imm__2) id) - where - imm = maybeImm leaf - imm__2 = case imm of Just x -> x - -getAmode other - = getNewRegNat PtrRep `thenNat` \ tmp -> - getRegister other `thenNat` \ register -> - let - code = registerCode register tmp - reg = registerName register tmp - in - return (Amode (AddrReg reg) code) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), - CmmLit displacement]) - = return $ Amode (ripRel (litToImm displacement)) nilOL - -#endif - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- This is all just ridiculous, since it carefully undoes --- what mangleIndexTree has just done. -getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) - | is32BitLit lit - -- ASSERT(rep == II32)??? - = do (x_reg, x_code) <- getSomeReg x - let off = ImmInt (-(fromInteger i)) - return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) - -getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) - | is32BitLit lit - -- ASSERT(rep == II32)??? - = do (x_reg, x_code) <- getSomeReg x - let off = ImmInt (fromInteger i) - return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) - --- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be --- recognised by the next rule. -getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), - b@(CmmLit _)]) - = getAmode (CmmMachOp (MO_Add rep) [b,a]) - -getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _) - [y, CmmLit (CmmInt shift _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - = x86_complex_amode x y shift 0 - -getAmode (CmmMachOp (MO_Add rep) - [x, CmmMachOp (MO_Add _) - [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)], - CmmLit (CmmInt offset _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - && is32BitInteger offset - = x86_complex_amode x y shift offset - -getAmode (CmmMachOp (MO_Add rep) [x,y]) - = x86_complex_amode x y 0 0 - -getAmode (CmmLit lit) | is32BitLit lit - = return (Amode (ImmAddr (litToImm lit) 0) nilOL) - -getAmode expr = do - (reg,code) <- getSomeReg expr - return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) - - -x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode -x86_complex_amode base index shift offset - = do (x_reg, x_code) <- getNonClobberedReg base - -- x must be in a temp, because it has to stay live over y_code - -- we could compre x_reg and y_reg and do something better here... - (y_reg, y_code) <- getSomeReg index - let - code = x_code `appOL` y_code - base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8 - return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset))) - code) - -#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)]) - | fits13Bits (-i) - = do - (reg, code) <- getSomeReg x - let - off = ImmInt (-(fromInteger i)) - return (Amode (AddrRegImm reg off) code) - - -getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)]) - | fits13Bits i - = do - (reg, code) <- getSomeReg x - let - off = ImmInt (fromInteger i) - return (Amode (AddrRegImm reg off) code) - -getAmode (CmmMachOp (MO_Add rep) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - let - code = codeX `appOL` codeY - return (Amode (AddrRegReg regX regY) code) - -getAmode (CmmLit lit) - = do - let imm__2 = litToImm lit - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - - let code = toOL [ SETHI (HI imm__2) tmp1 - , OR False tmp1 (RIImm (LO imm__2)) tmp2] - - return (Amode (AddrRegReg tmp2 g0) code) - -getAmode other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) - -#endif /* sparc_TARGET_ARCH */ - -#ifdef powerpc_TARGET_ARCH -getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True (-i) - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True i - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -- optimize addition with 32-bit immediate - -- (needed for PIC) -getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) - = do - tmp <- getNewRegNat II32 - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - code = srcCode `snocOL` ADDIS tmp src (HA imm) - return (Amode (AddrRegImm tmp (LO imm)) code) - -getAmode (CmmLit lit) - = do - tmp <- getNewRegNat II32 - let imm = litToImm lit - code = unitOL (LIS tmp (HA imm)) - return (Amode (AddrRegImm tmp (LO imm)) code) - -getAmode (CmmMachOp (MO_Add W32) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - -getAmode other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) -#endif /* powerpc_TARGET_ARCH */ - --- ----------------------------------------------------------------------------- --- getOperand: sometimes any operand will do. - --- getNonClobberedOperand: the value of the operand will remain valid across --- the computation of an arbitrary expression, unless the expression --- is computed directly into a register which the operand refers to --- (see trivialCode where this function is used for an example). - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) -#if x86_64_TARGET_ARCH -getNonClobberedOperand (CmmLit lit) - | isSuitableFloatingPointLit lit = do - lbl <- getNewLabelNat - let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit lit]) - return (OpAddr (ripRel (ImmCLbl lbl)), code) -#endif -getNonClobberedOperand (CmmLit lit) - | is32BitLit lit && not (isFloatType (cmmLitType lit)) = - return (OpImm (litToImm lit), nilOL) -getNonClobberedOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - (src',save_code) <- - if (amodeCouldBeClobbered src) - then do - tmp <- getNewRegNat wordSize - return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), - unitOL (LEA II32 (OpAddr src) (OpReg tmp))) - else - return (src, nilOL) - return (OpAddr src', save_code `appOL` mem_code) -getNonClobberedOperand e = do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) - -amodeCouldBeClobbered :: AddrMode -> Bool -amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode) - -regClobbered (RealReg rr) = isFastTrue (freeReg rr) -regClobbered _ = False - --- getOperand: the operand is not required to remain valid across the --- computation of an arbitrary expression. -getOperand :: CmmExpr -> NatM (Operand, InstrBlock) -#if x86_64_TARGET_ARCH -getOperand (CmmLit lit) - | isSuitableFloatingPointLit lit = do - lbl <- getNewLabelNat - let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit lit]) - return (OpAddr (ripRel (ImmCLbl lbl)), code) -#endif -getOperand (CmmLit lit) - | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do - return (OpImm (litToImm lit), nilOL) -getOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) -getOperand e = do - (reg, code) <- getSomeReg e - return (OpReg reg, code) - -isOperand :: CmmExpr -> Bool -isOperand (CmmLoad _ _) = True -isOperand (CmmLit lit) = is32BitLit lit - || isSuitableFloatingPointLit lit -isOperand _ = False - --- if we want a floating-point literal as an operand, we can --- use it directly from memory. However, if the literal is --- zero, we're better off generating it into a register using --- xor. -isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 -isSuitableFloatingPointLit _ = False - -getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) -getRegOrMem (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) -getRegOrMem e = do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) - -#if x86_64_TARGET_ARCH -is32BitLit (CmmInt i W64) = is32BitInteger i - -- assume that labels are in the range 0-2^31-1: this assumes the - -- small memory model (see gcc docs, -mcmodel=small). -#endif -is32BitLit x = True -#endif - -is32BitInteger :: Integer -> Bool -is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 - where i64 = fromIntegral i :: Int64 - -- a CmmInt is intended to be truncated to the appropriate - -- number of bits, so here we truncate it to Int64. This is - -- important because e.g. -1 as a CmmInt might be either - -- -1 or 18446744073709551615. - --- ----------------------------------------------------------------------------- --- The 'CondCode' type: Condition codes passed up the tree. - -data CondCode = CondCode Bool Cond InstrBlock - --- Set up a condition code for a conditional branch. - -getCondCode :: CmmExpr -> NatM CondCode - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH -getCondCode = panic "MachCode.getCondCode: not on Alphas" -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH --- yes, they really do seem to want exactly the same! - -getCondCode (CmmMachOp mop [x, y]) - = - case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq rep -> condIntCode EQQ x y - MO_Ne rep -> condIntCode NE x y - - MO_S_Gt rep -> condIntCode GTT x y - MO_S_Ge rep -> condIntCode GE x y - MO_S_Lt rep -> condIntCode LTT x y - MO_S_Le rep -> condIntCode LE x y - - MO_U_Gt rep -> condIntCode GU x y - MO_U_Ge rep -> condIntCode GEU x y - MO_U_Lt rep -> condIntCode LU x y - MO_U_Le rep -> condIntCode LEU x y - - other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) - -getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) - -#elif powerpc_TARGET_ARCH - --- almost the same as everywhere else - but we need to --- extend small integers to 32 bit first - -getCondCode (CmmMachOp mop [x, y]) - = case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) - MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) - - MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y) - MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y) - MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y) - - MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y) - MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y) - - other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) - -getCondCode other = panic "getCondCode(2)(powerpc)" - - -#endif - - --- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be --- passed back up the tree. - -condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode - -#if alpha_TARGET_ARCH -condIntCode = panic "MachCode.condIntCode: not on Alphas" -condFltCode = panic "MachCode.condFltCode: not on Alphas" -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- memory vs immediate -condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do - Amode x_addr x_code <- getAmode x - let - imm = litToImm lit - code = x_code `snocOL` - CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr) - -- - return (CondCode False cond code) - --- anything vs zero, using a mask --- TODO: Add some sanity checking!!!! -condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) - | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit - = do - (x_reg, x_code) <- getSomeReg x - let - code = x_code `snocOL` - TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg) - -- - return (CondCode False cond code) - --- anything vs zero -condIntCode cond x (CmmLit (CmmInt 0 pk)) = do - (x_reg, x_code) <- getSomeReg x - let - code = x_code `snocOL` - TEST (intSize pk) (OpReg x_reg) (OpReg x_reg) - -- - return (CondCode False cond code) - --- anything vs operand -condIntCode cond x y | isOperand y = do - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` y_code `snocOL` - CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg) - -- in - return (CondCode False cond code) - --- anything vs anything -condIntCode cond x y = do - (y_reg, y_code) <- getNonClobberedReg y - (x_op, x_code) <- getRegOrMem x - let - code = y_code `appOL` - x_code `snocOL` - CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op - -- in - return (CondCode False cond code) -#endif - -#if i386_TARGET_ARCH -condFltCode cond x y - = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do - (x_reg, x_code) <- getNonClobberedReg x - (y_reg, y_code) <- getSomeReg y - let - code = x_code `appOL` y_code `snocOL` - GCMP cond x_reg y_reg - -- The GCMP insn does the test and sets the zero flag if comparable - -- and true. Hence we always supply EQQ as the condition to test. - return (CondCode True EQQ code) -#endif /* i386_TARGET_ARCH */ - -#if x86_64_TARGET_ARCH --- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be --- an operand, but the right must be a reg. We can probably do better --- than this general case... -condFltCode cond x y = do - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` - y_code `snocOL` - CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg) - -- NB(1): we need to use the unsigned comparison operators on the - -- result of this comparison. - -- in - return (CondCode True (condToUnsigned cond) code) -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -condIntCode cond x (CmmLit (CmmInt y rep)) - | fits13Bits y - = do - (src1, code) <- getSomeReg x - let - src2 = ImmInt (fromInteger y) - code' = code `snocOL` SUB False True src1 (RIImm src2) g0 - return (CondCode False cond code') - -condIntCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code__2 = code1 `appOL` code2 `snocOL` - SUB False True src1 (RIReg src2) g0 - return (CondCode False cond code__2) - ------------ -condFltCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp <- getNewRegNat FF64 - let - promote x = FxTOy FF32 FF64 x tmp - - pk1 = cmmExprType x - pk2 = cmmExprType y - - code__2 = - if pk1 `cmmEqType` pk2 then - code1 `appOL` code2 `snocOL` - FCMP True (cmmTypeSize pk1) src1 src2 - else if typeWidth pk1 == W32 then - code1 `snocOL` promote src1 `appOL` code2 `snocOL` - FCMP True FF64 tmp src2 - else - code1 `appOL` code2 `snocOL` promote src2 `snocOL` - FCMP True FF64 src1 tmp - return (CondCode True cond code__2) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH --- ###FIXME: I16 and I8! -condIntCode cond x (CmmLit (CmmInt y rep)) - | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y - = do - (src1, code) <- getSomeReg x - let - code' = code `snocOL` - (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) - return (CondCode False cond code') - -condIntCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code' = code1 `appOL` code2 `snocOL` - (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) - return (CondCode False cond code') - -condFltCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 - code'' = case cond of -- twiddle CR to handle unordered case - GE -> code' `snocOL` CRNOR ltbit eqbit gtbit - LE -> code' `snocOL` CRNOR gtbit eqbit ltbit - _ -> code' - where - ltbit = 0 ; eqbit = 2 ; gtbit = 1 - return (CondCode True cond code'') - -#endif /* powerpc_TARGET_ARCH */ - --- ----------------------------------------------------------------------------- --- Generating assignments - --- Assignments are really at the heart of the whole code generation --- business. Almost all top-level nodes of any real importance are --- assignments, which correspond to loads, stores, or register --- transfers. If we're really lucky, some of the register transfers --- will go away, because we can use the destination register to --- complete the code generation for the right hand side. This only --- fails when the right hand side is forced into a fixed register --- (e.g. the result of a call). - -assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -assignIntCode pk (CmmLoad dst _) src - = getNewRegNat IntRep `thenNat` \ tmp -> - getAmode dst `thenNat` \ amode -> - getRegister src `thenNat` \ register -> - let - code1 = amodeCode amode [] - dst__2 = amodeAddr amode - code2 = registerCode register tmp [] - src__2 = registerName register tmp - sz = primRepToSize pk - code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) - in - return code__2 - -assignIntCode pk dst src - = getRegister dst `thenNat` \ register1 -> - getRegister src `thenNat` \ register2 -> - let - dst__2 = registerName register1 zeroh - code = registerCode register2 dst__2 - src__2 = registerName register2 dst__2 - code__2 = if isFixed register2 - then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2) - else code - in - return code__2 - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- integer assignment to memory - --- specific case of adding/subtracting an integer to a particular address. --- ToDo: catch other cases where we can use an operation directly on a memory --- address. -assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, - CmmLit (CmmInt i _)]) - | addr == addr2, pk /= II64 || is32BitInteger i, - Just instr <- check op - = do Amode amode code_addr <- getAmode addr - let code = code_addr `snocOL` - instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode) - return code - where - check (MO_Add _) = Just ADD - check (MO_Sub _) = Just SUB - check _ = Nothing - -- ToDo: more? - --- general case -assignMem_IntCode pk addr src = do - Amode addr code_addr <- getAmode addr - (code_src, op_src) <- get_op_RI src - let - code = code_src `appOL` - code_addr `snocOL` - MOV pk op_src (OpAddr addr) - -- NOTE: op_src is stable, so it will still be valid - -- after code_addr. This may involve the introduction - -- of an extra MOV to a temporary register, but we hope - -- the register allocator will get rid of it. - -- - return code - where - get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator - get_op_RI (CmmLit lit) | is32BitLit lit - = return (nilOL, OpImm (litToImm lit)) - get_op_RI op - = do (reg,code) <- getNonClobberedReg op - return (code, OpReg reg) - - --- Assign; dst is a reg, rhs is mem -assignReg_IntCode pk reg (CmmLoad src _) = do - load_code <- intLoadCode (MOV pk) src - return (load_code (getRegisterReg reg)) - --- dst is a reg, but src could be anything -assignReg_IntCode pk reg src = do - code <- getAnyReg src - return (code (getRegisterReg reg)) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- getAmode addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - -assignReg_IntCode pk reg src = do - r <- getRegister src - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst - where - dst = getRegisterReg reg - - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- getAmode addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - --- dst is a reg, but src could be anything -assignReg_IntCode pk reg src - = do - r <- getRegister src - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` MR dst freg - where - dst = getRegisterReg reg - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Floating-point assignments - -#if alpha_TARGET_ARCH - -assignFltCode pk (CmmLoad dst _) src - = getNewRegNat pk `thenNat` \ tmp -> - getAmode dst `thenNat` \ amode -> - getRegister src `thenNat` \ register -> - let - code1 = amodeCode amode [] - dst__2 = amodeAddr amode - code2 = registerCode register tmp [] - src__2 = registerName register tmp - sz = primRepToSize pk - code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2) - in - return code__2 - -assignFltCode pk dst src - = getRegister dst `thenNat` \ register1 -> - getRegister src `thenNat` \ register2 -> - let - dst__2 = registerName register1 zeroh - code = registerCode register2 dst__2 - src__2 = registerName register2 dst__2 - code__2 = if isFixed register2 - then code . mkSeqInstr (FMOV src__2 dst__2) - else code - in - return code__2 - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - --- Floating point assignment to memory -assignMem_FltCode pk addr src = do - (src_reg, src_code) <- getNonClobberedReg src - Amode addr addr_code <- getAmode addr - let - code = src_code `appOL` - addr_code `snocOL` - IF_ARCH_i386(GST pk src_reg addr, - MOV pk (OpReg src_reg) (OpAddr addr)) - return code - --- Floating point assignment to a register/temporary -assignReg_FltCode pk reg src = do - src_code <- getAnyReg src - return (src_code (getRegisterReg reg)) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - --- Floating point assignment to memory -assignMem_FltCode pk addr src = do - Amode dst__2 code1 <- getAmode addr - (src__2, code2) <- getSomeReg src - tmp1 <- getNewRegNat pk - let - pk__2 = cmmExprType src - code__2 = code1 `appOL` code2 `appOL` - if sizeToWidth pk == typeWidth pk__2 - then unitOL (ST pk src__2 dst__2) - else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1 - , ST pk tmp1 dst__2] - return code__2 - --- Floating point assignment to a register/temporary -assignReg_FltCode pk dstCmmReg srcCmmExpr = do - srcRegister <- getRegister srcCmmExpr - let dstReg = getRegisterReg dstCmmReg - - return $ case srcRegister of - Any _ code -> code dstReg - Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - --- Easy, isn't it? -assignMem_FltCode = assignMem_IntCode -assignReg_FltCode = assignReg_IntCode - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Generating an non-local jump - --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -genJump (CmmLabel lbl) - | isAsmTemp lbl = returnInstr (BR target) - | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0] - where - target = ImmCLbl lbl - -genJump tree - = getRegister tree `thenNat` \ register -> - getNewRegNat PtrRep `thenNat` \ tmp -> - let - dst = registerName register pv - code = registerCode register pv - target = registerName register pv - in - if isFixed register then - returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0] - else - return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0)) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -genJump (CmmLoad mem pk) = do - Amode target code <- getAmode mem - return (code `snocOL` JMP (OpAddr target)) - -genJump (CmmLit lit) = do - return (unitOL (JMP (OpImm (litToImm lit)))) - -genJump expr = do - (reg,code) <- getSomeReg expr - return (code `snocOL` JMP (OpReg reg)) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -genJump (CmmLit (CmmLabel lbl)) - = return (toOL [CALL (Left target) 0 True, NOP]) - where - target = ImmCLbl lbl - -genJump tree - = do - (target, code) <- getSomeReg tree - return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -genJump (CmmLit (CmmLabel lbl)) - = return (unitOL $ JMP lbl) - -genJump tree - = do - (target,code) <- getSomeReg tree - return (code `snocOL` MTCTR target `snocOL` BCTR []) -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Unconditional branches - -genBranch :: BlockId -> NatM InstrBlock - -genBranch = return . toOL . mkBranchInstr - --- ----------------------------------------------------------------------------- --- Conditional jumps - -{- -Conditional jumps are always to local labels, so we can use branch -instructions. We peek at the arguments to decide what kind of -comparison to do. - -ALPHA: For comparisons with 0, we're laughing, because we can just do -the desired conditional branch. - -I386: First, we have to ensure that the condition -codes are set according to the supplied comparison operation. - -SPARC: First, we have to ensure that the condition codes are set -according to the supplied comparison operation. We generate slightly -different code for floating point comparisons, because a floating -point operation cannot directly precede a @BF@. We assume the worst -and fill that slot with a @NOP@. - -SPARC: Do not fill the delay slots here; you will confuse the register -allocator. --} - - -genCondJump - :: BlockId -- the branch target - -> CmmExpr -- the condition on which to branch - -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -genCondJump id (StPrim op [x, StInt 0]) - = getRegister x `thenNat` \ register -> - getNewRegNat (registerRep register) - `thenNat` \ tmp -> - let - code = registerCode register tmp - value = registerName register tmp - pk = registerRep register - target = ImmCLbl lbl - in - returnSeq code [BI (cmpOp op) value target] - where - cmpOp CharGtOp = GTT - cmpOp CharGeOp = GE - cmpOp CharEqOp = EQQ - cmpOp CharNeOp = NE - cmpOp CharLtOp = LTT - cmpOp CharLeOp = LE - cmpOp IntGtOp = GTT - cmpOp IntGeOp = GE - cmpOp IntEqOp = EQQ - cmpOp IntNeOp = NE - cmpOp IntLtOp = LTT - cmpOp IntLeOp = LE - cmpOp WordGtOp = NE - cmpOp WordGeOp = ALWAYS - cmpOp WordEqOp = EQQ - cmpOp WordNeOp = NE - cmpOp WordLtOp = NEVER - cmpOp WordLeOp = EQQ - cmpOp AddrGtOp = NE - cmpOp AddrGeOp = ALWAYS - cmpOp AddrEqOp = EQQ - cmpOp AddrNeOp = NE - cmpOp AddrLtOp = NEVER - cmpOp AddrLeOp = EQQ - -genCondJump lbl (StPrim op [x, StDouble 0.0]) - = getRegister x `thenNat` \ register -> - getNewRegNat (registerRep register) - `thenNat` \ tmp -> - let - code = registerCode register tmp - value = registerName register tmp - pk = registerRep register - target = ImmCLbl lbl - in - return (code . mkSeqInstr (BF (cmpOp op) value target)) - where - cmpOp FloatGtOp = GTT - cmpOp FloatGeOp = GE - cmpOp FloatEqOp = EQQ - cmpOp FloatNeOp = NE - cmpOp FloatLtOp = LTT - cmpOp FloatLeOp = LE - cmpOp DoubleGtOp = GTT - cmpOp DoubleGeOp = GE - cmpOp DoubleEqOp = EQQ - cmpOp DoubleNeOp = NE - cmpOp DoubleLtOp = LTT - cmpOp DoubleLeOp = LE - -genCondJump lbl (StPrim op [x, y]) - | fltCmpOp op - = trivialFCode pr instr x y `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - let - code = registerCode register tmp - result = registerName register tmp - target = ImmCLbl lbl - in - return (code . mkSeqInstr (BF cond result target)) - where - pr = panic "trivialU?FCode: does not use PrimRep on Alpha" - - fltCmpOp op = case op of - FloatGtOp -> True - FloatGeOp -> True - FloatEqOp -> True - FloatNeOp -> True - FloatLtOp -> True - FloatLeOp -> True - DoubleGtOp -> True - DoubleGeOp -> True - DoubleEqOp -> True - DoubleNeOp -> True - DoubleLtOp -> True - DoubleLeOp -> True - _ -> False - (instr, cond) = case op of - FloatGtOp -> (FCMP TF LE, EQQ) - FloatGeOp -> (FCMP TF LTT, EQQ) - FloatEqOp -> (FCMP TF EQQ, NE) - FloatNeOp -> (FCMP TF EQQ, EQQ) - FloatLtOp -> (FCMP TF LTT, NE) - FloatLeOp -> (FCMP TF LE, NE) - DoubleGtOp -> (FCMP TF LE, EQQ) - DoubleGeOp -> (FCMP TF LTT, EQQ) - DoubleEqOp -> (FCMP TF EQQ, NE) - DoubleNeOp -> (FCMP TF EQQ, EQQ) - DoubleLtOp -> (FCMP TF LTT, NE) - DoubleLeOp -> (FCMP TF LE, NE) - -genCondJump lbl (StPrim op [x, y]) - = trivialCode instr x y `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - result = registerName register tmp - target = ImmCLbl lbl - in - return (code . mkSeqInstr (BI cond result target)) - where - (instr, cond) = case op of - CharGtOp -> (CMP LE, EQQ) - CharGeOp -> (CMP LTT, EQQ) - CharEqOp -> (CMP EQQ, NE) - CharNeOp -> (CMP EQQ, EQQ) - CharLtOp -> (CMP LTT, NE) - CharLeOp -> (CMP LE, NE) - IntGtOp -> (CMP LE, EQQ) - IntGeOp -> (CMP LTT, EQQ) - IntEqOp -> (CMP EQQ, NE) - IntNeOp -> (CMP EQQ, EQQ) - IntLtOp -> (CMP LTT, NE) - IntLeOp -> (CMP LE, NE) - WordGtOp -> (CMP ULE, EQQ) - WordGeOp -> (CMP ULT, EQQ) - WordEqOp -> (CMP EQQ, NE) - WordNeOp -> (CMP EQQ, EQQ) - WordLtOp -> (CMP ULT, NE) - WordLeOp -> (CMP ULE, NE) - AddrGtOp -> (CMP ULE, EQQ) - AddrGeOp -> (CMP ULT, EQQ) - AddrEqOp -> (CMP EQQ, NE) - AddrNeOp -> (CMP EQQ, EQQ) - AddrLtOp -> (CMP ULT, NE) - AddrLeOp -> (CMP ULE, NE) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -genCondJump id bool = do - CondCode _ cond code <- getCondCode bool - return (code `snocOL` JXX cond id) - -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -genCondJump id bool = do - CondCode is_float cond cond_code <- getCondCode bool - if not is_float - then - return (cond_code `snocOL` JXX cond id) - else do - lbl <- getBlockIdNat - - -- see comment with condFltReg - let code = case cond of - NE -> or_unordered - GU -> plain_test - GEU -> plain_test - _ -> and_ordered - - plain_test = unitOL ( - JXX cond id - ) - or_unordered = toOL [ - JXX cond id, - JXX PARITY id - ] - and_ordered = toOL [ - JXX PARITY lbl, - JXX cond id, - JXX ALWAYS lbl, - NEWBLOCK lbl - ] - return (cond_code `appOL` code) - -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -genCondJump bid bool = do - CondCode is_float cond code <- getCondCode bool - return ( - code `appOL` - toOL ( - if is_float - then [NOP, BF cond False bid, NOP] - else [BI cond False bid, NOP] - ) - ) - -#endif /* sparc_TARGET_ARCH */ - - -#if powerpc_TARGET_ARCH - -genCondJump id bool = do - CondCode is_float cond code <- getCondCode bool - return (code `snocOL` BCC cond id) - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Generating C calls - --- Now the biggest nightmare---calls. Most of the nastiness is buried in --- @get_arg@, which moves the arguments to the correct registers/stack --- locations. Apart from that, the code is easy. --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -genCCall - :: CmmCallTarget -- function to call - -> HintedCmmFormals -- where to put the result - -> HintedCmmActuals -- arguments (of mixed type) - -> NatM InstrBlock - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -ccallResultRegs = - -genCCall fn cconv result_regs args - = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args - `thenNat` \ ((unused,_), argCode) -> - let - nRegs = length allArgRegs - length unused - code = asmSeqThen (map ($ []) argCode) - in - returnSeq code [ - LDA pv (AddrImm (ImmLab (ptext fn))), - JSR ra (AddrReg pv) nRegs, - LDGP gp (AddrReg ra)] - where - ------------------------ - {- Try to get a value into a specific register (or registers) for - a call. The first 6 arguments go into the appropriate - argument register (separate registers for integer and floating - point arguments, but used in lock-step), and the remaining - arguments are dumped to the stack, beginning at 0(sp). Our - first argument is a pair of the list of remaining argument - registers to be assigned for this call and the next stack - offset to use for overflowing arguments. This way, - @get_Arg@ can be applied to all of a call's arguments using - @mapAccumLNat@. - -} - get_arg - :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator) - -> StixTree -- Current argument - -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code - - -- We have to use up all of our argument registers first... - - get_arg ((iDst,fDst):dsts, offset) arg - = getRegister arg `thenNat` \ register -> - let - reg = if isFloatType pk then fDst else iDst - code = registerCode register reg - src = registerName register reg - pk = registerRep register - in - return ( - if isFloatType pk then - ((dsts, offset), if isFixed register then - code . mkSeqInstr (FMOV src fDst) - else code) - else - ((dsts, offset), if isFixed register then - code . mkSeqInstr (OR src (RIReg src) iDst) - else code)) - - -- Once we have run out of argument registers, we move to the - -- stack... - - get_arg ([], offset) arg - = getRegister arg `thenNat` \ register -> - getNewRegNat (registerRep register) - `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - pk = registerRep register - sz = primRepToSize pk - in - return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset))) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL - -- write barrier compiles to no code on x86/x86-64; - -- we keep it this long in order to prevent earlier optimisations. - --- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [CmmHinted r _] args = do - l1 <- getNewLabelNat - l2 <- getNewLabelNat - case op of - MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args - MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args - - MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args - MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args - - MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args - MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args - - MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args - MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args - - other_op -> outOfLineFloatOp op r args - where - actuallyInlineFloatOp instr size [CmmHinted x _] - = do res <- trivialUFCode size (instr size) x - any <- anyReg res - return (any (getRegisterReg (CmmLocal r))) - -genCCall target dest_regs args = do - let - sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args) -#if !darwin_TARGET_OS - tot_arg_size = sum sizes -#else - raw_arg_size = sum sizes - tot_arg_size = roundTo 16 raw_arg_size - arg_pad_size = tot_arg_size - raw_arg_size - delta0 <- getDeltaNat - setDeltaNat (delta0 - arg_pad_size) -#endif - - push_codes <- mapM push_arg (reverse args) - delta <- getDeltaNat - - -- in - -- deal with static vs dynamic call targets - (callinsns,cconv) <- - case target of - -- CmmPrim -> ... - CmmCallee (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) []), conv) - where fn_imm = ImmCLbl lbl - CmmCallee expr conv - -> do { (dyn_c, dyn_r) <- get_op expr - ; ASSERT( isWord32 (cmmExprType expr) ) - return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } - - let push_code -#if darwin_TARGET_OS - | arg_pad_size /= 0 - = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), - DELTA (delta0 - arg_pad_size)] - `appOL` concatOL push_codes - | otherwise -#endif - = concatOL push_codes - call = callinsns `appOL` - toOL ( - -- Deallocate parameters after call for ccall; - -- but not for stdcall (callee does it) - (if cconv == StdCallConv || tot_arg_size==0 then [] else - [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) - ++ - [DELTA (delta + tot_arg_size)] - ) - -- in - setDeltaNat (delta + tot_arg_size) - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [CmmHinted dest _hint] - | isFloatType ty = unitOL (GMOV fake0 r_dest) - | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), - MOV II32 (OpReg edx) (OpReg r_dest_hi)] - | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest)) - where - ty = localRegType dest - w = typeWidth ty - r_dest_hi = getHiVRegFromLo r_dest - r_dest = getRegisterReg (CmmLocal dest) - assign_code many = panic "genCCall.assign_code many" - - return (push_code `appOL` - call `appOL` - assign_code dest_regs) - - where - arg_size :: CmmType -> Int -- Width in bytes - arg_size ty = widthInBytes (typeWidth ty) - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - - push_arg :: HintedCmmActual {-current argument-} - -> NatM InstrBlock -- code - - push_arg (CmmHinted arg _hint) -- we don't need the hints on x86 - | isWord64 arg_ty = do - ChildCode64 code r_lo <- iselExpr64 arg - delta <- getDeltaNat - setDeltaNat (delta - 8) - let - r_hi = getHiVRegFromLo r_lo - -- in - return ( code `appOL` - toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), - PUSH II32 (OpReg r_lo), DELTA (delta - 8), - DELTA (delta-8)] - ) - - | otherwise = do - (code, reg) <- get_op arg - delta <- getDeltaNat - let size = arg_size arg_ty -- Byte size - setDeltaNat (delta-size) - if (isFloatType arg_ty) - then return (code `appOL` - toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), - DELTA (delta-size), - GST (floatSize (typeWidth arg_ty)) - reg (AddrBaseIndex (EABaseReg esp) - EAIndexNone - (ImmInt 0))] - ) - else return (code `snocOL` - PUSH II32 (OpReg reg) `snocOL` - DELTA (delta-size) - ) - where - arg_ty = cmmExprType arg - - ------------ - get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg - get_op op = do - (reg,code) <- getSomeReg op - return (code, reg) - -#endif /* i386_TARGET_ARCH */ - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals - -> NatM InstrBlock -outOfLineFloatOp mop res args - = do - dflags <- getDynFlagsNat - targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl - let target = CmmCallee targetExpr CCallConv - - if isFloat64 (localRegType res) - then - stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn) - else do - uq <- getUniqueNat - let - tmp = LocalReg uq f64 - -- in - code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn) - code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) - return (code1 `appOL` code2) - where - lbl = mkForeignLabel fn Nothing False IsFunction - - fn = case mop of - MO_F32_Sqrt -> fsLit "sqrtf" - MO_F32_Sin -> fsLit "sinf" - MO_F32_Cos -> fsLit "cosf" - MO_F32_Tan -> fsLit "tanf" - MO_F32_Exp -> fsLit "expf" - MO_F32_Log -> fsLit "logf" - - MO_F32_Asin -> fsLit "asinf" - MO_F32_Acos -> fsLit "acosf" - MO_F32_Atan -> fsLit "atanf" - - MO_F32_Sinh -> fsLit "sinhf" - MO_F32_Cosh -> fsLit "coshf" - MO_F32_Tanh -> fsLit "tanhf" - MO_F32_Pwr -> fsLit "powf" - - MO_F64_Sqrt -> fsLit "sqrt" - MO_F64_Sin -> fsLit "sin" - MO_F64_Cos -> fsLit "cos" - MO_F64_Tan -> fsLit "tan" - MO_F64_Exp -> fsLit "exp" - MO_F64_Log -> fsLit "log" - - MO_F64_Asin -> fsLit "asin" - MO_F64_Acos -> fsLit "acos" - MO_F64_Atan -> fsLit "atan" - - MO_F64_Sinh -> fsLit "sinh" - MO_F64_Cosh -> fsLit "cosh" - MO_F64_Tanh -> fsLit "tanh" - MO_F64_Pwr -> fsLit "pow" - -#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL - -- write barrier compiles to no code on x86/x86-64; - -- we keep it this long in order to prevent earlier optimisations. - - -genCCall (CmmPrim op) [CmmHinted r _] args = - outOfLineFloatOp op r args - -genCCall target dest_regs args = do - - -- load up the register arguments - (stack_args, aregs, fregs, load_args_code) - <- load_args args allArgRegs allFPArgRegs nilOL - - let - fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs)) - int_regs_used = reverse (drop (length aregs) (reverse allArgRegs)) - arg_regs = [eax] ++ int_regs_used ++ fp_regs_used - -- for annotating the call instruction with - - sse_regs = length fp_regs_used - - tot_arg_size = arg_size * length stack_args - - -- On entry to the called function, %rsp should be aligned - -- on a 16-byte boundary +8 (i.e. the first stack arg after - -- the return address is 16-byte aligned). In STG land - -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just - -- need to make sure we push a multiple of 16-bytes of args, - -- plus the return address, to get the correct alignment. - -- Urg, this is hard. We need to feed the delta back into - -- the arg pushing code. - (real_size, adjust_rsp) <- - if tot_arg_size `rem` 16 == 0 - then return (tot_arg_size, nilOL) - else do -- we need to adjust... - delta <- getDeltaNat - setDeltaNat (delta-8) - return (tot_arg_size+8, toOL [ - SUB II64 (OpImm (ImmInt 8)) (OpReg rsp), - DELTA (delta-8) - ]) - - -- push the stack args, right to left - push_code <- push_args (reverse stack_args) nilOL - delta <- getDeltaNat - - -- deal with static vs dynamic call targets - (callinsns,cconv) <- - case target of - -- CmmPrim -> ... - CmmCallee (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) arg_regs), conv) - where fn_imm = ImmCLbl lbl - CmmCallee expr conv - -> do (dyn_r, dyn_c) <- getSomeReg expr - return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) - - let - -- The x86_64 ABI requires us to set %al to the number of SSE - -- registers that contain arguments, if the called routine - -- is a varargs function. We don't know whether it's a - -- varargs function or not, so we have to assume it is. - -- - -- It's not safe to omit this assignment, even if the number - -- of SSE regs in use is zero. If %al is larger than 8 - -- on entry to a varargs function, seg faults ensue. - assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) - - let call = callinsns `appOL` - toOL ( - -- Deallocate parameters after call for ccall; - -- but not for stdcall (callee does it) - (if cconv == StdCallConv || real_size==0 then [] else - [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)]) - ++ - [DELTA (delta + real_size)] - ) - -- in - setDeltaNat (delta + real_size) - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [CmmHinted dest _hint] = - case typeWidth rep of - W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) - W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) - _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest)) - where - rep = localRegType dest - r_dest = getRegisterReg (CmmLocal dest) - assign_code many = panic "genCCall.assign_code many" - - return (load_args_code `appOL` - adjust_rsp `appOL` - push_code `appOL` - assign_eax sse_regs `appOL` - call `appOL` - assign_code dest_regs) - - where - arg_size = 8 -- always, at the mo - - load_args :: [CmmHinted CmmExpr] - -> [Reg] -- int regs avail for args - -> [Reg] -- FP regs avail for args - -> InstrBlock - -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock) - load_args args [] [] code = return (args, [], [], code) - -- no more regs to use - load_args [] aregs fregs code = return ([], aregs, fregs, code) - -- no more args to push - load_args ((CmmHinted arg hint) : rest) aregs fregs code - | isFloatType arg_rep = - case fregs of - [] -> push_this_arg - (r:rs) -> do - arg_code <- getAnyReg arg - load_args rest aregs rs (code `appOL` arg_code r) - | otherwise = - case aregs of - [] -> push_this_arg - (r:rs) -> do - arg_code <- getAnyReg arg - load_args rest rs fregs (code `appOL` arg_code r) - where - arg_rep = cmmExprType arg - - push_this_arg = do - (args',ars,frs,code') <- load_args rest aregs fregs code - return ((CmmHinted arg hint):args', ars, frs, code') - - push_args [] code = return code - push_args ((CmmHinted arg hint):rest) code - | isFloatType arg_rep = do - (arg_reg, arg_code) <- getSomeReg arg - delta <- getDeltaNat - setDeltaNat (delta-arg_size) - let code' = code `appOL` arg_code `appOL` toOL [ - SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) , - DELTA (delta-arg_size), - MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))] - push_args rest code' - - | otherwise = do - -- we only ever generate word-sized function arguments. Promotion - -- has already happened: our Int8# type is kept sign-extended - -- in an Int#, for example. - ASSERT(width == W64) return () - (arg_op, arg_code) <- getOperand arg - delta <- getDeltaNat - setDeltaNat (delta-arg_size) - let code' = code `appOL` arg_code `appOL` toOL [ - PUSH II64 arg_op, - DELTA (delta-arg_size)] - push_args rest code' - where - arg_rep = cmmExprType arg - width = typeWidth arg_rep -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH -{- - The SPARC calling convention is an absolute - nightmare. The first 6x32 bits of arguments are mapped into - %o0 through %o5, and the remaining arguments are dumped to the - stack, beginning at [%sp+92]. (Note that %o6 == %sp.) - - If we have to put args on the stack, move %o6==%sp down by - the number of words to go on the stack, to ensure there's enough space. - - According to Fraser and Hanson's lcc book, page 478, fig 17.2, - 16 words above the stack pointer is a word for the address of - a structure return value. I use this as a temporary location - for moving values from float to int regs. Certainly it isn't - safe to put anything in the 16 words starting at %sp, since - this area can get trashed at any time due to window overflows - caused by signal handlers. - - A final complication (if the above isn't enough) is that - we can't blithely calculate the arguments one by one into - %o0 .. %o5. Consider the following nested calls: - - fff a (fff b c) - - Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately - the inner call will itself use %o0, which trashes the value put there - in preparation for the outer call. Upshot: we need to calculate the - args into temporary regs, and move those to arg regs or onto the - stack only immediately prior to the call proper. Sigh. - -genCCall - :: CmmCallTarget -- function to call - -> HintedCmmFormals -- where to put the result - -> HintedCmmActuals -- arguments (of mixed type) - -> NatM InstrBlock - --} - - --- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream --- are guaranteed to take place before writes afterwards (unlike on PowerPC). --- Ref: Section 8.4 of the SPARC V9 Architecture manual. --- --- In the SPARC case we don't need a barrier. --- -genCCall (CmmPrim (MO_WriteBarrier)) _ _ - = do return nilOL - -genCCall target dest_regs argsAndHints - = do - -- strip hints from the arg regs - let args :: [CmmExpr] - args = map hintlessCmm argsAndHints - - - -- work out the arguments, and assign them to integer regs - argcode_and_vregs <- mapM arg_to_int_vregs args - let (argcodes, vregss) = unzip argcode_and_vregs - let vregs = concat vregss - - let n_argRegs = length allArgRegs - let n_argRegs_used = min (length vregs) n_argRegs - - - -- deal with static vs dynamic call targets - callinsns <- case target of - CmmCallee (CmmLit (CmmLabel lbl)) conv -> - return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) - - CmmCallee expr conv - -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr - return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) - - CmmPrim mop - -> do res <- outOfLineFloatOp mop - lblOrMopExpr <- case res of - Left lbl -> do - return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) - - Right mopExpr -> do - (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr - return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) - - return lblOrMopExpr - - let argcode = concatOL argcodes - - let (move_sp_down, move_sp_up) - = let diff = length vregs - n_argRegs - nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment - in if nn <= 0 - then (nilOL, nilOL) - else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn))) - - let transfer_code - = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE) - - return - $ argcode `appOL` - move_sp_down `appOL` - transfer_code `appOL` - callinsns `appOL` - unitOL NOP `appOL` - move_sp_up `appOL` - assign_code dest_regs - - --- | Generate code to calculate an argument, and move it into one --- or two integer vregs. -arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) -arg_to_int_vregs arg - - -- If the expr produces a 64 bit int, then we can just use iselExpr64 - | isWord64 (cmmExprType arg) - = do (ChildCode64 code r_lo) <- iselExpr64 arg - let r_hi = getHiVRegFromLo r_lo - return (code, [r_hi, r_lo]) - - | otherwise - = do (src, code) <- getSomeReg arg - tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg) - let pk = cmmExprType arg - - case cmmTypeSize pk of - - -- Load a 64 bit float return value into two integer regs. - FF64 -> do - v1 <- getNewRegNat II32 - v2 <- getNewRegNat II32 - - let Just f0_high = fPair f0 - - let code2 = - code `snocOL` - FMOV FF64 src f0 `snocOL` - ST FF32 f0 (spRel 16) `snocOL` - LD II32 (spRel 16) v1 `snocOL` - ST FF32 f0_high (spRel 16) `snocOL` - LD II32 (spRel 16) v2 - - return (code2, [v1,v2]) - - -- Load a 32 bit float return value into an integer reg - FF32 -> do - v1 <- getNewRegNat II32 - - let code2 = - code `snocOL` - ST FF32 src (spRel 16) `snocOL` - LD II32 (spRel 16) v1 - - return (code2, [v1]) - - -- Move an integer return value into its destination reg. - other -> do - v1 <- getNewRegNat II32 - - let code2 = - code `snocOL` - OR False g0 (RIReg src) v1 - - return (code2, [v1]) - - --- | Move args from the integer vregs into which they have been --- marshalled, into %o0 .. %o5, and the rest onto the stack. --- -move_final :: [Reg] -> [Reg] -> Int -> [Instr] - --- all args done -move_final [] _ offset - = [] - --- out of aregs; move to stack -move_final (v:vs) [] offset - = ST II32 v (spRel offset) - : move_final vs [] (offset+1) - --- move into an arg (%o[0..5]) reg -move_final (v:vs) (a:az) offset - = OR False g0 (RIReg v) a - : move_final vs az offset - - --- | Assign results returned from the call into their --- desination regs. --- -assign_code :: [CmmHinted LocalReg] -> OrdList Instr -assign_code [] = nilOL - -assign_code [CmmHinted dest _hint] - = let rep = localRegType dest - width = typeWidth rep - r_dest = getRegisterReg (CmmLocal dest) - - result - | isFloatType rep - , W32 <- width - = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest - - | isFloatType rep - , W64 <- width - = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest - - | not $ isFloatType rep - , W32 <- width - = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest - - | not $ isFloatType rep - , W64 <- width - , r_dest_hi <- getHiVRegFromLo r_dest - = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi - , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest] - in result - - --- | Generate a call to implement an out-of-line floating point operation -outOfLineFloatOp - :: CallishMachOp - -> NatM (Either CLabel CmmExpr) - -outOfLineFloatOp mop - = do let functionName - = outOfLineFloatOp_table mop - - dflags <- getDynFlagsNat - mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference - $ mkForeignLabel functionName Nothing True IsFunction - - let mopLabelOrExpr - = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - - return mopLabelOrExpr - - --- | Decide what C function to use to implement a CallishMachOp --- -outOfLineFloatOp_table - :: CallishMachOp - -> FastString - -outOfLineFloatOp_table mop - = case mop of - MO_F32_Exp -> fsLit "expf" - MO_F32_Log -> fsLit "logf" - MO_F32_Sqrt -> fsLit "sqrtf" - MO_F32_Pwr -> fsLit "powf" - - MO_F32_Sin -> fsLit "sinf" - MO_F32_Cos -> fsLit "cosf" - MO_F32_Tan -> fsLit "tanf" - - MO_F32_Asin -> fsLit "asinf" - MO_F32_Acos -> fsLit "acosf" - MO_F32_Atan -> fsLit "atanf" - - MO_F32_Sinh -> fsLit "sinhf" - MO_F32_Cosh -> fsLit "coshf" - MO_F32_Tanh -> fsLit "tanhf" - - MO_F64_Exp -> fsLit "exp" - MO_F64_Log -> fsLit "log" - MO_F64_Sqrt -> fsLit "sqrt" - MO_F64_Pwr -> fsLit "pow" - - MO_F64_Sin -> fsLit "sin" - MO_F64_Cos -> fsLit "cos" - MO_F64_Tan -> fsLit "tan" - - MO_F64_Asin -> fsLit "asin" - MO_F64_Acos -> fsLit "acos" - MO_F64_Atan -> fsLit "atan" - - MO_F64_Sinh -> fsLit "sinh" - MO_F64_Cosh -> fsLit "cosh" - MO_F64_Tanh -> fsLit "tanh" - - other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op " - (pprCallishMachOp mop) - - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - -#if darwin_TARGET_OS || linux_TARGET_OS -{- - The PowerPC calling convention for Darwin/Mac OS X - is described in Apple's document - "Inside Mac OS X - Mach-O Runtime Architecture". - - PowerPC Linux uses the System V Release 4 Calling Convention - for PowerPC. It is described in the - "System V Application Binary Interface PowerPC Processor Supplement". - - Both conventions are similar: - Parameters may be passed in general-purpose registers starting at r3, in - floating point registers starting at f1, or on the stack. - - But there are substantial differences: - * The number of registers used for parameter passing and the exact set of - nonvolatile registers differs (see MachRegs.lhs). - * On Darwin, stack space is always reserved for parameters, even if they are - passed in registers. The called routine may choose to save parameters from - registers to the corresponding space on the stack. - * On Darwin, a corresponding amount of GPRs is skipped when a floating point - parameter is passed in an FPR. - * SysV insists on either passing I64 arguments on the stack, or in two GPRs, - starting with an odd-numbered GPR. It may skip a GPR to achieve this. - Darwin just treats an I64 like two separate II32s (high word first). - * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only - 4-byte aligned like everything else on Darwin. - * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on - PowerPC Linux does not agree, so neither do we. - - According to both conventions, The parameter area should be part of the - caller's stack frame, allocated in the caller's prologue code (large enough - to hold the parameter lists for all called routines). The NCG already - uses the stack for register spilling, leaving 64 bytes free at the top. - If we need a larger parameter area than that, we just allocate a new stack - frame just before ccalling. --} - - -genCCall (CmmPrim MO_WriteBarrier) _ _ - = return $ unitOL LWSYNC - -genCCall target dest_regs argsAndHints - = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps) - -- we rely on argument promotion in the codeGen - do - (finalStack,passArgumentsCode,usedRegs) <- passArguments - (zip args argReps) - allArgRegs allFPArgRegs - initialStackOffset - (toOL []) [] - - (labelOrExpr, reduceToFF32) <- case target of - CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) - CmmCallee expr conv -> return (Right expr, False) - CmmPrim mop -> outOfLineFloatOp mop - - let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode - codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 - - case labelOrExpr of - Left lbl -> do - return ( codeBefore - `snocOL` BL lbl usedRegs - `appOL` codeAfter) - Right dyn -> do - (dynReg, dynCode) <- getSomeReg dyn - return ( dynCode - `snocOL` MTCTR dynReg - `appOL` codeBefore - `snocOL` BCTRL usedRegs - `appOL` codeAfter) - where -#if darwin_TARGET_OS - initialStackOffset = 24 - -- size of linkage area + size of arguments, in bytes - stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ - map (widthInBytes . typeWidth) argReps -#elif linux_TARGET_OS - initialStackOffset = 8 - stackDelta finalStack = roundTo 16 finalStack -#endif - args = map hintlessCmm argsAndHints - argReps = map cmmExprType args - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - move_sp_down finalStack - | delta > 64 = - toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), - DELTA (-delta)] - | otherwise = nilOL - where delta = stackDelta finalStack - move_sp_up finalStack - | delta > 64 = - toOL [ADD sp sp (RIImm (ImmInt delta)), - DELTA 0] - | otherwise = nilOL - where delta = stackDelta finalStack - - - passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) - passArguments ((arg,arg_ty):args) gprs fprs stackOffset - accumCode accumUsed | isWord64 arg_ty = - do - ChildCode64 code vr_lo <- iselExpr64 arg - let vr_hi = getHiVRegFromLo vr_lo - -#if darwin_TARGET_OS - passArguments args - (drop 2 gprs) - fprs - (stackOffset+8) - (accumCode `appOL` code - `snocOL` storeWord vr_hi gprs stackOffset - `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) - ((take 2 gprs) ++ accumUsed) - where - storeWord vr (gpr:_) offset = MR gpr vr - storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset)) - -#elif linux_TARGET_OS - let stackOffset' = roundTo 8 stackOffset - stackCode = accumCode `appOL` code - `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) - `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) - regCode hireg loreg = - accumCode `appOL` code - `snocOL` MR hireg vr_hi - `snocOL` MR loreg vr_lo - - case gprs of - hireg : loreg : regs | even (length gprs) -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _skipped : hireg : loreg : regs -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _ -> -- only one or no regs left - passArguments args [] fprs (stackOffset'+8) - stackCode accumUsed -#endif - - passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed - | reg : _ <- regs = do - register <- getRegister arg - let code = case register of - Fixed _ freg fcode -> fcode `snocOL` MR reg freg - Any _ acode -> acode reg - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) -#if darwin_TARGET_OS - -- The Darwin ABI requires that we reserve stack slots for register parameters - (stackOffset + stackBytes) -#elif linux_TARGET_OS - -- ... the SysV ABI doesn't. - stackOffset -#endif - (accumCode `appOL` code) - (reg : accumUsed) - | otherwise = do - (vr, code) <- getSomeReg arg - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) - (stackOffset' + stackBytes) - (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot) - accumUsed - where -#if darwin_TARGET_OS - -- stackOffset is at least 4-byte aligned - -- The Darwin ABI is happy with that. - stackOffset' = stackOffset -#else - -- ... the SysV ABI requires 8-byte alignment for doubles. - stackOffset' | isFloatType rep && typeWidth rep == W64 = - roundTo 8 stackOffset - | otherwise = stackOffset -#endif - stackSlot = AddrRegImm sp (ImmInt stackOffset') - (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of - II32 -> (1, 0, 4, gprs) -#if darwin_TARGET_OS - -- The Darwin ABI requires that we skip a corresponding number of GPRs when - -- we use the FPRs. - FF32 -> (1, 1, 4, fprs) - FF64 -> (2, 1, 8, fprs) -#elif linux_TARGET_OS - -- ... the SysV ABI doesn't. - FF32 -> (0, 1, 4, fprs) - FF64 -> (0, 1, 8, fprs) -#endif - - moveResult reduceToFF32 = - case dest_regs of - [] -> nilOL - [CmmHinted dest _hint] - | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) - | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) - | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3, - MR r_dest r4] - | otherwise -> unitOL (MR r_dest r3) - where rep = cmmRegType (CmmLocal dest) - r_dest = getRegisterReg (CmmLocal dest) - - outOfLineFloatOp mop = - do - dflags <- getDynFlagsNat - mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $ - mkForeignLabel functionName Nothing True IsFunction - let mopLabelOrExpr = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - return (mopLabelOrExpr, reduce) - where - (functionName, reduce) = case mop of - MO_F32_Exp -> (fsLit "exp", True) - MO_F32_Log -> (fsLit "log", True) - MO_F32_Sqrt -> (fsLit "sqrt", True) - - MO_F32_Sin -> (fsLit "sin", True) - MO_F32_Cos -> (fsLit "cos", True) - MO_F32_Tan -> (fsLit "tan", True) - - MO_F32_Asin -> (fsLit "asin", True) - MO_F32_Acos -> (fsLit "acos", True) - MO_F32_Atan -> (fsLit "atan", True) - - MO_F32_Sinh -> (fsLit "sinh", True) - MO_F32_Cosh -> (fsLit "cosh", True) - MO_F32_Tanh -> (fsLit "tanh", True) - MO_F32_Pwr -> (fsLit "pow", True) - - MO_F64_Exp -> (fsLit "exp", False) - MO_F64_Log -> (fsLit "log", False) - MO_F64_Sqrt -> (fsLit "sqrt", False) - - MO_F64_Sin -> (fsLit "sin", False) - MO_F64_Cos -> (fsLit "cos", False) - MO_F64_Tan -> (fsLit "tan", False) - - MO_F64_Asin -> (fsLit "asin", False) - MO_F64_Acos -> (fsLit "acos", False) - MO_F64_Atan -> (fsLit "atan", False) - - MO_F64_Sinh -> (fsLit "sinh", False) - MO_F64_Cosh -> (fsLit "cosh", False) - MO_F64_Tanh -> (fsLit "tanh", False) - MO_F64_Pwr -> (fsLit "pow", False) - other -> pprPanic "genCCall(ppc): unknown callish op" - (pprCallishMachOp other) - -#endif /* darwin_TARGET_OS || linux_TARGET_OS */ - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Generating a table-branch - -genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH -genSwitch expr ids - | opt_PIC - = do - (reg,e_code) <- getSomeReg expr - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let - jumpTable = map jumpTableEntryRel ids - - jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordWidth) - jumpTableEntryRel (Just (BlockId id)) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel id - - op = OpAddr (AddrBaseIndex (EABaseReg tableReg) - (EAIndex reg wORD_SIZE) (ImmInt 0)) - -#if x86_64_TARGET_ARCH -#if darwin_TARGET_OS - -- on Mac OS X/x86_64, put the jump table in the text section - -- to work around a limitation of the linker. - -- ld64 is unable to handle the relocations for - -- .quad L1 - L0 - -- if L0 is not preceded by a non-anonymous label in its section. - - code = e_code `appOL` t_code `appOL` toOL [ - ADD (intSize wordWidth) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ], - LDATA Text (CmmDataLabel lbl : jumpTable) - ] -#else - -- HACK: On x86_64 binutils<2.17 is only able to generate PC32 - -- relocations, hence we only get 32-bit offsets in the jump - -- table. As these offsets are always negative we need to properly - -- sign extend them to 64-bit. This hack should be removed in - -- conjunction with the hack in PprMach.hs/pprDataItem once - -- binutils 2.17 is standard. - code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - MOVSxL II32 - (OpAddr (AddrBaseIndex (EABaseReg tableReg) - (EAIndex reg wORD_SIZE) (ImmInt 0))) - (OpReg reg), - ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] - ] -#endif -#else - code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - ADD (intSize wordWidth) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] - ] -#endif - return code - | otherwise - = do - (reg,e_code) <- getSomeReg expr - lbl <- getNewLabelNat - let - jumpTable = map jumpTableEntry ids - op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl)) - code = e_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - JMP_TBL op [ id | Just id <- ids ] - ] - -- in - return code -#elif powerpc_TARGET_ARCH -genSwitch expr ids - | opt_PIC - = do - (reg,e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - lbl <- getNewLabelNat - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let - jumpTable = map jumpTableEntryRel ids - - jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordWidth) - jumpTableEntryRel (Just (BlockId id)) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel id - - code = e_code `appOL` t_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - SLW tmp reg (RIImm (ImmInt 2)), - LD II32 tmp (AddrRegReg tableReg tmp), - ADD tmp tmp (RIReg tableReg), - MTCTR tmp, - BCTR [ id | Just id <- ids ] - ] - return code - | otherwise - = do - (reg,e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - lbl <- getNewLabelNat - let - jumpTable = map jumpTableEntry ids - - code = e_code `appOL` toOL [ - LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - SLW tmp reg (RIImm (ImmInt 2)), - ADDIS tmp tmp (HA (ImmCLbl lbl)), - LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), - MTCTR tmp, - BCTR [ id | Just id <- ids ] - ] - return code -#elif sparc_TARGET_ARCH -genSwitch expr ids - | opt_PIC - = error "MachCodeGen: sparc genSwitch PIC not finished\n" - - | otherwise - = do (e_reg, e_code) <- getSomeReg expr - - base_reg <- getNewRegNat II32 - offset_reg <- getNewRegNat II32 - dst <- getNewRegNat II32 - - label <- getNewLabelNat - let jumpTable = map jumpTableEntry ids - - return $ e_code `appOL` - toOL - -- the jump table - [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable) - - -- load base of jump table - , SETHI (HI (ImmCLbl label)) base_reg - , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg - - -- the addrs in the table are 32 bits wide.. - , SLL e_reg (RIImm $ ImmInt 2) offset_reg - - -- load and jump to the destination - , LD II32 (AddrRegReg base_reg offset_reg) dst - , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids] - , NOP ] - -#else -#error "ToDo: genSwitch" -#endif - - --- | Convert a BlockId to some CmmStatic data -jumpTableEntry :: Maybe BlockId -> CmmStatic -jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) -jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel id - --- ----------------------------------------------------------------------------- --- Support bits --- ----------------------------------------------------------------------------- - - --- ----------------------------------------------------------------------------- --- 'condIntReg' and 'condFltReg': condition codes into registers - --- Turn those condition codes into integers now (when they appear on --- the right hand side of an assignment). --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH -condIntReg = panic "MachCode.condIntReg (not on Alpha)" -condFltReg = panic "MachCode.condFltReg (not on Alpha)" -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -condIntReg cond x y = do - CondCode _ cond cond_code <- condIntCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - -- in - return (Any II32 code) - -#endif - -#if i386_TARGET_ARCH - -condFltReg cond x y = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - -- in - return (Any II32 code) - -#endif - -#if x86_64_TARGET_ARCH - -condFltReg cond x y = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp1 <- getNewRegNat wordSize - tmp2 <- getNewRegNat wordSize - let - -- We have to worry about unordered operands (eg. comparisons - -- against NaN). If the operands are unordered, the comparison - -- sets the parity flag, carry flag and zero flag. - -- All comparisons are supposed to return false for unordered - -- operands except for !=, which returns true. - -- - -- Optimisation: we don't have to test the parity flag if we - -- know the test has already excluded the unordered case: eg > - -- and >= test for a zero carry flag, which can only occur for - -- ordered operands. - -- - -- ToDo: by reversing comparisons we could avoid testing the - -- parity flag in more cases. - - code dst = - cond_code `appOL` - (case cond of - NE -> or_unordered dst - GU -> plain_test dst - GEU -> plain_test dst - _ -> and_ordered dst) - - plain_test dst = toOL [ - SETCC cond (OpReg tmp1), - MOVZxL II8 (OpReg tmp1) (OpReg dst) - ] - or_unordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC PARITY (OpReg tmp2), - OR II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - and_ordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC NOTPARITY (OpReg tmp2), - AND II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - -- in - return (Any II32 code) - -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat II32 - let - code__2 dst = code `appOL` toOL [ - SUB False True g0 (RIReg src) g0, - SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any II32 code__2) - -condIntReg EQQ x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - let - code__2 dst = code1 `appOL` code2 `appOL` toOL [ - XOR False src1 (RIReg src2) dst, - SUB False True g0 (RIReg dst) g0, - SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any II32 code__2) - -condIntReg NE x (CmmLit (CmmInt 0 d)) = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat II32 - let - code__2 dst = code `appOL` toOL [ - SUB False True g0 (RIReg src) g0, - ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any II32 code__2) - -condIntReg NE x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - let - code__2 dst = code1 `appOL` code2 `appOL` toOL [ - XOR False src1 (RIReg src2) dst, - SUB False True g0 (RIReg dst) g0, - ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any II32 code__2) - -condIntReg cond x y = do - bid1@(BlockId lbl1) <- getBlockIdNat - bid2@(BlockId lbl2) <- getBlockIdNat - CondCode _ cond cond_code <- condIntCode cond x y - let - code__2 dst = cond_code `appOL` toOL [ - BI cond False bid1, NOP, - OR False g0 (RIImm (ImmInt 0)) dst, - BI ALWAYS False bid2, NOP, - NEWBLOCK bid1, - OR False g0 (RIImm (ImmInt 1)) dst, - NEWBLOCK bid2] - return (Any II32 code__2) - -condFltReg cond x y = do - bid1@(BlockId lbl1) <- getBlockIdNat - bid2@(BlockId lbl2) <- getBlockIdNat - CondCode _ cond cond_code <- condFltCode cond x y - let - code__2 dst = cond_code `appOL` toOL [ - NOP, - BF cond False bid1, NOP, - OR False g0 (RIImm (ImmInt 0)) dst, - BI ALWAYS False bid2, NOP, - NEWBLOCK bid1, - OR False g0 (RIImm (ImmInt 1)) dst, - NEWBLOCK bid2] - return (Any II32 code__2) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -condReg getCond = do - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - CondCode _ cond cond_code <- getCond - let -{- code dst = cond_code `appOL` toOL [ - BCC cond lbl1, - LI dst (ImmInt 0), - BCC ALWAYS lbl2, - NEWBLOCK lbl1, - LI dst (ImmInt 1), - BCC ALWAYS lbl2, - NEWBLOCK lbl2 - ]-} - code dst = cond_code - `appOL` negate_code - `appOL` toOL [ - MFCR dst, - RLWINM dst dst (bit + 1) 31 31 - ] - - negate_code | do_negate = unitOL (CRNOR bit bit bit) - | otherwise = nilOL - - (bit, do_negate) = case cond of - LTT -> (0, False) - LE -> (1, True) - EQQ -> (2, False) - GE -> (0, True) - GTT -> (1, False) - - NE -> (2, True) - - LU -> (0, False) - LEU -> (1, True) - GEU -> (0, True) - GU -> (1, False) - - return (Any II32 code) - -condIntReg cond x y = condReg (condIntCode cond x y) -condFltReg cond x y = condReg (condFltCode cond x y) -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- 'trivial*Code': deal with trivial instructions - --- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', --- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. --- Only look for constants on the right hand side, because that's --- where the generic optimizer will have put them. - --- Similarly, for unary instructions, we don't have to worry about --- matching an StInt as the argument, because genericOpt will already --- have handled the constant-folding. - -trivialCode - :: Width -- Int only - -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr) - ,IF_ARCH_i386 ((Operand -> Operand -> Instr) - -> Maybe (Operand -> Operand -> Instr) - ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr) - -> Maybe (Operand -> Operand -> Instr) - ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr) - ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr) - ,))))) - -> CmmExpr -> CmmExpr -- the two arguments - -> NatM Register - -#ifndef powerpc_TARGET_ARCH -trivialFCode - :: Width -- Floating point only - -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr) - ,)))) - -> CmmExpr -> CmmExpr -- the two arguments - -> NatM Register -#endif - -trivialUCode - :: Size - -> IF_ARCH_alpha((RI -> Reg -> Instr) - ,IF_ARCH_i386 ((Operand -> Instr) - ,IF_ARCH_x86_64 ((Operand -> Instr) - ,IF_ARCH_sparc((RI -> Reg -> Instr) - ,IF_ARCH_powerpc((Reg -> Reg -> Instr) - ,))))) - -> CmmExpr -- the one argument - -> NatM Register - -#ifndef powerpc_TARGET_ARCH -trivialUFCode - :: Size - -> IF_ARCH_alpha((Reg -> Reg -> Instr) - ,IF_ARCH_i386 ((Reg -> Reg -> Instr) - ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr) - ,IF_ARCH_sparc((Reg -> Reg -> Instr) - ,)))) - -> CmmExpr -- the one argument - -> NatM Register -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -trivialCode instr x (StInt y) - | fits8Bits y - = getRegister x `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - src1 = registerName register tmp - src2 = ImmInt (fromInteger y) - code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst) - in - return (Any IntRep code__2) - -trivialCode instr x y - = getRegister x `thenNat` \ register1 -> - getRegister y `thenNat` \ register2 -> - getNewRegNat IntRep `thenNat` \ tmp1 -> - getNewRegNat IntRep `thenNat` \ tmp2 -> - let - code1 = registerCode register1 tmp1 [] - src1 = registerName register1 tmp1 - code2 = registerCode register2 tmp2 [] - src2 = registerName register2 tmp2 - code__2 dst = asmSeqThen [code1, code2] . - mkSeqInstr (instr src1 (RIReg src2) dst) - in - return (Any IntRep code__2) - ------------- -trivialUCode instr x - = getRegister x `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - code__2 dst = code . mkSeqInstr (instr (RIReg src) dst) - in - return (Any IntRep code__2) - ------------- -trivialFCode _ instr x y - = getRegister x `thenNat` \ register1 -> - getRegister y `thenNat` \ register2 -> - getNewRegNat FF64 `thenNat` \ tmp1 -> - getNewRegNat FF64 `thenNat` \ tmp2 -> - let - code1 = registerCode register1 tmp1 - src1 = registerName register1 tmp1 - - code2 = registerCode register2 tmp2 - src2 = registerName register2 tmp2 - - code__2 dst = asmSeqThen [code1 [], code2 []] . - mkSeqInstr (instr src1 src2 dst) - in - return (Any FF64 code__2) - -trivialUFCode _ instr x - = getRegister x `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - code__2 dst = code . mkSeqInstr (instr src dst) - in - return (Any FF64 code__2) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH || x86_64_TARGET_ARCH - -{- -The Rules of the Game are: - -* You cannot assume anything about the destination register dst; - it may be anything, including a fixed reg. - -* You may compute an operand into a fixed reg, but you may not - subsequently change the contents of that fixed reg. If you - want to do so, first copy the value either to a temporary - or into dst. You are free to modify dst even if it happens - to be a fixed reg -- that's not your problem. - -* You cannot assume that a fixed reg will stay live over an - arbitrary computation. The same applies to the dst reg. - -* Temporary regs obtained from getNewRegNat are distinct from - each other and from all other regs, and stay live over - arbitrary computations. - --------------------- - -SDM's version of The Rules: - -* If getRegister returns Any, that means it can generate correct - code which places the result in any register, period. Even if that - register happens to be read during the computation. - - Corollary #1: this means that if you are generating code for an - operation with two arbitrary operands, you cannot assign the result - of the first operand into the destination register before computing - the second operand. The second operand might require the old value - of the destination register. - - Corollary #2: A function might be able to generate more efficient - code if it knows the destination register is a new temporary (and - therefore not read by any of the sub-computations). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - (c) known registers (eg. %ecx is used by shifts) - In particular, it may *not* modify global registers, unless the global - register happens to be the destination register. --} - -trivialCode width instr (Just revinstr) (CmmLit lit_a) b - | is32BitLit lit_a = do - b_code <- getAnyReg b - let - code dst - = b_code dst `snocOL` - revinstr (OpImm (litToImm lit_a)) (OpReg dst) - -- in - return (Any (intSize width) code) - -trivialCode width instr maybe_revinstr a b - = genTrivialCode (intSize width) instr a b - --- This is re-used for floating pt instructions too. -genTrivialCode rep instr a b = do - (b_op, b_code) <- getNonClobberedOperand b - a_code <- getAnyReg a - tmp <- getNewRegNat rep - let - -- We want the value of b to stay alive across the computation of a. - -- But, we want to calculate a straight into the destination register, - -- because the instruction only has two operands (dst := dst `op` src). - -- The troublesome case is when the result of b is in the same register - -- as the destination reg. In this case, we have to save b in a - -- new temporary across the computation of a. - code dst - | dst `regClashesWithOp` b_op = - b_code `appOL` - unitOL (MOV rep b_op (OpReg tmp)) `appOL` - a_code dst `snocOL` - instr (OpReg tmp) (OpReg dst) - | otherwise = - b_code `appOL` - a_code dst `snocOL` - instr b_op (OpReg dst) - -- in - return (Any rep code) - -reg `regClashesWithOp` OpReg reg2 = reg == reg2 -reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) -reg `regClashesWithOp` _ = False - ------------ - -trivialUCode rep instr x = do - x_code <- getAnyReg x - let - code dst = - x_code dst `snocOL` - instr (OpReg dst) - return (Any rep code) - ------------ - -#if i386_TARGET_ARCH - -trivialFCode width instr x y = do - (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too - (y_reg, y_code) <- getSomeReg y - let - size = floatSize width - code dst = - x_code `appOL` - y_code `snocOL` - instr size x_reg y_reg dst - return (Any size code) - -#endif - -#if x86_64_TARGET_ARCH -trivialFCode pk instr x y - = genTrivialCode size (instr size) x y - where size = floatSize pk -#endif - -------------- - -trivialUFCode size instr x = do - (x_reg, x_code) <- getSomeReg x - let - code dst = - x_code `snocOL` - instr x_reg dst - -- in - return (Any size code) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -trivialCode pk instr x (CmmLit (CmmInt y d)) - | fits13Bits y - = do - (src1, code) <- getSomeReg x - tmp <- getNewRegNat II32 - let - src2 = ImmInt (fromInteger y) - code__2 dst = code `snocOL` instr src1 (RIImm src2) dst - return (Any II32 code__2) - -trivialCode pk instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - let - code__2 dst = code1 `appOL` code2 `snocOL` - instr src1 (RIReg src2) dst - return (Any II32 code__2) - ------------- -trivialFCode pk instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x) - tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y) - tmp <- getNewRegNat FF64 - let - promote x = FxTOy FF32 FF64 x tmp - - pk1 = cmmExprType x - pk2 = cmmExprType y - - code__2 dst = - if pk1 `cmmEqType` pk2 then - code1 `appOL` code2 `snocOL` - instr (floatSize pk) src1 src2 dst - else if typeWidth pk1 == W32 then - code1 `snocOL` promote src1 `appOL` code2 `snocOL` - instr FF64 tmp src2 dst - else - code1 `appOL` code2 `snocOL` promote src2 `snocOL` - instr FF64 src1 tmp dst - return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) - code__2) - ------------- -trivialUCode size instr x = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat size - let - code__2 dst = code `snocOL` instr (RIReg src) dst - return (Any size code__2) - -------------- -trivialUFCode pk instr x = do - (src, code) <- getSomeReg x - tmp <- getNewRegNat pk - let - code__2 dst = code `snocOL` instr src dst - return (Any pk code__2) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH - -{- -Wolfgang's PowerPC version of The Rules: - -A slightly modified version of The Rules to take advantage of the fact -that PowerPC instructions work on all registers and don't implicitly -clobber any fixed registers. - -* The only expression for which getRegister returns Fixed is (CmmReg reg). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - It may *not* modify global registers, unless the global - register happens to be the destination register. - It may not clobber any other registers. In fact, only ccalls clobber any - fixed registers. - Also, it may not modify the counter register (used by genCCall). - - Corollary: If a getRegister for a subexpression returns Fixed, you need - not move it to a fresh temporary before evaluating the next subexpression. - The Fixed register won't be modified. - Therefore, we don't need a counterpart for the x86's getStableReg on PPC. - -* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on - the value of the destination register. --} - -trivialCode rep signed instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate rep signed y - = do - (src1, code1) <- getSomeReg x - let code dst = code1 `snocOL` instr dst src1 (RIImm imm) - return (Any (intSize rep) code) - -trivialCode rep signed instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) - return (Any (intSize rep) code) - -trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm' size instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 - return (Any size code) - -trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y - -trivialUCode rep instr x = do - (src, code) <- getSomeReg x - let code' dst = code `snocOL` instr dst src - return (Any rep code') - --- There is no "remainder" instruction on the PPC, so we have to do --- it the hard way. --- The "div" parameter is the division instruction to use (DIVW or DIVWU) - -remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -remainderCode rep div x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `appOL` toOL [ - div dst src1 src2, - MULLW dst dst (RIReg src2), - SUBF dst dst src1 - ] - return (Any (intSize rep) code) - -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- Coercing to/from integer/floating-point... - --- When going to integer, we truncate (round towards 0). - --- @coerce(Int2FP|FP2Int)@ are more complicated integer/float --- conversions. We have to store temporaries in memory to move --- between the integer and the floating point register sets. - --- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we --- pretend, on sparc at least, that double and float regs are seperate --- kinds, so the value has to be computed into one kind before being --- explicitly "converted" to live in the other kind. - -coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register - -#if sparc_TARGET_ARCH -coerceDbl2Flt :: CmmExpr -> NatM Register -coerceFlt2Dbl :: CmmExpr -> NatM Register -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if alpha_TARGET_ARCH - -coerceInt2FP _ x - = getRegister x `thenNat` \ register -> - getNewRegNat IntRep `thenNat` \ reg -> - let - code = registerCode register reg - src = registerName register reg - - code__2 dst = code . mkSeqInstrs [ - ST Q src (spRel 0), - LD TF dst (spRel 0), - CVTxy Q TF dst dst] - in - return (Any FF64 code__2) - -------------- -coerceFP2Int x - = getRegister x `thenNat` \ register -> - getNewRegNat FF64 `thenNat` \ tmp -> - let - code = registerCode register tmp - src = registerName register tmp - - code__2 dst = code . mkSeqInstrs [ - CVTxy TF Q src tmp, - ST TF tmp (spRel 0), - LD Q dst (spRel 0)] - in - return (Any IntRep code__2) - -#endif /* alpha_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if i386_TARGET_ARCH - -coerceInt2FP from to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> GITOF; W64 -> GITOD - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - return (Any (floatSize to) code) - ------------- - -coerceFP2Int from to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case from of W32 -> GFTOI; W64 -> GDTOI - code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-II32 reps? - -- in - return (Any (intSize to) code) - -#endif /* i386_TARGET_ARCH */ - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if x86_64_TARGET_ARCH - -coerceFP2Int from to x = do - (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand - let - opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ - code dst = x_code `snocOL` opc x_op dst - -- in - return (Any (intSize to) code) -- works even if the destination rep is CVTSI2SS; W64 -> CVTSI2SD - code dst = x_code `snocOL` opc x_op dst - -- in - return (Any (floatSize to) code) -- works even if the destination rep is CmmExpr -> NatM Register -coerceFP2FP to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD - code dst = x_code `snocOL` opc x_reg dst - -- in - return (Any (floatSize to) code) -#endif - --- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if sparc_TARGET_ARCH - -coerceInt2FP width1 width2 x = do - (src, code) <- getSomeReg x - let - code__2 dst = code `appOL` toOL [ - ST (intSize width1) src (spRel (-2)), - LD (intSize width1) (spRel (-2)) dst, - FxTOy (intSize width1) (floatSize width2) dst dst] - return (Any (floatSize $ width2) code__2) - - --- | Coerce a floating point value to integer --- --- NOTE: On sparc v9 there are no instructions to move a value from an --- FP register directly to an int register, so we have to use a load/store. --- -coerceFP2Int width1 width2 x - = do let fsize1 = floatSize width1 - fsize2 = floatSize width2 - - isize2 = intSize width2 - - (fsrc, code) <- getSomeReg x - fdst <- getNewRegNat fsize2 - - let code2 dst - = code - `appOL` toOL - -- convert float to int format, leaving it in a float reg. - [ FxTOy fsize1 isize2 fsrc fdst - - -- store the int into mem, then load it back to move - -- it into an actual int reg. - , ST fsize2 fdst (spRel (-2)) - , LD isize2 (spRel (-2)) dst] - - return (Any isize2 code2) - ------------- -coerceDbl2Flt x = do - (src, code) <- getSomeReg x - return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) - ------------- -coerceFlt2Dbl x = do - (src, code) <- getSomeReg x - return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst)) - -#endif /* sparc_TARGET_ARCH */ - -#if powerpc_TARGET_ARCH -coerceInt2FP fromRep toRep x = do - (src, code) <- getSomeReg x - lbl <- getNewLabelNat - itmp <- getNewRegNat II32 - ftmp <- getNewRegNat FF64 - dflags <- getDynFlagsNat - dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl - Amode addr addr_code <- getAmode dynRef - let - code' dst = code `appOL` maybe_exts `appOL` toOL [ - LDATA ReadOnlyData - [CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x43300000 W32), - CmmStaticLit (CmmInt 0x80000000 W32)], - XORIS itmp src (ImmInt 0x8000), - ST II32 itmp (spRel 3), - LIS itmp (ImmInt 0x4330), - ST II32 itmp (spRel 2), - LD FF64 ftmp (spRel 2) - ] `appOL` addr_code `appOL` toOL [ - LD FF64 dst addr, - FSUB FF64 dst ftmp dst - ] `appOL` maybe_frsp dst - - maybe_exts = case fromRep of - W8 -> unitOL $ EXTS II8 src src - W16 -> unitOL $ EXTS II16 src src - W32 -> nilOL - maybe_frsp dst = case toRep of - W32 -> unitOL $ FRSP dst dst - W64 -> nilOL - return (Any (floatSize toRep) code') - -coerceFP2Int fromRep toRep x = do - -- the reps don't really matter: F*->FF64 and II32->I* are no-ops - (src, code) <- getSomeReg x - tmp <- getNewRegNat FF64 - let - code' dst = code `appOL` toOL [ - -- convert to int in FP reg - FCTIWZ tmp src, - -- store value (64bit) from FP to stack - ST FF64 tmp (spRel 2), - -- read low word of value (high word is undefined) - LD II32 dst (spRel 3)] - return (Any (intSize toRep) code') -#endif /* powerpc_TARGET_ARCH */ - - --- ----------------------------------------------------------------------------- --- eXTRA_STK_ARGS_HERE - --- We (allegedly) put the first six C-call arguments in registers; --- where do we start putting the rest of them? - --- Moved from Instrs (SDM): - -#if alpha_TARGET_ARCH || sparc_TARGET_ARCH -eXTRA_STK_ARGS_HERE :: Int -eXTRA_STK_ARGS_HERE - = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???)) -#endif