{-# OPTIONS -Wall -fno-warn-missing-signatures #-} module ParsecParser (parseCore) where import Core import ParseGlue import Check import PrimCoercions import Text.ParserCombinators.Parsec import qualified Text.ParserCombinators.Parsec.Token as P import Text.ParserCombinators.Parsec.Language import Data.Char import Data.Ratio parseCore :: FilePath -> IO (Either ParseError Module) parseCore = parseFromFile coreModule coreModule :: Parser Module coreModule = do whiteSpace reserved "module" mName <- coreModuleName whiteSpace tdefs <- option [] coreTdefs vdefGroups <- coreVdefGroups eof return $ Module mName tdefs vdefGroups coreModuleName :: Parser AnMname coreModuleName = do pkgName <- corePackageName char ':' (modHierarchy,baseName) <- coreHierModuleNames return $ M (pkgName, modHierarchy, baseName) corePackageName :: Parser Pname -- Package names can be lowercase or uppercase! -- TODO: update docs corePackageName = identifier <|> upperName coreHierModuleNames :: Parser ([Id], Id) coreHierModuleNames = do parentName <- upperName return $ splitModuleName parentName upperName :: Parser Id upperName = do firstChar <- upper rest <- many (identLetter extCoreDef) return $ firstChar:rest coreTdefs :: Parser [Tdef] coreTdefs = many coreTdef coreTdef :: Parser Tdef coreTdef = withSemi (try (try coreDataDecl <|> try coreNewtypeDecl)) withSemi p = try p `withTerminator` ";" withTerminator p term = do x <- try p try $ symbol term return x coreDataDecl :: Parser Tdef coreDataDecl = do reserved "data" tyCon <- coreQualifiedCon whiteSpace -- important tBinds <- coreTbinds whiteSpace symbol "=" whiteSpace cDefs <- braces coreCdefs return $ Data tyCon tBinds cDefs coreNewtypeDecl :: Parser Tdef coreNewtypeDecl = do reserved "newtype" tyCon <- coreQualifiedCon whiteSpace tBinds <- coreTbinds symbol "^" axiom <- coreAxiom tyRep <- try coreTRep return $ Newtype tyCon tBinds axiom tyRep coreQualifiedCon :: Parser (Mname, Id) coreQualifiedCon = coreQualifiedGen upperName coreQualifiedName = coreQualifiedGen identifier coreQualifiedGen :: Parser String -> Parser (Mname, Id) coreQualifiedGen p = (try (do packageIdOrVarName <- corePackageName maybeRest <- optionMaybe (char ':' >> coreHierModuleNames) case maybeRest of -- unqualified id, so backtrack Nothing -> pzero -- qualified name, so look for the id part Just (modHierarchy, baseName) -> do char '.' theId <- p return (Just $ M (packageIdOrVarName, modHierarchy, baseName), theId))) <|> -- unqualified name (p >>= (\ res -> return (Nothing, res))) coreAxiom :: Parser Axiom coreAxiom = parens (do coercionName <- coreQualifiedCon whiteSpace tbs <- coreTbinds whiteSpace symbol "::" whiteSpace coercionK <- try equalityKind <|> parens equalityKind return (coercionName, tbs, coercionK)) coreTbinds :: Parser [Tbind] coreTbinds = many coreTbind coreTbindsGen :: CharParser () String -> Parser [Tbind] -- The "try" here is important. Otherwise, when parsing: -- "Node (^base:DataziTuple.Z3T)" (a cdef), we commit to -- parsing (^base...) as a tbind rather than a type. coreTbindsGen separator = many (try $ coreTbindGen separator) coreTbind :: Parser Tbind coreTbind = coreTbindGen whiteSpace coreTbindGen :: CharParser () a -> Parser Tbind coreTbindGen sep = (parens (do sep tyVar <- identifier kind <- symbol "::" >> coreKind return (tyVar, kind))) <|> (sep >> identifier >>= (return . (\ tv -> (tv,Klifted)))) coreCdefs :: Parser [Cdef] coreCdefs = sepBy1 coreCdef (symbol ";") coreCdef :: Parser Cdef coreCdef = do dataConName <- coreQualifiedCon whiteSpace -- important! tBinds <- try $ coreTbindsGen (symbol "@") -- This should be equivalent to (many coreAty) -- But it isn't. WHY?? tys <- sepBy coreAtySaturated whiteSpace return $ Constr dataConName tBinds tys coreTRep :: Parser (Maybe Ty) -- note that the "=" is inside here since if there's -- no rhs for the newtype, there's no "=" coreTRep = optionMaybe (do symbol "=" try coreType) coreType :: Parser Ty coreType = coreForallTy <|> (do hd <- coreBty -- whiteSpace is important! whiteSpace -- This says: If there is at least one ("-> ty"..) thing, -- use it. If not, don't consume any input. maybeRest <- option [] (many1 (symbol "->" >> coreType)) return $ case maybeRest of [] -> hd stuff -> foldl Tapp (Tcon tcArrow) (hd:stuff)) coreBty :: Parser Ty coreBty = do hd <- coreAty -- The "try" is necessary: -- otherwise, parsing "T " fails rather -- than returning "T". maybeRest <- option [] (many1 (try (whiteSpace >> coreAtySaturated))) return $ (case hd of -- so I'm not sure I like this... it's basically doing -- typechecking (kind-checking?) in the parser. -- However, the type syntax as defined in Core.hs sort of -- forces it. ATy t -> foldl Tapp t maybeRest Trans k -> app k 2 maybeRest "trans" Sym k -> app k 1 maybeRest "sym" Unsafe k -> app k 2 maybeRest "unsafe" LeftCo k -> app k 1 maybeRest "left" RightCo k -> app k 1 maybeRest "right" InstCo k -> app k 2 maybeRest "inst") where app k arity args _ | length args == arity = k args app _ _ args err = primCoercionError (err ++ ("Args were: " ++ show args)) coreAtySaturated :: Parser Ty coreAtySaturated = do t <- coreAty case t of ATy ty -> return ty _ -> unexpected "coercion ty" coreAty :: Parser ATyOp coreAty = try coreTcon <|> ((try coreTvar <|> parens coreType) >>= return . ATy) coreTvar :: Parser Ty coreTvar = try identifier >>= (return . Tvar) coreTcon :: Parser ATyOp -- TODO: Change the grammar -- A Tcon can be an uppercase type constructor -- or a lowercase (always qualified) coercion variable coreTcon = -- Special case is first so that (CoUnsafe .. ..) gets parsed as -- a prim. coercion app and not a Tcon app. -- But the whole thing is so bogus. try (do -- the "try"s are crucial; they force -- backtracking maybeCoercion <- choice [try symCo, try transCo, try unsafeCo, try instCo, try leftCo, rightCo] return $ case maybeCoercion of TransC -> Trans (\ [x,y] -> TransCoercion x y) SymC -> Sym (\ [x] -> SymCoercion x) UnsafeC -> Unsafe (\ [x,y] -> UnsafeCoercion x y) LeftC -> LeftCo (\ [x] -> LeftCoercion x) RightC -> RightCo (\ [x] -> RightCoercion x) InstC -> InstCo (\ [x,y] -> InstCoercion x y)) <|> (coreQualifiedCon >>= (return . ATy . Tcon)) data CoercionTy = TransC | InstC | SymC | UnsafeC | LeftC | RightC symCo, transCo, unsafeCo, instCo, leftCo, rightCo :: Parser CoercionTy -- Would be better not to wire these in quite this way. Sigh symCo = string "ghczmprim:GHCziPrim.sym" >> return SymC transCo = string "ghczmprim:GHCziPrim.trans" >> return TransC unsafeCo = string "ghczmprim:GHCziPrim.CoUnsafe" >> return UnsafeC leftCo = string "ghczmprim:GHCziPrim.left" >> return LeftC rightCo = string "ghczmprim:GHCziPrim.right" >> return RightC instCo = string "ghczmprim:GHCziPrim.inst" >> return InstC coreForallTy :: Parser Ty coreForallTy = do reserved "forall" tBinds <- many1 coreTbind symbol "." bodyTy <- coreType return $ foldr Tforall bodyTy tBinds -- TODO: similar to coreType. should refactor coreKind :: Parser Kind coreKind = do hd <- coreAtomicKind maybeRest <- option [] (many1 (symbol "->" >> coreKind)) return $ foldl Karrow hd maybeRest coreAtomicKind = try liftedKind <|> try unliftedKind <|> try openKind <|> try (do (from,to) <- parens equalityKind return $ Keq from to) <|> try (parens coreKind) liftedKind = do symbol "*" return Klifted unliftedKind = do symbol "#" return Kunlifted openKind = do symbol "?" return Kopen equalityKind = do ty1 <- coreBty symbol ":=:" ty2 <- coreBty return (ty1, ty2) -- Only used internally within the parser: -- represents either a Tcon, or a continuation -- for a primitive coercion data ATyOp = ATy Ty | Trans ([Ty] -> Ty) | Sym ([Ty] -> Ty) | Unsafe ([Ty] -> Ty) | LeftCo ([Ty] -> Ty) | RightCo ([Ty] -> Ty) | InstCo ([Ty] -> Ty) coreVdefGroups :: Parser [Vdefg] coreVdefGroups = option [] (do theFirstVdef <- coreVdefg symbol ";" others <- coreVdefGroups return $ theFirstVdef:others) coreVdefg :: Parser Vdefg coreVdefg = coreRecVdef <|> coreNonrecVdef coreRecVdef = do reserved "rec" braces (sepBy1 coreVdef (symbol ";")) >>= (return . Rec) coreNonrecVdef = coreVdef >>= (return . Nonrec) coreVdef = do (vdefLhs, vdefTy) <- try topVbind <|> (do (v, ty) <- lambdaBind return (unqual v, ty)) whiteSpace symbol "=" whiteSpace vdefRhs <- coreFullExp return $ Vdef (vdefLhs, vdefTy, vdefRhs) coreAtomicExp :: Parser Exp coreAtomicExp = do -- For stupid reasons, the whiteSpace is necessary. -- Without it, (pt coreAppExp "w ^a:B.C ") doesn't work. whiteSpace res <- choice [try coreDconOrVar, try coreLit, parens coreFullExp ] whiteSpace return res coreFullExp = (choice [coreLam, coreLet, coreCase, coreCast, coreNote, coreExternal, coreLabel]) <|> (try coreAppExp) -- The "try" is necessary so that we backtrack -- when we see a var (that is not an app) <|> coreAtomicExp coreAppExp = do -- notes: -- it's important to have a separate coreAtomicExp (that any app exp -- begins with) and to define the args in terms of many1. -- previously, coreAppExp could parse either an atomic exp (an app with -- 0 arguments) or an app with >= 1 arguments, but that led to ambiguity. oper <- try coreAtomicExp whiteSpace args <- many1 (whiteSpace >> ((coreAtomicExp >>= (return . Left)) <|> -- note this MUST be coreAty, not coreType, because otherwise: -- "A @ B c" gets parsed as "A @ (B c)" ((symbol "@" >> coreAtySaturated) >>= (return . Right)))) return $ foldl (\ op -> either (App op) (Appt op)) oper args coreDconOrVar = do theThing <- coreQualifiedGen (try upperName <|> identifier) return $ case theThing of -- note that data constructors must be qualified (Just _, idItself) | isUpper (head idItself) -> Dcon theThing _ -> Var theThing coreLit :: Parser Exp coreLit = parens (coreLiteral >>= (return . Lit)) coreLiteral :: Parser Lit coreLiteral = do l <- try aLit symbol "::" t <- coreType return $ Literal l t coreLam = do symbol "\\" binds <- coreLambdaBinds symbol "->" body <- coreFullExp return $ foldr Lam body binds coreLet = do reserved "let" vdefg <- coreVdefg whiteSpace reserved "in" body <- coreFullExp return $ Let vdefg body coreCase = do reserved "case" ty <- coreAtySaturated scrut <- coreAtomicExp reserved "of" vBind <- parens lambdaBind alts <- coreAlts return $ Case scrut vBind ty alts coreCast = do reserved "cast" whiteSpace -- The parens are CRUCIAL, o/w it's ambiguous body <- try (parens coreFullExp) whiteSpace ty <- try coreAtySaturated return $ Cast body ty coreNote = do reserved "note" s <- stringLiteral e <- coreFullExp return $ Note s e coreExternal = (do reserved "external" -- TODO: This isn't in the grammar, but GHC -- always prints "external ccall". investigate... symbol "ccall" s <- stringLiteral t <- coreAtySaturated return $ External s t) <|> -- TODO: I don't really understand what this does (do reserved "dynexternal" symbol "ccall" t <- coreAtySaturated return $ External "[dynamic]" t) coreLabel = do -- TODO: Totally punting this, but it needs to go in the grammar -- or not at all reserved "label" s <- stringLiteral return $ External s tAddrzh coreLambdaBinds = many1 coreBind coreBind = coreTbinding <|> coreVbind coreTbinding = try coreAtTbind >>= (return . Tb) coreVbind = parens (lambdaBind >>= (return . Vb)) coreAtTbind = (symbol "@") >> coreTbind topVbind :: Parser (Qual Var, Ty) topVbind = aCoreVbind coreQualifiedName lambdaBind :: Parser (Var, Ty) lambdaBind = aCoreVbind identifier aCoreVbind idP = do nm <- idP symbol "::" t <- coreType return (nm, t) aLit :: Parser CoreLit aLit = intOrRatLit <|> charLit <|> stringLit intOrRatLit :: Parser CoreLit intOrRatLit = do -- Int and lit combined into one to avoid ambiguity. -- Argh.... lhs <- intLit maybeRhs <- optionMaybe (symbol "%" >> anIntLit) case maybeRhs of Nothing -> return $ Lint lhs Just rhs -> return $ Lrational (lhs % rhs) intLit :: Parser Integer intLit = anIntLit <|> parens anIntLit anIntLit :: Parser Integer anIntLit = do sign <- option 1 (symbol "-" >> return (-1)) n <- natural return (sign * n) charLit :: Parser CoreLit charLit = charLiteral >>= (return . Lchar) -- make sure this is right stringLit :: Parser CoreLit stringLit = stringLiteral >>= (return . Lstring) -- make sure this is right coreAlts :: Parser [Alt] coreAlts = braces $ sepBy1 coreAlt (symbol ";") coreAlt :: Parser Alt coreAlt = conAlt <|> litAlt <|> defaultAlt conAlt :: Parser Alt conAlt = do conName <- coreQualifiedCon tBinds <- many (parens coreAtTbind) whiteSpace -- necessary b/c otherwise we parse the next list as empty vBinds <- many (parens lambdaBind) whiteSpace try (symbol "->") rhs <- try coreFullExp return $ Acon conName tBinds vBinds rhs litAlt :: Parser Alt litAlt = do l <- parens coreLiteral symbol "->" rhs <- coreFullExp return $ Alit l rhs defaultAlt :: Parser Alt defaultAlt = do reserved "_" symbol "->" rhs <- coreFullExp return $ Adefault rhs ---------------- extCore = P.makeTokenParser extCoreDef parens = P.parens extCore braces = P.braces extCore -- newlines are allowed anywhere whiteSpace = P.whiteSpace extCore <|> (newline >> return ()) symbol = P.symbol extCore identifier = P.identifier extCore -- Keywords all begin with '%' reserved s = P.reserved extCore ('%':s) natural = P.natural extCore charLiteral = P.charLiteral extCore stringLiteral = P.stringLiteral extCore -- dodgy since Core doesn't really allow comments, -- but we'll pretend... extCoreDef = LanguageDef { commentStart = "{-" , commentEnd = "-}" , commentLine = "--" , nestedComments = True , identStart = lower , identLetter = lower <|> upper <|> digit <|> (char '\'') , opStart = opLetter extCoreDef , opLetter = oneOf ";=@:\\%_.*#?%" , reservedNames = map ('%' :) ["module", "data", "newtype", "rec", "let", "in", "case", "of", "cast", "note", "external", "forall"] , reservedOpNames = [";", "=", "@", "::", "\\", "%_", ".", "*", "#", "?"] , caseSensitive = True } {- -- Stuff to help with testing in ghci. pTest (Left a) = error (show a) pTest (Right t) = print t pTest1 :: Show a => CharParser () a -> String -> IO () pTest1 pr s = do let res = parse pr "" s pTest res pt :: Show a => CharParser () a -> String -> IO () pt pr s = do x <- parseTest pr s print x try_ = try many_ = many option_ = option many1_ = many1 il = identLetter andThenSym a b = do p <- a symbol b return p -}