1 {-# OPTIONS -Wall -fno-warn-missing-signatures #-}
3 module Language.Core.ParsecParser (parseCore) where
5 import Language.Core.Core
6 import Language.Core.Check
7 import Language.Core.Encoding
8 import Language.Core.PrimCoercions
10 import Text.ParserCombinators.Parsec
11 import qualified Text.ParserCombinators.Parsec.Token as P
12 import Text.ParserCombinators.Parsec.Language
17 parseCore :: FilePath -> IO (Either ParseError Module)
18 parseCore = parseFromFile coreModule
20 coreModule :: Parser Module
24 mName <- coreModuleName
26 tdefs <- option [] coreTdefs
27 vdefGroups <- coreVdefGroups
29 return $ Module mName tdefs vdefGroups
31 coreModuleName :: Parser AnMname
33 pkgName <- corePackageName
35 (modHierarchy,baseName) <- coreHierModuleNames
36 return $ M (pkgName, modHierarchy, baseName)
38 corePackageName :: Parser Pname
39 -- Package names can be lowercase or uppercase!
40 corePackageName = (identifier <|> upperName) >>= (return . P)
42 coreHierModuleNames :: Parser ([Id], Id)
43 coreHierModuleNames = do
44 parentName <- upperName
45 return $ splitModuleName parentName
47 upperName :: Parser Id
50 rest <- many (identLetter extCoreDef)
51 return $ firstChar:rest
53 coreTdefs :: Parser [Tdef]
54 coreTdefs = many coreTdef
56 coreTdef :: Parser Tdef
57 coreTdef = withSemi (try (try coreDataDecl <|> try coreNewtypeDecl))
60 withSemi p = try p `withTerminator` ";"
62 withTerminator p term = do
67 coreDataDecl :: Parser Tdef
70 tyCon <- coreQualifiedCon
71 whiteSpace -- important
76 cDefs <- braces coreCdefs
77 return $ Data tyCon tBinds cDefs
79 coreNewtypeDecl :: Parser Tdef
82 tyCon <- coreQualifiedCon
84 coercionName <- coreQualifiedCon
88 return $ Newtype tyCon coercionName tBinds tyRep
90 coreQualifiedCon :: Parser (Mname, Id)
91 coreQualifiedCon = coreQualifiedGen upperName
93 coreQualifiedName = coreQualifiedGen identifier
95 coreQualifiedGen :: Parser String -> Parser (Mname, Id)
96 coreQualifiedGen p = (try (do
97 packageIdOrVarName <- corePackageName
98 maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
100 -- unqualified id, so backtrack
102 -- qualified name, so look for the id part
103 Just (modHierarchy, baseName) -> do
107 (Just $ M (packageIdOrVarName, modHierarchy, baseName),
110 (p >>= (\ res -> return (Nothing, res)))
112 coreTbinds :: Parser [Tbind]
113 coreTbinds = many coreTbind
115 coreTbindsGen :: CharParser () String -> Parser [Tbind]
116 -- The "try" here is important. Otherwise, when parsing:
117 -- "Node (^base:DataziTuple.Z3T)" (a cdef), we commit to
118 -- parsing (^base...) as a tbind rather than a type.
119 coreTbindsGen separator = many (try $ coreTbindGen separator)
121 coreTbind :: Parser Tbind
122 coreTbind = coreTbindGen whiteSpace
124 coreTbindGen :: CharParser () a -> Parser Tbind
125 coreTbindGen sep = (parens (do
128 kind <- symbol "::" >> coreKind
129 return (tyVar, kind))) <|>
130 (sep >> identifier >>= (return . (\ tv -> (tv,Klifted))))
132 coreCdefs :: Parser [Cdef]
133 coreCdefs = sepBy1 coreCdef (symbol ";")
135 coreCdef :: Parser Cdef
137 dataConName <- coreQualifiedCon
138 whiteSpace -- important!
139 tBinds <- try $ coreTbindsGen (symbol "@")
140 -- This should be equivalent to (many coreAty)
141 -- But it isn't. WHY??
142 tys <- sepBy coreAtySaturated whiteSpace
143 return $ Constr dataConName tBinds tys
145 coreTRep :: Parser Ty
146 -- note that the "=" is inside here since if there's
147 -- no rhs for the newtype, there's no "="
148 coreTRep = symbol "=" >> try coreType
150 coreType :: Parser Ty
151 coreType = coreForallTy <|> (do
153 -- whiteSpace is important!
155 -- This says: If there is at least one ("-> ty"..) thing,
156 -- use it. If not, don't consume any input.
157 maybeRest <- option [] (many1 (symbol "->" >> coreType))
158 return $ case maybeRest of
160 stuff -> foldl Tapp (Tcon tcArrow) (hd:stuff))
165 -- The "try" is necessary:
166 -- otherwise, parsing "T " fails rather
167 -- than returning "T".
168 maybeRest <- option [] (many1 (try (whiteSpace >> coreAtySaturated)))
170 -- so I'm not sure I like this... it's basically doing
171 -- typechecking (kind-checking?) in the parser.
172 -- However, the type syntax as defined in Core.hs sort of
174 ATy t -> foldl Tapp t maybeRest
175 Trans k -> app k 2 maybeRest "trans"
176 Sym k -> app k 1 maybeRest "sym"
177 Unsafe k -> app k 2 maybeRest "unsafe"
178 LeftCo k -> app k 1 maybeRest "left"
179 RightCo k -> app k 1 maybeRest "right"
180 InstCo k -> app k 2 maybeRest "inst")
181 where app k arity args _ | length args == arity = k args
183 primCoercionError (err ++
184 ("Args were: " ++ show args))
186 coreAtySaturated :: Parser Ty
187 coreAtySaturated = do
191 _ -> unexpected "coercion ty"
193 coreAty :: Parser ATyOp
194 coreAty = try coreTcon <|> ((try coreTvar <|> parens coreType)
196 coreTvar :: Parser Ty
197 coreTvar = try identifier >>= (return . Tvar)
199 coreTcon :: Parser ATyOp
200 -- TODO: Change the grammar
201 -- A Tcon can be an uppercase type constructor
202 -- or a lowercase (always qualified) coercion variable
204 -- Special case is first so that (CoUnsafe .. ..) gets parsed as
205 -- a prim. coercion app and not a Tcon app.
206 -- But the whole thing is so bogus.
208 -- the "try"s are crucial; they force
210 maybeCoercion <- choice [try symCo, try transCo, try unsafeCo,
211 try instCo, try leftCo, rightCo]
212 return $ case maybeCoercion of
213 TransC -> Trans (\ [x,y] -> TransCoercion x y)
214 SymC -> Sym (\ [x] -> SymCoercion x)
215 UnsafeC -> Unsafe (\ [x,y] -> UnsafeCoercion x y)
216 LeftC -> LeftCo (\ [x] -> LeftCoercion x)
217 RightC -> RightCo (\ [x] -> RightCoercion x)
218 InstC -> InstCo (\ [x,y] -> InstCoercion x y))
219 <|> (coreQualifiedCon >>= (return . ATy . Tcon))
221 data CoercionTy = TransC | InstC | SymC | UnsafeC | LeftC | RightC
223 symCo, transCo, unsafeCo, instCo, leftCo, rightCo :: Parser CoercionTy
224 symCo = string "%sym" >> return SymC
225 transCo = string "%trans" >> return TransC
226 unsafeCo = string "%unsafe" >> return UnsafeC
227 leftCo = string "%left" >> return LeftC
228 rightCo = string "%right" >> return RightC
229 instCo = string "%inst" >> return InstC
231 coreForallTy :: Parser Ty
234 tBinds <- many1 coreTbind
237 return $ foldr Tforall bodyTy tBinds
239 -- TODO: similar to coreType. should refactor
240 coreKind :: Parser Kind
243 maybeRest <- option [] (many1 (symbol "->" >> coreKind))
244 return $ foldl Karrow hd maybeRest
246 coreAtomicKind = try liftedKind <|> try unliftedKind
247 <|> try openKind <|> try (do
248 (from,to) <- parens equalityKind
249 return $ Keq from to)
250 <|> try (parens coreKind)
270 -- Only used internally within the parser:
271 -- represents either a Tcon, or a continuation
272 -- for a primitive coercion
277 | Unsafe ([Ty] -> Ty)
278 | LeftCo ([Ty] -> Ty)
279 | RightCo ([Ty] -> Ty)
280 | InstCo ([Ty] -> Ty)
282 coreVdefGroups :: Parser [Vdefg]
283 coreVdefGroups = option [] (do
284 theFirstVdef <- coreVdefg
286 others <- coreVdefGroups
287 return $ theFirstVdef:others)
289 coreVdefg :: Parser Vdefg
290 coreVdefg = coreRecVdef <|> coreNonrecVdef
294 braces (sepBy1 coreVdef (symbol ";")) >>= (return . Rec)
296 coreNonrecVdef = coreVdef >>= (return . Nonrec)
299 (vdefLhs, vdefTy) <- try topVbind <|> (do
300 (v, ty) <- lambdaBind
301 return (unqual v, ty))
305 vdefRhs <- coreFullExp
306 return $ Vdef (vdefLhs, vdefTy, vdefRhs)
308 coreAtomicExp :: Parser Exp
310 -- For stupid reasons, the whiteSpace is necessary.
311 -- Without it, (pt coreAppExp "w a:B.C ") doesn't work.
313 res <- choice [try coreDconOrVar,
319 coreFullExp = (choice [coreLam, coreLet,
320 coreCase, coreCast, coreNote, coreExternal, coreLabel]) <|> (try coreAppExp)
321 -- The "try" is necessary so that we backtrack
322 -- when we see a var (that is not an app)
327 -- it's important to have a separate coreAtomicExp (that any app exp
328 -- begins with) and to define the args in terms of many1.
329 -- previously, coreAppExp could parse either an atomic exp (an app with
330 -- 0 arguments) or an app with >= 1 arguments, but that led to ambiguity.
331 oper <- try coreAtomicExp
333 args <- many1 (whiteSpace >> ((coreAtomicExp >>= (return . Left)) <|>
334 -- note this MUST be coreAty, not coreType, because otherwise:
335 -- "A @ B c" gets parsed as "A @ (B c)"
336 ((symbol "@" >> coreAtySaturated) >>= (return . Right))))
337 return $ foldl (\ op ->
338 either (App op) (Appt op)) oper args
341 theThing <- coreQualifiedGen (try upperName <|> identifier)
342 return $ case theThing of
343 -- note that data constructors must be qualified
344 (Just _, idItself) | isUpper (head idItself)
348 coreLit :: Parser Exp
349 coreLit = parens (coreLiteral >>= (return . Lit))
351 coreLiteral :: Parser Lit
360 binds <- coreLambdaBinds
363 return $ foldr Lam body binds
370 return $ Let vdefg body
373 ty <- coreAtySaturated
374 scrut <- coreAtomicExp
376 vBind <- parens lambdaBind
378 return $ Case scrut vBind ty alts
382 -- The parens are CRUCIAL, o/w it's ambiguous
383 body <- try (parens coreFullExp)
385 ty <- try coreAtySaturated
386 return $ Cast body ty
394 -- TODO: This isn't in the grammar, but GHC
395 -- always prints "external ccall". investigate...
398 t <- coreAtySaturated
399 return $ External s t) <|>
400 -- TODO: I don't really understand what this does
402 reserved "dynexternal"
404 t <- coreAtySaturated
405 return $ External "[dynamic]" t)
407 -- TODO: Totally punting this, but it needs to go in the grammar
411 return $ External s tAddrzh
413 coreLambdaBinds = many1 coreBind
415 coreBind = coreTbinding <|> coreVbind
417 coreTbinding = try coreAtTbind >>= (return . Tb)
418 coreVbind = parens (lambdaBind >>= (return . Vb))
420 coreAtTbind = (symbol "@") >> coreTbind
422 topVbind :: Parser (Qual Var, Ty)
423 topVbind = aCoreVbind coreQualifiedName
424 lambdaBind :: Parser (Var, Ty)
425 lambdaBind = aCoreVbind identifier
434 aLit :: Parser CoreLit
435 aLit = intOrRatLit <|> charLit <|> stringLit
437 intOrRatLit :: Parser CoreLit
439 -- Int and lit combined into one to avoid ambiguity.
442 maybeRhs <- optionMaybe (symbol "%" >> anIntLit)
444 Nothing -> return $ Lint lhs
445 Just rhs -> return $ Lrational (lhs % rhs)
447 intLit :: Parser Integer
448 intLit = anIntLit <|> parens anIntLit
450 anIntLit :: Parser Integer
452 sign <- option 1 (symbol "-" >> return (-1))
456 charLit :: Parser CoreLit
457 charLit = charLiteral >>= (return . Lchar)
458 -- make sure this is right
460 stringLit :: Parser CoreLit
461 stringLit = stringLiteral >>= (return . Lstring)
462 -- make sure this is right
464 coreAlts :: Parser [Alt]
465 coreAlts = braces $ sepBy1 coreAlt (symbol ";")
467 coreAlt :: Parser Alt
468 coreAlt = conAlt <|> litAlt <|> defaultAlt
472 conName <- coreQualifiedCon
473 tBinds <- many (parens coreAtTbind)
474 whiteSpace -- necessary b/c otherwise we parse the next list as empty
475 vBinds <- many (parens lambdaBind)
478 rhs <- try coreFullExp
479 return $ Acon conName tBinds vBinds rhs
483 l <- parens coreLiteral
488 defaultAlt :: Parser Alt
493 return $ Adefault rhs
497 let decoded = zDecodeString mn
499 -- We re-encode the individual parts so that:
500 -- main:Foo_Bar.Quux.baz
502 -- main:FoozuBarziQuux.baz
504 -- main:Foo_BarziQuux.baz
505 parts = map zEncodeString $ filter (notElem '.') $ groupBy
506 (\ c1 c2 -> c1 /= '.' && c2 /= '.')
508 (take (length parts - 1) parts, last parts)
510 extCore = P.makeTokenParser extCoreDef
512 parens = P.parens extCore
513 braces = P.braces extCore
514 -- newlines are allowed anywhere
515 whiteSpace = P.whiteSpace extCore <|> (newline >> return ())
516 symbol = P.symbol extCore
517 identifier = P.identifier extCore
518 -- Keywords all begin with '%'
519 reserved s = P.reserved extCore ('%':s)
520 natural = P.natural extCore
521 charLiteral = P.charLiteral extCore
522 stringLiteral = P.stringLiteral extCore
524 -- dodgy since Core doesn't really allow comments,
525 -- but we'll pretend...
526 extCoreDef = LanguageDef {
530 , nestedComments = True
532 , identLetter = lower <|> upper <|> digit <|> (char '\'')
533 , opStart = opLetter extCoreDef
534 , opLetter = oneOf ";=@:\\%_.*#?%"
535 , reservedNames = map ('%' :)
536 ["module", "data", "newtype", "rec",
537 "let", "in", "case", "of", "cast",
538 "note", "external", "forall"]
539 , reservedOpNames = [";", "=", "@", "::", "\\", "%_",
541 , caseSensitive = True
545 -- Stuff to help with testing in ghci.
546 pTest (Left a) = error (show a)
547 pTest (Right t) = print t
549 pTest1 :: Show a => CharParser () a -> String -> IO ()
551 let res = parse pr "" s
554 pt :: Show a => CharParser () a -> String -> IO ()