1 {-# OPTIONS -Wall -fno-warn-missing-signatures #-}
3 module ParsecParser (parseCore) where
10 import Text.ParserCombinators.Parsec
11 import qualified Text.ParserCombinators.Parsec.Token as P
12 import Text.ParserCombinators.Parsec.Language
16 parseCore :: FilePath -> IO (Either ParseError Module)
17 parseCore = parseFromFile coreModule
19 coreModule :: Parser Module
23 mName <- coreModuleName
25 tdefs <- option [] coreTdefs
26 vdefGroups <- coreVdefGroups
28 return $ Module mName tdefs vdefGroups
30 coreModuleName :: Parser AnMname
32 pkgName <- corePackageName
34 (modHierarchy,baseName) <- coreHierModuleNames
35 return $ M (pkgName, modHierarchy, baseName)
37 corePackageName :: Parser Pname
38 -- Package names can be lowercase or uppercase!
39 corePackageName = identifier <|> upperName
41 coreHierModuleNames :: Parser ([Id], Id)
42 coreHierModuleNames = do
43 parentName <- upperName
44 return $ splitModuleName parentName
46 upperName :: Parser Id
49 rest <- many (identLetter extCoreDef)
50 return $ firstChar:rest
52 coreTdefs :: Parser [Tdef]
53 coreTdefs = many coreTdef
55 coreTdef :: Parser Tdef
56 coreTdef = withSemi (try (try coreDataDecl <|> try coreNewtypeDecl))
59 withSemi p = try p `withTerminator` ";"
61 withTerminator p term = do
66 coreDataDecl :: Parser Tdef
69 tyCon <- coreQualifiedCon
70 whiteSpace -- important
75 cDefs <- braces coreCdefs
76 return $ Data tyCon tBinds cDefs
78 coreNewtypeDecl :: Parser Tdef
81 tyCon <- coreQualifiedCon
83 coercionName <- coreQualifiedCon
87 return $ Newtype tyCon coercionName tBinds tyRep
89 coreQualifiedCon :: Parser (Mname, Id)
90 coreQualifiedCon = coreQualifiedGen upperName
92 coreQualifiedName = coreQualifiedGen identifier
94 coreQualifiedGen :: Parser String -> Parser (Mname, Id)
95 coreQualifiedGen p = (try (do
96 packageIdOrVarName <- corePackageName
97 maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
99 -- unqualified id, so backtrack
101 -- qualified name, so look for the id part
102 Just (modHierarchy, baseName) -> do
106 (Just $ M (packageIdOrVarName, modHierarchy, baseName),
109 (p >>= (\ res -> return (Nothing, res)))
111 coreTbinds :: Parser [Tbind]
112 coreTbinds = many coreTbind
114 coreTbindsGen :: CharParser () String -> Parser [Tbind]
115 -- The "try" here is important. Otherwise, when parsing:
116 -- "Node (^base:DataziTuple.Z3T)" (a cdef), we commit to
117 -- parsing (^base...) as a tbind rather than a type.
118 coreTbindsGen separator = many (try $ coreTbindGen separator)
120 coreTbind :: Parser Tbind
121 coreTbind = coreTbindGen whiteSpace
123 coreTbindGen :: CharParser () a -> Parser Tbind
124 coreTbindGen sep = (parens (do
127 kind <- symbol "::" >> coreKind
128 return (tyVar, kind))) <|>
129 (sep >> identifier >>= (return . (\ tv -> (tv,Klifted))))
131 coreCdefs :: Parser [Cdef]
132 coreCdefs = sepBy1 coreCdef (symbol ";")
134 coreCdef :: Parser Cdef
136 dataConName <- coreQualifiedCon
137 whiteSpace -- important!
138 tBinds <- try $ coreTbindsGen (symbol "@")
139 -- This should be equivalent to (many coreAty)
140 -- But it isn't. WHY??
141 tys <- sepBy coreAtySaturated whiteSpace
142 return $ Constr dataConName tBinds tys
144 coreTRep :: Parser (Maybe Ty)
145 -- note that the "=" is inside here since if there's
146 -- no rhs for the newtype, there's no "="
147 coreTRep = optionMaybe (do
151 coreType :: Parser Ty
152 coreType = coreForallTy <|> (do
154 -- whiteSpace is important!
156 -- This says: If there is at least one ("-> ty"..) thing,
157 -- use it. If not, don't consume any input.
158 maybeRest <- option [] (many1 (symbol "->" >> coreType))
159 return $ case maybeRest of
161 stuff -> foldl Tapp (Tcon tcArrow) (hd:stuff))
166 -- The "try" is necessary:
167 -- otherwise, parsing "T " fails rather
168 -- than returning "T".
169 maybeRest <- option [] (many1 (try (whiteSpace >> coreAtySaturated)))
171 -- so I'm not sure I like this... it's basically doing
172 -- typechecking (kind-checking?) in the parser.
173 -- However, the type syntax as defined in Core.hs sort of
175 ATy t -> foldl Tapp t maybeRest
176 Trans k -> app k 2 maybeRest "trans"
177 Sym k -> app k 1 maybeRest "sym"
178 Unsafe k -> app k 2 maybeRest "unsafe"
179 LeftCo k -> app k 1 maybeRest "left"
180 RightCo k -> app k 1 maybeRest "right"
181 InstCo k -> app k 2 maybeRest "inst")
182 where app k arity args _ | length args == arity = k args
184 primCoercionError (err ++
185 ("Args were: " ++ show args))
187 coreAtySaturated :: Parser Ty
188 coreAtySaturated = do
192 _ -> unexpected "coercion ty"
194 coreAty :: Parser ATyOp
195 coreAty = try coreTcon <|> ((try coreTvar <|> parens coreType)
197 coreTvar :: Parser Ty
198 coreTvar = try identifier >>= (return . Tvar)
200 coreTcon :: Parser ATyOp
201 -- TODO: Change the grammar
202 -- A Tcon can be an uppercase type constructor
203 -- or a lowercase (always qualified) coercion variable
205 -- Special case is first so that (CoUnsafe .. ..) gets parsed as
206 -- a prim. coercion app and not a Tcon app.
207 -- But the whole thing is so bogus.
209 -- the "try"s are crucial; they force
211 maybeCoercion <- choice [try symCo, try transCo, try unsafeCo,
212 try instCo, try leftCo, rightCo]
213 return $ case maybeCoercion of
214 TransC -> Trans (\ [x,y] -> TransCoercion x y)
215 SymC -> Sym (\ [x] -> SymCoercion x)
216 UnsafeC -> Unsafe (\ [x,y] -> UnsafeCoercion x y)
217 LeftC -> LeftCo (\ [x] -> LeftCoercion x)
218 RightC -> RightCo (\ [x] -> RightCoercion x)
219 InstC -> InstCo (\ [x,y] -> InstCoercion x y))
220 <|> (coreQualifiedCon >>= (return . ATy . Tcon))
222 data CoercionTy = TransC | InstC | SymC | UnsafeC | LeftC | RightC
224 symCo, transCo, unsafeCo, instCo, leftCo, rightCo :: Parser CoercionTy
225 -- Would be better not to wire these in quite this way. Sigh
226 symCo = string "ghczmprim:GHCziPrim.sym" >> return SymC
227 transCo = string "ghczmprim:GHCziPrim.trans" >> return TransC
228 unsafeCo = string "ghczmprim:GHCziPrim.CoUnsafe" >> return UnsafeC
229 leftCo = string "ghczmprim:GHCziPrim.left" >> return LeftC
230 rightCo = string "ghczmprim:GHCziPrim.right" >> return RightC
231 instCo = string "ghczmprim:GHCziPrim.inst" >> return InstC
233 coreForallTy :: Parser Ty
236 tBinds <- many1 coreTbind
239 return $ foldr Tforall bodyTy tBinds
241 -- TODO: similar to coreType. should refactor
242 coreKind :: Parser Kind
245 maybeRest <- option [] (many1 (symbol "->" >> coreKind))
246 return $ foldl Karrow hd maybeRest
248 coreAtomicKind = try liftedKind <|> try unliftedKind
249 <|> try openKind <|> try (do
250 (from,to) <- parens equalityKind
251 return $ Keq from to)
252 <|> try (parens coreKind)
272 -- Only used internally within the parser:
273 -- represents either a Tcon, or a continuation
274 -- for a primitive coercion
279 | Unsafe ([Ty] -> Ty)
280 | LeftCo ([Ty] -> Ty)
281 | RightCo ([Ty] -> Ty)
282 | InstCo ([Ty] -> Ty)
284 coreVdefGroups :: Parser [Vdefg]
285 coreVdefGroups = option [] (do
286 theFirstVdef <- coreVdefg
288 others <- coreVdefGroups
289 return $ theFirstVdef:others)
291 coreVdefg :: Parser Vdefg
292 coreVdefg = coreRecVdef <|> coreNonrecVdef
296 braces (sepBy1 coreVdef (symbol ";")) >>= (return . Rec)
298 coreNonrecVdef = coreVdef >>= (return . Nonrec)
301 (vdefLhs, vdefTy) <- try topVbind <|> (do
302 (v, ty) <- lambdaBind
303 return (unqual v, ty))
307 vdefRhs <- coreFullExp
308 return $ Vdef (vdefLhs, vdefTy, vdefRhs)
310 coreAtomicExp :: Parser Exp
312 -- For stupid reasons, the whiteSpace is necessary.
313 -- Without it, (pt coreAppExp "w a:B.C ") doesn't work.
315 res <- choice [try coreDconOrVar,
321 coreFullExp = (choice [coreLam, coreLet,
322 coreCase, coreCast, coreNote, coreExternal, coreLabel]) <|> (try coreAppExp)
323 -- The "try" is necessary so that we backtrack
324 -- when we see a var (that is not an app)
329 -- it's important to have a separate coreAtomicExp (that any app exp
330 -- begins with) and to define the args in terms of many1.
331 -- previously, coreAppExp could parse either an atomic exp (an app with
332 -- 0 arguments) or an app with >= 1 arguments, but that led to ambiguity.
333 oper <- try coreAtomicExp
335 args <- many1 (whiteSpace >> ((coreAtomicExp >>= (return . Left)) <|>
336 -- note this MUST be coreAty, not coreType, because otherwise:
337 -- "A @ B c" gets parsed as "A @ (B c)"
338 ((symbol "@" >> coreAtySaturated) >>= (return . Right))))
339 return $ foldl (\ op ->
340 either (App op) (Appt op)) oper args
343 theThing <- coreQualifiedGen (try upperName <|> identifier)
344 return $ case theThing of
345 -- note that data constructors must be qualified
346 (Just _, idItself) | isUpper (head idItself)
350 coreLit :: Parser Exp
351 coreLit = parens (coreLiteral >>= (return . Lit))
353 coreLiteral :: Parser Lit
362 binds <- coreLambdaBinds
365 return $ foldr Lam body binds
372 return $ Let vdefg body
375 ty <- coreAtySaturated
376 scrut <- coreAtomicExp
378 vBind <- parens lambdaBind
380 return $ Case scrut vBind ty alts
384 -- The parens are CRUCIAL, o/w it's ambiguous
385 body <- try (parens coreFullExp)
387 ty <- try coreAtySaturated
388 return $ Cast body ty
396 -- TODO: This isn't in the grammar, but GHC
397 -- always prints "external ccall". investigate...
400 t <- coreAtySaturated
401 return $ External s t) <|>
402 -- TODO: I don't really understand what this does
404 reserved "dynexternal"
406 t <- coreAtySaturated
407 return $ External "[dynamic]" t)
409 -- TODO: Totally punting this, but it needs to go in the grammar
413 return $ External s tAddrzh
415 coreLambdaBinds = many1 coreBind
417 coreBind = coreTbinding <|> coreVbind
419 coreTbinding = try coreAtTbind >>= (return . Tb)
420 coreVbind = parens (lambdaBind >>= (return . Vb))
422 coreAtTbind = (symbol "@") >> coreTbind
424 topVbind :: Parser (Qual Var, Ty)
425 topVbind = aCoreVbind coreQualifiedName
426 lambdaBind :: Parser (Var, Ty)
427 lambdaBind = aCoreVbind identifier
436 aLit :: Parser CoreLit
437 aLit = intOrRatLit <|> charLit <|> stringLit
439 intOrRatLit :: Parser CoreLit
441 -- Int and lit combined into one to avoid ambiguity.
444 maybeRhs <- optionMaybe (symbol "%" >> anIntLit)
446 Nothing -> return $ Lint lhs
447 Just rhs -> return $ Lrational (lhs % rhs)
449 intLit :: Parser Integer
450 intLit = anIntLit <|> parens anIntLit
452 anIntLit :: Parser Integer
454 sign <- option 1 (symbol "-" >> return (-1))
458 charLit :: Parser CoreLit
459 charLit = charLiteral >>= (return . Lchar)
460 -- make sure this is right
462 stringLit :: Parser CoreLit
463 stringLit = stringLiteral >>= (return . Lstring)
464 -- make sure this is right
466 coreAlts :: Parser [Alt]
467 coreAlts = braces $ sepBy1 coreAlt (symbol ";")
469 coreAlt :: Parser Alt
470 coreAlt = conAlt <|> litAlt <|> defaultAlt
474 conName <- coreQualifiedCon
475 tBinds <- many (parens coreAtTbind)
476 whiteSpace -- necessary b/c otherwise we parse the next list as empty
477 vBinds <- many (parens lambdaBind)
480 rhs <- try coreFullExp
481 return $ Acon conName tBinds vBinds rhs
485 l <- parens coreLiteral
490 defaultAlt :: Parser Alt
495 return $ Adefault rhs
497 extCore = P.makeTokenParser extCoreDef
499 parens = P.parens extCore
500 braces = P.braces extCore
501 -- newlines are allowed anywhere
502 whiteSpace = P.whiteSpace extCore <|> (newline >> return ())
503 symbol = P.symbol extCore
504 identifier = P.identifier extCore
505 -- Keywords all begin with '%'
506 reserved s = P.reserved extCore ('%':s)
507 natural = P.natural extCore
508 charLiteral = P.charLiteral extCore
509 stringLiteral = P.stringLiteral extCore
511 -- dodgy since Core doesn't really allow comments,
512 -- but we'll pretend...
513 extCoreDef = LanguageDef {
517 , nestedComments = True
519 , identLetter = lower <|> upper <|> digit <|> (char '\'')
520 , opStart = opLetter extCoreDef
521 , opLetter = oneOf ";=@:\\%_.*#?%"
522 , reservedNames = map ('%' :)
523 ["module", "data", "newtype", "rec",
524 "let", "in", "case", "of", "cast",
525 "note", "external", "forall"]
526 , reservedOpNames = [";", "=", "@", "::", "\\", "%_",
528 , caseSensitive = True
532 -- Stuff to help with testing in ghci.
533 pTest (Left a) = error (show a)
534 pTest (Right t) = print t
536 pTest1 :: Show a => CharParser () a -> String -> IO ()
538 let res = parse pr "" s
541 pt :: Show a => CharParser () a -> String -> IO ()