ghc/tests/programs/andy_cherry/Parser.lhs

   1 > module Parser (pgnLexer,pgnParser) where
   2
   3 > import GenUtils
   4 > import DataTypes
   5 > import Char -- 1.3
   6
   7 This is a PGN lexer. Simple, and straightforward.
   8
   9 > pgnLexer :: String -> [Token]
  10 > pgnLexer ('.':r) = PeriodToken  : pgnLexer r
  11 > pgnLexer ('*':r) = AsterixToken : pgnLexer r
  12 > pgnLexer ('[':r) = LeftSBToken  : pgnLexer r
  13 > pgnLexer (']':r) = RightSBToken : pgnLexer r
  14 > pgnLexer ('(':r) = LeftRBToken  : pgnLexer r
  15 > pgnLexer (')':r) = RightRBToken : pgnLexer r
  16 > pgnLexer ('<':r) = LeftABToken  : pgnLexer r
  17 > pgnLexer ('>':r) = RightABToken : pgnLexer r
  18 > pgnLexer ('"':r) = readString r ""
  19 > pgnLexer ('{':r) = readComment r ""
  20 > pgnLexer ('$':r) = readNAG r ""
  21 > pgnLexer ('!':'?':r) = mkNAGToken 5 : pgnLexer r
  22 > pgnLexer ('!':'!':r) = mkNAGToken 3 : pgnLexer r
  23 > pgnLexer ('!':r)     = mkNAGToken 1 : pgnLexer r
  24 > pgnLexer ('?':'?':r) = mkNAGToken 4 : pgnLexer r
  25 > pgnLexer ('?':'!':r) = mkNAGToken 6 : pgnLexer r
  26 > pgnLexer ('?':r)     = mkNAGToken 2 : pgnLexer r
  27 > pgnLexer ('%':r) = pgnLexer (dropWhile (/= '\n') r)
  28 > pgnLexer (c:r)
  29 >       | isSpace c = pgnLexer r
  30 >       | isAlpha c || isDigit c = pgnSymbolLexer r [c]
  31 >       | otherwise = error ("Error lexing: " ++ takeWhile (/= '\n') (c:r))
  32 > pgnLexer [] = []
  33
  34 > pgnSymbolLexer (c:r) sym
  35 >       | isAlpha c
  36 >       || isDigit c
  37 >       || elem c "_+#=:-/" = pgnSymbolLexer r (c:sym)
  38 > pgnSymbolLexer r sym
  39 >       | all isDigit sym = IntToken (read (reverse sym)) : pgnLexer r
  40 > pgnSymbolLexer r sym   = SymbolToken (reverse sym) : pgnLexer r
  41
  42 > readString ('\\':'\\':r) str = readString r ('\\':str)
  43 > readString ('\\':'"':r) str = readString r ('"':str)
  44 > readString ('"':r) str     = StringToken (reverse str) : pgnLexer r
  45 > readString (c:r) str       = readString r (c:str)
  46
  47 > readComment ('}':r) str = CommentToken (revwords str []) : pgnLexer r
  48 > readComment (c:r) str = readComment r (c:str)
  49
  50 > revwords (c:r) wds
  51 >    | isSpace c = revwords r wds
  52 >    | otherwise = revwords' r [c] wds
  53 > revwords [] wds = wds
  54 > revwords' (c:r) wd wds
  55 >    | isSpace c = revwords r (wd:wds)
  56 >    | otherwise = revwords' r (c:wd) wds
  57 > revwords' [] wd wds = wd : wds
  58
  59 > readNAG (c:r) str
  60 >       | isDigit c = readNAG r (c:str)
  61 > readNAG r str = mkNAGToken (read (reverse str)) : pgnLexer r
  62
  63 > mkNAGToken 1 = NAGAnnToken 1 "!"
  64 > mkNAGToken 2 = NAGAnnToken 2 "?"
  65 > mkNAGToken 3 = NAGAnnToken 3 "!!"
  66 > mkNAGToken 4 = NAGAnnToken 4 "??"
  67 > mkNAGToken 5 = NAGAnnToken 5 "!?"
  68 > mkNAGToken 6 = NAGAnnToken 6 "?!"
  69 > mkNAGToken n = NAGToken n
  70
  71 And this is a parser for PGN. It takes a list of tokens,
  72 and splits up games, as well as nesting varations, etc.
  73
  74 > pgnParser :: (Int -> Bool) -> String -> [AbsGame]
  75 > pgnParser fn str =
  76 >       [ game | (no,game) <- zip [1..] (parseTags (pgnLexer str) id),
  77 >                fn no]
  78
  79 > type FL a = [a] -> [a]
  80
  81 > parseTags :: [Token] -> FL TagStr -> [AbsGame]
  82 > parseTags (LeftSBToken:SymbolToken sym:StringToken str:RightSBToken:rest)
  83 >           other_tags = parseTags rest (other_tags . ((:) (TagStr sym str)))
  84 > parseTags toks@(LeftSBToken:_) _
  85 >       = error ("BAD Token:" ++ unwords (map userFormat (take 10 toks)))
  86 > parseTags toks tags = parseToks toks id tags
  87
  88 > parseToks :: [Token]
  89 >       -> FL Token
  90 >       -> FL TagStr
  91 >       -> [AbsGame]
  92 > parseToks next@(LeftSBToken:_)     = \ toks tags ->
  93 >       Game (tags []) (toks []) : parseTags next id
  94 > parseToks (tk:r)                    = pushToken tk r
  95 > parseToks [] = \ toks tags -> [Game (tags []) (toks [])]
  96
  97 > pushToken tok r toks = parseToks r (toks . ((:) tok))