6 import qualified Numeric( readFloat, readDec )
8 isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
9 || (c == ':') || (c == '$')
10 isKeywordChar c = isAlpha c || (c == '_')
12 lexer :: (Token -> P a) -> P a
13 lexer cont [] = cont TKEOF []
14 lexer cont ('\n':cs) = \line -> lexer cont cs (line+1)
15 lexer cont ('-':'>':cs) = cont TKrarrow cs
18 | isSpace c = lexer cont cs
19 | isLower c || (c == '_') = lexName cont TKname (c:cs)
20 | isUpper c = lexName cont TKcname (c:cs)
21 | isDigit c || (c == '-') = lexNum cont (c:cs)
23 lexer cont ('%':cs) = lexKeyword cont cs
24 lexer cont ('\'':cs) = lexChar cont cs
25 lexer cont ('\"':cs) = lexString [] cont cs
26 lexer cont ('#':cs) = cont TKhash cs
27 lexer cont ('(':cs) = cont TKoparen cs
28 lexer cont (')':cs) = cont TKcparen cs
29 lexer cont ('{':cs) = cont TKobrace cs
30 lexer cont ('}':cs) = cont TKcbrace cs
31 lexer cont ('=':cs) = cont TKeq cs
32 lexer cont (':':':':cs) = cont TKcoloncolon cs
33 lexer cont ('*':cs) = cont TKstar cs
34 lexer cont ('.':cs) = cont TKdot cs
35 lexer cont ('\\':cs) = cont TKlambda cs
36 lexer cont ('@':cs) = cont TKat cs
37 lexer cont ('?':cs) = cont TKquestion cs
38 lexer cont (';':cs) = cont TKsemicolon cs
39 -- 20060420 GHC spits out constructors with colon in them nowadays. jds
40 lexer cont (':':cs) = lexName cont TKcname (':':cs)
41 -- 20060420 Likewise does it create identifiers starting with dollar. jds
42 lexer cont ('$':cs) = lexName cont TKname ('$':cs)
43 lexer cont (c:cs) = failP "invalid character" [c]
47 lexChar cont ('\\':'x':h1:h0:'\'':cs)
48 | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
49 lexChar cont ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs))
50 lexChar cont ('\'':cs) = failP "invalid char character" ['\'']
51 lexChar cont ('\"':cs) = failP "invalid char character" ['\"']
52 lexChar cont (c:'\'':cs) = cont (TKchar c) cs
55 lexString s cont ('\\':'x':h1:h0:cs)
56 | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs
57 lexString s cont ('\\':cs) = failP "invalid string character" ['\\']
58 lexString s cont ('\'':cs) = failP "invalid string character" ['\'']
59 lexString s cont ('\"':cs) = cont (TKstring s) cs
60 lexString s cont (c:cs) = lexString (s++[c]) cont cs
62 isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))
64 hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)
72 case span isDigit cs of
74 | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
75 where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
76 -- When reading a floating-point number, which is
77 -- a bit complicated, use the Haskell 98 library function
78 (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest
80 lexName cont cstr cs = cont (cstr name) rest
81 where (name,rest) = span isNameChar cs
84 case span isKeywordChar cs of
85 ("module",rest) -> cont TKmodule rest
86 ("data",rest) -> cont TKdata rest
87 ("newtype",rest) -> cont TKnewtype rest
88 ("forall",rest) -> cont TKforall rest
89 ("rec",rest) -> cont TKrec rest
90 ("let",rest) -> cont TKlet rest
91 ("in",rest) -> cont TKin rest
92 ("case",rest) -> cont TKcase rest
93 ("of",rest) -> cont TKof rest
94 ("coerce",rest) -> cont TKcoerce rest
95 ("note",rest) -> cont TKnote rest
96 ("external",rest) -> cont TKexternal rest
97 ("_",rest) -> cont TKwild rest
98 _ -> failP "invalid keyword" ('%':cs)
101 #if __GLASGOW_HASKELL__ >= 504
102 -- The readFloat in the Numeric library will do the job
104 readFloat :: (RealFrac a) => ReadS a
105 readFloat = Numeric.readFloat
108 -- Haskell 98's Numeric.readFloat used to have a bogusly restricted signature
109 -- so it was incapable of reading a rational.
110 -- So for GHCs that have that old bogus library, here is the code, written out longhand.
112 readFloat r = [(fromRational ((n%1)*10^^(k-d)),t) | (n,d,s) <- readFix r,
113 (k,t) <- readExp s] ++
114 [ (0/0, t) | ("NaN",t) <- lex r] ++
115 [ (1/0, t) | ("Infinity",t) <- lex r]
117 readFix r = [(read (ds++ds'), length ds', t)
118 | (ds,d) <- lexDigits r,
119 (ds',t) <- lexFrac d ]
121 lexFrac ('.':ds) = lexDigits ds
124 readExp (e:s) | e `elem` "eE" = readExp' s
127 readExp' ('-':s) = [(-k,t) | (k,t) <- Numeric.readDec s]
128 readExp' ('+':s) = Numeric.readDec s
129 readExp' s = Numeric.readDec s
131 lexDigits :: ReadS String
132 lexDigits s = case span isDigit s of
133 (cs,s') | not (null cs) -> [(cs,s')]