1 -----------------------------------------------------------------------------
3 -- Module : Text.ParserCombinators.Parsec.Token
4 -- Copyright : (c) Daan Leijen 1999-2001
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : daan@cs.uu.nl
8 -- Stability : provisional
9 -- Portability : non-portable (uses existentially quantified data constructors)
11 -- A helper module to parse lexical elements (tokens).
13 -----------------------------------------------------------------------------
15 module Text.ParserCombinators.Parsec.Token
21 import Data.Char (isAlpha,toLower,toUpper,isSpace,digitToInt)
22 import Data.List (nub,sort)
23 import Text.ParserCombinators.Parsec
26 -----------------------------------------------------------
27 -- Language Definition
28 -----------------------------------------------------------
31 { commentStart :: String
32 , commentEnd :: String
33 , commentLine :: String
34 , nestedComments :: Bool
35 , identStart :: CharParser st Char
36 , identLetter :: CharParser st Char
37 , opStart :: CharParser st Char
38 , opLetter :: CharParser st Char
39 , reservedNames :: [String]
40 , reservedOpNames:: [String]
41 , caseSensitive :: Bool
44 -----------------------------------------------------------
45 -- A first class module: TokenParser
46 -----------------------------------------------------------
48 = TokenParser{ identifier :: CharParser st String
49 , reserved :: String -> CharParser st ()
50 , operator :: CharParser st String
51 , reservedOp :: String -> CharParser st ()
53 , charLiteral :: CharParser st Char
54 , stringLiteral :: CharParser st String
55 , natural :: CharParser st Integer
56 , integer :: CharParser st Integer
57 , float :: CharParser st Double
58 , naturalOrFloat :: CharParser st (Either Integer Double)
59 , decimal :: CharParser st Integer
60 , hexadecimal :: CharParser st Integer
61 , octal :: CharParser st Integer
63 , symbol :: String -> CharParser st String
64 , lexeme :: forall a. CharParser st a -> CharParser st a
65 , whiteSpace :: CharParser st ()
67 , parens :: forall a. CharParser st a -> CharParser st a
68 , braces :: forall a. CharParser st a -> CharParser st a
69 , angles :: forall a. CharParser st a -> CharParser st a
70 , brackets :: forall a. CharParser st a -> CharParser st a
71 -- "squares" is deprecated
72 , squares :: forall a. CharParser st a -> CharParser st a
74 , semi :: CharParser st String
75 , comma :: CharParser st String
76 , colon :: CharParser st String
77 , dot :: CharParser st String
78 , semiSep :: forall a . CharParser st a -> CharParser st [a]
79 , semiSep1 :: forall a . CharParser st a -> CharParser st [a]
80 , commaSep :: forall a . CharParser st a -> CharParser st [a]
81 , commaSep1 :: forall a . CharParser st a -> CharParser st [a]
84 -----------------------------------------------------------
85 -- Given a LanguageDef, create a token parser.
86 -----------------------------------------------------------
87 makeTokenParser :: LanguageDef st -> TokenParser st
88 makeTokenParser languageDef
89 = TokenParser{ identifier = identifier
92 , reservedOp = reservedOp
94 , charLiteral = charLiteral
95 , stringLiteral = stringLiteral
99 , naturalOrFloat = naturalOrFloat
101 , hexadecimal = hexadecimal
106 , whiteSpace = whiteSpace
111 , brackets = brackets
118 , semiSep1 = semiSep1
119 , commaSep = commaSep
120 , commaSep1 = commaSep1
124 -----------------------------------------------------------
126 -----------------------------------------------------------
127 parens p = between (symbol "(") (symbol ")") p
128 braces p = between (symbol "{") (symbol "}") p
129 angles p = between (symbol "<") (symbol ">") p
130 brackets p = between (symbol "[") (symbol "]") p
137 commaSep p = sepBy p comma
138 semiSep p = sepBy p semi
140 commaSep1 p = sepBy1 p comma
141 semiSep1 p = sepBy1 p semi
144 -----------------------------------------------------------
146 -----------------------------------------------------------
147 -- charLiteral :: CharParser st Char
148 charLiteral = lexeme (between (char '\'')
149 (char '\'' <?> "end of character")
153 characterChar = charLetter <|> charEscape
154 <?> "literal character"
156 charEscape = do{ char '\\'; escapeCode }
157 charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
161 -- stringLiteral :: CharParser st String
162 stringLiteral = lexeme (
163 do{ str <- between (char '"')
164 (char '"' <?> "end of string")
166 ; return (foldr (maybe id (:)) "" str)
168 <?> "literal string")
170 -- stringChar :: CharParser st (Maybe Char)
171 stringChar = do{ c <- stringLetter; return (Just c) }
173 <?> "string character"
175 stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
177 stringEscape = do{ char '\\'
178 ; do{ escapeGap ; return Nothing }
179 <|> do{ escapeEmpty; return Nothing }
180 <|> do{ esc <- escapeCode; return (Just esc) }
183 escapeEmpty = char '&'
184 escapeGap = do{ many1 space
185 ; char '\\' <?> "end of string gap"
191 escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
194 -- charControl :: CharParser st Char
195 charControl = do{ char '^'
197 ; return (toEnum (fromEnum code - fromEnum 'A'))
200 -- charNum :: CharParser st Char
201 charNum = do{ code <- decimal
202 <|> do{ char 'o'; number 8 octDigit }
203 <|> do{ char 'x'; number 16 hexDigit }
204 ; return (toEnum (fromInteger code))
207 charEsc = choice (map parseEsc escMap)
209 parseEsc (c,code) = do{ char c; return code }
211 charAscii = choice (map parseAscii asciiMap)
213 parseAscii (asc,code) = try (do{ string asc; return code })
216 -- escape code tables
217 escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
218 asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
220 ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
221 "FS","GS","RS","US","SP"]
222 ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
223 "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
224 "CAN","SUB","ESC","DEL"]
226 ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
227 '\EM','\FS','\GS','\RS','\US','\SP']
228 ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
229 '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
230 '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
233 -----------------------------------------------------------
235 -----------------------------------------------------------
236 -- naturalOrFloat :: CharParser st (Either Integer Double)
237 naturalOrFloat = lexeme (natFloat) <?> "number"
239 float = lexeme floating <?> "float"
240 integer = lexeme int <?> "integer"
241 natural = lexeme nat <?> "natural"
245 floating = do{ n <- decimal
250 natFloat = do{ char '0'
255 zeroNumFloat = do{ n <- hexadecimal <|> octal
262 decimalFloat = do{ n <- decimal
267 fractFloat n = do{ f <- fractExponent n
271 fractExponent n = do{ fract <- fraction
272 ; expo <- option 1.0 exponent'
273 ; return ((fromInteger n + fract)*expo)
276 do{ expo <- exponent'
277 ; return ((fromInteger n)*expo)
280 fraction = do{ char '.'
281 ; digits <- many1 digit <?> "fraction"
282 ; return (foldr op 0.0 digits)
286 op d f = (f + fromIntegral (digitToInt d))/10.0
288 exponent' = do{ oneOf "eE"
290 ; e <- decimal <?> "exponent"
291 ; return (power (f e))
295 power e | e < 0 = 1.0/power(-e)
296 | otherwise = fromInteger (10^e)
299 -- integers and naturals
300 int = do{ f <- lexeme sign
305 -- sign :: CharParser st (Integer -> Integer)
306 sign = (char '-' >> return negate)
307 <|> (char '+' >> return id)
310 nat = zeroNumber <|> decimal
312 zeroNumber = do{ char '0'
313 ; hexadecimal <|> octal <|> decimal <|> return 0
317 decimal = number 10 digit
318 hexadecimal = do{ oneOf "xX"; number 16 hexDigit }
319 octal = do{ oneOf "oO"; number 8 octDigit }
321 -- number :: Integer -> CharParser st Char -> CharParser st Integer
322 number base baseDigit
323 = do{ digits <- many1 baseDigit
324 ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
328 -----------------------------------------------------------
329 -- Operators & reserved ops
330 -----------------------------------------------------------
334 ; notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
340 ; if (isReservedOp name)
341 then unexpected ("reserved operator " ++ show name)
346 do{ c <- (opStart languageDef)
347 ; cs <- many (opLetter languageDef)
353 isReserved (sort (reservedOpNames languageDef)) name
356 -----------------------------------------------------------
357 -- Identifiers & Reserved words
358 -----------------------------------------------------------
362 ; notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
366 | caseSensitive languageDef = string name
367 | otherwise = do{ walk name; return name }
370 walk (c:cs) = do{ caseChar c <?> msg; walk cs }
372 caseChar c | isAlpha c = char (toLower c) <|> char (toUpper c)
381 ; if (isReservedName name)
382 then unexpected ("reserved word " ++ show name)
388 = do{ c <- identStart languageDef
389 ; cs <- many (identLetter languageDef)
395 = isReserved theReservedNames caseName
397 caseName | caseSensitive languageDef = name
398 | otherwise = map toLower name
401 isReserved names name
405 scan (r:rs) = case (compare r name) of
411 | caseSensitive languageDef = sortedNames
412 | otherwise = map (map toLower) sortedNames
414 sortedNames = sort (reservedNames languageDef)
418 -----------------------------------------------------------
419 -- White space & symbols
420 -----------------------------------------------------------
422 = lexeme (string name)
425 = do{ x <- p; whiteSpace; return x }
430 | noLine && noMulti = skipMany (simpleSpace <?> "")
431 | noLine = skipMany (simpleSpace <|> multiLineComment <?> "")
432 | noMulti = skipMany (simpleSpace <|> oneLineComment <?> "")
433 | otherwise = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
435 noLine = null (commentLine languageDef)
436 noMulti = null (commentStart languageDef)
440 skipMany1 (satisfy isSpace)
443 do{ try (string (commentLine languageDef))
444 ; skipMany (satisfy (/= '\n'))
449 do { try (string (commentStart languageDef))
454 | nestedComments languageDef = inCommentMulti
455 | otherwise = inCommentSingle
458 = do{ try (string (commentEnd languageDef)) ; return () }
459 <|> do{ multiLineComment ; inCommentMulti }
460 <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti }
461 <|> do{ oneOf startEnd ; inCommentMulti }
464 startEnd = nub (commentEnd languageDef ++ commentStart languageDef)
467 = do{ try (string (commentEnd languageDef)); return () }
468 <|> do{ skipMany1 (noneOf startEnd) ; inCommentSingle }
469 <|> do{ oneOf startEnd ; inCommentSingle }
472 startEnd = nub (commentEnd languageDef ++ commentStart languageDef)