1 Character classification
5 ( is_ident -- Char# -> Bool
6 , is_symbol -- Char# -> Bool
7 , is_any -- Char# -> Bool
8 , is_space -- Char# -> Bool
9 , is_lower -- Char# -> Bool
10 , is_upper -- Char# -> Bool
11 , is_digit -- Char# -> Bool
16 import Bits ( Bits((.&.)) )
18 import PrelBase ( Char#, Char(..) )
24 cIdent, cSymbol, cAny, cSpace, cLower, cUpper, cDigit :: Int
34 The predicates below look costly, but aren't, GHC+GCC do a great job
35 at the big case below.
38 is_ctype :: Int -> Char# -> Bool
39 is_ctype mask c = (fromIntegral (charType (C# c)) .&. fromIntegral mask) /= (0::Int32)
41 is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char# -> Bool
42 is_ident = is_ctype cIdent
43 is_symbol = is_ctype cSymbol
44 is_any = is_ctype cAny
45 is_space = is_ctype cSpace
46 is_lower = is_ctype cLower
47 is_upper = is_ctype cUpper
48 is_digit = is_ctype cDigit
51 We really mean .|. instead of + below, but GHC currently doesn't do
52 any constant folding with bitops. *sigh*
55 charType :: Char -> Int
56 charType c = case c of
66 '\9' -> cAny + cSpace -- \t
67 '\10' -> cAny + cSpace -- \n
68 '\11' -> cAny + cSpace -- \v
69 '\12' -> cAny + cSpace -- \f
70 '\13' -> cAny + cSpace -- ^M
89 '\32' -> cAny + cSpace --
90 '\33' -> cAny + cSymbol -- !
92 '\35' -> cAny + cSymbol -- #
93 '\36' -> cAny + cSymbol -- $
94 '\37' -> cAny + cSymbol -- %
95 '\38' -> cAny + cSymbol -- &
96 '\39' -> cAny + cIdent -- '
99 '\42' -> cAny + cSymbol -- *
100 '\43' -> cAny + cSymbol -- +
102 '\45' -> cAny + cSymbol -- -
103 '\46' -> cAny + cSymbol -- .
104 '\47' -> cAny + cSymbol -- /
105 '\48' -> cAny + cIdent + cDigit -- 0
106 '\49' -> cAny + cIdent + cDigit -- 1
107 '\50' -> cAny + cIdent + cDigit -- 2
108 '\51' -> cAny + cIdent + cDigit -- 3
109 '\52' -> cAny + cIdent + cDigit -- 4
110 '\53' -> cAny + cIdent + cDigit -- 5
111 '\54' -> cAny + cIdent + cDigit -- 6
112 '\55' -> cAny + cIdent + cDigit -- 7
113 '\56' -> cAny + cIdent + cDigit -- 8
114 '\57' -> cAny + cIdent + cDigit -- 9
115 '\58' -> cAny + cSymbol -- :
117 '\60' -> cAny + cSymbol -- <
118 '\61' -> cAny + cSymbol -- =
119 '\62' -> cAny + cSymbol -- >
120 '\63' -> cAny + cSymbol -- ?
121 '\64' -> cAny + cSymbol -- @
122 '\65' -> cAny + cIdent + cUpper -- A
123 '\66' -> cAny + cIdent + cUpper -- B
124 '\67' -> cAny + cIdent + cUpper -- C
125 '\68' -> cAny + cIdent + cUpper -- D
126 '\69' -> cAny + cIdent + cUpper -- E
127 '\70' -> cAny + cIdent + cUpper -- F
128 '\71' -> cAny + cIdent + cUpper -- G
129 '\72' -> cAny + cIdent + cUpper -- H
130 '\73' -> cAny + cIdent + cUpper -- I
131 '\74' -> cAny + cIdent + cUpper -- J
132 '\75' -> cAny + cIdent + cUpper -- K
133 '\76' -> cAny + cIdent + cUpper -- L
134 '\77' -> cAny + cIdent + cUpper -- M
135 '\78' -> cAny + cIdent + cUpper -- N
136 '\79' -> cAny + cIdent + cUpper -- O
137 '\80' -> cAny + cIdent + cUpper -- P
138 '\81' -> cAny + cIdent + cUpper -- Q
139 '\82' -> cAny + cIdent + cUpper -- R
140 '\83' -> cAny + cIdent + cUpper -- S
141 '\84' -> cAny + cIdent + cUpper -- T
142 '\85' -> cAny + cIdent + cUpper -- U
143 '\86' -> cAny + cIdent + cUpper -- V
144 '\87' -> cAny + cIdent + cUpper -- W
145 '\88' -> cAny + cIdent + cUpper -- X
146 '\89' -> cAny + cIdent + cUpper -- Y
147 '\90' -> cAny + cIdent + cUpper -- Z
149 '\92' -> cAny + cSymbol -- backslash
151 '\94' -> cAny + cSymbol -- ^
152 '\95' -> cAny + cIdent + cLower -- _
154 '\97' -> cAny + cIdent + cLower -- a
155 '\98' -> cAny + cIdent + cLower -- b
156 '\99' -> cAny + cIdent + cLower -- c
157 '\100' -> cAny + cIdent + cLower -- d
158 '\101' -> cAny + cIdent + cLower -- e
159 '\102' -> cAny + cIdent + cLower -- f
160 '\103' -> cAny + cIdent + cLower -- g
161 '\104' -> cAny + cIdent + cLower -- h
162 '\105' -> cAny + cIdent + cLower -- i
163 '\106' -> cAny + cIdent + cLower -- j
164 '\107' -> cAny + cIdent + cLower -- k
165 '\108' -> cAny + cIdent + cLower -- l
166 '\109' -> cAny + cIdent + cLower -- m
167 '\110' -> cAny + cIdent + cLower -- n
168 '\111' -> cAny + cIdent + cLower -- o
169 '\112' -> cAny + cIdent + cLower -- p
170 '\113' -> cAny + cIdent + cLower -- q
171 '\114' -> cAny + cIdent + cLower -- r
172 '\115' -> cAny + cIdent + cLower -- s
173 '\116' -> cAny + cIdent + cLower -- t
174 '\117' -> cAny + cIdent + cLower -- u
175 '\118' -> cAny + cIdent + cLower -- v
176 '\119' -> cAny + cIdent + cLower -- w
177 '\120' -> cAny + cIdent + cLower -- x
178 '\121' -> cAny + cIdent + cLower -- y
179 '\122' -> cAny + cIdent + cLower -- z
181 '\124' -> cAny + cSymbol -- |
183 '\126' -> cAny + cSymbol -- ~
218 '\161' -> cAny + cSymbol -- ¡
219 '\162' -> cAny + cSymbol -- ¢
220 '\163' -> cAny + cSymbol -- £
221 '\164' -> cAny + cSymbol -- ¤
222 '\165' -> cAny + cSymbol -- ¥
223 '\166' -> cAny + cSymbol -- ¦
224 '\167' -> cAny + cSymbol -- §
225 '\168' -> cAny + cSymbol -- ¨
226 '\169' -> cAny + cSymbol -- ©
227 '\170' -> cAny + cSymbol -- ª
228 '\171' -> cAny + cSymbol -- «
229 '\172' -> cAny + cSymbol -- ¬
230 '\173' -> cAny + cSymbol --
231 '\174' -> cAny + cSymbol -- ®
232 '\175' -> cAny + cSymbol -- ¯
233 '\176' -> cAny + cSymbol -- °
234 '\177' -> cAny + cSymbol -- ±
235 '\178' -> cAny + cSymbol -- ²
236 '\179' -> cAny + cSymbol -- ³
237 '\180' -> cAny + cSymbol -- ´
238 '\181' -> cAny + cSymbol -- µ
239 '\182' -> cAny + cSymbol -- ¶
240 '\183' -> cAny + cSymbol -- ·
241 '\184' -> cAny + cSymbol -- ¸
242 '\185' -> cAny + cSymbol -- ¹
243 '\186' -> cAny + cSymbol -- º
244 '\187' -> cAny + cSymbol -- »
245 '\188' -> cAny + cSymbol -- ¼
246 '\189' -> cAny + cSymbol -- ½
247 '\190' -> cAny + cSymbol -- ¾
248 '\191' -> cAny + cSymbol -- ¿
249 '\192' -> cAny + cIdent + cUpper -- À
250 '\193' -> cAny + cIdent + cUpper -- Á
251 '\194' -> cAny + cIdent + cUpper -- Â
252 '\195' -> cAny + cIdent + cUpper -- Ã
253 '\196' -> cAny + cIdent + cUpper -- Ä
254 '\197' -> cAny + cIdent + cUpper -- Å
255 '\198' -> cAny + cIdent + cUpper -- Æ
256 '\199' -> cAny + cIdent + cUpper -- Ç
257 '\200' -> cAny + cIdent + cUpper -- È
258 '\201' -> cAny + cIdent + cUpper -- É
259 '\202' -> cAny + cIdent + cUpper -- Ê
260 '\203' -> cAny + cIdent + cUpper -- Ë
261 '\204' -> cAny + cIdent + cUpper -- Ì
262 '\205' -> cAny + cIdent + cUpper -- Í
263 '\206' -> cAny + cIdent + cUpper -- Î
264 '\207' -> cAny + cIdent + cUpper -- Ï
265 '\208' -> cAny + cIdent + cUpper -- Ð
266 '\209' -> cAny + cIdent + cUpper -- Ñ
267 '\210' -> cAny + cIdent + cUpper -- Ò
268 '\211' -> cAny + cIdent + cUpper -- Ó
269 '\212' -> cAny + cIdent + cUpper -- Ô
270 '\213' -> cAny + cIdent + cUpper -- Õ
271 '\214' -> cAny + cIdent + cUpper -- Ö
272 '\215' -> cAny + cSymbol + cLower -- ×
273 '\216' -> cAny + cIdent + cUpper -- Ø
274 '\217' -> cAny + cIdent + cUpper -- Ù
275 '\218' -> cAny + cIdent + cUpper -- Ú
276 '\219' -> cAny + cIdent + cUpper -- Û
277 '\220' -> cAny + cIdent + cUpper -- Ü
278 '\221' -> cAny + cIdent + cUpper -- Ý
279 '\222' -> cAny + cIdent + cUpper -- Þ
280 '\223' -> cAny + cIdent -- ß
281 '\224' -> cAny + cIdent + cLower -- à
282 '\225' -> cAny + cIdent + cLower -- á
283 '\226' -> cAny + cIdent + cLower -- â
284 '\227' -> cAny + cIdent + cLower -- ã
285 '\228' -> cAny + cIdent + cLower -- ä
286 '\229' -> cAny + cIdent + cLower -- å
287 '\230' -> cAny + cIdent + cLower -- æ
288 '\231' -> cAny + cIdent + cLower -- ç
289 '\232' -> cAny + cIdent + cLower -- è
290 '\233' -> cAny + cIdent + cLower -- é
291 '\234' -> cAny + cIdent + cLower -- ê
292 '\235' -> cAny + cIdent + cLower -- ë
293 '\236' -> cAny + cIdent + cLower -- ì
294 '\237' -> cAny + cIdent + cLower -- í
295 '\238' -> cAny + cIdent + cLower -- î
296 '\239' -> cAny + cIdent + cLower -- ï
297 '\240' -> cAny + cIdent + cLower -- ð
298 '\241' -> cAny + cIdent + cLower -- ñ
299 '\242' -> cAny + cIdent + cLower -- ò
300 '\243' -> cAny + cIdent + cLower -- ó
301 '\244' -> cAny + cIdent + cLower -- ô
302 '\245' -> cAny + cIdent + cLower -- õ
303 '\246' -> cAny + cIdent + cLower -- ö
304 '\247' -> cAny + cSymbol -- ÷
305 '\248' -> cAny + cIdent -- ø
306 '\249' -> cAny + cIdent + cLower -- ù
307 '\250' -> cAny + cIdent + cLower -- ú
308 '\251' -> cAny + cIdent + cLower -- û
309 '\252' -> cAny + cIdent + cLower -- ü
310 '\253' -> cAny + cIdent + cLower -- ý
311 '\254' -> cAny + cIdent + cLower -- þ
312 '\255' -> cAny + cIdent + cLower -- ÿ