1 /* --------------------------------------------------------------------------
2 * Character set handling:
4 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
5 * character set. The following code provides methods for classifying
6 * input characters according to the lexical structure specified by the
7 * report. Hugs should still accept older programs because ASCII is
8 * essentially just a subset of the ISO character set.
10 * Notes: If you want to port Hugs to a machine that uses something
11 * substantially different from the ISO character set, then you will need
12 * to insert additional code to map between character sets.
14 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
15 * ------------------------------------------------------------------------*/
22 unsigned char ctable[NUM_CHARS];
24 Void initCharTab() { /* Initialize char decode table */
25 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
26 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
27 #define setCopy(x,c) {Int i; \
28 for (i=0; i<NUM_CHARS; ++i) \
33 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
35 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
36 setRange(SMALL, 223,246); /* ISO lower case letters */
37 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
39 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
40 setRange(LARGE, 192,214); /* ISO upper case letters */
41 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
43 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
44 setRange(SYMBOL, 215,215);
45 setRange(SYMBOL, 247,247);
46 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
48 setChars(IDAFTER, "'_"); /* Characters in identifier */
49 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
51 setRange(SPACE, ' ',' '); /* ASCII space character */
52 setRange(SPACE, 160,160); /* ISO non breaking space */
53 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
55 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
56 setChars(PRINT, " '\""); /* Space and quotes */
57 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));