2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 #define _isconstr(s) (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
50 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
51 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
52 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
53 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
54 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
56 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
57 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
58 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
59 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
60 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
61 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
79 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
80 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /**********************************************************************
89 **********************************************************************/
91 char *input_filename = NULL; /* Always points to a dynamically allocated string */
94 * For my own sanity, things that are not part of the flex skeleton
95 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
98 static int hslineno = 0; /* Line number at end of token */
99 int hsplineno = 0; /* Line number at end of previous token */
101 static int hscolno = 0; /* Column number at end of token */
102 int hspcolno = 0; /* Column number at end of previous token */
103 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
105 int modulelineno = -1; /* The line number where the module starts */
106 int startlineno = 0; /* The line number where something starts */
107 int endlineno = 0; /* The line number where something ends */
109 static BOOLEAN noGap = TRUE; /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
112 static int nested_comments; /* For counting comment nesting depth */
114 /* Hacky definition of yywrap: see flex doc.
116 If we don't do this, then we'll have to get the default
117 yywrap from the flex library, which is often something
118 we are not good at locating. This avoids that difficulty.
119 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124 /* Essential forward declarations */
126 static void hsnewid PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext (NO_ARGS);
129 static void addtext PROTO((char *, unsigned));
130 static void addchar PROTO((char));
131 static char *fetchtext PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int Return PROTO((int));
134 static void hsentercontext PROTO((int));
136 /* Special file handling for IMPORTS */
137 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
139 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
140 static char *filename_save; /* File Name */
141 static int hslineno_save = 0, /* Line Number */
142 hsplineno_save = 0, /* Line Number of Prev. token */
143 hscolno_save = 0, /* Indentation */
144 hspcolno_save = 0; /* Left Indentation */
145 static short icontexts_save = 0; /* Indent Context Level */
147 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags; /* that which is saved */
150 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
152 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
153 * inserted before token +ve -- "}" inserted before
156 short icontexts = 0; /* Which context we're in */
159 Table of indentations: right bit indicates whether to use
160 indentation rules (1 = use rules; 0 = ignore)
163 push one of these "contexts" at every "case" or "where"; the right bit says
164 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
166 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
167 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
168 push is shown just below.
173 static short indenttab[MAX_CONTEXTS] = {-1};
175 #define INDENTPT (indenttab[icontexts]>>1)
176 #define INDENTON (indenttab[icontexts]&1)
178 #define RETURN(tok) return(Return(tok))
181 #define YY_DECL int yylex1()
183 /* We should not peek at yy_act, but flex calls us even for the internal action
184 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
185 to support older versions of flex, we'll continue to peek for now.
187 #define YY_USER_ACTION \
188 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
192 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
195 /* Each time we enter a new start state, we push it onto the state stack.
196 Note that the rules do not allow us to underflow or overflow the stack.
197 (At least, they shouldn't.) The maximum expected depth is 4:
198 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
200 static int StateStack[5];
201 static int StateDepth = -1;
204 #define PUSH_STATE(n) do {\
205 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
206 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
207 #define POP_STATE do {--StateDepth;\
208 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
209 BEGIN(StateStack[StateDepth]);} while(0)
211 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
212 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
217 /* The start states are:
218 Code -- normal Haskell code (principal lexer)
219 GlaExt -- Haskell code with Glasgow extensions
220 Comment -- Nested comment processing
221 String -- Inside a string literal with backslashes
222 StringEsc -- Immediately following a backslash in a string literal
223 Char -- Inside a character literal with backslashes
224 CharEsc -- Immediately following a backslash in a character literal
226 Note that the INITIAL state is unused. Also note that these states
227 are _exclusive_. All rules should be prefixed with an appropriate
228 list of start states.
231 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
233 isoS [\xa1-\xbf\xd7\xf7]
234 isoL [\xc0-\xd6\xd8-\xde]
235 isol [\xdf-\xf6\xf8-\xff]
242 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
243 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
245 L [A-Z\xc0-\xd6\xd8-\xde]
246 l [a-z\xdf-\xf6\xf8-\xff]
251 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
260 * Special GHC pragma rules. Do we need a start state for interface files,
261 * so these won't be matched in source files? --JSM
265 <Code,GlaExt>^"# ".*{NL} {
266 char tempf[FILENAME_SIZE];
267 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
269 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
272 <Code,GlaExt>^"#line ".*{NL} {
273 char tempf[FILENAME_SIZE];
274 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
276 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
279 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
280 /* partain: pragma-style line directive */
281 char tempf[FILENAME_SIZE];
282 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
284 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
287 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
288 PUSH_STATE(UserPragma);
289 RETURN(INTERFACE_UPRAGMA);
291 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
292 PUSH_STATE(UserPragma);
293 RETURN(SPECIALISE_UPRAGMA);
295 <Code,GlaExt>"{-#"{WS}*"INLINE" {
296 PUSH_STATE(UserPragma);
297 RETURN(INLINE_UPRAGMA);
299 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
300 PUSH_STATE(UserPragma);
301 RETURN(MAGIC_UNFOLDING_UPRAGMA);
303 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
304 PUSH_STATE(UserPragma);
305 RETURN(DEFOREST_UPRAGMA);
307 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
308 /* these are handled by hscpp */
312 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
313 fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
314 input_filename, hsplineno);
315 format_string(stderr, (unsigned char *) yytext, yyleng);
316 fputs("'\n", stderr);
320 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
324 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
325 * intentionally accepted as a keyword even for normal <Code>.
329 <Code,GlaExt>"case" { RETURN(CASE); }
330 <Code,GlaExt>"class" { RETURN(CLASS); }
331 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
332 <Code,GlaExt>"default" { RETURN(DEFAULT); }
333 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
334 <Code,GlaExt>"do" { RETURN(DO); }
335 <Code,GlaExt>"else" { RETURN(ELSE); }
336 <Code,GlaExt>"if" { RETURN(IF); }
337 <Code,GlaExt>"import" { RETURN(IMPORT); }
338 <Code,GlaExt>"in" { RETURN(IN); }
339 <Code,GlaExt>"infix" { RETURN(INFIX); }
340 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
341 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
342 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
343 <Code,GlaExt>"let" { RETURN(LET); }
344 <Code,GlaExt>"module" { RETURN(MODULE); }
345 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
346 <Code,GlaExt>"of" { RETURN(OF); }
347 <Code,GlaExt>"then" { RETURN(THEN); }
348 <Code,GlaExt>"type" { RETURN(TYPE); }
349 <Code,GlaExt>"where" { RETURN(WHERE); }
351 <Code,GlaExt>"as" { RETURN(AS); }
352 <Code,GlaExt>"hiding" { RETURN(HIDING); }
353 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
355 <Code,GlaExt>"_scc_" { RETURN(SCC); }
356 <GlaExt>"_ccall_" { RETURN(CCALL); }
357 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
358 <GlaExt>"_casm_" { RETURN(CASM); }
359 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
363 * Haskell operators: special, reservedops and useful varsyms
367 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
368 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
369 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
370 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
371 <Code,GlaExt>"{" { RETURN(OCURLY); }
372 <Code,GlaExt>"}" { RETURN(CCURLY); }
373 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
374 <Code,GlaExt>";" { RETURN(SEMI); }
375 <Code,GlaExt>"`" { RETURN(BQUOTE); }
376 <Code,GlaExt>"_" { RETURN(WILDCARD); }
378 <Code,GlaExt>".." { RETURN(DOTDOT); }
379 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
380 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
381 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
382 <Code,GlaExt>"|" { RETURN(VBAR); }
383 <Code,GlaExt>"<-" { RETURN(LARROW); }
384 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
385 <Code,GlaExt>"-" { RETURN(MINUS); }
387 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
388 <Code,GlaExt>"@" { RETURN(AT); }
389 <Code,GlaExt>"!" { RETURN(BANG); }
390 <Code,GlaExt>"~" { RETURN(LAZY); }
394 * Integers and (for Glasgow extensions) primitive integers. Note that
395 * we pass all of the text on to the parser, because flex/C can't handle
396 * arbitrary precision numbers.
400 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
401 yylval.uid = xstrndup(yytext, yyleng - 1);
404 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
405 yylval.uid = xstrndup(yytext, yyleng);
408 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
409 yylval.uid = xstrndup(yytext, yyleng - 1);
412 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
413 yylval.uid = xstrndup(yytext, yyleng);
416 <GlaExt>("-")?{N}"#" {
417 yylval.uid = xstrndup(yytext, yyleng - 1);
420 <Code,GlaExt,UserPragma>{N} {
421 yylval.uid = xstrndup(yytext, yyleng);
427 * Floats and (for Glasgow extensions) primitive floats/doubles.
431 <GlaExt>("-")?{F}"##" {
432 yylval.uid = xstrndup(yytext, yyleng - 2);
435 <GlaExt>("-")?{F}"#" {
436 yylval.uid = xstrndup(yytext, yyleng - 1);
440 yylval.uid = xstrndup(yytext, yyleng);
446 * Funky ``foo'' style C literals for Glasgow extensions
450 <GlaExt>"``"[^']+"''" {
451 hsnewid(yytext + 2, yyleng - 4);
457 * Identifiers, both variables and operators. The trailing hash is allowed
458 * for Glasgow extensions.
464 /* These SHOULDNAE work in "Code" (sigh) */
466 <Code,GlaExt,UserPragma>{Id}"#" {
467 if (! nonstandardFlag) {
468 char errbuf[ERR_BUF_SIZE];
469 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
472 hsnewid(yytext, yyleng);
473 RETURN(_isconstr(yytext) ? CONID : VARID);
475 <Code,GlaExt,UserPragma>_+{Id} {
476 if (! nonstandardFlag) {
477 char errbuf[ERR_BUF_SIZE];
478 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
481 hsnewid(yytext, yyleng);
482 RETURN(isconstr(yytext) ? CONID : VARID);
483 /* NB: ^^^^^^^^ : not the macro! */
485 <Code,GlaExt,UserPragma>{Id} {
486 hsnewid(yytext, yyleng);
487 RETURN(_isconstr(yytext) ? CONID : VARID);
489 <Code,GlaExt,UserPragma>{SId} {
490 hsnewid(yytext, yyleng);
491 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
493 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
494 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
495 RETURN(isconstr ? QCONID : QVARID);
497 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
498 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
499 RETURN(isconstr ? QCONSYM : QVARSYM);
503 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
505 /* Because we can make the former well-behaved (we defined them).
507 Sadly, the latter is defined by Haskell, which allows such
508 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
512 <GlaExt,UserPragma>"`"{Id}"#`" {
513 hsnewid(yytext + 1, yyleng - 2);
514 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
519 * Character literals. The first form is the quick form, for character
520 * literals that don't contain backslashes. Literals with backslashes are
521 * lexed through multiple rules. First, we match the open ' and as many
522 * normal characters as possible. This puts us into the <Char> state, where
523 * a backslash is legal. Then, we match the backslash and move into the
524 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
525 * characters and the close '. We may end up with too many characters, but
526 * this allows us to easily share the lex rules with strings. Excess characters
527 * are ignored with a warning.
531 <GlaExt>'({CHAR}|"\"")"'#" {
532 yylval.uhstring = installHstring(1, yytext+1);
535 <Code,GlaExt>'({CHAR}|"\"")' {
536 yylval.uhstring = installHstring(1, yytext+1);
539 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
540 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
543 <Code,GlaExt>'({CHAR}|"\"")* {
544 hsmlcolno = hspcolno;
546 addtext(yytext+1, yyleng-1);
549 <Char>({CHAR}|"\"")*'# {
553 addtext(yytext, yyleng - 2);
554 text = fetchtext(&length);
556 if (! nonstandardFlag) {
557 char errbuf[ERR_BUF_SIZE];
558 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
563 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
564 input_filename, hsplineno, hspcolno + 1);
565 format_string(stderr, (unsigned char *) text, length);
566 fputs("' too long\n", stderr);
569 yylval.uhstring = installHstring(1, text);
570 hspcolno = hsmlcolno;
574 <Char>({CHAR}|"\"")*' {
578 addtext(yytext, yyleng - 1);
579 text = fetchtext(&length);
582 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
583 input_filename, hsplineno, hspcolno + 1);
584 format_string(stderr, (unsigned char *) text, length);
585 fputs("' too long\n", stderr);
588 yylval.uhstring = installHstring(1, text);
589 hspcolno = hsmlcolno;
593 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
598 * String literals. The first form is the quick form, for string literals
599 * that don't contain backslashes. Literals with backslashes are lexed
600 * through multiple rules. First, we match the open " and as many normal
601 * characters as possible. This puts us into the <String> state, where
602 * a backslash is legal. Then, we match the backslash and move into the
603 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
604 * characters, moving back and forth between <String> and <StringEsc> as more
605 * backslashes are encountered. (We may even digress into <Comment> mode if we
606 * find a comment in a gap between backslashes.) Finally, we read the last chunk
607 * of normal characters and the close ".
611 <GlaExt>"\""({CHAR}|"'")*"\""# {
612 yylval.uhstring = installHstring(yyleng-3, yytext+1);
613 /* the -3 accounts for the " on front, "# on the end */
616 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
617 yylval.uhstring = installHstring(yyleng-2, yytext+1);
620 <Code,GlaExt>"\""({CHAR}|"'")* {
621 hsmlcolno = hspcolno;
623 addtext(yytext+1, yyleng-1);
626 <String>({CHAR}|"'")*"\"#" {
630 addtext(yytext, yyleng-2);
631 text = fetchtext(&length);
633 if (! nonstandardFlag) {
634 char errbuf[ERR_BUF_SIZE];
635 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
639 yylval.uhstring = installHstring(length, text);
640 hspcolno = hsmlcolno;
644 <String>({CHAR}|"'")*"\"" {
648 addtext(yytext, yyleng-1);
649 text = fetchtext(&length);
651 yylval.uhstring = installHstring(length, text);
652 hspcolno = hsmlcolno;
656 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
660 * Character and string escapes are roughly the same, but strings have the
661 * extra `\&' sequence which is not allowed for characters. Also, comments
662 * are allowed in the <StringEsc> state. (See the comment section much
665 * NB: Backslashes and tabs are stored in strings as themselves.
666 * But if we print them (in printtree.c), they must go out as
667 * "\\\\" and "\\t" respectively. (This is because of the bogus
668 * intermediate format that the parser produces. It uses '\t' fpr end of
669 * string, so it needs to be able to escape tabs, which means that it
670 * also needs to be able to escape the escape character ('\\'). Sigh.
674 <Char>\\ { PUSH_STATE(CharEsc); }
675 <String>\\& /* Ignore */ ;
676 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
678 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
679 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
681 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
682 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
683 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
684 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
685 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
686 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
687 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
688 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
689 <CharEsc,StringEsc>BEL |
690 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
691 <CharEsc,StringEsc>BS |
692 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
693 <CharEsc,StringEsc>HT |
694 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
695 <CharEsc,StringEsc>LF |
696 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
697 <CharEsc,StringEsc>VT |
698 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
699 <CharEsc,StringEsc>FF |
700 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
701 <CharEsc,StringEsc>CR |
702 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
703 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
704 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
705 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
706 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
707 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
708 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
709 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
710 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
711 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
712 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
713 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
714 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
715 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
716 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
717 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
718 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
719 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
720 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
721 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
722 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
723 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
724 <CharEsc,StringEsc>{D}+ {
725 int i = strtol(yytext, NULL, 10);
729 char errbuf[ERR_BUF_SIZE];
730 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
736 <CharEsc,StringEsc>o{O}+ {
737 int i = strtol(yytext + 1, NULL, 8);
741 char errbuf[ERR_BUF_SIZE];
742 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
748 <CharEsc,StringEsc>x{H}+ {
749 int i = strtol(yytext + 1, NULL, 16);
753 char errbuf[ERR_BUF_SIZE];
754 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
763 * Simple comments and whitespace. Normally, we would just ignore these, but
764 * in case we're processing a string escape, we need to note that we've seen
767 * Note that we cater for a comment line that *doesn't* end in a newline.
768 * This is incorrect, strictly speaking, but seems like the right thing
769 * to do. Reported by Rajiv Mirani. (WDP 95/08)
773 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
774 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
778 * Nested comments. The major complication here is in trying to match the
779 * longest lexemes possible, for better performance. (See the flex document.)
780 * That's why the rules look so bizarre.
784 <Code,GlaExt,UserPragma,StringEsc>"{-" {
785 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
789 <Comment>"-"+[^-{}]+ |
790 <Comment>"{"+[^-{}]+ ;
791 <Comment>"{-" { nested_comments++; }
792 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
797 * Illegal characters. This used to be a single rule, but we might as well
798 * pass on as much information as we have, so now we indicate our state in
803 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
804 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
805 input_filename, hsplineno, hspcolno + 1);
806 format_string(stderr, (unsigned char *) yytext, 1);
807 fputs("'\n", stderr);
811 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
812 input_filename, hsplineno, hspcolno + 1);
813 format_string(stderr, (unsigned char *) yytext, 1);
814 fputs("' in a character literal\n", stderr);
818 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
819 input_filename, hsplineno, hspcolno + 1);
820 format_string(stderr, (unsigned char *) yytext, 1);
821 fputs("'\n", stderr);
824 <String>(.|\n) { if (nonstandardFlag) {
825 addtext(yytext, yyleng);
827 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
828 input_filename, hsplineno, hspcolno + 1);
829 format_string(stderr, (unsigned char *) yytext, 1);
830 fputs("' in a string literal\n", stderr);
836 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
837 input_filename, hsplineno, hspcolno + 1);
838 format_string(stderr, (unsigned char *) yytext, 1);
839 fputs("'\n", stderr);
842 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
843 input_filename, hsplineno, hspcolno + 1);
844 format_string(stderr, (unsigned char *) yytext, 1);
845 fputs("' in a string gap\n", stderr);
852 * End of file. In any sub-state, this is an error. However, for the primary
853 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
854 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
855 * adding virtual close curlies, or closing an interface and returning to the
856 * primary source file.
858 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
859 * line/column advancement has to be done by hand.
863 <Char,CharEsc><<EOF>> {
864 hsplineno = hslineno; hspcolno = hscolno;
865 hsperror("unterminated character literal");
868 hsplineno = hslineno; hspcolno = hscolno;
869 hsperror("unterminated comment");
871 <String,StringEsc><<EOF>> {
872 hsplineno = hslineno; hspcolno = hscolno;
873 hsperror("unterminated string literal");
875 <UserPragma><<EOF>> {
876 hsplineno = hslineno; hspcolno = hscolno;
877 hsperror("unterminated user-specified pragma");
879 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
883 /**********************************************************************
886 * YACC/LEX Initialisation etc. *
889 **********************************************************************/
892 We initialise input_filename to "<stdin>".
893 This allows unnamed sources to be piped into the parser.
899 input_filename = xstrdup("<stdin>");
901 /* We must initialize the input buffer _now_, because we call
902 setyyin _before_ calling yylex for the first time! */
903 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
912 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
914 if (input_filename != NULL)
915 free(input_filename);
916 input_filename = xstrdup(f);
919 /**********************************************************************
922 * Layout Processing *
925 **********************************************************************/
928 The following section deals with Haskell Layout conventions
929 forcing insertion of ; or } as appropriate
935 return (!forgetindent && INDENTON);
939 /* Enter new context and set new indentation level */
944 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
948 * partain: first chk that new indent won't be less than current one; this code
949 * doesn't make sense to me; hscolno tells the position of the _end_ of the
950 * current token; what that has to do with indenting, I don't know.
954 if (hscolno - 1 <= INDENTPT) {
956 return; /* Empty input OK for Haskell 1.1 */
958 char errbuf[ERR_BUF_SIZE];
960 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
964 hsentercontext((hspcolno << 1) | 1);
968 /* Enter a new context without changing the indentation level */
973 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
975 hsentercontext(indenttab[icontexts] & ~1);
979 /* Turn off indentation processing, usually because an explicit "{" has been seen */
987 /* Enter a new layout context. */
989 hsentercontext(int indent)
991 /* Enter new context and set indentation as specified */
992 if (++icontexts >= MAX_CONTEXTS) {
993 char errbuf[ERR_BUF_SIZE];
995 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
998 forgetindent = FALSE;
999 indenttab[icontexts] = indent;
1001 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1006 /* Exit a layout context */
1012 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1017 * Return checks the indentation level and returns ;, } or the specified token.
1027 if (hsshouldindent()) {
1028 if (hspcolno < INDENTPT) {
1030 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1034 } else if (hspcolno == INDENTPT) {
1036 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1044 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1051 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1057 static BOOLEAN eof = FALSE;
1060 if (hssttok != -1) {
1068 endlineno = hslineno;
1069 if ((tok = yylex1()) != EOF)
1075 if (icontexts > icontexts_save) {
1078 indenttab[icontexts] = 0;
1081 hsperror("missing '}' at end of file");
1082 } else if (hsbuf_save != NULL) {
1084 yy_delete_buffer(YY_CURRENT_BUFFER);
1085 yy_switch_to_buffer(hsbuf_save);
1087 new_filename(filename_save);
1088 free(filename_save);
1089 hslineno = hslineno_save;
1090 hsplineno = hsplineno_save;
1091 hscolno = hscolno_save;
1092 hspcolno = hspcolno_save;
1094 icontexts = icontexts_save - 1;
1097 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1102 hsperror("No longer using yacc to parse interface files");
1107 abort(); /* should never get here! */
1111 /**********************************************************************
1114 * Input Processing for Interfaces -- Not currently used !!! *
1117 **********************************************************************/
1119 /* setyyin(file) open file as new lex input buffer */
1125 hsbuf_save = YY_CURRENT_BUFFER;
1126 if ((yyin = fopen(file, "r")) == NULL) {
1127 char errbuf[ERR_BUF_SIZE];
1129 sprintf(errbuf, "can't read \"%-.50s\"", file);
1132 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1134 hslineno_save = hslineno;
1135 hsplineno_save = hsplineno;
1136 hslineno = hsplineno = 1;
1138 filename_save = input_filename;
1139 input_filename = NULL;
1141 hscolno_save = hscolno;
1142 hspcolno_save = hspcolno;
1143 hscolno = hspcolno = 0;
1144 etags_save = etags; /* do not do "etags" stuff in interfaces */
1145 etags = 0; /* We remember whether we are doing it in
1146 the module, so we can restore it later [WDP 94/09] */
1147 hsentercontext(-1); /* partain: changed this from 0 */
1148 icontexts_save = icontexts;
1150 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1155 layout_input(char *text, int len)
1158 fprintf(stderr, "Scanning \"%s\"\n", text);
1161 hsplineno = hslineno;
1173 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1185 setstartlineno(void)
1187 startlineno = hsplineno;
1189 if (modulelineno == 0) {
1190 modulelineno = startlineno;
1196 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1200 /**********************************************************************
1206 **********************************************************************/
1208 #define CACHE_SIZE YY_BUF_SIZE
1214 } textcache = { 0, 0, NULL };
1219 /* fprintf(stderr, "cleartext\n"); */
1221 if (textcache.allocated == 0) {
1222 textcache.allocated = CACHE_SIZE;
1223 textcache.text = xmalloc(CACHE_SIZE);
1228 addtext(char *text, unsigned length)
1230 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1235 if (textcache.next + length + 1 >= textcache.allocated) {
1236 textcache.allocated += length + CACHE_SIZE;
1237 textcache.text = xrealloc(textcache.text, textcache.allocated);
1239 bcopy(text, textcache.text + textcache.next, length);
1240 textcache.next += length;
1246 /* fprintf(stderr, "addchar: %c\n", c); */
1248 if (textcache.next + 2 >= textcache.allocated) {
1249 textcache.allocated += CACHE_SIZE;
1250 textcache.text = xrealloc(textcache.text, textcache.allocated);
1252 textcache.text[textcache.next++] = c;
1256 fetchtext(unsigned *length)
1258 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1260 *length = textcache.next;
1261 textcache.text[textcache.next] = '\0';
1262 return textcache.text;
1265 /**********************************************************************
1268 * Identifier Processing *
1271 **********************************************************************/
1274 hsnewid Enters an id of length n into the symbol table.
1278 hsnewid(char *name, int length)
1280 char save = name[length];
1282 name[length] = '\0';
1283 yylval.uid = installid(name);
1284 name[length] = save;
1288 hsnewqid(char *name, int length)
1291 char save = name[length];
1292 name[length] = '\0';
1294 dot = strchr(name, '.');
1296 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1298 name[length] = save;
1300 return _isconstr(dot+1);
1304 isconstr(char *s) /* walks past leading underscores before using the macro */
1308 for ( ; temp != NULL && *temp == '_' ; temp++ );
1310 return _isconstr(temp);