2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
118 /* OLD: Hacky definition of yywrap: see flex doc.
120 If we don't do this, then we'll have to get the default
121 yywrap from the flex library, which is often something
122 we are not good at locating. This avoids that difficulty.
123 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
128 /* Essential forward declarations */
130 static void hsnewid PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext (NO_ARGS);
133 static void addtext PROTO((char *, unsigned));
134 static void addchar PROTO((char));
135 static char *fetchtext PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int Return PROTO((int));
138 static void hsentercontext PROTO((int));
140 /* Special file handling for IMPORTS */
141 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
143 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
144 static char *filename_save; /* File Name */
145 static int hslineno_save = 0, /* Line Number */
146 hsplineno_save = 0, /* Line Number of Prev. token */
147 hscolno_save = 0, /* Indentation */
148 hspcolno_save = 0; /* Left Indentation */
149 static short icontexts_save = 0; /* Indent Context Level */
151 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags; /* that which is saved */
154 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
156 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
157 * inserted before token +ve -- "}" inserted before
160 short icontexts = 0; /* Which context we're in */
163 Table of indentations: right bit indicates whether to use
164 indentation rules (1 = use rules; 0 = ignore)
167 push one of these "contexts" at every "case" or "where"; the right bit says
168 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
170 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
171 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
172 push is shown just below.
177 static short indenttab[MAX_CONTEXTS] = {-1};
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
182 #define RETURN(tok) return(Return(tok))
185 #define YY_DECL int yylex1()
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189 to support older versions of flex, we'll continue to peek for now.
191 #define YY_USER_ACTION \
192 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
199 /* Each time we enter a new start state, we push it onto the state stack.
201 #define PUSH_STATE(n) yy_push_state(n)
202 #define POP_STATE yy_pop_state()
207 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208 never-interactive (to go a bit faster)
209 stack (use a start-condition stack)
213 %option never-interactive
216 /* The start states are:
217 Code -- normal Haskell code (principal lexer)
218 GlaExt -- Haskell code with Glasgow extensions
219 Comment -- Nested comment processing
220 String -- Inside a string literal with backslashes
221 StringEsc -- Immediately following a backslash in a string literal
222 Char -- Inside a character literal with backslashes
223 CharEsc -- Immediately following a backslash in a character literal
225 Note that the INITIAL state is unused. Also note that these states
226 are _exclusive_. All rules should be prefixed with an appropriate
227 list of start states.
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
232 isoS [\xa1-\xbf\xd7\xf7]
233 isoL [\xc0-\xd6\xd8-\xde]
234 isol [\xdf-\xf6\xf8-\xff]
241 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
244 L [A-Z\xc0-\xd6\xd8-\xde]
245 l [a-z\xdf-\xf6\xf8-\xff]
250 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
259 * Simple comments and whitespace. Normally, we would just ignore these, but
260 * in case we're processing a string escape, we need to note that we've seen
263 * Note that we cater for a comment line that *doesn't* end in a newline.
264 * This is incorrect, strictly speaking, but seems like the right thing
265 * to do. Reported by Rajiv Mirani. (WDP 95/08)
267 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
276 * Special GHC pragma rules. Do we need a start state for interface files,
277 * so these won't be matched in source files? --JSM
281 <Code,GlaExt>^"# ".*{NL} {
282 char tempf[FILENAME_SIZE];
283 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
285 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
288 <Code,GlaExt>^"#line ".*{NL} {
289 char tempf[FILENAME_SIZE];
290 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
292 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
295 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
296 /* partain: pragma-style line directive */
297 char tempf[FILENAME_SIZE];
298 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
300 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
303 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
304 PUSH_STATE(UserPragma);
305 RETURN(INTERFACE_UPRAGMA);
307 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
308 PUSH_STATE(UserPragma);
309 RETURN(SPECIALISE_UPRAGMA);
311 <Code,GlaExt>"{-#"{WS}*"INLINE" {
312 PUSH_STATE(UserPragma);
313 RETURN(INLINE_UPRAGMA);
315 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
316 PUSH_STATE(UserPragma);
317 RETURN(MAGIC_UNFOLDING_UPRAGMA);
319 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
320 PUSH_STATE(UserPragma);
321 RETURN(DEFOREST_UPRAGMA);
323 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
324 /* these are handled by hscpp */
328 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
329 /* these are for the driver! */
333 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
334 /* these are used by `make depend' and the
335 compiler to indicate that a module should
336 be imported from source */
338 RETURN(SOURCE_UPRAGMA);
341 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
342 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
343 input_filename, hsplineno);
344 format_string(stderr, (unsigned char *) yytext, yyleng);
345 fputs("'\n", stderr);
349 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
353 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
354 * intentionally accepted as a keyword even for normal <Code>.
358 <Code,GlaExt>"case" { RETURN(CASE); }
359 <Code,GlaExt>"class" { RETURN(CLASS); }
360 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
361 <Code,GlaExt>"default" { RETURN(DEFAULT); }
362 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
363 <Code,GlaExt>"do" { RETURN(DO); }
364 <Code,GlaExt>"else" { RETURN(ELSE); }
365 <Code,GlaExt>"if" { RETURN(IF); }
366 <Code,GlaExt>"import" { RETURN(IMPORT); }
367 <Code,GlaExt>"in" { RETURN(IN); }
368 <Code,GlaExt>"infix" { RETURN(INFIX); }
369 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
370 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
371 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
372 <Code,GlaExt>"let" { RETURN(LET); }
373 <Code,GlaExt>"module" { RETURN(MODULE); }
374 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
375 <Code,GlaExt>"of" { RETURN(OF); }
376 <Code,GlaExt>"then" { RETURN(THEN); }
377 <Code,GlaExt>"type" { RETURN(TYPE); }
378 <Code,GlaExt>"where" { RETURN(WHERE); }
380 <Code,GlaExt>"as" { RETURN(AS); }
381 <Code,GlaExt>"hiding" { RETURN(HIDING); }
382 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
384 <Code,GlaExt>"_scc_" { RETURN(SCC); }
385 <GlaExt>"_ccall_" { RETURN(CCALL); }
386 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
387 <GlaExt>"_casm_" { RETURN(CASM); }
388 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
392 * Haskell operators: special, reservedops and useful varsyms
396 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
397 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
398 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
399 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
400 <Code,GlaExt>"{" { RETURN(OCURLY); }
401 <Code,GlaExt>"}" { RETURN(CCURLY); }
402 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
403 <Code,GlaExt>";" { RETURN(SEMI); }
404 <Code,GlaExt>"`" { RETURN(BQUOTE); }
405 <Code,GlaExt>"_" { RETURN(WILDCARD); }
407 <Code,GlaExt>".." { RETURN(DOTDOT); }
408 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
409 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
410 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
411 <Code,GlaExt>"|" { RETURN(VBAR); }
412 <Code,GlaExt>"<-" { RETURN(LARROW); }
413 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
414 <Code,GlaExt>"-" { RETURN(MINUS); }
415 <Code,GlaExt>"+" { RETURN(PLUS); }
417 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
418 <Code,GlaExt>"@" { RETURN(AT); }
419 <Code,GlaExt>"!" { RETURN(BANG); }
420 <Code,GlaExt>"~" { RETURN(LAZY); }
424 * Integers and (for Glasgow extensions) primitive integers. Note that
425 * we pass all of the text on to the parser, because flex/C can't handle
426 * arbitrary precision numbers.
430 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
431 yylval.uid = xstrndup(yytext, yyleng - 1);
434 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
435 yylval.uid = xstrndup(yytext, yyleng);
438 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
439 yylval.uid = xstrndup(yytext, yyleng - 1);
442 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
443 yylval.uid = xstrndup(yytext, yyleng);
446 <GlaExt>("-")?{N}"#" {
447 yylval.uid = xstrndup(yytext, yyleng - 1);
450 <Code,GlaExt,UserPragma>{N} {
451 yylval.uid = xstrndup(yytext, yyleng);
457 * Floats and (for Glasgow extensions) primitive floats/doubles.
461 <GlaExt>("-")?{F}"##" {
462 yylval.uid = xstrndup(yytext, yyleng - 2);
465 <GlaExt>("-")?{F}"#" {
466 yylval.uid = xstrndup(yytext, yyleng - 1);
470 yylval.uid = xstrndup(yytext, yyleng);
476 * Funky ``foo'' style C literals for Glasgow extensions
480 <GlaExt>"``"[^']+"''" {
481 hsnewid(yytext + 2, yyleng - 4);
487 * Identifiers, both variables and operators. The trailing hash is allowed
488 * for Glasgow extensions.
494 /* These SHOULDNAE work in "Code" (sigh) */
496 <GlaExt,UserPragma>{Id}"#" {
497 if (! nonstandardFlag) {
498 char errbuf[ERR_BUF_SIZE];
499 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
502 hsnewid(yytext, yyleng);
503 RETURN(isconstr(yytext) ? CONID : VARID);
505 <Code,GlaExt,UserPragma>{Id} {
506 hsnewid(yytext, yyleng);
507 RETURN(isconstr(yytext) ? CONID : VARID);
509 <Code,GlaExt,UserPragma>{SId} {
510 hsnewid(yytext, yyleng);
511 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
513 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
515 if (! nonstandardFlag) {
516 char errbuf[ERR_BUF_SIZE];
517 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
520 is_constr = hsnewqid(yytext, yyleng);
521 RETURN(is_constr ? QCONID : QVARID);
523 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
524 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
525 RETURN(is_constr ? QCONID : QVARID);
527 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
528 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
529 RETURN(is_constr ? QCONSYM : QVARSYM);
533 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
535 /* Because we can make the former well-behaved (we defined them).
537 Sadly, the latter is defined by Haskell, which allows such
538 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
542 <GlaExt,UserPragma>"`"{Id}"#`" {
543 hsnewid(yytext + 1, yyleng - 2);
544 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
549 * Character literals. The first form is the quick form, for character
550 * literals that don't contain backslashes. Literals with backslashes are
551 * lexed through multiple rules. First, we match the open ' and as many
552 * normal characters as possible. This puts us into the <Char> state, where
553 * a backslash is legal. Then, we match the backslash and move into the
554 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
555 * characters and the close '. We may end up with too many characters, but
556 * this allows us to easily share the lex rules with strings. Excess characters
557 * are ignored with a warning.
561 <GlaExt>'({CHAR}|"\"")"'#" {
562 yylval.uhstring = installHstring(1, yytext+1);
565 <Code,GlaExt>'({CHAR}|"\"")' {
566 yylval.uhstring = installHstring(1, yytext+1);
569 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
570 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
573 <Code,GlaExt>'({CHAR}|"\"")* {
574 hsmlcolno = hspcolno;
576 addtext(yytext+1, yyleng-1);
579 <Char>({CHAR}|"\"")*'# {
583 addtext(yytext, yyleng - 2);
584 text = fetchtext(&length);
586 if (! nonstandardFlag) {
587 char errbuf[ERR_BUF_SIZE];
588 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
593 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
594 input_filename, hsplineno, hspcolno + 1);
595 format_string(stderr, (unsigned char *) text, length);
596 fputs("' too long\n", stderr);
599 yylval.uhstring = installHstring(1, text);
600 hspcolno = hsmlcolno;
604 <Char>({CHAR}|"\"")*' {
608 addtext(yytext, yyleng - 1);
609 text = fetchtext(&length);
612 fprintf(stderr, "%s:%d:%d: Character literal '",
613 input_filename, hsplineno, hspcolno + 1);
614 format_string(stderr, (unsigned char *) text, length);
615 fputs("' too long\n", stderr);
618 yylval.uhstring = installHstring(1, text);
619 hspcolno = hsmlcolno;
623 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
628 * String literals. The first form is the quick form, for string literals
629 * that don't contain backslashes. Literals with backslashes are lexed
630 * through multiple rules. First, we match the open " and as many normal
631 * characters as possible. This puts us into the <String> state, where
632 * a backslash is legal. Then, we match the backslash and move into the
633 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
634 * characters, moving back and forth between <String> and <StringEsc> as more
635 * backslashes are encountered. (We may even digress into <Comment> mode if we
636 * find a comment in a gap between backslashes.) Finally, we read the last chunk
637 * of normal characters and the close ".
641 <GlaExt>"\""({CHAR}|"'")*"\""# {
642 yylval.uhstring = installHstring(yyleng-3, yytext+1);
643 /* the -3 accounts for the " on front, "# on the end */
646 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
647 yylval.uhstring = installHstring(yyleng-2, yytext+1);
650 <Code,GlaExt>"\""({CHAR}|"'")* {
651 hsmlcolno = hspcolno;
653 addtext(yytext+1, yyleng-1);
656 <String>({CHAR}|"'")*"\"#" {
660 addtext(yytext, yyleng-2);
661 text = fetchtext(&length);
663 if (! nonstandardFlag) {
664 char errbuf[ERR_BUF_SIZE];
665 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
669 yylval.uhstring = installHstring(length, text);
670 hspcolno = hsmlcolno;
674 <String>({CHAR}|"'")*"\"" {
678 addtext(yytext, yyleng-1);
679 text = fetchtext(&length);
681 yylval.uhstring = installHstring(length, text);
682 hspcolno = hsmlcolno;
686 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
690 * Character and string escapes are roughly the same, but strings have the
691 * extra `\&' sequence which is not allowed for characters. Also, comments
692 * are allowed in the <StringEsc> state. (See the comment section much
695 * NB: Backslashes and tabs are stored in strings as themselves.
696 * But if we print them (in printtree.c), they must go out as
697 * "\\\\" and "\\t" respectively. (This is because of the bogus
698 * intermediate format that the parser produces. It uses '\t' fpr end of
699 * string, so it needs to be able to escape tabs, which means that it
700 * also needs to be able to escape the escape character ('\\'). Sigh.
704 <Char>\\ { PUSH_STATE(CharEsc); }
705 <String>\\& /* Ignore */ ;
706 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
708 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
709 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
711 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
712 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
713 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
714 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
715 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
716 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
717 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
718 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
719 <CharEsc,StringEsc>BEL |
720 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
721 <CharEsc,StringEsc>BS |
722 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
723 <CharEsc,StringEsc>HT |
724 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
725 <CharEsc,StringEsc>LF |
726 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
727 <CharEsc,StringEsc>VT |
728 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
729 <CharEsc,StringEsc>FF |
730 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
731 <CharEsc,StringEsc>CR |
732 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
733 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
734 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
735 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
736 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
737 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
738 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
739 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
740 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
741 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
742 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
743 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
744 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
745 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
746 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
747 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
748 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
749 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
750 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
751 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
752 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
753 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
754 <CharEsc,StringEsc>{D}+ {
755 int i = strtol(yytext, NULL, 10);
759 char errbuf[ERR_BUF_SIZE];
760 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
766 <CharEsc,StringEsc>o{O}+ {
767 int i = strtol(yytext + 1, NULL, 8);
771 char errbuf[ERR_BUF_SIZE];
772 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
778 <CharEsc,StringEsc>x{H}+ {
779 int i = strtol(yytext + 1, NULL, 16);
783 char errbuf[ERR_BUF_SIZE];
784 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
794 * Nested comments. The major complication here is in trying to match the
795 * longest lexemes possible, for better performance. (See the flex document.)
796 * That's why the rules look so bizarre.
800 <Code,GlaExt,UserPragma,StringEsc>"{-" {
801 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
805 <Comment>"-"+[^-{}]+ |
806 <Comment>"{"+[^-{}]+ ;
807 <Comment>"{-" { nested_comments++; }
808 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
813 * Illegal characters. This used to be a single rule, but we might as well
814 * pass on as much information as we have, so now we indicate our state in
819 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
820 fprintf(stderr, "%s:%d:%d: Illegal character: `",
821 input_filename, hsplineno, hspcolno + 1);
822 format_string(stderr, (unsigned char *) yytext, 1);
823 fputs("'\n", stderr);
827 fprintf(stderr, "%s:%d:%d: Illegal character: `",
828 input_filename, hsplineno, hspcolno + 1);
829 format_string(stderr, (unsigned char *) yytext, 1);
830 fputs("' in a character literal\n", stderr);
834 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
835 input_filename, hsplineno, hspcolno + 1);
836 format_string(stderr, (unsigned char *) yytext, 1);
837 fputs("'\n", stderr);
840 <String>(.|\n) { if (nonstandardFlag) {
841 addtext(yytext, yyleng);
843 fprintf(stderr, "%s:%d:%d: Illegal character: `",
844 input_filename, hsplineno, hspcolno + 1);
845 format_string(stderr, (unsigned char *) yytext, 1);
846 fputs("' in a string literal\n", stderr);
852 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
853 input_filename, hsplineno, hspcolno + 1);
854 format_string(stderr, (unsigned char *) yytext, 1);
855 fputs("'\n", stderr);
858 fprintf(stderr, "%s:%d:%d: Illegal character: `",
859 input_filename, hsplineno, hspcolno + 1);
860 format_string(stderr, (unsigned char *) yytext, 1);
861 fputs("' in a string gap\n", stderr);
868 * End of file. In any sub-state, this is an error. However, for the primary
869 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
870 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
871 * adding virtual close curlies, or closing an interface and returning to the
872 * primary source file.
874 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
875 * line/column advancement has to be done by hand.
879 <Char,CharEsc><<EOF>> {
880 hsplineno = hslineno; hspcolno = hscolno;
881 hsperror("unterminated character literal");
884 hsplineno = hslineno; hspcolno = hscolno;
885 hsperror("unterminated comment");
887 <String,StringEsc><<EOF>> {
888 hsplineno = hslineno; hspcolno = hscolno;
889 hsperror("unterminated string literal");
891 <UserPragma><<EOF>> {
892 hsplineno = hslineno; hspcolno = hscolno;
893 hsperror("unterminated user-specified pragma");
895 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
899 /**********************************************************************
902 * YACC/LEX Initialisation etc. *
905 **********************************************************************/
908 We initialise input_filename to "<stdin>".
909 This allows unnamed sources to be piped into the parser.
915 input_filename = xstrdup("<stdin>");
917 /* We must initialize the input buffer _now_, because we call
918 setyyin _before_ calling yylex for the first time! */
919 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
928 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
930 if (input_filename != NULL)
931 free(input_filename);
932 input_filename = xstrdup(f);
935 /**********************************************************************
938 * Layout Processing *
941 **********************************************************************/
944 The following section deals with Haskell Layout conventions
945 forcing insertion of ; or } as appropriate
951 return (!forgetindent && INDENTON);
955 /* Enter new context and set new indentation level */
960 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
964 * partain: first chk that new indent won't be less than current one; this code
965 * doesn't make sense to me; hscolno tells the position of the _end_ of the
966 * current token; what that has to do with indenting, I don't know.
970 if (hscolno - 1 <= INDENTPT) {
972 return; /* Empty input OK for Haskell 1.1 */
974 char errbuf[ERR_BUF_SIZE];
976 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
980 hsentercontext((hspcolno << 1) | 1);
984 /* Enter a new context without changing the indentation level */
989 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
991 hsentercontext(indenttab[icontexts] & ~1);
995 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1003 /* Enter a new layout context. */
1005 hsentercontext(int indent)
1007 /* Enter new context and set indentation as specified */
1008 if (++icontexts >= MAX_CONTEXTS) {
1009 char errbuf[ERR_BUF_SIZE];
1011 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1014 forgetindent = FALSE;
1015 indenttab[icontexts] = indent;
1017 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1022 /* Exit a layout context */
1028 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1033 * Return checks the indentation level and returns ;, } or the specified token.
1043 if (hsshouldindent()) {
1044 if (hspcolno < INDENTPT) {
1046 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1050 } else if (hspcolno == INDENTPT) {
1052 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1060 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1067 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1073 static BOOLEAN eof = FALSE;
1076 if (hssttok != -1) {
1084 endlineno = hslineno;
1085 if ((tok = yylex1()) != EOF)
1091 if (icontexts > icontexts_save) {
1094 indenttab[icontexts] = 0;
1097 hsperror("missing '}' at end of file");
1098 } else if (hsbuf_save != NULL) {
1100 yy_delete_buffer(YY_CURRENT_BUFFER);
1101 yy_switch_to_buffer(hsbuf_save);
1103 new_filename(filename_save);
1104 free(filename_save);
1105 hslineno = hslineno_save;
1106 hsplineno = hsplineno_save;
1107 hscolno = hscolno_save;
1108 hspcolno = hspcolno_save;
1110 icontexts = icontexts_save - 1;
1113 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1118 hsperror("No longer using yacc to parse interface files");
1123 abort(); /* should never get here! */
1127 /**********************************************************************
1130 * Input Processing for Interfaces -- Not currently used !!! *
1133 **********************************************************************/
1135 /* setyyin(file) open file as new lex input buffer */
1141 hsbuf_save = YY_CURRENT_BUFFER;
1142 if ((yyin = fopen(file, "r")) == NULL) {
1143 char errbuf[ERR_BUF_SIZE];
1145 sprintf(errbuf, "can't read \"%-.50s\"", file);
1148 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1150 hslineno_save = hslineno;
1151 hsplineno_save = hsplineno;
1152 hslineno = hsplineno = 1;
1154 filename_save = input_filename;
1155 input_filename = NULL;
1157 hscolno_save = hscolno;
1158 hspcolno_save = hspcolno;
1159 hscolno = hspcolno = 0;
1160 etags_save = etags; /* do not do "etags" stuff in interfaces */
1161 etags = 0; /* We remember whether we are doing it in
1162 the module, so we can restore it later [WDP 94/09] */
1163 hsentercontext(-1); /* partain: changed this from 0 */
1164 icontexts_save = icontexts;
1166 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1171 layout_input(char *text, int len)
1174 fprintf(stderr, "Scanning \"%s\"\n", text);
1177 hsplineno = hslineno;
1189 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1201 setstartlineno(void)
1203 startlineno = hsplineno;
1205 if (modulelineno == 0) {
1206 modulelineno = startlineno;
1212 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1216 /**********************************************************************
1222 **********************************************************************/
1224 #define CACHE_SIZE YY_BUF_SIZE
1230 } textcache = { 0, 0, NULL };
1235 /* fprintf(stderr, "cleartext\n"); */
1237 if (textcache.allocated == 0) {
1238 textcache.allocated = CACHE_SIZE;
1239 textcache.text = xmalloc(CACHE_SIZE);
1244 addtext(char *text, unsigned length)
1246 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1251 if (textcache.next + length + 1 >= textcache.allocated) {
1252 textcache.allocated += length + CACHE_SIZE;
1253 textcache.text = xrealloc(textcache.text, textcache.allocated);
1255 bcopy(text, textcache.text + textcache.next, length);
1256 textcache.next += length;
1262 /* fprintf(stderr, "addchar: %c\n", c); */
1264 if (textcache.next + 2 >= textcache.allocated) {
1265 textcache.allocated += CACHE_SIZE;
1266 textcache.text = xrealloc(textcache.text, textcache.allocated);
1268 textcache.text[textcache.next++] = c;
1272 fetchtext(unsigned *length)
1274 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1276 *length = textcache.next;
1277 textcache.text[textcache.next] = '\0';
1278 return textcache.text;
1281 /**********************************************************************
1284 * Identifier Processing *
1287 **********************************************************************/
1290 hsnewid Enters an id of length n into the symbol table.
1294 hsnewid(char *name, int length)
1296 char save = name[length];
1298 name[length] = '\0';
1299 yylval.uid = installid(name);
1300 name[length] = save;
1304 hsnewqid(char *name, int length)
1307 char save = name[length];
1308 name[length] = '\0';
1310 dot = strchr(name, '.');
1312 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1314 name[length] = save;
1316 return isconstr(dot+1);