2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 static unsigned char CharTable[NCHARS] = {
45 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
46 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
47 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
48 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
50 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
51 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
52 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
53 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
54 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
55 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
57 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
58 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
59 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 return(CharTable[*s]&(_C));
86 /**********************************************************************
92 **********************************************************************/
94 char *input_filename = NULL; /* Always points to a dynamically allocated string */
97 * For my own sanity, things that are not part of the flex skeleton
98 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
101 static int hslineno = 0; /* Line number at end of token */
102 int hsplineno = 0; /* Line number at end of previous token */
104 static int hscolno = 0; /* Column number at end of token */
105 int hspcolno = 0; /* Column number at end of previous token */
106 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
108 int modulelineno = -1; /* The line number where the module starts */
109 int startlineno = 0; /* The line number where something starts */
110 int endlineno = 0; /* The line number where something ends */
112 static BOOLEAN noGap = TRUE; /* For checking string gaps */
113 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
115 static int nested_comments; /* For counting comment nesting depth */
117 /* OLD: Hacky definition of yywrap: see flex doc.
119 If we don't do this, then we'll have to get the default
120 yywrap from the flex library, which is often something
121 we are not good at locating. This avoids that difficulty.
122 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
127 /* Essential forward declarations */
129 static void hsnewid PROTO((char *, int));
130 static void layout_input PROTO((char *, int));
131 static void cleartext (NO_ARGS);
132 static void addtext PROTO((char *, unsigned));
133 static void addchar PROTO((char));
134 static char *fetchtext PROTO((unsigned *));
135 static void new_filename PROTO((char *));
136 static int Return PROTO((int));
137 static void hsentercontext PROTO((int));
139 /* Special file handling for IMPORTS */
140 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
143 static char *filename_save; /* File Name */
144 static int hslineno_save = 0, /* Line Number */
145 hsplineno_save = 0, /* Line Number of Prev. token */
146 hscolno_save = 0, /* Indentation */
147 hspcolno_save = 0; /* Left Indentation */
148 static short icontexts_save = 0; /* Indent Context Level */
150 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
151 extern BOOLEAN etags; /* that which is saved */
153 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
155 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
156 * inserted before token +ve -- "}" inserted before
159 short icontexts = 0; /* Which context we're in */
162 Table of indentations: right bit indicates whether to use
163 indentation rules (1 = use rules; 0 = ignore)
166 push one of these "contexts" at every "case" or "where"; the right bit says
167 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
169 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
170 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
171 push is shown just below.
176 static short indenttab[MAX_CONTEXTS] = {-1};
178 #define INDENTPT (indenttab[icontexts]>>1)
179 #define INDENTON (indenttab[icontexts]&1)
181 #define RETURN(tok) return(Return(tok))
184 #define YY_DECL int yylex1()
186 /* We should not peek at yy_act, but flex calls us even for the internal action
187 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
188 to support older versions of flex, we'll continue to peek for now.
190 #define YY_USER_ACTION \
191 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
195 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
198 /* Each time we enter a new start state, we push it onto the state stack.
200 #define PUSH_STATE(n) yy_push_state(n)
201 #define POP_STATE yy_pop_state()
206 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
207 never-interactive (to go a bit faster)
208 stack (use a start-condition stack)
212 %option never-interactive
215 /* The start states are:
216 Code -- normal Haskell code (principal lexer)
217 GlaExt -- Haskell code with Glasgow extensions
218 Comment -- Nested comment processing
219 String -- Inside a string literal with backslashes
220 StringEsc -- Immediately following a backslash in a string literal
221 Char -- Inside a character literal with backslashes
222 CharEsc -- Immediately following a backslash in a character literal
224 Note that the INITIAL state is unused. Also note that these states
225 are _exclusive_. All rules should be prefixed with an appropriate
226 list of start states.
229 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231 isoS [\xa1-\xbf\xd7\xf7]
232 isoL [\xc0-\xd6\xd8-\xde]
233 isol [\xdf-\xf6\xf8-\xff]
240 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
241 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 L [A-Z\xc0-\xd6\xd8-\xde]
244 l [a-z\xdf-\xf6\xf8-\xff]
249 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
258 * Special GHC pragma rules. Do we need a start state for interface files,
259 * so these won't be matched in source files? --JSM
263 <Code,GlaExt>^"# ".*{NL} {
264 char tempf[FILENAME_SIZE];
265 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
267 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
270 <Code,GlaExt>^"#line ".*{NL} {
271 char tempf[FILENAME_SIZE];
272 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
274 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
277 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
278 /* partain: pragma-style line directive */
279 char tempf[FILENAME_SIZE];
280 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
282 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
285 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
286 PUSH_STATE(UserPragma);
287 RETURN(INTERFACE_UPRAGMA);
289 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
290 PUSH_STATE(UserPragma);
291 RETURN(SPECIALISE_UPRAGMA);
293 <Code,GlaExt>"{-#"{WS}*"INLINE" {
294 PUSH_STATE(UserPragma);
295 RETURN(INLINE_UPRAGMA);
297 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
298 PUSH_STATE(UserPragma);
299 RETURN(MAGIC_UNFOLDING_UPRAGMA);
301 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
302 PUSH_STATE(UserPragma);
303 RETURN(DEFOREST_UPRAGMA);
305 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
306 /* these are handled by hscpp */
310 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
311 fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
312 input_filename, hsplineno);
313 format_string(stderr, (unsigned char *) yytext, yyleng);
314 fputs("'\n", stderr);
318 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
322 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
323 * intentionally accepted as a keyword even for normal <Code>.
327 <Code,GlaExt>"case" { RETURN(CASE); }
328 <Code,GlaExt>"class" { RETURN(CLASS); }
329 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
330 <Code,GlaExt>"default" { RETURN(DEFAULT); }
331 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
332 <Code,GlaExt>"do" { RETURN(DO); }
333 <Code,GlaExt>"else" { RETURN(ELSE); }
334 <Code,GlaExt>"if" { RETURN(IF); }
335 <Code,GlaExt>"import" { RETURN(IMPORT); }
336 <Code,GlaExt>"in" { RETURN(IN); }
337 <Code,GlaExt>"infix" { RETURN(INFIX); }
338 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
339 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
340 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
341 <Code,GlaExt>"let" { RETURN(LET); }
342 <Code,GlaExt>"module" { RETURN(MODULE); }
343 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
344 <Code,GlaExt>"of" { RETURN(OF); }
345 <Code,GlaExt>"then" { RETURN(THEN); }
346 <Code,GlaExt>"type" { RETURN(TYPE); }
347 <Code,GlaExt>"where" { RETURN(WHERE); }
349 <Code,GlaExt>"as" { RETURN(AS); }
350 <Code,GlaExt>"hiding" { RETURN(HIDING); }
351 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
353 <Code,GlaExt>"_scc_" { RETURN(SCC); }
354 <GlaExt>"_ccall_" { RETURN(CCALL); }
355 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
356 <GlaExt>"_casm_" { RETURN(CASM); }
357 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
361 * Haskell operators: special, reservedops and useful varsyms
365 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
366 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
367 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
368 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
369 <Code,GlaExt>"{" { RETURN(OCURLY); }
370 <Code,GlaExt>"}" { RETURN(CCURLY); }
371 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
372 <Code,GlaExt>";" { RETURN(SEMI); }
373 <Code,GlaExt>"`" { RETURN(BQUOTE); }
374 <Code,GlaExt>"_" { RETURN(WILDCARD); }
376 <Code,GlaExt>".." { RETURN(DOTDOT); }
377 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
378 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
379 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
380 <Code,GlaExt>"|" { RETURN(VBAR); }
381 <Code,GlaExt>"<-" { RETURN(LARROW); }
382 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
383 <Code,GlaExt>"-" { RETURN(MINUS); }
385 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
386 <Code,GlaExt>"@" { RETURN(AT); }
387 <Code,GlaExt>"!" { RETURN(BANG); }
388 <Code,GlaExt>"~" { RETURN(LAZY); }
392 * Integers and (for Glasgow extensions) primitive integers. Note that
393 * we pass all of the text on to the parser, because flex/C can't handle
394 * arbitrary precision numbers.
398 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
399 yylval.uid = xstrndup(yytext, yyleng - 1);
402 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
403 yylval.uid = xstrndup(yytext, yyleng);
406 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
407 yylval.uid = xstrndup(yytext, yyleng - 1);
410 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
411 yylval.uid = xstrndup(yytext, yyleng);
414 <GlaExt>("-")?{N}"#" {
415 yylval.uid = xstrndup(yytext, yyleng - 1);
418 <Code,GlaExt,UserPragma>{N} {
419 yylval.uid = xstrndup(yytext, yyleng);
425 * Floats and (for Glasgow extensions) primitive floats/doubles.
429 <GlaExt>("-")?{F}"##" {
430 yylval.uid = xstrndup(yytext, yyleng - 2);
433 <GlaExt>("-")?{F}"#" {
434 yylval.uid = xstrndup(yytext, yyleng - 1);
438 yylval.uid = xstrndup(yytext, yyleng);
444 * Funky ``foo'' style C literals for Glasgow extensions
448 <GlaExt>"``"[^']+"''" {
449 hsnewid(yytext + 2, yyleng - 4);
455 * Identifiers, both variables and operators. The trailing hash is allowed
456 * for Glasgow extensions.
462 /* These SHOULDNAE work in "Code" (sigh) */
464 <Code,GlaExt,UserPragma>{Id}"#" {
465 if (! nonstandardFlag) {
466 char errbuf[ERR_BUF_SIZE];
467 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
470 hsnewid(yytext, yyleng);
471 RETURN(isconstr(yytext) ? CONID : VARID);
473 <Code,GlaExt,UserPragma>{Id} {
474 hsnewid(yytext, yyleng);
475 RETURN(isconstr(yytext) ? CONID : VARID);
477 <Code,GlaExt,UserPragma>{SId} {
478 hsnewid(yytext, yyleng);
479 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
481 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
482 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
483 RETURN(is_constr ? QCONID : QVARID);
485 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
486 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
487 RETURN(is_constr ? QCONSYM : QVARSYM);
491 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
493 /* Because we can make the former well-behaved (we defined them).
495 Sadly, the latter is defined by Haskell, which allows such
496 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
500 <GlaExt,UserPragma>"`"{Id}"#`" {
501 hsnewid(yytext + 1, yyleng - 2);
502 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
507 * Character literals. The first form is the quick form, for character
508 * literals that don't contain backslashes. Literals with backslashes are
509 * lexed through multiple rules. First, we match the open ' and as many
510 * normal characters as possible. This puts us into the <Char> state, where
511 * a backslash is legal. Then, we match the backslash and move into the
512 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
513 * characters and the close '. We may end up with too many characters, but
514 * this allows us to easily share the lex rules with strings. Excess characters
515 * are ignored with a warning.
519 <GlaExt>'({CHAR}|"\"")"'#" {
520 yylval.uhstring = installHstring(1, yytext+1);
523 <Code,GlaExt>'({CHAR}|"\"")' {
524 yylval.uhstring = installHstring(1, yytext+1);
527 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
528 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
531 <Code,GlaExt>'({CHAR}|"\"")* {
532 hsmlcolno = hspcolno;
534 addtext(yytext+1, yyleng-1);
537 <Char>({CHAR}|"\"")*'# {
541 addtext(yytext, yyleng - 2);
542 text = fetchtext(&length);
544 if (! nonstandardFlag) {
545 char errbuf[ERR_BUF_SIZE];
546 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
551 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
552 input_filename, hsplineno, hspcolno + 1);
553 format_string(stderr, (unsigned char *) text, length);
554 fputs("' too long\n", stderr);
557 yylval.uhstring = installHstring(1, text);
558 hspcolno = hsmlcolno;
562 <Char>({CHAR}|"\"")*' {
566 addtext(yytext, yyleng - 1);
567 text = fetchtext(&length);
570 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
571 input_filename, hsplineno, hspcolno + 1);
572 format_string(stderr, (unsigned char *) text, length);
573 fputs("' too long\n", stderr);
576 yylval.uhstring = installHstring(1, text);
577 hspcolno = hsmlcolno;
581 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
586 * String literals. The first form is the quick form, for string literals
587 * that don't contain backslashes. Literals with backslashes are lexed
588 * through multiple rules. First, we match the open " and as many normal
589 * characters as possible. This puts us into the <String> state, where
590 * a backslash is legal. Then, we match the backslash and move into the
591 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
592 * characters, moving back and forth between <String> and <StringEsc> as more
593 * backslashes are encountered. (We may even digress into <Comment> mode if we
594 * find a comment in a gap between backslashes.) Finally, we read the last chunk
595 * of normal characters and the close ".
599 <GlaExt>"\""({CHAR}|"'")*"\""# {
600 yylval.uhstring = installHstring(yyleng-3, yytext+1);
601 /* the -3 accounts for the " on front, "# on the end */
604 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
605 yylval.uhstring = installHstring(yyleng-2, yytext+1);
608 <Code,GlaExt>"\""({CHAR}|"'")* {
609 hsmlcolno = hspcolno;
611 addtext(yytext+1, yyleng-1);
614 <String>({CHAR}|"'")*"\"#" {
618 addtext(yytext, yyleng-2);
619 text = fetchtext(&length);
621 if (! nonstandardFlag) {
622 char errbuf[ERR_BUF_SIZE];
623 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
627 yylval.uhstring = installHstring(length, text);
628 hspcolno = hsmlcolno;
632 <String>({CHAR}|"'")*"\"" {
636 addtext(yytext, yyleng-1);
637 text = fetchtext(&length);
639 yylval.uhstring = installHstring(length, text);
640 hspcolno = hsmlcolno;
644 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
648 * Character and string escapes are roughly the same, but strings have the
649 * extra `\&' sequence which is not allowed for characters. Also, comments
650 * are allowed in the <StringEsc> state. (See the comment section much
653 * NB: Backslashes and tabs are stored in strings as themselves.
654 * But if we print them (in printtree.c), they must go out as
655 * "\\\\" and "\\t" respectively. (This is because of the bogus
656 * intermediate format that the parser produces. It uses '\t' fpr end of
657 * string, so it needs to be able to escape tabs, which means that it
658 * also needs to be able to escape the escape character ('\\'). Sigh.
662 <Char>\\ { PUSH_STATE(CharEsc); }
663 <String>\\& /* Ignore */ ;
664 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
666 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
667 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
669 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
670 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
671 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
672 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
673 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
674 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
675 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
676 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
677 <CharEsc,StringEsc>BEL |
678 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
679 <CharEsc,StringEsc>BS |
680 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
681 <CharEsc,StringEsc>HT |
682 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
683 <CharEsc,StringEsc>LF |
684 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
685 <CharEsc,StringEsc>VT |
686 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
687 <CharEsc,StringEsc>FF |
688 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
689 <CharEsc,StringEsc>CR |
690 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
691 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
692 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
693 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
694 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
695 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
696 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
697 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
698 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
699 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
700 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
701 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
702 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
703 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
704 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
705 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
706 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
707 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
708 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
709 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
710 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
711 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
712 <CharEsc,StringEsc>{D}+ {
713 int i = strtol(yytext, NULL, 10);
717 char errbuf[ERR_BUF_SIZE];
718 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
724 <CharEsc,StringEsc>o{O}+ {
725 int i = strtol(yytext + 1, NULL, 8);
729 char errbuf[ERR_BUF_SIZE];
730 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
736 <CharEsc,StringEsc>x{H}+ {
737 int i = strtol(yytext + 1, NULL, 16);
741 char errbuf[ERR_BUF_SIZE];
742 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
751 * Simple comments and whitespace. Normally, we would just ignore these, but
752 * in case we're processing a string escape, we need to note that we've seen
755 * Note that we cater for a comment line that *doesn't* end in a newline.
756 * This is incorrect, strictly speaking, but seems like the right thing
757 * to do. Reported by Rajiv Mirani. (WDP 95/08)
761 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
762 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
766 * Nested comments. The major complication here is in trying to match the
767 * longest lexemes possible, for better performance. (See the flex document.)
768 * That's why the rules look so bizarre.
772 <Code,GlaExt,UserPragma,StringEsc>"{-" {
773 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
777 <Comment>"-"+[^-{}]+ |
778 <Comment>"{"+[^-{}]+ ;
779 <Comment>"{-" { nested_comments++; }
780 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
785 * Illegal characters. This used to be a single rule, but we might as well
786 * pass on as much information as we have, so now we indicate our state in
791 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
792 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
793 input_filename, hsplineno, hspcolno + 1);
794 format_string(stderr, (unsigned char *) yytext, 1);
795 fputs("'\n", stderr);
799 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
800 input_filename, hsplineno, hspcolno + 1);
801 format_string(stderr, (unsigned char *) yytext, 1);
802 fputs("' in a character literal\n", stderr);
806 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
807 input_filename, hsplineno, hspcolno + 1);
808 format_string(stderr, (unsigned char *) yytext, 1);
809 fputs("'\n", stderr);
812 <String>(.|\n) { if (nonstandardFlag) {
813 addtext(yytext, yyleng);
815 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
816 input_filename, hsplineno, hspcolno + 1);
817 format_string(stderr, (unsigned char *) yytext, 1);
818 fputs("' in a string literal\n", stderr);
824 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
825 input_filename, hsplineno, hspcolno + 1);
826 format_string(stderr, (unsigned char *) yytext, 1);
827 fputs("'\n", stderr);
830 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
831 input_filename, hsplineno, hspcolno + 1);
832 format_string(stderr, (unsigned char *) yytext, 1);
833 fputs("' in a string gap\n", stderr);
840 * End of file. In any sub-state, this is an error. However, for the primary
841 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
842 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
843 * adding virtual close curlies, or closing an interface and returning to the
844 * primary source file.
846 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
847 * line/column advancement has to be done by hand.
851 <Char,CharEsc><<EOF>> {
852 hsplineno = hslineno; hspcolno = hscolno;
853 hsperror("unterminated character literal");
856 hsplineno = hslineno; hspcolno = hscolno;
857 hsperror("unterminated comment");
859 <String,StringEsc><<EOF>> {
860 hsplineno = hslineno; hspcolno = hscolno;
861 hsperror("unterminated string literal");
863 <UserPragma><<EOF>> {
864 hsplineno = hslineno; hspcolno = hscolno;
865 hsperror("unterminated user-specified pragma");
867 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
871 /**********************************************************************
874 * YACC/LEX Initialisation etc. *
877 **********************************************************************/
880 We initialise input_filename to "<stdin>".
881 This allows unnamed sources to be piped into the parser.
887 input_filename = xstrdup("<stdin>");
889 /* We must initialize the input buffer _now_, because we call
890 setyyin _before_ calling yylex for the first time! */
891 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
900 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
902 if (input_filename != NULL)
903 free(input_filename);
904 input_filename = xstrdup(f);
907 /**********************************************************************
910 * Layout Processing *
913 **********************************************************************/
916 The following section deals with Haskell Layout conventions
917 forcing insertion of ; or } as appropriate
923 return (!forgetindent && INDENTON);
927 /* Enter new context and set new indentation level */
932 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
936 * partain: first chk that new indent won't be less than current one; this code
937 * doesn't make sense to me; hscolno tells the position of the _end_ of the
938 * current token; what that has to do with indenting, I don't know.
942 if (hscolno - 1 <= INDENTPT) {
944 return; /* Empty input OK for Haskell 1.1 */
946 char errbuf[ERR_BUF_SIZE];
948 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
952 hsentercontext((hspcolno << 1) | 1);
956 /* Enter a new context without changing the indentation level */
961 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
963 hsentercontext(indenttab[icontexts] & ~1);
967 /* Turn off indentation processing, usually because an explicit "{" has been seen */
975 /* Enter a new layout context. */
977 hsentercontext(int indent)
979 /* Enter new context and set indentation as specified */
980 if (++icontexts >= MAX_CONTEXTS) {
981 char errbuf[ERR_BUF_SIZE];
983 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
986 forgetindent = FALSE;
987 indenttab[icontexts] = indent;
989 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
994 /* Exit a layout context */
1000 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1005 * Return checks the indentation level and returns ;, } or the specified token.
1015 if (hsshouldindent()) {
1016 if (hspcolno < INDENTPT) {
1018 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1022 } else if (hspcolno == INDENTPT) {
1024 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1032 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1039 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1045 static BOOLEAN eof = FALSE;
1048 if (hssttok != -1) {
1056 endlineno = hslineno;
1057 if ((tok = yylex1()) != EOF)
1063 if (icontexts > icontexts_save) {
1066 indenttab[icontexts] = 0;
1069 hsperror("missing '}' at end of file");
1070 } else if (hsbuf_save != NULL) {
1072 yy_delete_buffer(YY_CURRENT_BUFFER);
1073 yy_switch_to_buffer(hsbuf_save);
1075 new_filename(filename_save);
1076 free(filename_save);
1077 hslineno = hslineno_save;
1078 hsplineno = hsplineno_save;
1079 hscolno = hscolno_save;
1080 hspcolno = hspcolno_save;
1082 icontexts = icontexts_save - 1;
1085 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1090 hsperror("No longer using yacc to parse interface files");
1095 abort(); /* should never get here! */
1099 /**********************************************************************
1102 * Input Processing for Interfaces -- Not currently used !!! *
1105 **********************************************************************/
1107 /* setyyin(file) open file as new lex input buffer */
1113 hsbuf_save = YY_CURRENT_BUFFER;
1114 if ((yyin = fopen(file, "r")) == NULL) {
1115 char errbuf[ERR_BUF_SIZE];
1117 sprintf(errbuf, "can't read \"%-.50s\"", file);
1120 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1122 hslineno_save = hslineno;
1123 hsplineno_save = hsplineno;
1124 hslineno = hsplineno = 1;
1126 filename_save = input_filename;
1127 input_filename = NULL;
1129 hscolno_save = hscolno;
1130 hspcolno_save = hspcolno;
1131 hscolno = hspcolno = 0;
1132 etags_save = etags; /* do not do "etags" stuff in interfaces */
1133 etags = 0; /* We remember whether we are doing it in
1134 the module, so we can restore it later [WDP 94/09] */
1135 hsentercontext(-1); /* partain: changed this from 0 */
1136 icontexts_save = icontexts;
1138 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1143 layout_input(char *text, int len)
1146 fprintf(stderr, "Scanning \"%s\"\n", text);
1149 hsplineno = hslineno;
1161 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1173 setstartlineno(void)
1175 startlineno = hsplineno;
1177 if (modulelineno == 0) {
1178 modulelineno = startlineno;
1184 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1188 /**********************************************************************
1194 **********************************************************************/
1196 #define CACHE_SIZE YY_BUF_SIZE
1202 } textcache = { 0, 0, NULL };
1207 /* fprintf(stderr, "cleartext\n"); */
1209 if (textcache.allocated == 0) {
1210 textcache.allocated = CACHE_SIZE;
1211 textcache.text = xmalloc(CACHE_SIZE);
1216 addtext(char *text, unsigned length)
1218 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1223 if (textcache.next + length + 1 >= textcache.allocated) {
1224 textcache.allocated += length + CACHE_SIZE;
1225 textcache.text = xrealloc(textcache.text, textcache.allocated);
1227 bcopy(text, textcache.text + textcache.next, length);
1228 textcache.next += length;
1234 /* fprintf(stderr, "addchar: %c\n", c); */
1236 if (textcache.next + 2 >= textcache.allocated) {
1237 textcache.allocated += CACHE_SIZE;
1238 textcache.text = xrealloc(textcache.text, textcache.allocated);
1240 textcache.text[textcache.next++] = c;
1244 fetchtext(unsigned *length)
1246 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1248 *length = textcache.next;
1249 textcache.text[textcache.next] = '\0';
1250 return textcache.text;
1253 /**********************************************************************
1256 * Identifier Processing *
1259 **********************************************************************/
1262 hsnewid Enters an id of length n into the symbol table.
1266 hsnewid(char *name, int length)
1268 char save = name[length];
1270 name[length] = '\0';
1271 yylval.uid = installid(name);
1272 name[length] = save;
1276 hsnewqid(char *name, int length)
1279 char save = name[length];
1280 name[length] = '\0';
1282 dot = strchr(name, '.');
1284 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1286 name[length] = save;
1288 return isconstr(dot+1);