2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 static unsigned char CharTable[NCHARS] = {
45 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
46 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
47 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
48 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
50 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
51 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
52 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
53 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
54 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
55 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
57 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
58 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
59 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 return(CharTable[*s]&(_C));
86 /**********************************************************************
92 **********************************************************************/
94 char *input_filename = NULL; /* Always points to a dynamically allocated string */
97 * For my own sanity, things that are not part of the flex skeleton
98 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
101 static int hslineno = 0; /* Line number at end of token */
102 int hsplineno = 0; /* Line number at end of previous token */
104 static int hscolno = 0; /* Column number at end of token */
105 int hspcolno = 0; /* Column number at end of previous token */
106 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
108 int modulelineno = -1; /* The line number where the module starts */
109 int startlineno = 0; /* The line number where something starts */
110 int endlineno = 0; /* The line number where something ends */
112 static BOOLEAN noGap = TRUE; /* For checking string gaps */
113 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
115 static int nested_comments; /* For counting comment nesting depth */
117 /* OLD: Hacky definition of yywrap: see flex doc.
119 If we don't do this, then we'll have to get the default
120 yywrap from the flex library, which is often something
121 we are not good at locating. This avoids that difficulty.
122 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
127 /* Essential forward declarations */
129 static void hsnewid PROTO((char *, int));
130 static void layout_input PROTO((char *, int));
131 static void cleartext (NO_ARGS);
132 static void addtext PROTO((char *, unsigned));
133 static void addchar PROTO((char));
134 static char *fetchtext PROTO((unsigned *));
135 static void new_filename PROTO((char *));
136 static int Return PROTO((int));
137 static void hsentercontext PROTO((int));
139 /* Special file handling for IMPORTS */
140 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
143 static char *filename_save; /* File Name */
144 static int hslineno_save = 0, /* Line Number */
145 hsplineno_save = 0, /* Line Number of Prev. token */
146 hscolno_save = 0, /* Indentation */
147 hspcolno_save = 0; /* Left Indentation */
148 static short icontexts_save = 0; /* Indent Context Level */
150 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
151 extern BOOLEAN etags; /* that which is saved */
153 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
155 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
156 * inserted before token +ve -- "}" inserted before
159 short icontexts = 0; /* Which context we're in */
162 Table of indentations: right bit indicates whether to use
163 indentation rules (1 = use rules; 0 = ignore)
166 push one of these "contexts" at every "case" or "where"; the right bit says
167 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
169 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
170 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
171 push is shown just below.
176 static short indenttab[MAX_CONTEXTS] = {-1};
178 #define INDENTPT (indenttab[icontexts]>>1)
179 #define INDENTON (indenttab[icontexts]&1)
181 #define RETURN(tok) return(Return(tok))
184 #define YY_DECL int yylex1()
186 /* We should not peek at yy_act, but flex calls us even for the internal action
187 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
188 to support older versions of flex, we'll continue to peek for now.
190 #define YY_USER_ACTION \
191 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
195 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
198 /* Each time we enter a new start state, we push it onto the state stack.
200 #define PUSH_STATE(n) yy_push_state(n)
201 #define POP_STATE yy_pop_state()
206 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
207 never-interactive (to go a bit faster)
208 stack (use a start-condition stack)
212 %option never-interactive
215 /* The start states are:
216 Code -- normal Haskell code (principal lexer)
217 GlaExt -- Haskell code with Glasgow extensions
218 Comment -- Nested comment processing
219 String -- Inside a string literal with backslashes
220 StringEsc -- Immediately following a backslash in a string literal
221 Char -- Inside a character literal with backslashes
222 CharEsc -- Immediately following a backslash in a character literal
224 Note that the INITIAL state is unused. Also note that these states
225 are _exclusive_. All rules should be prefixed with an appropriate
226 list of start states.
229 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231 isoS [\xa1-\xbf\xd7\xf7]
232 isoL [\xc0-\xd6\xd8-\xde]
233 isol [\xdf-\xf6\xf8-\xff]
240 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
241 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 L [A-Z\xc0-\xd6\xd8-\xde]
244 l [a-z\xdf-\xf6\xf8-\xff]
249 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
258 * Special GHC pragma rules. Do we need a start state for interface files,
259 * so these won't be matched in source files? --JSM
263 <Code,GlaExt>^"# ".*{NL} {
264 char tempf[FILENAME_SIZE];
265 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
267 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
270 <Code,GlaExt>^"#line ".*{NL} {
271 char tempf[FILENAME_SIZE];
272 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
274 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
277 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
278 /* partain: pragma-style line directive */
279 char tempf[FILENAME_SIZE];
280 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
282 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
285 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
286 PUSH_STATE(UserPragma);
287 RETURN(INTERFACE_UPRAGMA);
289 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
290 PUSH_STATE(UserPragma);
291 RETURN(SPECIALISE_UPRAGMA);
293 <Code,GlaExt>"{-#"{WS}*"INLINE" {
294 PUSH_STATE(UserPragma);
295 RETURN(INLINE_UPRAGMA);
297 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
298 PUSH_STATE(UserPragma);
299 RETURN(MAGIC_UNFOLDING_UPRAGMA);
301 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
302 PUSH_STATE(UserPragma);
303 RETURN(DEFOREST_UPRAGMA);
305 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
306 /* these are handled by hscpp */
310 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
311 /* these are by the driver! */
315 <Code,GlaExt>"{-#"{WS}*"SOURCE" {
316 /* these are used by `make depend' (temp) */
320 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
321 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
322 input_filename, hsplineno);
323 format_string(stderr, (unsigned char *) yytext, yyleng);
324 fputs("'\n", stderr);
328 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
332 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
333 * intentionally accepted as a keyword even for normal <Code>.
337 <Code,GlaExt>"case" { RETURN(CASE); }
338 <Code,GlaExt>"class" { RETURN(CLASS); }
339 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
340 <Code,GlaExt>"default" { RETURN(DEFAULT); }
341 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
342 <Code,GlaExt>"do" { RETURN(DO); }
343 <Code,GlaExt>"else" { RETURN(ELSE); }
344 <Code,GlaExt>"if" { RETURN(IF); }
345 <Code,GlaExt>"import" { RETURN(IMPORT); }
346 <Code,GlaExt>"in" { RETURN(IN); }
347 <Code,GlaExt>"infix" { RETURN(INFIX); }
348 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
349 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
350 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
351 <Code,GlaExt>"let" { RETURN(LET); }
352 <Code,GlaExt>"module" { RETURN(MODULE); }
353 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
354 <Code,GlaExt>"of" { RETURN(OF); }
355 <Code,GlaExt>"then" { RETURN(THEN); }
356 <Code,GlaExt>"type" { RETURN(TYPE); }
357 <Code,GlaExt>"where" { RETURN(WHERE); }
359 <Code,GlaExt>"as" { RETURN(AS); }
360 <Code,GlaExt>"hiding" { RETURN(HIDING); }
361 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
363 <Code,GlaExt>"_scc_" { RETURN(SCC); }
364 <GlaExt>"_ccall_" { RETURN(CCALL); }
365 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
366 <GlaExt>"_casm_" { RETURN(CASM); }
367 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
371 * Haskell operators: special, reservedops and useful varsyms
375 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
376 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
377 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
378 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
379 <Code,GlaExt>"{" { RETURN(OCURLY); }
380 <Code,GlaExt>"}" { RETURN(CCURLY); }
381 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
382 <Code,GlaExt>";" { RETURN(SEMI); }
383 <Code,GlaExt>"`" { RETURN(BQUOTE); }
384 <Code,GlaExt>"_" { RETURN(WILDCARD); }
386 <Code,GlaExt>".." { RETURN(DOTDOT); }
387 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
388 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
389 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
390 <Code,GlaExt>"|" { RETURN(VBAR); }
391 <Code,GlaExt>"<-" { RETURN(LARROW); }
392 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
393 <Code,GlaExt>"-" { RETURN(MINUS); }
394 <Code,GlaExt>"+" { RETURN(PLUS); }
396 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
397 <Code,GlaExt>"@" { RETURN(AT); }
398 <Code,GlaExt>"!" { RETURN(BANG); }
399 <Code,GlaExt>"~" { RETURN(LAZY); }
403 * Integers and (for Glasgow extensions) primitive integers. Note that
404 * we pass all of the text on to the parser, because flex/C can't handle
405 * arbitrary precision numbers.
409 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
410 yylval.uid = xstrndup(yytext, yyleng - 1);
413 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
414 yylval.uid = xstrndup(yytext, yyleng);
417 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
418 yylval.uid = xstrndup(yytext, yyleng - 1);
421 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
422 yylval.uid = xstrndup(yytext, yyleng);
425 <GlaExt>("-")?{N}"#" {
426 yylval.uid = xstrndup(yytext, yyleng - 1);
429 <Code,GlaExt,UserPragma>{N} {
430 yylval.uid = xstrndup(yytext, yyleng);
436 * Floats and (for Glasgow extensions) primitive floats/doubles.
440 <GlaExt>("-")?{F}"##" {
441 yylval.uid = xstrndup(yytext, yyleng - 2);
444 <GlaExt>("-")?{F}"#" {
445 yylval.uid = xstrndup(yytext, yyleng - 1);
449 yylval.uid = xstrndup(yytext, yyleng);
455 * Funky ``foo'' style C literals for Glasgow extensions
459 <GlaExt>"``"[^']+"''" {
460 hsnewid(yytext + 2, yyleng - 4);
466 * Identifiers, both variables and operators. The trailing hash is allowed
467 * for Glasgow extensions.
473 /* These SHOULDNAE work in "Code" (sigh) */
475 <GlaExt,UserPragma>{Id}"#" {
476 if (! nonstandardFlag) {
477 char errbuf[ERR_BUF_SIZE];
478 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
481 hsnewid(yytext, yyleng);
482 RETURN(isconstr(yytext) ? CONID : VARID);
484 <Code,GlaExt,UserPragma>{Id} {
485 hsnewid(yytext, yyleng);
486 RETURN(isconstr(yytext) ? CONID : VARID);
488 <Code,GlaExt,UserPragma>{SId} {
489 hsnewid(yytext, yyleng);
490 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
492 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
494 if (! nonstandardFlag) {
495 char errbuf[ERR_BUF_SIZE];
496 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
499 is_constr = hsnewqid(yytext, yyleng);
500 RETURN(is_constr ? QCONID : QVARID);
502 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
503 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
504 RETURN(is_constr ? QCONID : QVARID);
506 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
507 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
508 RETURN(is_constr ? QCONSYM : QVARSYM);
512 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
514 /* Because we can make the former well-behaved (we defined them).
516 Sadly, the latter is defined by Haskell, which allows such
517 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
521 <GlaExt,UserPragma>"`"{Id}"#`" {
522 hsnewid(yytext + 1, yyleng - 2);
523 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
528 * Character literals. The first form is the quick form, for character
529 * literals that don't contain backslashes. Literals with backslashes are
530 * lexed through multiple rules. First, we match the open ' and as many
531 * normal characters as possible. This puts us into the <Char> state, where
532 * a backslash is legal. Then, we match the backslash and move into the
533 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
534 * characters and the close '. We may end up with too many characters, but
535 * this allows us to easily share the lex rules with strings. Excess characters
536 * are ignored with a warning.
540 <GlaExt>'({CHAR}|"\"")"'#" {
541 yylval.uhstring = installHstring(1, yytext+1);
544 <Code,GlaExt>'({CHAR}|"\"")' {
545 yylval.uhstring = installHstring(1, yytext+1);
548 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
549 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
552 <Code,GlaExt>'({CHAR}|"\"")* {
553 hsmlcolno = hspcolno;
555 addtext(yytext+1, yyleng-1);
558 <Char>({CHAR}|"\"")*'# {
562 addtext(yytext, yyleng - 2);
563 text = fetchtext(&length);
565 if (! nonstandardFlag) {
566 char errbuf[ERR_BUF_SIZE];
567 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
572 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
573 input_filename, hsplineno, hspcolno + 1);
574 format_string(stderr, (unsigned char *) text, length);
575 fputs("' too long\n", stderr);
578 yylval.uhstring = installHstring(1, text);
579 hspcolno = hsmlcolno;
583 <Char>({CHAR}|"\"")*' {
587 addtext(yytext, yyleng - 1);
588 text = fetchtext(&length);
591 fprintf(stderr, "%s:%d:%d: Character literal '",
592 input_filename, hsplineno, hspcolno + 1);
593 format_string(stderr, (unsigned char *) text, length);
594 fputs("' too long\n", stderr);
597 yylval.uhstring = installHstring(1, text);
598 hspcolno = hsmlcolno;
602 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
607 * String literals. The first form is the quick form, for string literals
608 * that don't contain backslashes. Literals with backslashes are lexed
609 * through multiple rules. First, we match the open " and as many normal
610 * characters as possible. This puts us into the <String> state, where
611 * a backslash is legal. Then, we match the backslash and move into the
612 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
613 * characters, moving back and forth between <String> and <StringEsc> as more
614 * backslashes are encountered. (We may even digress into <Comment> mode if we
615 * find a comment in a gap between backslashes.) Finally, we read the last chunk
616 * of normal characters and the close ".
620 <GlaExt>"\""({CHAR}|"'")*"\""# {
621 yylval.uhstring = installHstring(yyleng-3, yytext+1);
622 /* the -3 accounts for the " on front, "# on the end */
625 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
626 yylval.uhstring = installHstring(yyleng-2, yytext+1);
629 <Code,GlaExt>"\""({CHAR}|"'")* {
630 hsmlcolno = hspcolno;
632 addtext(yytext+1, yyleng-1);
635 <String>({CHAR}|"'")*"\"#" {
639 addtext(yytext, yyleng-2);
640 text = fetchtext(&length);
642 if (! nonstandardFlag) {
643 char errbuf[ERR_BUF_SIZE];
644 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
648 yylval.uhstring = installHstring(length, text);
649 hspcolno = hsmlcolno;
653 <String>({CHAR}|"'")*"\"" {
657 addtext(yytext, yyleng-1);
658 text = fetchtext(&length);
660 yylval.uhstring = installHstring(length, text);
661 hspcolno = hsmlcolno;
665 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
669 * Character and string escapes are roughly the same, but strings have the
670 * extra `\&' sequence which is not allowed for characters. Also, comments
671 * are allowed in the <StringEsc> state. (See the comment section much
674 * NB: Backslashes and tabs are stored in strings as themselves.
675 * But if we print them (in printtree.c), they must go out as
676 * "\\\\" and "\\t" respectively. (This is because of the bogus
677 * intermediate format that the parser produces. It uses '\t' fpr end of
678 * string, so it needs to be able to escape tabs, which means that it
679 * also needs to be able to escape the escape character ('\\'). Sigh.
683 <Char>\\ { PUSH_STATE(CharEsc); }
684 <String>\\& /* Ignore */ ;
685 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
687 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
688 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
690 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
691 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
692 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
693 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
694 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
695 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
696 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
697 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
698 <CharEsc,StringEsc>BEL |
699 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
700 <CharEsc,StringEsc>BS |
701 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
702 <CharEsc,StringEsc>HT |
703 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
704 <CharEsc,StringEsc>LF |
705 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
706 <CharEsc,StringEsc>VT |
707 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
708 <CharEsc,StringEsc>FF |
709 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
710 <CharEsc,StringEsc>CR |
711 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
712 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
713 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
714 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
715 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
716 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
717 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
718 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
719 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
720 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
721 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
722 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
723 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
724 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
725 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
726 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
727 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
728 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
729 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
730 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
731 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
732 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
733 <CharEsc,StringEsc>{D}+ {
734 int i = strtol(yytext, NULL, 10);
738 char errbuf[ERR_BUF_SIZE];
739 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
745 <CharEsc,StringEsc>o{O}+ {
746 int i = strtol(yytext + 1, NULL, 8);
750 char errbuf[ERR_BUF_SIZE];
751 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
757 <CharEsc,StringEsc>x{H}+ {
758 int i = strtol(yytext + 1, NULL, 16);
762 char errbuf[ERR_BUF_SIZE];
763 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
772 * Simple comments and whitespace. Normally, we would just ignore these, but
773 * in case we're processing a string escape, we need to note that we've seen
776 * Note that we cater for a comment line that *doesn't* end in a newline.
777 * This is incorrect, strictly speaking, but seems like the right thing
778 * to do. Reported by Rajiv Mirani. (WDP 95/08)
782 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
783 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
787 * Nested comments. The major complication here is in trying to match the
788 * longest lexemes possible, for better performance. (See the flex document.)
789 * That's why the rules look so bizarre.
793 <Code,GlaExt,UserPragma,StringEsc>"{-" {
794 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
798 <Comment>"-"+[^-{}]+ |
799 <Comment>"{"+[^-{}]+ ;
800 <Comment>"{-" { nested_comments++; }
801 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
806 * Illegal characters. This used to be a single rule, but we might as well
807 * pass on as much information as we have, so now we indicate our state in
812 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
813 fprintf(stderr, "%s:%d:%d: Illegal character: `",
814 input_filename, hsplineno, hspcolno + 1);
815 format_string(stderr, (unsigned char *) yytext, 1);
816 fputs("'\n", stderr);
820 fprintf(stderr, "%s:%d:%d: Illegal character: `",
821 input_filename, hsplineno, hspcolno + 1);
822 format_string(stderr, (unsigned char *) yytext, 1);
823 fputs("' in a character literal\n", stderr);
827 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
828 input_filename, hsplineno, hspcolno + 1);
829 format_string(stderr, (unsigned char *) yytext, 1);
830 fputs("'\n", stderr);
833 <String>(.|\n) { if (nonstandardFlag) {
834 addtext(yytext, yyleng);
836 fprintf(stderr, "%s:%d:%d: Illegal character: `",
837 input_filename, hsplineno, hspcolno + 1);
838 format_string(stderr, (unsigned char *) yytext, 1);
839 fputs("' in a string literal\n", stderr);
845 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
846 input_filename, hsplineno, hspcolno + 1);
847 format_string(stderr, (unsigned char *) yytext, 1);
848 fputs("'\n", stderr);
851 fprintf(stderr, "%s:%d:%d: Illegal character: `",
852 input_filename, hsplineno, hspcolno + 1);
853 format_string(stderr, (unsigned char *) yytext, 1);
854 fputs("' in a string gap\n", stderr);
861 * End of file. In any sub-state, this is an error. However, for the primary
862 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
863 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
864 * adding virtual close curlies, or closing an interface and returning to the
865 * primary source file.
867 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
868 * line/column advancement has to be done by hand.
872 <Char,CharEsc><<EOF>> {
873 hsplineno = hslineno; hspcolno = hscolno;
874 hsperror("unterminated character literal");
877 hsplineno = hslineno; hspcolno = hscolno;
878 hsperror("unterminated comment");
880 <String,StringEsc><<EOF>> {
881 hsplineno = hslineno; hspcolno = hscolno;
882 hsperror("unterminated string literal");
884 <UserPragma><<EOF>> {
885 hsplineno = hslineno; hspcolno = hscolno;
886 hsperror("unterminated user-specified pragma");
888 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
892 /**********************************************************************
895 * YACC/LEX Initialisation etc. *
898 **********************************************************************/
901 We initialise input_filename to "<stdin>".
902 This allows unnamed sources to be piped into the parser.
908 input_filename = xstrdup("<stdin>");
910 /* We must initialize the input buffer _now_, because we call
911 setyyin _before_ calling yylex for the first time! */
912 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
921 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
923 if (input_filename != NULL)
924 free(input_filename);
925 input_filename = xstrdup(f);
928 /**********************************************************************
931 * Layout Processing *
934 **********************************************************************/
937 The following section deals with Haskell Layout conventions
938 forcing insertion of ; or } as appropriate
944 return (!forgetindent && INDENTON);
948 /* Enter new context and set new indentation level */
953 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
957 * partain: first chk that new indent won't be less than current one; this code
958 * doesn't make sense to me; hscolno tells the position of the _end_ of the
959 * current token; what that has to do with indenting, I don't know.
963 if (hscolno - 1 <= INDENTPT) {
965 return; /* Empty input OK for Haskell 1.1 */
967 char errbuf[ERR_BUF_SIZE];
969 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
973 hsentercontext((hspcolno << 1) | 1);
977 /* Enter a new context without changing the indentation level */
982 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
984 hsentercontext(indenttab[icontexts] & ~1);
988 /* Turn off indentation processing, usually because an explicit "{" has been seen */
996 /* Enter a new layout context. */
998 hsentercontext(int indent)
1000 /* Enter new context and set indentation as specified */
1001 if (++icontexts >= MAX_CONTEXTS) {
1002 char errbuf[ERR_BUF_SIZE];
1004 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1007 forgetindent = FALSE;
1008 indenttab[icontexts] = indent;
1010 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1015 /* Exit a layout context */
1021 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1026 * Return checks the indentation level and returns ;, } or the specified token.
1036 if (hsshouldindent()) {
1037 if (hspcolno < INDENTPT) {
1039 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1043 } else if (hspcolno == INDENTPT) {
1045 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1053 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1060 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1066 static BOOLEAN eof = FALSE;
1069 if (hssttok != -1) {
1077 endlineno = hslineno;
1078 if ((tok = yylex1()) != EOF)
1084 if (icontexts > icontexts_save) {
1087 indenttab[icontexts] = 0;
1090 hsperror("missing '}' at end of file");
1091 } else if (hsbuf_save != NULL) {
1093 yy_delete_buffer(YY_CURRENT_BUFFER);
1094 yy_switch_to_buffer(hsbuf_save);
1096 new_filename(filename_save);
1097 free(filename_save);
1098 hslineno = hslineno_save;
1099 hsplineno = hsplineno_save;
1100 hscolno = hscolno_save;
1101 hspcolno = hspcolno_save;
1103 icontexts = icontexts_save - 1;
1106 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1111 hsperror("No longer using yacc to parse interface files");
1116 abort(); /* should never get here! */
1120 /**********************************************************************
1123 * Input Processing for Interfaces -- Not currently used !!! *
1126 **********************************************************************/
1128 /* setyyin(file) open file as new lex input buffer */
1134 hsbuf_save = YY_CURRENT_BUFFER;
1135 if ((yyin = fopen(file, "r")) == NULL) {
1136 char errbuf[ERR_BUF_SIZE];
1138 sprintf(errbuf, "can't read \"%-.50s\"", file);
1141 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1143 hslineno_save = hslineno;
1144 hsplineno_save = hsplineno;
1145 hslineno = hsplineno = 1;
1147 filename_save = input_filename;
1148 input_filename = NULL;
1150 hscolno_save = hscolno;
1151 hspcolno_save = hspcolno;
1152 hscolno = hspcolno = 0;
1153 etags_save = etags; /* do not do "etags" stuff in interfaces */
1154 etags = 0; /* We remember whether we are doing it in
1155 the module, so we can restore it later [WDP 94/09] */
1156 hsentercontext(-1); /* partain: changed this from 0 */
1157 icontexts_save = icontexts;
1159 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1164 layout_input(char *text, int len)
1167 fprintf(stderr, "Scanning \"%s\"\n", text);
1170 hsplineno = hslineno;
1182 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1194 setstartlineno(void)
1196 startlineno = hsplineno;
1198 if (modulelineno == 0) {
1199 modulelineno = startlineno;
1205 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1209 /**********************************************************************
1215 **********************************************************************/
1217 #define CACHE_SIZE YY_BUF_SIZE
1223 } textcache = { 0, 0, NULL };
1228 /* fprintf(stderr, "cleartext\n"); */
1230 if (textcache.allocated == 0) {
1231 textcache.allocated = CACHE_SIZE;
1232 textcache.text = xmalloc(CACHE_SIZE);
1237 addtext(char *text, unsigned length)
1239 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1244 if (textcache.next + length + 1 >= textcache.allocated) {
1245 textcache.allocated += length + CACHE_SIZE;
1246 textcache.text = xrealloc(textcache.text, textcache.allocated);
1248 bcopy(text, textcache.text + textcache.next, length);
1249 textcache.next += length;
1255 /* fprintf(stderr, "addchar: %c\n", c); */
1257 if (textcache.next + 2 >= textcache.allocated) {
1258 textcache.allocated += CACHE_SIZE;
1259 textcache.text = xrealloc(textcache.text, textcache.allocated);
1261 textcache.text[textcache.next++] = c;
1265 fetchtext(unsigned *length)
1267 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1269 *length = textcache.next;
1270 textcache.text[textcache.next] = '\0';
1271 return textcache.text;
1274 /**********************************************************************
1277 * Identifier Processing *
1280 **********************************************************************/
1283 hsnewid Enters an id of length n into the symbol table.
1287 hsnewid(char *name, int length)
1289 char save = name[length];
1291 name[length] = '\0';
1292 yylval.uid = installid(name);
1293 name[length] = save;
1297 hsnewqid(char *name, int length)
1300 char save = name[length];
1301 name[length] = '\0';
1303 dot = strchr(name, '.');
1305 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1307 name[length] = save;
1309 return isconstr(dot+1);