2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
118 /* OLD: Hacky definition of yywrap: see flex doc.
120 If we don't do this, then we'll have to get the default
121 yywrap from the flex library, which is often something
122 we are not good at locating. This avoids that difficulty.
123 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
128 /* Essential forward declarations */
130 static void hsnewid PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext (NO_ARGS);
133 static void addtext PROTO((char *, unsigned));
134 static void addchar PROTO((char));
135 static char *fetchtext PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int Return PROTO((int));
138 static void hsentercontext PROTO((int));
140 static BOOLEAN is_commment PROTO((char*, int));
142 /* Special file handling for IMPORTS */
143 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
145 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
146 static char *filename_save; /* File Name */
147 static int hslineno_save = 0, /* Line Number */
148 hsplineno_save = 0, /* Line Number of Prev. token */
149 hscolno_save = 0, /* Indentation */
150 hspcolno_save = 0; /* Left Indentation */
151 static short icontexts_save = 0; /* Indent Context Level */
153 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
154 extern BOOLEAN etags; /* that which is saved */
156 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
158 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
159 * inserted before token +ve -- "}" inserted before
162 short icontexts = 0; /* Which context we're in */
165 Table of indentations: right bit indicates whether to use
166 indentation rules (1 = use rules; 0 = ignore)
169 push one of these "contexts" at every "case" or "where"; the right bit says
170 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
172 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
173 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
174 push is shown just below.
179 static short indenttab[MAX_CONTEXTS] = {-1};
181 #define INDENTPT (indenttab[icontexts]>>1)
182 #define INDENTON (indenttab[icontexts]&1)
184 #define RETURN(tok) return(Return(tok))
187 #define YY_DECL int yylex1()
189 /* We should not peek at yy_act, but flex calls us even for the internal action
190 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
191 to support older versions of flex, we'll continue to peek for now.
193 #define YY_USER_ACTION \
194 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
198 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
201 /* Each time we enter a new start state, we push it onto the state stack.
203 #define PUSH_STATE(n) yy_push_state(n)
204 #define POP_STATE yy_pop_state()
209 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
210 never-interactive (to go a bit faster)
211 stack (use a start-condition stack)
215 %option never-interactive
218 /* The start states are:
219 Code -- normal Haskell code (principal lexer)
220 GlaExt -- Haskell code with Glasgow extensions
221 Comment -- Nested comment processing
222 String -- Inside a string literal with backslashes
223 StringEsc -- Immediately following a backslash in a string literal
224 Char -- Inside a character literal with backslashes
225 CharEsc -- Immediately following a backslash in a character literal
227 Note that the INITIAL state is unused. Also note that these states
228 are _exclusive_. All rules should be prefixed with an appropriate
229 list of start states.
232 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
234 isoS [\xa1-\xbf\xd7\xf7]
235 isoL [\xc0-\xd6\xd8-\xde]
236 isol [\xdf-\xf6\xf8-\xff]
243 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
244 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
246 L [A-Z\xc0-\xd6\xd8-\xde]
247 l [a-z_\xdf-\xf6\xf8-\xff]
252 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
261 * Simple comments and whitespace. Normally, we would just ignore these, but
262 * in case we're processing a string escape, we need to note that we've seen
265 * Note that we cater for a comment line that *doesn't* end in a newline.
266 * This is incorrect, strictly speaking, but seems like the right thing
267 * to do. Reported by Rajiv Mirani. (WDP 95/08)
269 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
273 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
277 * Special GHC pragma rules. Do we need a start state for interface files,
278 * so these won't be matched in source files? --JSM
284 /* I believe the next rule is not ever matched.
286 The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
287 Driver has now been modified to output `standard' {-# LINE ..-} pragmas
288 where possible, so the lexer should now never see cpp directives
289 like '# ' and '#line'.
293 <Code,GlaExt>^"# ".*{NL} {
294 char tempf[FILENAME_SIZE];
295 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
297 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
300 <Code,GlaExt>^"#line ".*{NL} {
301 char tempf[FILENAME_SIZE];
302 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
304 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
309 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
310 /* partain: pragma-style line directive */
311 char tempf[FILENAME_SIZE];
312 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
314 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
317 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
318 PUSH_STATE(UserPragma);
319 RETURN(INTERFACE_UPRAGMA);
321 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
322 PUSH_STATE(UserPragma);
323 RETURN(SPECIALISE_UPRAGMA);
325 <Code,GlaExt>"{-#"{WS}*"INLINE" {
326 PUSH_STATE(UserPragma);
327 RETURN(INLINE_UPRAGMA);
329 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
330 PUSH_STATE(UserPragma);
331 RETURN(NOINLINE_UPRAGMA);
333 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
334 PUSH_STATE(UserPragma);
335 RETURN(MAGIC_UNFOLDING_UPRAGMA);
337 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
338 /* these are handled by hscpp */
342 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
343 /* these are for the driver! */
347 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
348 /* these are used by `make depend' and the
349 compiler to indicate that a module should
350 be imported from source */
352 RETURN(SOURCE_UPRAGMA);
355 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
356 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
357 input_filename, hsplineno);
358 format_string(stderr, (unsigned char *) yytext, yyleng);
359 fputs("'\n", stderr);
363 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
367 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
368 * intentionally accepted as a keyword even for normal <Code>.
372 <Code,GlaExt>"case" { RETURN(CASE); }
373 <Code,GlaExt>"class" { RETURN(CLASS); }
374 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
375 <Code,GlaExt>"default" { RETURN(DEFAULT); }
376 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
377 <Code,GlaExt>"do" { RETURN(DO); }
378 <Code,GlaExt>"else" { RETURN(ELSE); }
379 <Code,GlaExt>"if" { RETURN(IF); }
380 <Code,GlaExt>"import" { RETURN(IMPORT); }
381 <Code,GlaExt>"in" { RETURN(IN); }
382 <Code,GlaExt>"infix" { RETURN(INFIX); }
383 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
384 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
385 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
386 <Code,GlaExt>"let" { RETURN(LET); }
387 <Code,GlaExt>"module" { RETURN(MODULE); }
388 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
389 <Code,GlaExt>"of" { RETURN(OF); }
390 <Code,GlaExt>"then" { RETURN(THEN); }
391 <Code,GlaExt>"type" { RETURN(TYPE); }
392 <Code,GlaExt>"where" { RETURN(WHERE); }
394 <Code,GlaExt>"as" { RETURN(AS); }
395 <Code,GlaExt>"hiding" { RETURN(HIDING); }
396 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
398 <Code,GlaExt>"forall" { RETURN(FORALL); }
400 <Code,GlaExt>"_scc_" { RETURN(SCC); }
401 <GlaExt>"_ccall_" { RETURN(CCALL); }
402 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
403 <GlaExt>"_casm_" { RETURN(CASM); }
404 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
405 <GlaExt>"(#" { RETURN(OUNBOXPAREN); }
406 <GlaExt>"#)" { RETURN(CUNBOXPAREN); }
407 <GlaExt>"foreign" { RETURN(FOREIGN); }
408 <GlaExt>"export" { RETURN(EXPORT); }
409 <GlaExt>"label" { RETURN(LABEL); }
410 <GlaExt>"unsafe" { RETURN(UNSAFE); }
411 <GlaExt>"_stdcall" { RETURN(STDCALL); }
412 <GlaExt>"_ccall" { RETURN(C_CALL); }
413 <GlaExt>"_pascal" { RETURN(PASCAL); }
414 <GlaExt>"stdcall" { RETURN(STDCALL); }
415 <GlaExt>"ccall" { RETURN(C_CALL); }
416 <GlaExt>"pascal" { RETURN(PASCAL); }
417 <GlaExt>"dynamic" { RETURN(DYNAMIC); }
421 * Haskell operators: special, reservedops and useful varsyms
425 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
426 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
427 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
428 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
429 <Code,GlaExt>"{" { RETURN(OCURLY); }
430 <Code,GlaExt>"}" { RETURN(CCURLY); }
431 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
432 <Code,GlaExt>";" { RETURN(SEMI); }
433 <Code,GlaExt>"`" { RETURN(BQUOTE); }
435 <Code,GlaExt>"." { RETURN(DOT); }
436 <Code,GlaExt>".." { RETURN(DOTDOT); }
437 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
438 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
439 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
440 <Code,GlaExt>"|" { RETURN(VBAR); }
441 <Code,GlaExt>"<-" { RETURN(LARROW); }
442 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
443 <Code,GlaExt>"-" { RETURN(MINUS); }
444 <Code,GlaExt>"+" { RETURN(PLUS); }
446 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
447 <Code,GlaExt>"@" { RETURN(AT); }
448 <Code,GlaExt>"!" { RETURN(BANG); }
449 <Code,GlaExt>"~" { RETURN(LAZY); }
453 * Integers and (for Glasgow extensions) primitive integers. Note that
454 * we pass all of the text on to the parser, because flex/C can't handle
455 * arbitrary precision numbers.
459 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
460 yylval.uid = xstrndup(yytext, yyleng - 1);
463 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
464 yylval.uid = xstrndup(yytext, yyleng);
467 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
468 yylval.uid = xstrndup(yytext, yyleng - 1);
471 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
472 yylval.uid = xstrndup(yytext, yyleng);
475 <GlaExt>("-")?{N}"#" {
476 yylval.uid = xstrndup(yytext, yyleng - 1);
479 <Code,GlaExt,UserPragma>{N} {
480 yylval.uid = xstrndup(yytext, yyleng);
486 * Floats and (for Glasgow extensions) primitive floats/doubles.
490 <GlaExt>("-")?{F}"##" {
491 yylval.uid = xstrndup(yytext, yyleng - 2);
494 <GlaExt>("-")?{F}"#" {
495 yylval.uid = xstrndup(yytext, yyleng - 1);
499 yylval.uid = xstrndup(yytext, yyleng);
505 * Funky ``foo'' style C literals for Glasgow extensions
509 <GlaExt>"``"[^']+"''" {
510 hsnewid(yytext + 2, yyleng - 4);
516 * Identifiers, both variables and operators. The trailing hash is allowed
517 * for Glasgow extensions.
523 /* These SHOULDNAE work in "Code" (sigh) */
525 <GlaExt,UserPragma>{Id}"#" {
526 if (! nonstandardFlag) {
527 char errbuf[ERR_BUF_SIZE];
528 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
531 hsnewid(yytext, yyleng);
532 RETURN(isconstr(yytext) ? CONID : VARID);
534 <Code,GlaExt,UserPragma>{Id} {
535 hsnewid(yytext, yyleng);
536 RETURN(isconstr(yytext) ? CONID : VARID);
538 <Code,GlaExt,UserPragma>{SId} {
539 if (is_commment(yytext,yyleng)) {
541 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
546 hsnewid(yytext, yyleng);
547 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
550 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
552 if (! nonstandardFlag) {
553 char errbuf[ERR_BUF_SIZE];
554 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
557 is_constr = hsnewqid(yytext, yyleng);
558 RETURN(is_constr ? QCONID : QVARID);
560 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
561 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
562 RETURN(is_constr ? QCONID : QVARID);
564 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
565 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
566 RETURN(is_constr ? QCONSYM : QVARSYM);
570 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
572 /* Because we can make the former well-behaved (we defined them).
574 Sadly, the latter is defined by Haskell, which allows such
575 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
579 <GlaExt,UserPragma>"`"{Id}"#`" {
580 hsnewid(yytext + 1, yyleng - 2);
581 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
586 * Character literals. The first form is the quick form, for character
587 * literals that don't contain backslashes. Literals with backslashes are
588 * lexed through multiple rules. First, we match the open ' and as many
589 * normal characters as possible. This puts us into the <Char> state, where
590 * a backslash is legal. Then, we match the backslash and move into the
591 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
592 * characters and the close '. We may end up with too many characters, but
593 * this allows us to easily share the lex rules with strings. Excess characters
594 * are ignored with a warning.
598 <GlaExt>'({CHAR}|"\"")"'#" {
599 yylval.uhstring = installHstring(1, yytext+1);
602 <Code,GlaExt>'({CHAR}|"\"")' {
603 yylval.uhstring = installHstring(1, yytext+1);
606 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
607 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
610 <Code,GlaExt>'({CHAR}|"\"")* {
611 hsmlcolno = hspcolno;
613 addtext(yytext+1, yyleng-1);
616 <Char>({CHAR}|"\"")*'# {
620 addtext(yytext, yyleng - 2);
621 text = fetchtext(&length);
623 if (! nonstandardFlag) {
624 char errbuf[ERR_BUF_SIZE];
625 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
630 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
631 input_filename, hsplineno, hspcolno + 1);
632 format_string(stderr, (unsigned char *) text, length);
633 fputs("' too long\n", stderr);
636 yylval.uhstring = installHstring(1, text);
637 hspcolno = hsmlcolno;
641 <Char>({CHAR}|"\"")*' {
645 addtext(yytext, yyleng - 1);
646 text = fetchtext(&length);
649 fprintf(stderr, "%s:%d:%d: Character literal '",
650 input_filename, hsplineno, hspcolno + 1);
651 format_string(stderr, (unsigned char *) text, length);
652 fputs("' too long\n", stderr);
655 yylval.uhstring = installHstring(1, text);
656 hspcolno = hsmlcolno;
660 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
665 * String literals. The first form is the quick form, for string literals
666 * that don't contain backslashes. Literals with backslashes are lexed
667 * through multiple rules. First, we match the open " and as many normal
668 * characters as possible. This puts us into the <String> state, where
669 * a backslash is legal. Then, we match the backslash and move into the
670 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
671 * characters, moving back and forth between <String> and <StringEsc> as more
672 * backslashes are encountered. (We may even digress into <Comment> mode if we
673 * find a comment in a gap between backslashes.) Finally, we read the last chunk
674 * of normal characters and the close ".
678 <GlaExt>"\""({CHAR}|"'")*"\""# {
679 yylval.uhstring = installHstring(yyleng-3, yytext+1);
680 /* the -3 accounts for the " on front, "# on the end */
683 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
684 yylval.uhstring = installHstring(yyleng-2, yytext+1);
687 <Code,GlaExt>"\""({CHAR}|"'")* {
688 hsmlcolno = hspcolno;
690 addtext(yytext+1, yyleng-1);
693 <String>({CHAR}|"'")*"\"#" {
697 addtext(yytext, yyleng-2);
698 text = fetchtext(&length);
700 if (! nonstandardFlag) {
701 char errbuf[ERR_BUF_SIZE];
702 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
706 yylval.uhstring = installHstring(length, text);
707 hspcolno = hsmlcolno;
711 <String>({CHAR}|"'")*"\"" {
715 addtext(yytext, yyleng-1);
716 text = fetchtext(&length);
718 yylval.uhstring = installHstring(length, text);
719 hspcolno = hsmlcolno;
723 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
727 * Character and string escapes are roughly the same, but strings have the
728 * extra `\&' sequence which is not allowed for characters. Also, comments
729 * are allowed in the <StringEsc> state. (See the comment section much
732 * NB: Backslashes and tabs are stored in strings as themselves.
733 * But if we print them (in printtree.c), they must go out as
734 * "\\\\" and "\\t" respectively. (This is because of the bogus
735 * intermediate format that the parser produces. It uses '\t' fpr end of
736 * string, so it needs to be able to escape tabs, which means that it
737 * also needs to be able to escape the escape character ('\\'). Sigh.
741 <Char>\\ { PUSH_STATE(CharEsc); }
742 <String>\\& /* Ignore */ ;
743 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
745 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
746 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
750 Not 100% correct, tokenizes "foo \ --<>--
753 as "foo bar", but this is not correct as per Haskell 98 report and its
754 maximal munch rule for "--"-style comments.
756 For the moment, not deemed worthy to fix.
759 <StringEsc>"--"[^\n\r]*{NL}?{WS}* { noGap=FALSE; }
761 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
762 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
763 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
764 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
765 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
766 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
767 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
768 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
769 <CharEsc,StringEsc>BEL |
770 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
771 <CharEsc,StringEsc>BS |
772 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
773 <CharEsc,StringEsc>HT |
774 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
775 <CharEsc,StringEsc>LF |
776 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
777 <CharEsc,StringEsc>VT |
778 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
779 <CharEsc,StringEsc>FF |
780 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
781 <CharEsc,StringEsc>CR |
782 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
783 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
784 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
785 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
786 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
787 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
788 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
789 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
790 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
791 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
792 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
793 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
794 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
795 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
796 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
797 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
798 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
799 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
800 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
801 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
802 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
803 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
804 <CharEsc,StringEsc>{D}+ {
805 int i = strtol(yytext, NULL, 10);
809 char errbuf[ERR_BUF_SIZE];
810 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
816 <CharEsc,StringEsc>o{O}+ {
817 int i = strtol(yytext + 1, NULL, 8);
821 char errbuf[ERR_BUF_SIZE];
822 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
828 <CharEsc,StringEsc>x{H}+ {
829 int i = strtol(yytext + 1, NULL, 16);
833 char errbuf[ERR_BUF_SIZE];
834 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
844 * Nested comments. The major complication here is in trying to match the
845 * longest lexemes possible, for better performance. (See the flex document.)
846 * That's why the rules look so bizarre.
850 <Code,GlaExt,UserPragma,StringEsc>"{-" {
851 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
855 <Comment>"-"+[^-{}]+ |
856 <Comment>"{"+[^-{}]+ ;
857 <Comment>"{-" { nested_comments++; }
858 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
864 * Illegal characters. This used to be a single rule, but we might as well
865 * pass on as much information as we have, so now we indicate our state in
870 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
871 fprintf(stderr, "%s:%d:%d: Illegal character: `",
872 input_filename, hsplineno, hspcolno + 1);
873 format_string(stderr, (unsigned char *) yytext, 1);
874 fputs("'\n", stderr);
878 fprintf(stderr, "%s:%d:%d: Illegal character: `",
879 input_filename, hsplineno, hspcolno + 1);
880 format_string(stderr, (unsigned char *) yytext, 1);
881 fputs("' in a character literal\n", stderr);
885 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
886 input_filename, hsplineno, hspcolno + 1);
887 format_string(stderr, (unsigned char *) yytext, 1);
888 fputs("'\n", stderr);
891 <String>(.|\n) { if (nonstandardFlag) {
892 addtext(yytext, yyleng);
894 fprintf(stderr, "%s:%d:%d: Illegal character: `",
895 input_filename, hsplineno, hspcolno + 1);
896 format_string(stderr, (unsigned char *) yytext, 1);
897 fputs("' in a string literal\n", stderr);
903 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
904 input_filename, hsplineno, hspcolno + 1);
905 format_string(stderr, (unsigned char *) yytext, 1);
906 fputs("'\n", stderr);
909 fprintf(stderr, "%s:%d:%d: Illegal character: `",
910 input_filename, hsplineno, hspcolno + 1);
911 format_string(stderr, (unsigned char *) yytext, 1);
912 fputs("' in a string gap\n", stderr);
919 * End of file. In any sub-state, this is an error. However, for the primary
920 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
921 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
922 * adding virtual close curlies, or closing an interface and returning to the
923 * primary source file.
925 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
926 * line/column advancement has to be done by hand.
930 <Char,CharEsc><<EOF>> {
931 hsplineno = hslineno; hspcolno = hscolno;
932 hsperror("unterminated character literal");
935 hsplineno = hslineno; hspcolno = hscolno;
936 hsperror("unterminated comment");
938 <String,StringEsc><<EOF>> {
939 hsplineno = hslineno; hspcolno = hscolno;
940 hsperror("unterminated string literal");
942 <UserPragma><<EOF>> {
943 hsplineno = hslineno; hspcolno = hscolno;
944 hsperror("unterminated user-specified pragma");
946 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
950 /**********************************************************************
953 * YACC/LEX Initialisation etc. *
956 **********************************************************************/
959 We initialise input_filename to "<stdin>".
960 This allows unnamed sources to be piped into the parser.
966 input_filename = xstrdup("<stdin>");
968 /* We must initialize the input buffer _now_, because we call
969 setyyin _before_ calling yylex for the first time! */
970 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
979 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
981 if (input_filename != NULL)
982 free(input_filename);
983 input_filename = xstrdup(f);
986 /**********************************************************************
989 * Layout Processing *
992 **********************************************************************/
995 The following section deals with Haskell Layout conventions
996 forcing insertion of ; or } as appropriate
1000 #define LAYOUT_DEBUG
1005 hsshouldindent(void)
1007 return (!forgetindent && INDENTON);
1011 /* Enter new context and set new indentation level */
1016 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1020 * partain: first chk that new indent won't be less than current one; this code
1021 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1022 * current token; what that has to do with indenting, I don't know.
1026 if (hscolno - 1 <= INDENTPT) {
1028 return; /* Empty input OK for Haskell 1.1 */
1030 char errbuf[ERR_BUF_SIZE];
1032 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1036 hsentercontext((hspcolno << 1) | 1);
1040 /* Enter a new context without changing the indentation level */
1045 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1047 hsentercontext(indenttab[icontexts] & ~1);
1051 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1055 forgetindent = TRUE;
1059 /* Enter a new layout context. */
1061 hsentercontext(int indent)
1063 /* Enter new context and set indentation as specified */
1064 if (++icontexts >= MAX_CONTEXTS) {
1065 char errbuf[ERR_BUF_SIZE];
1067 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1070 forgetindent = FALSE;
1071 indenttab[icontexts] = indent;
1073 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1078 /* Exit a layout context */
1084 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1089 * Return checks the indentation level and returns ;, } or the specified token.
1097 if (hsshouldindent()) {
1098 if (hspcolno < INDENTPT) {
1100 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1104 } else if (hspcolno == INDENTPT) {
1106 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1115 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1122 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1128 static BOOLEAN eof = FALSE;
1131 if (hssttok != -1) {
1139 endlineno = hslineno;
1140 if ((tok = yylex1()) != EOF)
1146 if (icontexts > icontexts_save) {
1149 indenttab[icontexts] = 0;
1152 hsperror("missing '}' at end of file");
1153 } else if (hsbuf_save != NULL) {
1155 yy_delete_buffer(YY_CURRENT_BUFFER);
1156 yy_switch_to_buffer(hsbuf_save);
1158 new_filename(filename_save);
1159 free(filename_save);
1160 hslineno = hslineno_save;
1161 hsplineno = hsplineno_save;
1162 hscolno = hscolno_save;
1163 hspcolno = hspcolno_save;
1165 icontexts = icontexts_save - 1;
1168 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1173 hsperror("No longer using yacc to parse interface files");
1178 abort(); /* should never get here! */
1182 /**********************************************************************
1185 * Input Processing for Interfaces -- Not currently used !!! *
1188 **********************************************************************/
1190 /* setyyin(file) open file as new lex input buffer */
1196 hsbuf_save = YY_CURRENT_BUFFER;
1197 if ((yyin = fopen(file, "r")) == NULL) {
1198 char errbuf[ERR_BUF_SIZE];
1200 sprintf(errbuf, "can't read \"%-.50s\"", file);
1203 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1205 hslineno_save = hslineno;
1206 hsplineno_save = hsplineno;
1207 hslineno = hsplineno = 1;
1209 filename_save = input_filename;
1210 input_filename = NULL;
1212 hscolno_save = hscolno;
1213 hspcolno_save = hspcolno;
1214 hscolno = hspcolno = 0;
1215 etags_save = etags; /* do not do "etags" stuff in interfaces */
1216 etags = 0; /* We remember whether we are doing it in
1217 the module, so we can restore it later [WDP 94/09] */
1218 hsentercontext(-1); /* partain: changed this from 0 */
1219 icontexts_save = icontexts;
1221 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1226 layout_input(char *text, int len)
1229 fprintf(stderr, "Scanning \"%s\"\n", text);
1232 hsplineno = hslineno;
1244 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1256 setstartlineno(void)
1258 startlineno = hsplineno;
1260 if (modulelineno == 0) {
1261 modulelineno = startlineno;
1267 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1271 /**********************************************************************
1277 **********************************************************************/
1279 #define CACHE_SIZE YY_BUF_SIZE
1285 } textcache = { 0, 0, NULL };
1290 /* fprintf(stderr, "cleartext\n"); */
1292 if (textcache.allocated == 0) {
1293 textcache.allocated = CACHE_SIZE;
1294 textcache.text = xmalloc(CACHE_SIZE);
1299 addtext(char *text, unsigned length)
1301 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1306 if (textcache.next + length + 1 >= textcache.allocated) {
1307 textcache.allocated += length + CACHE_SIZE;
1308 textcache.text = xrealloc(textcache.text, textcache.allocated);
1310 bcopy(text, textcache.text + textcache.next, length);
1311 textcache.next += length;
1317 /* fprintf(stderr, "addchar: %c\n", c); */
1319 if (textcache.next + 2 >= textcache.allocated) {
1320 textcache.allocated += CACHE_SIZE;
1321 textcache.text = xrealloc(textcache.text, textcache.allocated);
1323 textcache.text[textcache.next++] = c;
1327 fetchtext(unsigned *length)
1329 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1331 *length = textcache.next;
1332 textcache.text[textcache.next] = '\0';
1333 return textcache.text;
1336 /**********************************************************************
1339 * Identifier Processing *
1342 **********************************************************************/
1345 hsnewid Enters an id of length n into the symbol table.
1349 hsnewid(char *name, int length)
1351 char save = name[length];
1353 name[length] = '\0';
1354 yylval.uid = installid(name);
1355 name[length] = save;
1359 hsnewqid(char *name, int length)
1362 char save = name[length];
1363 name[length] = '\0';
1365 dot = strchr(name, '.');
1367 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1369 name[length] = save;
1371 return isconstr(dot+1);
1376 is_commment(char* lexeme, int len)
1385 for(i=0;i<len;i++) {
1386 if (lexeme[i] != '-') return FALSE;