2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
118 /* OLD: Hacky definition of yywrap: see flex doc.
120 If we don't do this, then we'll have to get the default
121 yywrap from the flex library, which is often something
122 we are not good at locating. This avoids that difficulty.
123 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
128 /* Essential forward declarations */
130 static void hsnewid PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext (NO_ARGS);
133 static void addtext PROTO((char *, unsigned));
134 static void addchar PROTO((char));
135 static char *fetchtext PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int Return PROTO((int));
138 static void hsentercontext PROTO((int));
140 /* Special file handling for IMPORTS */
141 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
143 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
144 static char *filename_save; /* File Name */
145 static int hslineno_save = 0, /* Line Number */
146 hsplineno_save = 0, /* Line Number of Prev. token */
147 hscolno_save = 0, /* Indentation */
148 hspcolno_save = 0; /* Left Indentation */
149 static short icontexts_save = 0; /* Indent Context Level */
151 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags; /* that which is saved */
154 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
156 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
157 * inserted before token +ve -- "}" inserted before
160 short icontexts = 0; /* Which context we're in */
163 Table of indentations: right bit indicates whether to use
164 indentation rules (1 = use rules; 0 = ignore)
167 push one of these "contexts" at every "case" or "where"; the right bit says
168 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
170 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
171 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
172 push is shown just below.
177 static short indenttab[MAX_CONTEXTS] = {-1};
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
182 #define RETURN(tok) return(Return(tok))
185 #define YY_DECL int yylex1()
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189 to support older versions of flex, we'll continue to peek for now.
191 #define YY_USER_ACTION \
192 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
199 /* Each time we enter a new start state, we push it onto the state stack.
201 #define PUSH_STATE(n) yy_push_state(n)
202 #define POP_STATE yy_pop_state()
207 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208 never-interactive (to go a bit faster)
209 stack (use a start-condition stack)
213 %option never-interactive
216 /* The start states are:
217 Code -- normal Haskell code (principal lexer)
218 GlaExt -- Haskell code with Glasgow extensions
219 Comment -- Nested comment processing
220 String -- Inside a string literal with backslashes
221 StringEsc -- Immediately following a backslash in a string literal
222 Char -- Inside a character literal with backslashes
223 CharEsc -- Immediately following a backslash in a character literal
225 Note that the INITIAL state is unused. Also note that these states
226 are _exclusive_. All rules should be prefixed with an appropriate
227 list of start states.
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
232 isoS [\xa1-\xbf\xd7\xf7]
233 isoL [\xc0-\xd6\xd8-\xde]
234 isol [\xdf-\xf6\xf8-\xff]
241 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
244 L [A-Z\xc0-\xd6\xd8-\xde]
245 l [a-z\xdf-\xf6\xf8-\xff]
250 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
259 * Simple comments and whitespace. Normally, we would just ignore these, but
260 * in case we're processing a string escape, we need to note that we've seen
263 * Note that we cater for a comment line that *doesn't* end in a newline.
264 * This is incorrect, strictly speaking, but seems like the right thing
265 * to do. Reported by Rajiv Mirani. (WDP 95/08)
267 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
276 * Special GHC pragma rules. Do we need a start state for interface files,
277 * so these won't be matched in source files? --JSM
283 /* I believe the next rule is not ever matched.
285 The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286 Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287 where possible, so the lexer should now never see cpp directives
288 like '# ' and '#line'.
292 <Code,GlaExt>^"# ".*{NL} {
293 char tempf[FILENAME_SIZE];
294 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
296 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
299 <Code,GlaExt>^"#line ".*{NL} {
300 char tempf[FILENAME_SIZE];
301 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
303 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
309 /* partain: pragma-style line directive */
310 char tempf[FILENAME_SIZE];
311 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
313 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317 PUSH_STATE(UserPragma);
319 RETURN(INTERFACE_UPRAGMA);
321 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
322 PUSH_STATE(UserPragma);
324 RETURN(SPECIALISE_UPRAGMA);
326 <Code,GlaExt>"{-#"{WS}*"INLINE" {
327 PUSH_STATE(UserPragma);
329 RETURN(INLINE_UPRAGMA);
331 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
332 PUSH_STATE(UserPragma);
334 RETURN(MAGIC_UNFOLDING_UPRAGMA);
336 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
337 /* these are handled by hscpp */
342 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
343 /* these are for the driver! */
348 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
349 /* these are used by `make depend' and the
350 compiler to indicate that a module should
351 be imported from source */
353 RETURN(SOURCE_UPRAGMA);
356 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
357 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
358 input_filename, hsplineno);
359 format_string(stderr, (unsigned char *) yytext, yyleng);
360 fputs("'\n", stderr);
364 <UserPragma>"#-}" { POP_STATE;
366 /* don't want any layout processing here,
367 * so just use 'return' instead of 'RETURN',
368 * remembering to set hssttok.
376 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
377 * intentionally accepted as a keyword even for normal <Code>.
381 <Code,GlaExt>"case" { RETURN(CASE); }
382 <Code,GlaExt>"class" { RETURN(CLASS); }
383 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
384 <Code,GlaExt>"default" { RETURN(DEFAULT); }
385 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
386 <Code,GlaExt>"do" { RETURN(DO); }
387 <Code,GlaExt>"else" { RETURN(ELSE); }
388 <Code,GlaExt>"if" { RETURN(IF); }
389 <Code,GlaExt>"import" { RETURN(IMPORT); }
390 <Code,GlaExt>"in" { RETURN(IN); }
391 <Code,GlaExt>"infix" { RETURN(INFIX); }
392 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
393 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
394 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
395 <Code,GlaExt>"let" { RETURN(LET); }
396 <Code,GlaExt>"module" { RETURN(MODULE); }
397 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
398 <Code,GlaExt>"of" { RETURN(OF); }
399 <Code,GlaExt>"then" { RETURN(THEN); }
400 <Code,GlaExt>"type" { RETURN(TYPE); }
401 <Code,GlaExt>"where" { RETURN(WHERE); }
403 <Code,GlaExt>"as" { RETURN(AS); }
404 <Code,GlaExt>"hiding" { RETURN(HIDING); }
405 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
407 <Code,GlaExt>"_scc_" { RETURN(SCC); }
408 <GlaExt>"_ccall_" { RETURN(CCALL); }
409 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
410 <GlaExt>"_casm_" { RETURN(CASM); }
411 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
415 * Haskell operators: special, reservedops and useful varsyms
419 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
420 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
421 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
422 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
423 <Code,GlaExt>"{" { RETURN(OCURLY); }
424 <Code,GlaExt>"}" { RETURN(CCURLY); }
425 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
426 <Code,GlaExt>";" { RETURN(SEMI); }
427 <Code,GlaExt>"`" { RETURN(BQUOTE); }
428 <Code,GlaExt>"_" { RETURN(WILDCARD); }
430 <Code,GlaExt>".." { RETURN(DOTDOT); }
431 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
432 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
433 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
434 <Code,GlaExt>"|" { RETURN(VBAR); }
435 <Code,GlaExt>"<-" { RETURN(LARROW); }
436 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
437 <Code,GlaExt>"-" { RETURN(MINUS); }
438 <Code,GlaExt>"+" { RETURN(PLUS); }
440 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
441 <Code,GlaExt>"@" { RETURN(AT); }
442 <Code,GlaExt>"!" { RETURN(BANG); }
443 <Code,GlaExt>"~" { RETURN(LAZY); }
447 * Integers and (for Glasgow extensions) primitive integers. Note that
448 * we pass all of the text on to the parser, because flex/C can't handle
449 * arbitrary precision numbers.
453 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
454 yylval.uid = xstrndup(yytext, yyleng - 1);
457 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
458 yylval.uid = xstrndup(yytext, yyleng);
461 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
462 yylval.uid = xstrndup(yytext, yyleng - 1);
465 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
466 yylval.uid = xstrndup(yytext, yyleng);
469 <GlaExt>("-")?{N}"#" {
470 yylval.uid = xstrndup(yytext, yyleng - 1);
473 <Code,GlaExt,UserPragma>{N} {
474 yylval.uid = xstrndup(yytext, yyleng);
480 * Floats and (for Glasgow extensions) primitive floats/doubles.
484 <GlaExt>("-")?{F}"##" {
485 yylval.uid = xstrndup(yytext, yyleng - 2);
488 <GlaExt>("-")?{F}"#" {
489 yylval.uid = xstrndup(yytext, yyleng - 1);
493 yylval.uid = xstrndup(yytext, yyleng);
499 * Funky ``foo'' style C literals for Glasgow extensions
503 <GlaExt>"``"[^']+"''" {
504 hsnewid(yytext + 2, yyleng - 4);
510 * Identifiers, both variables and operators. The trailing hash is allowed
511 * for Glasgow extensions.
517 /* These SHOULDNAE work in "Code" (sigh) */
519 <GlaExt,UserPragma>{Id}"#" {
520 if (! nonstandardFlag) {
521 char errbuf[ERR_BUF_SIZE];
522 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
525 hsnewid(yytext, yyleng);
526 RETURN(isconstr(yytext) ? CONID : VARID);
528 <Code,GlaExt,UserPragma>{Id} {
529 hsnewid(yytext, yyleng);
530 RETURN(isconstr(yytext) ? CONID : VARID);
532 <Code,GlaExt,UserPragma>{SId} {
533 hsnewid(yytext, yyleng);
534 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
536 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
538 if (! nonstandardFlag) {
539 char errbuf[ERR_BUF_SIZE];
540 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
543 is_constr = hsnewqid(yytext, yyleng);
544 RETURN(is_constr ? QCONID : QVARID);
546 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
547 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
548 RETURN(is_constr ? QCONID : QVARID);
550 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
551 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
552 RETURN(is_constr ? QCONSYM : QVARSYM);
556 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
558 /* Because we can make the former well-behaved (we defined them).
560 Sadly, the latter is defined by Haskell, which allows such
561 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
565 <GlaExt,UserPragma>"`"{Id}"#`" {
566 hsnewid(yytext + 1, yyleng - 2);
567 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
572 * Character literals. The first form is the quick form, for character
573 * literals that don't contain backslashes. Literals with backslashes are
574 * lexed through multiple rules. First, we match the open ' and as many
575 * normal characters as possible. This puts us into the <Char> state, where
576 * a backslash is legal. Then, we match the backslash and move into the
577 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
578 * characters and the close '. We may end up with too many characters, but
579 * this allows us to easily share the lex rules with strings. Excess characters
580 * are ignored with a warning.
584 <GlaExt>'({CHAR}|"\"")"'#" {
585 yylval.uhstring = installHstring(1, yytext+1);
588 <Code,GlaExt>'({CHAR}|"\"")' {
589 yylval.uhstring = installHstring(1, yytext+1);
592 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
593 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
596 <Code,GlaExt>'({CHAR}|"\"")* {
597 hsmlcolno = hspcolno;
599 addtext(yytext+1, yyleng-1);
602 <Char>({CHAR}|"\"")*'# {
606 addtext(yytext, yyleng - 2);
607 text = fetchtext(&length);
609 if (! nonstandardFlag) {
610 char errbuf[ERR_BUF_SIZE];
611 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
616 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
617 input_filename, hsplineno, hspcolno + 1);
618 format_string(stderr, (unsigned char *) text, length);
619 fputs("' too long\n", stderr);
622 yylval.uhstring = installHstring(1, text);
623 hspcolno = hsmlcolno;
627 <Char>({CHAR}|"\"")*' {
631 addtext(yytext, yyleng - 1);
632 text = fetchtext(&length);
635 fprintf(stderr, "%s:%d:%d: Character literal '",
636 input_filename, hsplineno, hspcolno + 1);
637 format_string(stderr, (unsigned char *) text, length);
638 fputs("' too long\n", stderr);
641 yylval.uhstring = installHstring(1, text);
642 hspcolno = hsmlcolno;
646 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
651 * String literals. The first form is the quick form, for string literals
652 * that don't contain backslashes. Literals with backslashes are lexed
653 * through multiple rules. First, we match the open " and as many normal
654 * characters as possible. This puts us into the <String> state, where
655 * a backslash is legal. Then, we match the backslash and move into the
656 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
657 * characters, moving back and forth between <String> and <StringEsc> as more
658 * backslashes are encountered. (We may even digress into <Comment> mode if we
659 * find a comment in a gap between backslashes.) Finally, we read the last chunk
660 * of normal characters and the close ".
664 <GlaExt>"\""({CHAR}|"'")*"\""# {
665 yylval.uhstring = installHstring(yyleng-3, yytext+1);
666 /* the -3 accounts for the " on front, "# on the end */
669 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
670 yylval.uhstring = installHstring(yyleng-2, yytext+1);
673 <Code,GlaExt>"\""({CHAR}|"'")* {
674 hsmlcolno = hspcolno;
676 addtext(yytext+1, yyleng-1);
679 <String>({CHAR}|"'")*"\"#" {
683 addtext(yytext, yyleng-2);
684 text = fetchtext(&length);
686 if (! nonstandardFlag) {
687 char errbuf[ERR_BUF_SIZE];
688 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
692 yylval.uhstring = installHstring(length, text);
693 hspcolno = hsmlcolno;
697 <String>({CHAR}|"'")*"\"" {
701 addtext(yytext, yyleng-1);
702 text = fetchtext(&length);
704 yylval.uhstring = installHstring(length, text);
705 hspcolno = hsmlcolno;
709 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
713 * Character and string escapes are roughly the same, but strings have the
714 * extra `\&' sequence which is not allowed for characters. Also, comments
715 * are allowed in the <StringEsc> state. (See the comment section much
718 * NB: Backslashes and tabs are stored in strings as themselves.
719 * But if we print them (in printtree.c), they must go out as
720 * "\\\\" and "\\t" respectively. (This is because of the bogus
721 * intermediate format that the parser produces. It uses '\t' fpr end of
722 * string, so it needs to be able to escape tabs, which means that it
723 * also needs to be able to escape the escape character ('\\'). Sigh.
727 <Char>\\ { PUSH_STATE(CharEsc); }
728 <String>\\& /* Ignore */ ;
729 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
731 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
732 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
734 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
735 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
736 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
737 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
738 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
739 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
740 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
741 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
742 <CharEsc,StringEsc>BEL |
743 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
744 <CharEsc,StringEsc>BS |
745 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
746 <CharEsc,StringEsc>HT |
747 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
748 <CharEsc,StringEsc>LF |
749 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
750 <CharEsc,StringEsc>VT |
751 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
752 <CharEsc,StringEsc>FF |
753 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
754 <CharEsc,StringEsc>CR |
755 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
756 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
757 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
758 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
759 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
760 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
761 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
762 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
763 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
764 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
765 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
766 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
767 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
768 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
769 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
770 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
771 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
772 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
773 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
774 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
775 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
776 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
777 <CharEsc,StringEsc>{D}+ {
778 int i = strtol(yytext, NULL, 10);
782 char errbuf[ERR_BUF_SIZE];
783 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
789 <CharEsc,StringEsc>o{O}+ {
790 int i = strtol(yytext + 1, NULL, 8);
794 char errbuf[ERR_BUF_SIZE];
795 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
801 <CharEsc,StringEsc>x{H}+ {
802 int i = strtol(yytext + 1, NULL, 16);
806 char errbuf[ERR_BUF_SIZE];
807 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
817 * Nested comments. The major complication here is in trying to match the
818 * longest lexemes possible, for better performance. (See the flex document.)
819 * That's why the rules look so bizarre.
823 <Code,GlaExt,UserPragma,StringEsc>"{-" {
824 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
828 <Comment>"-"+[^-{}]+ |
829 <Comment>"{"+[^-{}]+ ;
830 <Comment>"{-" { nested_comments++; }
831 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
836 * Illegal characters. This used to be a single rule, but we might as well
837 * pass on as much information as we have, so now we indicate our state in
842 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
843 fprintf(stderr, "%s:%d:%d: Illegal character: `",
844 input_filename, hsplineno, hspcolno + 1);
845 format_string(stderr, (unsigned char *) yytext, 1);
846 fputs("'\n", stderr);
850 fprintf(stderr, "%s:%d:%d: Illegal character: `",
851 input_filename, hsplineno, hspcolno + 1);
852 format_string(stderr, (unsigned char *) yytext, 1);
853 fputs("' in a character literal\n", stderr);
857 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
858 input_filename, hsplineno, hspcolno + 1);
859 format_string(stderr, (unsigned char *) yytext, 1);
860 fputs("'\n", stderr);
863 <String>(.|\n) { if (nonstandardFlag) {
864 addtext(yytext, yyleng);
866 fprintf(stderr, "%s:%d:%d: Illegal character: `",
867 input_filename, hsplineno, hspcolno + 1);
868 format_string(stderr, (unsigned char *) yytext, 1);
869 fputs("' in a string literal\n", stderr);
875 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
876 input_filename, hsplineno, hspcolno + 1);
877 format_string(stderr, (unsigned char *) yytext, 1);
878 fputs("'\n", stderr);
881 fprintf(stderr, "%s:%d:%d: Illegal character: `",
882 input_filename, hsplineno, hspcolno + 1);
883 format_string(stderr, (unsigned char *) yytext, 1);
884 fputs("' in a string gap\n", stderr);
891 * End of file. In any sub-state, this is an error. However, for the primary
892 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
893 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
894 * adding virtual close curlies, or closing an interface and returning to the
895 * primary source file.
897 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
898 * line/column advancement has to be done by hand.
902 <Char,CharEsc><<EOF>> {
903 hsplineno = hslineno; hspcolno = hscolno;
904 hsperror("unterminated character literal");
907 hsplineno = hslineno; hspcolno = hscolno;
908 hsperror("unterminated comment");
910 <String,StringEsc><<EOF>> {
911 hsplineno = hslineno; hspcolno = hscolno;
912 hsperror("unterminated string literal");
914 <UserPragma><<EOF>> {
915 hsplineno = hslineno; hspcolno = hscolno;
916 hsperror("unterminated user-specified pragma");
918 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
922 /**********************************************************************
925 * YACC/LEX Initialisation etc. *
928 **********************************************************************/
931 We initialise input_filename to "<stdin>".
932 This allows unnamed sources to be piped into the parser.
938 input_filename = xstrdup("<stdin>");
940 /* We must initialize the input buffer _now_, because we call
941 setyyin _before_ calling yylex for the first time! */
942 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
951 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
953 if (input_filename != NULL)
954 free(input_filename);
955 input_filename = xstrdup(f);
958 /**********************************************************************
961 * Layout Processing *
964 **********************************************************************/
967 The following section deals with Haskell Layout conventions
968 forcing insertion of ; or } as appropriate
974 return (!forgetindent && INDENTON);
978 /* Enter new context and set new indentation level */
983 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
987 * partain: first chk that new indent won't be less than current one; this code
988 * doesn't make sense to me; hscolno tells the position of the _end_ of the
989 * current token; what that has to do with indenting, I don't know.
993 if (hscolno - 1 <= INDENTPT) {
995 return; /* Empty input OK for Haskell 1.1 */
997 char errbuf[ERR_BUF_SIZE];
999 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1003 hsentercontext((hspcolno << 1) | 1);
1007 /* Enter a new context without changing the indentation level */
1012 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1014 hsentercontext(indenttab[icontexts] & ~1);
1018 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1022 forgetindent = TRUE;
1026 /* Enter a new layout context. */
1028 hsentercontext(int indent)
1030 /* Enter new context and set indentation as specified */
1031 if (++icontexts >= MAX_CONTEXTS) {
1032 char errbuf[ERR_BUF_SIZE];
1034 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1037 forgetindent = FALSE;
1038 indenttab[icontexts] = indent;
1040 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1045 /* Exit a layout context */
1051 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1056 * Return checks the indentation level and returns ;, } or the specified token.
1066 if (hsshouldindent()) {
1067 if (hspcolno < INDENTPT) {
1069 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1073 } else if (hspcolno == INDENTPT) {
1075 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1083 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1090 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1096 static BOOLEAN eof = FALSE;
1099 if (hssttok != -1) {
1107 endlineno = hslineno;
1108 if ((tok = yylex1()) != EOF)
1114 if (icontexts > icontexts_save) {
1117 indenttab[icontexts] = 0;
1120 hsperror("missing '}' at end of file");
1121 } else if (hsbuf_save != NULL) {
1123 yy_delete_buffer(YY_CURRENT_BUFFER);
1124 yy_switch_to_buffer(hsbuf_save);
1126 new_filename(filename_save);
1127 free(filename_save);
1128 hslineno = hslineno_save;
1129 hsplineno = hsplineno_save;
1130 hscolno = hscolno_save;
1131 hspcolno = hspcolno_save;
1133 icontexts = icontexts_save - 1;
1136 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1141 hsperror("No longer using yacc to parse interface files");
1146 abort(); /* should never get here! */
1150 /**********************************************************************
1153 * Input Processing for Interfaces -- Not currently used !!! *
1156 **********************************************************************/
1158 /* setyyin(file) open file as new lex input buffer */
1164 hsbuf_save = YY_CURRENT_BUFFER;
1165 if ((yyin = fopen(file, "r")) == NULL) {
1166 char errbuf[ERR_BUF_SIZE];
1168 sprintf(errbuf, "can't read \"%-.50s\"", file);
1171 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1173 hslineno_save = hslineno;
1174 hsplineno_save = hsplineno;
1175 hslineno = hsplineno = 1;
1177 filename_save = input_filename;
1178 input_filename = NULL;
1180 hscolno_save = hscolno;
1181 hspcolno_save = hspcolno;
1182 hscolno = hspcolno = 0;
1183 etags_save = etags; /* do not do "etags" stuff in interfaces */
1184 etags = 0; /* We remember whether we are doing it in
1185 the module, so we can restore it later [WDP 94/09] */
1186 hsentercontext(-1); /* partain: changed this from 0 */
1187 icontexts_save = icontexts;
1189 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1194 layout_input(char *text, int len)
1197 fprintf(stderr, "Scanning \"%s\"\n", text);
1200 hsplineno = hslineno;
1212 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1224 setstartlineno(void)
1226 startlineno = hsplineno;
1228 if (modulelineno == 0) {
1229 modulelineno = startlineno;
1235 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1239 /**********************************************************************
1245 **********************************************************************/
1247 #define CACHE_SIZE YY_BUF_SIZE
1253 } textcache = { 0, 0, NULL };
1258 /* fprintf(stderr, "cleartext\n"); */
1260 if (textcache.allocated == 0) {
1261 textcache.allocated = CACHE_SIZE;
1262 textcache.text = xmalloc(CACHE_SIZE);
1267 addtext(char *text, unsigned length)
1269 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1274 if (textcache.next + length + 1 >= textcache.allocated) {
1275 textcache.allocated += length + CACHE_SIZE;
1276 textcache.text = xrealloc(textcache.text, textcache.allocated);
1278 bcopy(text, textcache.text + textcache.next, length);
1279 textcache.next += length;
1285 /* fprintf(stderr, "addchar: %c\n", c); */
1287 if (textcache.next + 2 >= textcache.allocated) {
1288 textcache.allocated += CACHE_SIZE;
1289 textcache.text = xrealloc(textcache.text, textcache.allocated);
1291 textcache.text[textcache.next++] = c;
1295 fetchtext(unsigned *length)
1297 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1299 *length = textcache.next;
1300 textcache.text[textcache.next] = '\0';
1301 return textcache.text;
1304 /**********************************************************************
1307 * Identifier Processing *
1310 **********************************************************************/
1313 hsnewid Enters an id of length n into the symbol table.
1317 hsnewid(char *name, int length)
1319 char save = name[length];
1321 name[length] = '\0';
1322 yylval.uid = installid(name);
1323 name[length] = save;
1327 hsnewqid(char *name, int length)
1330 char save = name[length];
1331 name[length] = '\0';
1333 dot = strchr(name, '.');
1335 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1337 name[length] = save;
1339 return isconstr(dot+1);