2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
118 /* OLD: Hacky definition of yywrap: see flex doc.
120 If we don't do this, then we'll have to get the default
121 yywrap from the flex library, which is often something
122 we are not good at locating. This avoids that difficulty.
123 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
128 /* Essential forward declarations */
130 static void hsnewid PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext (NO_ARGS);
133 static void addtext PROTO((char *, unsigned));
134 static void addchar PROTO((char));
135 static char *fetchtext PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int Return PROTO((int));
138 static void hsentercontext PROTO((int));
140 /* Special file handling for IMPORTS */
141 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
143 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
144 static char *filename_save; /* File Name */
145 static int hslineno_save = 0, /* Line Number */
146 hsplineno_save = 0, /* Line Number of Prev. token */
147 hscolno_save = 0, /* Indentation */
148 hspcolno_save = 0; /* Left Indentation */
149 static short icontexts_save = 0; /* Indent Context Level */
151 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags; /* that which is saved */
154 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
156 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
157 * inserted before token +ve -- "}" inserted before
160 short icontexts = 0; /* Which context we're in */
163 Table of indentations: right bit indicates whether to use
164 indentation rules (1 = use rules; 0 = ignore)
167 push one of these "contexts" at every "case" or "where"; the right bit says
168 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
170 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
171 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
172 push is shown just below.
177 static short indenttab[MAX_CONTEXTS] = {-1};
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
182 #define RETURN(tok) return(Return(tok))
185 #define YY_DECL int yylex1()
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189 to support older versions of flex, we'll continue to peek for now.
191 #define YY_USER_ACTION \
192 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
199 /* Each time we enter a new start state, we push it onto the state stack.
201 #define PUSH_STATE(n) yy_push_state(n)
202 #define POP_STATE yy_pop_state()
207 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208 never-interactive (to go a bit faster)
209 stack (use a start-condition stack)
213 %option never-interactive
216 /* The start states are:
217 Code -- normal Haskell code (principal lexer)
218 GlaExt -- Haskell code with Glasgow extensions
219 Comment -- Nested comment processing
220 String -- Inside a string literal with backslashes
221 StringEsc -- Immediately following a backslash in a string literal
222 Char -- Inside a character literal with backslashes
223 CharEsc -- Immediately following a backslash in a character literal
225 Note that the INITIAL state is unused. Also note that these states
226 are _exclusive_. All rules should be prefixed with an appropriate
227 list of start states.
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
232 isoS [\xa1-\xbf\xd7\xf7]
233 isoL [\xc0-\xd6\xd8-\xde]
234 isol [\xdf-\xf6\xf8-\xff]
241 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
244 L [A-Z\xc0-\xd6\xd8-\xde]
245 l [a-z\xdf-\xf6\xf8-\xff]
250 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
259 * Simple comments and whitespace. Normally, we would just ignore these, but
260 * in case we're processing a string escape, we need to note that we've seen
263 * Note that we cater for a comment line that *doesn't* end in a newline.
264 * This is incorrect, strictly speaking, but seems like the right thing
265 * to do. Reported by Rajiv Mirani. (WDP 95/08)
267 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
276 * Special GHC pragma rules. Do we need a start state for interface files,
277 * so these won't be matched in source files? --JSM
283 /* I believe the next rule is not ever matched.
285 The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286 Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287 where possible, so the lexer should now never see cpp directives
288 like '# ' and '#line'.
292 <Code,GlaExt>^"# ".*{NL} {
293 char tempf[FILENAME_SIZE];
294 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
296 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
299 <Code,GlaExt>^"#line ".*{NL} {
300 char tempf[FILENAME_SIZE];
301 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
303 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
309 /* partain: pragma-style line directive */
310 char tempf[FILENAME_SIZE];
311 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
313 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317 PUSH_STATE(UserPragma);
318 RETURN(INTERFACE_UPRAGMA);
320 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
321 PUSH_STATE(UserPragma);
322 RETURN(SPECIALISE_UPRAGMA);
324 <Code,GlaExt>"{-#"{WS}*"INLINE" {
325 PUSH_STATE(UserPragma);
326 RETURN(INLINE_UPRAGMA);
328 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
329 PUSH_STATE(UserPragma);
330 RETURN(NOINLINE_UPRAGMA);
332 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
333 PUSH_STATE(UserPragma);
334 RETURN(MAGIC_UNFOLDING_UPRAGMA);
336 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
337 /* these are handled by hscpp */
341 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
342 /* these are for the driver! */
346 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
347 /* these are used by `make depend' and the
348 compiler to indicate that a module should
349 be imported from source */
351 RETURN(SOURCE_UPRAGMA);
354 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
355 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
356 input_filename, hsplineno);
357 format_string(stderr, (unsigned char *) yytext, yyleng);
358 fputs("'\n", stderr);
362 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
366 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
367 * intentionally accepted as a keyword even for normal <Code>.
371 <Code,GlaExt>"case" { RETURN(CASE); }
372 <Code,GlaExt>"class" { RETURN(CLASS); }
373 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
374 <Code,GlaExt>"default" { RETURN(DEFAULT); }
375 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
376 <Code,GlaExt>"do" { RETURN(DO); }
377 <Code,GlaExt>"else" { RETURN(ELSE); }
378 <Code,GlaExt>"if" { RETURN(IF); }
379 <Code,GlaExt>"import" { RETURN(IMPORT); }
380 <Code,GlaExt>"in" { RETURN(IN); }
381 <Code,GlaExt>"infix" { RETURN(INFIX); }
382 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
383 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
384 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
385 <Code,GlaExt>"let" { RETURN(LET); }
386 <Code,GlaExt>"module" { RETURN(MODULE); }
387 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
388 <Code,GlaExt>"of" { RETURN(OF); }
389 <Code,GlaExt>"then" { RETURN(THEN); }
390 <Code,GlaExt>"type" { RETURN(TYPE); }
391 <Code,GlaExt>"where" { RETURN(WHERE); }
393 <Code,GlaExt>"as" { RETURN(AS); }
394 <Code,GlaExt>"hiding" { RETURN(HIDING); }
395 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
397 <Code,GlaExt>"forall" { RETURN(FORALL); }
399 <Code,GlaExt>"_scc_" { RETURN(SCC); }
400 <GlaExt>"_ccall_" { RETURN(CCALL); }
401 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
402 <GlaExt>"_casm_" { RETURN(CASM); }
403 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
404 <GlaExt>"(#" { RETURN(OUNBOXPAREN); }
405 <GlaExt>"#)" { RETURN(CUNBOXPAREN); }
406 <GlaExt>"foreign" { RETURN(FOREIGN); }
407 <GlaExt>"export" { RETURN(EXPORT); }
408 <GlaExt>"label" { RETURN(LABEL); }
409 <GlaExt>"unsafe" { RETURN(UNSAFE); }
410 <GlaExt>"_stdcall" { RETURN(STDCALL); }
411 <GlaExt>"_ccall" { RETURN(C_CALL); }
412 <GlaExt>"_pascal" { RETURN(PASCAL); }
413 <GlaExt>"stdcall" { RETURN(STDCALL); }
414 <GlaExt>"ccall" { RETURN(C_CALL); }
415 <GlaExt>"pascal" { RETURN(PASCAL); }
416 <GlaExt>"dynamic" { RETURN(DYNAMIC); }
420 * Haskell operators: special, reservedops and useful varsyms
424 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
425 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
426 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
427 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
428 <Code,GlaExt>"{" { RETURN(OCURLY); }
429 <Code,GlaExt>"}" { RETURN(CCURLY); }
430 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
431 <Code,GlaExt>";" { RETURN(SEMI); }
432 <Code,GlaExt>"`" { RETURN(BQUOTE); }
433 <Code,GlaExt>"_" { RETURN(WILDCARD); }
435 <Code,GlaExt>"." { RETURN(DOT); }
436 <Code,GlaExt>".." { RETURN(DOTDOT); }
437 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
438 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
439 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
440 <Code,GlaExt>"|" { RETURN(VBAR); }
441 <Code,GlaExt>"<-" { RETURN(LARROW); }
442 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
443 <Code,GlaExt>"-" { RETURN(MINUS); }
444 <Code,GlaExt>"+" { RETURN(PLUS); }
446 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
447 <Code,GlaExt>"@" { RETURN(AT); }
448 <Code,GlaExt>"!" { RETURN(BANG); }
449 <Code,GlaExt>"~" { RETURN(LAZY); }
453 * Integers and (for Glasgow extensions) primitive integers. Note that
454 * we pass all of the text on to the parser, because flex/C can't handle
455 * arbitrary precision numbers.
459 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
460 yylval.uid = xstrndup(yytext, yyleng - 1);
463 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
464 yylval.uid = xstrndup(yytext, yyleng);
467 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
468 yylval.uid = xstrndup(yytext, yyleng - 1);
471 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
472 yylval.uid = xstrndup(yytext, yyleng);
475 <GlaExt>("-")?{N}"#" {
476 yylval.uid = xstrndup(yytext, yyleng - 1);
479 <Code,GlaExt,UserPragma>{N} {
480 yylval.uid = xstrndup(yytext, yyleng);
486 * Floats and (for Glasgow extensions) primitive floats/doubles.
490 <GlaExt>("-")?{F}"##" {
491 yylval.uid = xstrndup(yytext, yyleng - 2);
494 <GlaExt>("-")?{F}"#" {
495 yylval.uid = xstrndup(yytext, yyleng - 1);
499 yylval.uid = xstrndup(yytext, yyleng);
505 * Funky ``foo'' style C literals for Glasgow extensions
509 <GlaExt>"``"[^']+"''" {
510 hsnewid(yytext + 2, yyleng - 4);
516 * Identifiers, both variables and operators. The trailing hash is allowed
517 * for Glasgow extensions.
523 /* These SHOULDNAE work in "Code" (sigh) */
525 <GlaExt,UserPragma>{Id}"#" {
526 if (! nonstandardFlag) {
527 char errbuf[ERR_BUF_SIZE];
528 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
531 hsnewid(yytext, yyleng);
532 RETURN(isconstr(yytext) ? CONID : VARID);
534 <Code,GlaExt,UserPragma>{Id} {
535 hsnewid(yytext, yyleng);
536 RETURN(isconstr(yytext) ? CONID : VARID);
538 <Code,GlaExt,UserPragma>{SId} {
539 hsnewid(yytext, yyleng);
540 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
542 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
544 if (! nonstandardFlag) {
545 char errbuf[ERR_BUF_SIZE];
546 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
549 is_constr = hsnewqid(yytext, yyleng);
550 RETURN(is_constr ? QCONID : QVARID);
552 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
553 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
554 RETURN(is_constr ? QCONID : QVARID);
556 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
557 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
558 RETURN(is_constr ? QCONSYM : QVARSYM);
562 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
564 /* Because we can make the former well-behaved (we defined them).
566 Sadly, the latter is defined by Haskell, which allows such
567 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
571 <GlaExt,UserPragma>"`"{Id}"#`" {
572 hsnewid(yytext + 1, yyleng - 2);
573 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
578 * Character literals. The first form is the quick form, for character
579 * literals that don't contain backslashes. Literals with backslashes are
580 * lexed through multiple rules. First, we match the open ' and as many
581 * normal characters as possible. This puts us into the <Char> state, where
582 * a backslash is legal. Then, we match the backslash and move into the
583 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
584 * characters and the close '. We may end up with too many characters, but
585 * this allows us to easily share the lex rules with strings. Excess characters
586 * are ignored with a warning.
590 <GlaExt>'({CHAR}|"\"")"'#" {
591 yylval.uhstring = installHstring(1, yytext+1);
594 <Code,GlaExt>'({CHAR}|"\"")' {
595 yylval.uhstring = installHstring(1, yytext+1);
598 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
599 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
602 <Code,GlaExt>'({CHAR}|"\"")* {
603 hsmlcolno = hspcolno;
605 addtext(yytext+1, yyleng-1);
608 <Char>({CHAR}|"\"")*'# {
612 addtext(yytext, yyleng - 2);
613 text = fetchtext(&length);
615 if (! nonstandardFlag) {
616 char errbuf[ERR_BUF_SIZE];
617 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
622 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
623 input_filename, hsplineno, hspcolno + 1);
624 format_string(stderr, (unsigned char *) text, length);
625 fputs("' too long\n", stderr);
628 yylval.uhstring = installHstring(1, text);
629 hspcolno = hsmlcolno;
633 <Char>({CHAR}|"\"")*' {
637 addtext(yytext, yyleng - 1);
638 text = fetchtext(&length);
641 fprintf(stderr, "%s:%d:%d: Character literal '",
642 input_filename, hsplineno, hspcolno + 1);
643 format_string(stderr, (unsigned char *) text, length);
644 fputs("' too long\n", stderr);
647 yylval.uhstring = installHstring(1, text);
648 hspcolno = hsmlcolno;
652 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
657 * String literals. The first form is the quick form, for string literals
658 * that don't contain backslashes. Literals with backslashes are lexed
659 * through multiple rules. First, we match the open " and as many normal
660 * characters as possible. This puts us into the <String> state, where
661 * a backslash is legal. Then, we match the backslash and move into the
662 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
663 * characters, moving back and forth between <String> and <StringEsc> as more
664 * backslashes are encountered. (We may even digress into <Comment> mode if we
665 * find a comment in a gap between backslashes.) Finally, we read the last chunk
666 * of normal characters and the close ".
670 <GlaExt>"\""({CHAR}|"'")*"\""# {
671 yylval.uhstring = installHstring(yyleng-3, yytext+1);
672 /* the -3 accounts for the " on front, "# on the end */
675 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
676 yylval.uhstring = installHstring(yyleng-2, yytext+1);
679 <Code,GlaExt>"\""({CHAR}|"'")* {
680 hsmlcolno = hspcolno;
682 addtext(yytext+1, yyleng-1);
685 <String>({CHAR}|"'")*"\"#" {
689 addtext(yytext, yyleng-2);
690 text = fetchtext(&length);
692 if (! nonstandardFlag) {
693 char errbuf[ERR_BUF_SIZE];
694 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
698 yylval.uhstring = installHstring(length, text);
699 hspcolno = hsmlcolno;
703 <String>({CHAR}|"'")*"\"" {
707 addtext(yytext, yyleng-1);
708 text = fetchtext(&length);
710 yylval.uhstring = installHstring(length, text);
711 hspcolno = hsmlcolno;
715 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
719 * Character and string escapes are roughly the same, but strings have the
720 * extra `\&' sequence which is not allowed for characters. Also, comments
721 * are allowed in the <StringEsc> state. (See the comment section much
724 * NB: Backslashes and tabs are stored in strings as themselves.
725 * But if we print them (in printtree.c), they must go out as
726 * "\\\\" and "\\t" respectively. (This is because of the bogus
727 * intermediate format that the parser produces. It uses '\t' fpr end of
728 * string, so it needs to be able to escape tabs, which means that it
729 * also needs to be able to escape the escape character ('\\'). Sigh.
733 <Char>\\ { PUSH_STATE(CharEsc); }
734 <String>\\& /* Ignore */ ;
735 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
737 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
738 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
740 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
741 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
742 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
743 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
744 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
745 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
746 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
747 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
748 <CharEsc,StringEsc>BEL |
749 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
750 <CharEsc,StringEsc>BS |
751 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
752 <CharEsc,StringEsc>HT |
753 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
754 <CharEsc,StringEsc>LF |
755 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
756 <CharEsc,StringEsc>VT |
757 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
758 <CharEsc,StringEsc>FF |
759 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
760 <CharEsc,StringEsc>CR |
761 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
762 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
763 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
764 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
765 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
766 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
767 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
768 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
769 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
770 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
771 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
772 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
773 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
774 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
775 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
776 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
777 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
778 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
779 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
780 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
781 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
782 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
783 <CharEsc,StringEsc>{D}+ {
784 int i = strtol(yytext, NULL, 10);
788 char errbuf[ERR_BUF_SIZE];
789 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
795 <CharEsc,StringEsc>o{O}+ {
796 int i = strtol(yytext + 1, NULL, 8);
800 char errbuf[ERR_BUF_SIZE];
801 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
807 <CharEsc,StringEsc>x{H}+ {
808 int i = strtol(yytext + 1, NULL, 16);
812 char errbuf[ERR_BUF_SIZE];
813 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
823 * Nested comments. The major complication here is in trying to match the
824 * longest lexemes possible, for better performance. (See the flex document.)
825 * That's why the rules look so bizarre.
829 <Code,GlaExt,UserPragma,StringEsc>"{-" {
830 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
834 <Comment>"-"+[^-{}]+ |
835 <Comment>"{"+[^-{}]+ ;
836 <Comment>"{-" { nested_comments++; }
837 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
842 * Illegal characters. This used to be a single rule, but we might as well
843 * pass on as much information as we have, so now we indicate our state in
848 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
849 fprintf(stderr, "%s:%d:%d: Illegal character: `",
850 input_filename, hsplineno, hspcolno + 1);
851 format_string(stderr, (unsigned char *) yytext, 1);
852 fputs("'\n", stderr);
856 fprintf(stderr, "%s:%d:%d: Illegal character: `",
857 input_filename, hsplineno, hspcolno + 1);
858 format_string(stderr, (unsigned char *) yytext, 1);
859 fputs("' in a character literal\n", stderr);
863 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
864 input_filename, hsplineno, hspcolno + 1);
865 format_string(stderr, (unsigned char *) yytext, 1);
866 fputs("'\n", stderr);
869 <String>(.|\n) { if (nonstandardFlag) {
870 addtext(yytext, yyleng);
872 fprintf(stderr, "%s:%d:%d: Illegal character: `",
873 input_filename, hsplineno, hspcolno + 1);
874 format_string(stderr, (unsigned char *) yytext, 1);
875 fputs("' in a string literal\n", stderr);
881 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
882 input_filename, hsplineno, hspcolno + 1);
883 format_string(stderr, (unsigned char *) yytext, 1);
884 fputs("'\n", stderr);
887 fprintf(stderr, "%s:%d:%d: Illegal character: `",
888 input_filename, hsplineno, hspcolno + 1);
889 format_string(stderr, (unsigned char *) yytext, 1);
890 fputs("' in a string gap\n", stderr);
897 * End of file. In any sub-state, this is an error. However, for the primary
898 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
899 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
900 * adding virtual close curlies, or closing an interface and returning to the
901 * primary source file.
903 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
904 * line/column advancement has to be done by hand.
908 <Char,CharEsc><<EOF>> {
909 hsplineno = hslineno; hspcolno = hscolno;
910 hsperror("unterminated character literal");
913 hsplineno = hslineno; hspcolno = hscolno;
914 hsperror("unterminated comment");
916 <String,StringEsc><<EOF>> {
917 hsplineno = hslineno; hspcolno = hscolno;
918 hsperror("unterminated string literal");
920 <UserPragma><<EOF>> {
921 hsplineno = hslineno; hspcolno = hscolno;
922 hsperror("unterminated user-specified pragma");
924 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
928 /**********************************************************************
931 * YACC/LEX Initialisation etc. *
934 **********************************************************************/
937 We initialise input_filename to "<stdin>".
938 This allows unnamed sources to be piped into the parser.
944 input_filename = xstrdup("<stdin>");
946 /* We must initialize the input buffer _now_, because we call
947 setyyin _before_ calling yylex for the first time! */
948 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
957 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
959 if (input_filename != NULL)
960 free(input_filename);
961 input_filename = xstrdup(f);
964 /**********************************************************************
967 * Layout Processing *
970 **********************************************************************/
973 The following section deals with Haskell Layout conventions
974 forcing insertion of ; or } as appropriate
980 return (!forgetindent && INDENTON);
984 /* Enter new context and set new indentation level */
989 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
993 * partain: first chk that new indent won't be less than current one; this code
994 * doesn't make sense to me; hscolno tells the position of the _end_ of the
995 * current token; what that has to do with indenting, I don't know.
999 if (hscolno - 1 <= INDENTPT) {
1001 return; /* Empty input OK for Haskell 1.1 */
1003 char errbuf[ERR_BUF_SIZE];
1005 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1009 hsentercontext((hspcolno << 1) | 1);
1013 /* Enter a new context without changing the indentation level */
1018 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1020 hsentercontext(indenttab[icontexts] & ~1);
1024 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1028 forgetindent = TRUE;
1032 /* Enter a new layout context. */
1034 hsentercontext(int indent)
1036 /* Enter new context and set indentation as specified */
1037 if (++icontexts >= MAX_CONTEXTS) {
1038 char errbuf[ERR_BUF_SIZE];
1040 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1043 forgetindent = FALSE;
1044 indenttab[icontexts] = indent;
1046 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1051 /* Exit a layout context */
1057 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1062 * Return checks the indentation level and returns ;, } or the specified token.
1072 if (hsshouldindent()) {
1073 if (hspcolno < INDENTPT) {
1075 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1079 } else if (hspcolno == INDENTPT) {
1081 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1089 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1096 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1102 static BOOLEAN eof = FALSE;
1105 if (hssttok != -1) {
1113 endlineno = hslineno;
1114 if ((tok = yylex1()) != EOF)
1120 if (icontexts > icontexts_save) {
1123 indenttab[icontexts] = 0;
1126 hsperror("missing '}' at end of file");
1127 } else if (hsbuf_save != NULL) {
1129 yy_delete_buffer(YY_CURRENT_BUFFER);
1130 yy_switch_to_buffer(hsbuf_save);
1132 new_filename(filename_save);
1133 free(filename_save);
1134 hslineno = hslineno_save;
1135 hsplineno = hsplineno_save;
1136 hscolno = hscolno_save;
1137 hspcolno = hspcolno_save;
1139 icontexts = icontexts_save - 1;
1142 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1147 hsperror("No longer using yacc to parse interface files");
1152 abort(); /* should never get here! */
1156 /**********************************************************************
1159 * Input Processing for Interfaces -- Not currently used !!! *
1162 **********************************************************************/
1164 /* setyyin(file) open file as new lex input buffer */
1170 hsbuf_save = YY_CURRENT_BUFFER;
1171 if ((yyin = fopen(file, "r")) == NULL) {
1172 char errbuf[ERR_BUF_SIZE];
1174 sprintf(errbuf, "can't read \"%-.50s\"", file);
1177 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1179 hslineno_save = hslineno;
1180 hsplineno_save = hsplineno;
1181 hslineno = hsplineno = 1;
1183 filename_save = input_filename;
1184 input_filename = NULL;
1186 hscolno_save = hscolno;
1187 hspcolno_save = hspcolno;
1188 hscolno = hspcolno = 0;
1189 etags_save = etags; /* do not do "etags" stuff in interfaces */
1190 etags = 0; /* We remember whether we are doing it in
1191 the module, so we can restore it later [WDP 94/09] */
1192 hsentercontext(-1); /* partain: changed this from 0 */
1193 icontexts_save = icontexts;
1195 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1200 layout_input(char *text, int len)
1203 fprintf(stderr, "Scanning \"%s\"\n", text);
1206 hsplineno = hslineno;
1218 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1230 setstartlineno(void)
1232 startlineno = hsplineno;
1234 if (modulelineno == 0) {
1235 modulelineno = startlineno;
1241 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1245 /**********************************************************************
1251 **********************************************************************/
1253 #define CACHE_SIZE YY_BUF_SIZE
1259 } textcache = { 0, 0, NULL };
1264 /* fprintf(stderr, "cleartext\n"); */
1266 if (textcache.allocated == 0) {
1267 textcache.allocated = CACHE_SIZE;
1268 textcache.text = xmalloc(CACHE_SIZE);
1273 addtext(char *text, unsigned length)
1275 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1280 if (textcache.next + length + 1 >= textcache.allocated) {
1281 textcache.allocated += length + CACHE_SIZE;
1282 textcache.text = xrealloc(textcache.text, textcache.allocated);
1284 bcopy(text, textcache.text + textcache.next, length);
1285 textcache.next += length;
1291 /* fprintf(stderr, "addchar: %c\n", c); */
1293 if (textcache.next + 2 >= textcache.allocated) {
1294 textcache.allocated += CACHE_SIZE;
1295 textcache.text = xrealloc(textcache.text, textcache.allocated);
1297 textcache.text[textcache.next++] = c;
1301 fetchtext(unsigned *length)
1303 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1305 *length = textcache.next;
1306 textcache.text[textcache.next] = '\0';
1307 return textcache.text;
1310 /**********************************************************************
1313 * Identifier Processing *
1316 **********************************************************************/
1319 hsnewid Enters an id of length n into the symbol table.
1323 hsnewid(char *name, int length)
1325 char save = name[length];
1327 name[length] = '\0';
1328 yylval.uid = installid(name);
1329 name[length] = save;
1333 hsnewqid(char *name, int length)
1336 char save = name[length];
1337 name[length] = '\0';
1339 dot = strchr(name, '.');
1341 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1343 name[length] = save;
1345 return isconstr(dot+1);