2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 static unsigned char CharTable[NCHARS] = {
45 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
46 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
47 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
48 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
50 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
51 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
52 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
53 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
54 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
55 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
57 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
58 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
59 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 return(CharTable[*s]&(_C));
86 /**********************************************************************
92 **********************************************************************/
94 char *input_filename = NULL; /* Always points to a dynamically allocated string */
97 * For my own sanity, things that are not part of the flex skeleton
98 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
101 static int hslineno = 0; /* Line number at end of token */
102 int hsplineno = 0; /* Line number at end of previous token */
104 static int hscolno = 0; /* Column number at end of token */
105 int hspcolno = 0; /* Column number at end of previous token */
106 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
108 int modulelineno = -1; /* The line number where the module starts */
109 int startlineno = 0; /* The line number where something starts */
110 int endlineno = 0; /* The line number where something ends */
112 static BOOLEAN noGap = TRUE; /* For checking string gaps */
113 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
115 static int nested_comments; /* For counting comment nesting depth */
117 /* OLD: Hacky definition of yywrap: see flex doc.
119 If we don't do this, then we'll have to get the default
120 yywrap from the flex library, which is often something
121 we are not good at locating. This avoids that difficulty.
122 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
127 /* Essential forward declarations */
129 static void hsnewid PROTO((char *, int));
130 static void layout_input PROTO((char *, int));
131 static void cleartext (NO_ARGS);
132 static void addtext PROTO((char *, unsigned));
133 static void addchar PROTO((char));
134 static char *fetchtext PROTO((unsigned *));
135 static void new_filename PROTO((char *));
136 static int Return PROTO((int));
137 static void hsentercontext PROTO((int));
139 /* Special file handling for IMPORTS */
140 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
143 static char *filename_save; /* File Name */
144 static int hslineno_save = 0, /* Line Number */
145 hsplineno_save = 0, /* Line Number of Prev. token */
146 hscolno_save = 0, /* Indentation */
147 hspcolno_save = 0; /* Left Indentation */
148 static short icontexts_save = 0; /* Indent Context Level */
150 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
151 extern BOOLEAN etags; /* that which is saved */
153 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
155 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
156 * inserted before token +ve -- "}" inserted before
159 short icontexts = 0; /* Which context we're in */
162 Table of indentations: right bit indicates whether to use
163 indentation rules (1 = use rules; 0 = ignore)
166 push one of these "contexts" at every "case" or "where"; the right bit says
167 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
169 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
170 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
171 push is shown just below.
176 static short indenttab[MAX_CONTEXTS] = {-1};
178 #define INDENTPT (indenttab[icontexts]>>1)
179 #define INDENTON (indenttab[icontexts]&1)
181 #define RETURN(tok) return(Return(tok))
184 #define YY_DECL int yylex1()
186 /* We should not peek at yy_act, but flex calls us even for the internal action
187 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
188 to support older versions of flex, we'll continue to peek for now.
190 #define YY_USER_ACTION \
191 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
195 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
198 /* Each time we enter a new start state, we push it onto the state stack.
200 #define PUSH_STATE(n) yy_push_state(n)
201 #define POP_STATE yy_pop_state()
206 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
207 never-interactive (to go a bit faster)
208 stack (use a start-condition stack)
212 %option never-interactive
215 /* The start states are:
216 Code -- normal Haskell code (principal lexer)
217 GlaExt -- Haskell code with Glasgow extensions
218 Comment -- Nested comment processing
219 String -- Inside a string literal with backslashes
220 StringEsc -- Immediately following a backslash in a string literal
221 Char -- Inside a character literal with backslashes
222 CharEsc -- Immediately following a backslash in a character literal
224 Note that the INITIAL state is unused. Also note that these states
225 are _exclusive_. All rules should be prefixed with an appropriate
226 list of start states.
229 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231 isoS [\xa1-\xbf\xd7\xf7]
232 isoL [\xc0-\xd6\xd8-\xde]
233 isol [\xdf-\xf6\xf8-\xff]
240 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
241 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 L [A-Z\xc0-\xd6\xd8-\xde]
244 l [a-z\xdf-\xf6\xf8-\xff]
249 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
258 * Special GHC pragma rules. Do we need a start state for interface files,
259 * so these won't be matched in source files? --JSM
263 <Code,GlaExt>^"# ".*{NL} {
264 char tempf[FILENAME_SIZE];
265 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
267 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
270 <Code,GlaExt>^"#line ".*{NL} {
271 char tempf[FILENAME_SIZE];
272 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
274 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
277 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
278 /* partain: pragma-style line directive */
279 char tempf[FILENAME_SIZE];
280 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
282 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
285 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
286 PUSH_STATE(UserPragma);
287 RETURN(INTERFACE_UPRAGMA);
289 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
290 PUSH_STATE(UserPragma);
291 RETURN(SPECIALISE_UPRAGMA);
293 <Code,GlaExt>"{-#"{WS}*"INLINE" {
294 PUSH_STATE(UserPragma);
295 RETURN(INLINE_UPRAGMA);
297 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
298 PUSH_STATE(UserPragma);
299 RETURN(MAGIC_UNFOLDING_UPRAGMA);
301 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
302 PUSH_STATE(UserPragma);
303 RETURN(DEFOREST_UPRAGMA);
305 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
306 /* these are handled by hscpp */
310 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
311 fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
312 input_filename, hsplineno);
313 format_string(stderr, (unsigned char *) yytext, yyleng);
314 fputs("'\n", stderr);
318 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
322 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
323 * intentionally accepted as a keyword even for normal <Code>.
327 <Code,GlaExt>"case" { RETURN(CASE); }
328 <Code,GlaExt>"class" { RETURN(CLASS); }
329 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
330 <Code,GlaExt>"default" { RETURN(DEFAULT); }
331 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
332 <Code,GlaExt>"do" { RETURN(DO); }
333 <Code,GlaExt>"else" { RETURN(ELSE); }
334 <Code,GlaExt>"if" { RETURN(IF); }
335 <Code,GlaExt>"import" { RETURN(IMPORT); }
336 <Code,GlaExt>"in" { RETURN(IN); }
337 <Code,GlaExt>"infix" { RETURN(INFIX); }
338 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
339 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
340 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
341 <Code,GlaExt>"let" { RETURN(LET); }
342 <Code,GlaExt>"module" { RETURN(MODULE); }
343 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
344 <Code,GlaExt>"of" { RETURN(OF); }
345 <Code,GlaExt>"then" { RETURN(THEN); }
346 <Code,GlaExt>"type" { RETURN(TYPE); }
347 <Code,GlaExt>"where" { RETURN(WHERE); }
349 <Code,GlaExt>"as" { RETURN(AS); }
350 <Code,GlaExt>"hiding" { RETURN(HIDING); }
351 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
353 <Code,GlaExt>"_scc_" { RETURN(SCC); }
354 <GlaExt>"_ccall_" { RETURN(CCALL); }
355 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
356 <GlaExt>"_casm_" { RETURN(CASM); }
357 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
361 * Haskell operators: special, reservedops and useful varsyms
365 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
366 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
367 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
368 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
369 <Code,GlaExt>"{" { RETURN(OCURLY); }
370 <Code,GlaExt>"}" { RETURN(CCURLY); }
371 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
372 <Code,GlaExt>";" { RETURN(SEMI); }
373 <Code,GlaExt>"`" { RETURN(BQUOTE); }
374 <Code,GlaExt>"_" { RETURN(WILDCARD); }
376 <Code,GlaExt>".." { RETURN(DOTDOT); }
377 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
378 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
379 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
380 <Code,GlaExt>"|" { RETURN(VBAR); }
381 <Code,GlaExt>"<-" { RETURN(LARROW); }
382 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
383 <Code,GlaExt>"-" { RETURN(MINUS); }
385 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
386 <Code,GlaExt>"@" { RETURN(AT); }
387 <Code,GlaExt>"!" { RETURN(BANG); }
388 <Code,GlaExt>"~" { RETURN(LAZY); }
392 * Integers and (for Glasgow extensions) primitive integers. Note that
393 * we pass all of the text on to the parser, because flex/C can't handle
394 * arbitrary precision numbers.
398 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
399 yylval.uid = xstrndup(yytext, yyleng - 1);
402 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
403 yylval.uid = xstrndup(yytext, yyleng);
406 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
407 yylval.uid = xstrndup(yytext, yyleng - 1);
410 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
411 yylval.uid = xstrndup(yytext, yyleng);
414 <GlaExt>("-")?{N}"#" {
415 yylval.uid = xstrndup(yytext, yyleng - 1);
418 <Code,GlaExt,UserPragma>{N} {
419 yylval.uid = xstrndup(yytext, yyleng);
425 * Floats and (for Glasgow extensions) primitive floats/doubles.
429 <GlaExt>("-")?{F}"##" {
430 yylval.uid = xstrndup(yytext, yyleng - 2);
433 <GlaExt>("-")?{F}"#" {
434 yylval.uid = xstrndup(yytext, yyleng - 1);
438 yylval.uid = xstrndup(yytext, yyleng);
444 * Funky ``foo'' style C literals for Glasgow extensions
448 <GlaExt>"``"[^']+"''" {
449 hsnewid(yytext + 2, yyleng - 4);
455 * Identifiers, both variables and operators. The trailing hash is allowed
456 * for Glasgow extensions.
462 /* These SHOULDNAE work in "Code" (sigh) */
464 <GlaExt,UserPragma>{Id}"#" {
465 if (! nonstandardFlag) {
466 char errbuf[ERR_BUF_SIZE];
467 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
470 hsnewid(yytext, yyleng);
471 RETURN(isconstr(yytext) ? CONID : VARID);
473 <Code,GlaExt,UserPragma>{Id} {
474 hsnewid(yytext, yyleng);
475 RETURN(isconstr(yytext) ? CONID : VARID);
477 <Code,GlaExt,UserPragma>{SId} {
478 hsnewid(yytext, yyleng);
479 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
481 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
483 if (! nonstandardFlag) {
484 char errbuf[ERR_BUF_SIZE];
485 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
488 is_constr = hsnewqid(yytext, yyleng);
489 RETURN(is_constr ? QCONID : QVARID);
491 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
492 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
493 RETURN(is_constr ? QCONID : QVARID);
495 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
496 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
497 RETURN(is_constr ? QCONSYM : QVARSYM);
501 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
503 /* Because we can make the former well-behaved (we defined them).
505 Sadly, the latter is defined by Haskell, which allows such
506 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
510 <GlaExt,UserPragma>"`"{Id}"#`" {
511 hsnewid(yytext + 1, yyleng - 2);
512 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
517 * Character literals. The first form is the quick form, for character
518 * literals that don't contain backslashes. Literals with backslashes are
519 * lexed through multiple rules. First, we match the open ' and as many
520 * normal characters as possible. This puts us into the <Char> state, where
521 * a backslash is legal. Then, we match the backslash and move into the
522 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
523 * characters and the close '. We may end up with too many characters, but
524 * this allows us to easily share the lex rules with strings. Excess characters
525 * are ignored with a warning.
529 <GlaExt>'({CHAR}|"\"")"'#" {
530 yylval.uhstring = installHstring(1, yytext+1);
533 <Code,GlaExt>'({CHAR}|"\"")' {
534 yylval.uhstring = installHstring(1, yytext+1);
537 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
538 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
541 <Code,GlaExt>'({CHAR}|"\"")* {
542 hsmlcolno = hspcolno;
544 addtext(yytext+1, yyleng-1);
547 <Char>({CHAR}|"\"")*'# {
551 addtext(yytext, yyleng - 2);
552 text = fetchtext(&length);
554 if (! nonstandardFlag) {
555 char errbuf[ERR_BUF_SIZE];
556 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
561 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
562 input_filename, hsplineno, hspcolno + 1);
563 format_string(stderr, (unsigned char *) text, length);
564 fputs("' too long\n", stderr);
567 yylval.uhstring = installHstring(1, text);
568 hspcolno = hsmlcolno;
572 <Char>({CHAR}|"\"")*' {
576 addtext(yytext, yyleng - 1);
577 text = fetchtext(&length);
580 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
581 input_filename, hsplineno, hspcolno + 1);
582 format_string(stderr, (unsigned char *) text, length);
583 fputs("' too long\n", stderr);
586 yylval.uhstring = installHstring(1, text);
587 hspcolno = hsmlcolno;
591 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
596 * String literals. The first form is the quick form, for string literals
597 * that don't contain backslashes. Literals with backslashes are lexed
598 * through multiple rules. First, we match the open " and as many normal
599 * characters as possible. This puts us into the <String> state, where
600 * a backslash is legal. Then, we match the backslash and move into the
601 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
602 * characters, moving back and forth between <String> and <StringEsc> as more
603 * backslashes are encountered. (We may even digress into <Comment> mode if we
604 * find a comment in a gap between backslashes.) Finally, we read the last chunk
605 * of normal characters and the close ".
609 <GlaExt>"\""({CHAR}|"'")*"\""# {
610 yylval.uhstring = installHstring(yyleng-3, yytext+1);
611 /* the -3 accounts for the " on front, "# on the end */
614 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
615 yylval.uhstring = installHstring(yyleng-2, yytext+1);
618 <Code,GlaExt>"\""({CHAR}|"'")* {
619 hsmlcolno = hspcolno;
621 addtext(yytext+1, yyleng-1);
624 <String>({CHAR}|"'")*"\"#" {
628 addtext(yytext, yyleng-2);
629 text = fetchtext(&length);
631 if (! nonstandardFlag) {
632 char errbuf[ERR_BUF_SIZE];
633 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
637 yylval.uhstring = installHstring(length, text);
638 hspcolno = hsmlcolno;
642 <String>({CHAR}|"'")*"\"" {
646 addtext(yytext, yyleng-1);
647 text = fetchtext(&length);
649 yylval.uhstring = installHstring(length, text);
650 hspcolno = hsmlcolno;
654 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
658 * Character and string escapes are roughly the same, but strings have the
659 * extra `\&' sequence which is not allowed for characters. Also, comments
660 * are allowed in the <StringEsc> state. (See the comment section much
663 * NB: Backslashes and tabs are stored in strings as themselves.
664 * But if we print them (in printtree.c), they must go out as
665 * "\\\\" and "\\t" respectively. (This is because of the bogus
666 * intermediate format that the parser produces. It uses '\t' fpr end of
667 * string, so it needs to be able to escape tabs, which means that it
668 * also needs to be able to escape the escape character ('\\'). Sigh.
672 <Char>\\ { PUSH_STATE(CharEsc); }
673 <String>\\& /* Ignore */ ;
674 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
676 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
677 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
679 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
680 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
681 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
682 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
683 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
684 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
685 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
686 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
687 <CharEsc,StringEsc>BEL |
688 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
689 <CharEsc,StringEsc>BS |
690 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
691 <CharEsc,StringEsc>HT |
692 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
693 <CharEsc,StringEsc>LF |
694 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
695 <CharEsc,StringEsc>VT |
696 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
697 <CharEsc,StringEsc>FF |
698 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
699 <CharEsc,StringEsc>CR |
700 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
701 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
702 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
703 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
704 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
705 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
706 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
707 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
708 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
709 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
710 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
711 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
712 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
713 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
714 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
715 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
716 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
717 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
718 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
719 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
720 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
721 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
722 <CharEsc,StringEsc>{D}+ {
723 int i = strtol(yytext, NULL, 10);
727 char errbuf[ERR_BUF_SIZE];
728 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
734 <CharEsc,StringEsc>o{O}+ {
735 int i = strtol(yytext + 1, NULL, 8);
739 char errbuf[ERR_BUF_SIZE];
740 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
746 <CharEsc,StringEsc>x{H}+ {
747 int i = strtol(yytext + 1, NULL, 16);
751 char errbuf[ERR_BUF_SIZE];
752 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
761 * Simple comments and whitespace. Normally, we would just ignore these, but
762 * in case we're processing a string escape, we need to note that we've seen
765 * Note that we cater for a comment line that *doesn't* end in a newline.
766 * This is incorrect, strictly speaking, but seems like the right thing
767 * to do. Reported by Rajiv Mirani. (WDP 95/08)
771 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
772 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
776 * Nested comments. The major complication here is in trying to match the
777 * longest lexemes possible, for better performance. (See the flex document.)
778 * That's why the rules look so bizarre.
782 <Code,GlaExt,UserPragma,StringEsc>"{-" {
783 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
787 <Comment>"-"+[^-{}]+ |
788 <Comment>"{"+[^-{}]+ ;
789 <Comment>"{-" { nested_comments++; }
790 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
795 * Illegal characters. This used to be a single rule, but we might as well
796 * pass on as much information as we have, so now we indicate our state in
801 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
802 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
803 input_filename, hsplineno, hspcolno + 1);
804 format_string(stderr, (unsigned char *) yytext, 1);
805 fputs("'\n", stderr);
809 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
810 input_filename, hsplineno, hspcolno + 1);
811 format_string(stderr, (unsigned char *) yytext, 1);
812 fputs("' in a character literal\n", stderr);
816 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
817 input_filename, hsplineno, hspcolno + 1);
818 format_string(stderr, (unsigned char *) yytext, 1);
819 fputs("'\n", stderr);
822 <String>(.|\n) { if (nonstandardFlag) {
823 addtext(yytext, yyleng);
825 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
826 input_filename, hsplineno, hspcolno + 1);
827 format_string(stderr, (unsigned char *) yytext, 1);
828 fputs("' in a string literal\n", stderr);
834 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
835 input_filename, hsplineno, hspcolno + 1);
836 format_string(stderr, (unsigned char *) yytext, 1);
837 fputs("'\n", stderr);
840 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
841 input_filename, hsplineno, hspcolno + 1);
842 format_string(stderr, (unsigned char *) yytext, 1);
843 fputs("' in a string gap\n", stderr);
850 * End of file. In any sub-state, this is an error. However, for the primary
851 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
852 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
853 * adding virtual close curlies, or closing an interface and returning to the
854 * primary source file.
856 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
857 * line/column advancement has to be done by hand.
861 <Char,CharEsc><<EOF>> {
862 hsplineno = hslineno; hspcolno = hscolno;
863 hsperror("unterminated character literal");
866 hsplineno = hslineno; hspcolno = hscolno;
867 hsperror("unterminated comment");
869 <String,StringEsc><<EOF>> {
870 hsplineno = hslineno; hspcolno = hscolno;
871 hsperror("unterminated string literal");
873 <UserPragma><<EOF>> {
874 hsplineno = hslineno; hspcolno = hscolno;
875 hsperror("unterminated user-specified pragma");
877 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
881 /**********************************************************************
884 * YACC/LEX Initialisation etc. *
887 **********************************************************************/
890 We initialise input_filename to "<stdin>".
891 This allows unnamed sources to be piped into the parser.
897 input_filename = xstrdup("<stdin>");
899 /* We must initialize the input buffer _now_, because we call
900 setyyin _before_ calling yylex for the first time! */
901 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
910 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
912 if (input_filename != NULL)
913 free(input_filename);
914 input_filename = xstrdup(f);
917 /**********************************************************************
920 * Layout Processing *
923 **********************************************************************/
926 The following section deals with Haskell Layout conventions
927 forcing insertion of ; or } as appropriate
933 return (!forgetindent && INDENTON);
937 /* Enter new context and set new indentation level */
942 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
946 * partain: first chk that new indent won't be less than current one; this code
947 * doesn't make sense to me; hscolno tells the position of the _end_ of the
948 * current token; what that has to do with indenting, I don't know.
952 if (hscolno - 1 <= INDENTPT) {
954 return; /* Empty input OK for Haskell 1.1 */
956 char errbuf[ERR_BUF_SIZE];
958 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
962 hsentercontext((hspcolno << 1) | 1);
966 /* Enter a new context without changing the indentation level */
971 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
973 hsentercontext(indenttab[icontexts] & ~1);
977 /* Turn off indentation processing, usually because an explicit "{" has been seen */
985 /* Enter a new layout context. */
987 hsentercontext(int indent)
989 /* Enter new context and set indentation as specified */
990 if (++icontexts >= MAX_CONTEXTS) {
991 char errbuf[ERR_BUF_SIZE];
993 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
996 forgetindent = FALSE;
997 indenttab[icontexts] = indent;
999 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1004 /* Exit a layout context */
1010 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1015 * Return checks the indentation level and returns ;, } or the specified token.
1025 if (hsshouldindent()) {
1026 if (hspcolno < INDENTPT) {
1028 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1032 } else if (hspcolno == INDENTPT) {
1034 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1042 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1049 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1055 static BOOLEAN eof = FALSE;
1058 if (hssttok != -1) {
1066 endlineno = hslineno;
1067 if ((tok = yylex1()) != EOF)
1073 if (icontexts > icontexts_save) {
1076 indenttab[icontexts] = 0;
1079 hsperror("missing '}' at end of file");
1080 } else if (hsbuf_save != NULL) {
1082 yy_delete_buffer(YY_CURRENT_BUFFER);
1083 yy_switch_to_buffer(hsbuf_save);
1085 new_filename(filename_save);
1086 free(filename_save);
1087 hslineno = hslineno_save;
1088 hsplineno = hsplineno_save;
1089 hscolno = hscolno_save;
1090 hspcolno = hspcolno_save;
1092 icontexts = icontexts_save - 1;
1095 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1100 hsperror("No longer using yacc to parse interface files");
1105 abort(); /* should never get here! */
1109 /**********************************************************************
1112 * Input Processing for Interfaces -- Not currently used !!! *
1115 **********************************************************************/
1117 /* setyyin(file) open file as new lex input buffer */
1123 hsbuf_save = YY_CURRENT_BUFFER;
1124 if ((yyin = fopen(file, "r")) == NULL) {
1125 char errbuf[ERR_BUF_SIZE];
1127 sprintf(errbuf, "can't read \"%-.50s\"", file);
1130 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1132 hslineno_save = hslineno;
1133 hsplineno_save = hsplineno;
1134 hslineno = hsplineno = 1;
1136 filename_save = input_filename;
1137 input_filename = NULL;
1139 hscolno_save = hscolno;
1140 hspcolno_save = hspcolno;
1141 hscolno = hspcolno = 0;
1142 etags_save = etags; /* do not do "etags" stuff in interfaces */
1143 etags = 0; /* We remember whether we are doing it in
1144 the module, so we can restore it later [WDP 94/09] */
1145 hsentercontext(-1); /* partain: changed this from 0 */
1146 icontexts_save = icontexts;
1148 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1153 layout_input(char *text, int len)
1156 fprintf(stderr, "Scanning \"%s\"\n", text);
1159 hsplineno = hslineno;
1171 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1183 setstartlineno(void)
1185 startlineno = hsplineno;
1187 if (modulelineno == 0) {
1188 modulelineno = startlineno;
1194 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1198 /**********************************************************************
1204 **********************************************************************/
1206 #define CACHE_SIZE YY_BUF_SIZE
1212 } textcache = { 0, 0, NULL };
1217 /* fprintf(stderr, "cleartext\n"); */
1219 if (textcache.allocated == 0) {
1220 textcache.allocated = CACHE_SIZE;
1221 textcache.text = xmalloc(CACHE_SIZE);
1226 addtext(char *text, unsigned length)
1228 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1233 if (textcache.next + length + 1 >= textcache.allocated) {
1234 textcache.allocated += length + CACHE_SIZE;
1235 textcache.text = xrealloc(textcache.text, textcache.allocated);
1237 bcopy(text, textcache.text + textcache.next, length);
1238 textcache.next += length;
1244 /* fprintf(stderr, "addchar: %c\n", c); */
1246 if (textcache.next + 2 >= textcache.allocated) {
1247 textcache.allocated += CACHE_SIZE;
1248 textcache.text = xrealloc(textcache.text, textcache.allocated);
1250 textcache.text[textcache.next++] = c;
1254 fetchtext(unsigned *length)
1256 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1258 *length = textcache.next;
1259 textcache.text[textcache.next] = '\0';
1260 return textcache.text;
1263 /**********************************************************************
1266 * Identifier Processing *
1269 **********************************************************************/
1272 hsnewid Enters an id of length n into the symbol table.
1276 hsnewid(char *name, int length)
1278 char save = name[length];
1280 name[length] = '\0';
1281 yylval.uid = installid(name);
1282 name[length] = save;
1286 hsnewqid(char *name, int length)
1289 char save = name[length];
1290 name[length] = '\0';
1292 dot = strchr(name, '.');
1294 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1296 name[length] = save;
1298 return isconstr(dot+1);