2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 #define _isconstr(s) (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
50 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
51 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
52 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
53 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
54 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
56 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
57 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
58 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
59 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
60 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
61 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
79 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
80 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /**********************************************************************
89 **********************************************************************/
91 char *input_filename = NULL; /* Always points to a dynamically allocated string */
94 * For my own sanity, things that are not part of the flex skeleton
95 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
98 static int hslineno = 0; /* Line number at end of token */
99 int hsplineno = 0; /* Line number at end of previous token */
101 static int hscolno = 0; /* Column number at end of token */
102 int hspcolno = 0; /* Column number at end of previous token */
103 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
105 int modulelineno = -1; /* The line number where the module starts */
106 int startlineno = 0; /* The line number where something starts */
107 int endlineno = 0; /* The line number where something ends */
109 static BOOLEAN noGap = TRUE; /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
112 static int nested_comments; /* For counting comment nesting depth */
114 /* Hacky definition of yywrap: see flex doc.
116 If we don't do this, then we'll have to get the default
117 yywrap from the flex library, which is often something
118 we are not good at locating. This avoids that difficulty.
119 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124 /* Essential forward declarations */
126 static void hsnewid PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext (NO_ARGS);
129 static void addtext PROTO((char *, unsigned));
130 static void addchar PROTO((char));
131 static char *fetchtext PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int Return PROTO((int));
134 static void hsentercontext PROTO((int));
136 /* Special file handling for IMPORTS */
137 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
139 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
140 static char *filename_save; /* File Name */
141 static int hslineno_save = 0, /* Line Number */
142 hsplineno_save = 0, /* Line Number of Prev. token */
143 hscolno_save = 0, /* Indentation */
144 hspcolno_save = 0; /* Left Indentation */
145 static short icontexts_save = 0; /* Indent Context Level */
147 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags; /* that which is saved */
150 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
152 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
154 extern BOOLEAN ignorePragmas; /* True when we should ignore pragmas */
155 extern int minAcceptablePragmaVersion; /* see documentation in main.c */
156 extern int maxAcceptablePragmaVersion;
157 extern int thisIfacePragmaVersion;
159 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
160 * inserted before token +ve -- "}" inserted before
163 short icontexts = 0; /* Which context we're in */
168 Table of indentations: right bit indicates whether to use
169 indentation rules (1 = use rules; 0 = ignore)
172 push one of these "contexts" at every "case" or "where"; the right bit says
173 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
175 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
176 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
177 push is shown just below.
182 static short indenttab[MAX_CONTEXTS] = {-1};
184 #define INDENTPT (indenttab[icontexts]>>1)
185 #define INDENTON (indenttab[icontexts]&1)
187 #define RETURN(tok) return(Return(tok))
190 #define YY_DECL int yylex1()
192 /* We should not peek at yy_act, but flex calls us even for the internal action
193 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
194 to support older versions of flex, we'll continue to peek for now.
196 #define YY_USER_ACTION \
197 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
201 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
204 /* Each time we enter a new start state, we push it onto the state stack.
205 Note that the rules do not allow us to underflow or overflow the stack.
206 (At least, they shouldn't.) The maximum expected depth is 4:
207 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
209 static int StateStack[5];
210 static int StateDepth = -1;
213 #define PUSH_STATE(n) do {\
214 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
215 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
216 #define POP_STATE do {--StateDepth;\
217 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
218 BEGIN(StateStack[StateDepth]);} while(0)
220 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
221 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
226 /* The start states are:
227 Code -- normal Haskell code (principal lexer)
228 GlaExt -- Haskell code with Glasgow extensions
229 Comment -- Nested comment processing
230 String -- Inside a string literal with backslashes
231 StringEsc -- Immediately following a backslash in a string literal
232 Char -- Inside a character literal with backslashes
233 CharEsc -- Immediately following a backslash in a character literal
235 Note that the INITIAL state is unused. Also note that these states
236 are _exclusive_. All rules should be prefixed with an appropriate
237 list of start states.
240 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
242 isoS [\xa1-\xbf\xd7\xf7]
243 isoL [\xc0-\xd6\xd8-\xde]
244 isol [\xdf-\xf6\xf8-\xff]
251 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
252 S [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
254 L [A-Z\xc0-\xd6\xd8-\xde]
255 l [a-z\xdf-\xf6\xf8-\xff]
260 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
269 * Special GHC pragma rules. Do we need a start state for interface files,
270 * so these won't be matched in source files? --JSM
274 <Code,GlaExt>^"# ".*{NL} {
275 char tempf[FILENAME_SIZE];
276 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
278 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
281 <Code,GlaExt>^"#line ".*{NL} {
282 char tempf[FILENAME_SIZE];
283 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
285 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
288 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
289 /* partain: pragma-style line directive */
290 char tempf[FILENAME_SIZE];
291 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
293 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
296 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
297 PUSH_STATE(UserPragma);
298 RETURN(INTERFACE_UPRAGMA);
300 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
301 PUSH_STATE(UserPragma);
302 RETURN(SPECIALISE_UPRAGMA);
304 <Code,GlaExt>"{-#"{WS}*"INLINE" {
305 PUSH_STATE(UserPragma);
306 RETURN(INLINE_UPRAGMA);
308 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
309 PUSH_STATE(UserPragma);
310 RETURN(MAGIC_UNFOLDING_UPRAGMA);
312 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
313 PUSH_STATE(UserPragma);
314 RETURN(DEFOREST_UPRAGMA);
316 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
317 fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
318 input_filename, hsplineno);
319 format_string(stderr, (unsigned char *) yytext, yyleng);
320 fputs("'\n", stderr);
324 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
328 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
329 * intentionally accepted as a keyword even for normal <Code>.
333 <Code,GlaExt>"case" { RETURN(CASE); }
334 <Code,GlaExt>"class" { RETURN(CLASS); }
335 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
336 <Code,GlaExt>"default" { RETURN(DEFAULT); }
337 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
338 <Code,GlaExt>"do" { RETURN(DO); }
339 <Code,GlaExt>"else" { RETURN(ELSE); }
340 <Code,GlaExt>"if" { RETURN(IF); }
341 <Code,GlaExt>"import" { RETURN(IMPORT); }
342 <Code,GlaExt>"in" { RETURN(IN); }
343 <Code,GlaExt>"infix" { RETURN(INFIX); }
344 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
345 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
346 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
347 <Code,GlaExt>"let" { RETURN(LET); }
348 <Code,GlaExt>"module" { RETURN(MODULE); }
349 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
350 <Code,GlaExt>"of" { RETURN(OF); }
351 <Code,GlaExt>"then" { RETURN(THEN); }
352 <Code,GlaExt>"type" { RETURN(TYPE); }
353 <Code,GlaExt>"where" { RETURN(WHERE); }
355 <Code,GlaExt>"as" { RETURN(AS); }
356 <Code,GlaExt>"hiding" { RETURN(HIDING); }
357 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
359 <Code,GlaExt>"_scc_" { RETURN(SCC); }
360 <GlaExt>"_ccall_" { RETURN(CCALL); }
361 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
362 <GlaExt>"_casm_" { RETURN(CASM); }
363 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
367 * Haskell operators: special, reservedops and useful varsyms
371 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
372 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
373 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
374 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
375 <Code,GlaExt>"{" { RETURN(OCURLY); }
376 <Code,GlaExt>"}" { RETURN(CCURLY); }
377 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
378 <Code,GlaExt>";" { RETURN(SEMI); }
379 <Code,GlaExt>"`" { RETURN(BQUOTE); }
380 <Code,GlaExt>"_" { RETURN(WILDCARD); }
382 <Code,GlaExt>".." { RETURN(DOTDOT); }
383 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
384 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
385 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
386 <Code,GlaExt>"|" { RETURN(VBAR); }
387 <Code,GlaExt>"<-" { RETURN(LARROW); }
388 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
389 <Code,GlaExt>"-" { RETURN(MINUS); }
391 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
392 <Code,GlaExt>"@" { RETURN(AT); }
393 <Code,GlaExt>"!" { RETURN(BANG); }
394 <Code,GlaExt>"~" { RETURN(LAZY); }
398 * Integers and (for Glasgow extensions) primitive integers. Note that
399 * we pass all of the text on to the parser, because flex/C can't handle
400 * arbitrary precision numbers.
404 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
405 yylval.uid = xstrndup(yytext, yyleng - 1);
408 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
409 yylval.uid = xstrndup(yytext, yyleng);
412 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
413 yylval.uid = xstrndup(yytext, yyleng - 1);
416 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
417 yylval.uid = xstrndup(yytext, yyleng);
420 <GlaExt>("-")?{N}"#" {
421 yylval.uid = xstrndup(yytext, yyleng - 1);
424 <Code,GlaExt,UserPragma>{N} {
425 yylval.uid = xstrndup(yytext, yyleng);
431 * Floats and (for Glasgow extensions) primitive floats/doubles.
435 <GlaExt>("-")?{F}"##" {
436 yylval.uid = xstrndup(yytext, yyleng - 2);
439 <GlaExt>("-")?{F}"#" {
440 yylval.uid = xstrndup(yytext, yyleng - 1);
444 yylval.uid = xstrndup(yytext, yyleng);
450 * Funky ``foo'' style C literals for Glasgow extensions
454 <GlaExt>"``"[^']+"''" {
455 hsnewid(yytext + 2, yyleng - 4);
461 * Identifiers, both variables and operators. The trailing hash is allowed
462 * for Glasgow extensions.
468 /* These SHOULDNAE work in "Code" (sigh) */
470 <Code,GlaExt,UserPragma>{Id}"#" {
471 if (! (nonstandardFlag || in_interface)) {
472 char errbuf[ERR_BUF_SIZE];
473 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
476 hsnewid(yytext, yyleng);
477 RETURN(_isconstr(yytext) ? CONID : VARID);
479 <Code,GlaExt,UserPragma>_+{Id} {
480 if (! (nonstandardFlag || in_interface)) {
481 char errbuf[ERR_BUF_SIZE];
482 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
485 hsnewid(yytext, yyleng);
486 RETURN(isconstr(yytext) ? CONID : VARID);
487 /* NB: ^^^^^^^^ : not the macro! */
489 <Code,GlaExt,UserPragma>{Id} {
490 hsnewid(yytext, yyleng);
491 RETURN(_isconstr(yytext) ? CONID : VARID);
493 <Code,GlaExt,UserPragma>{SId} {
494 hsnewid(yytext, yyleng);
495 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
497 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
498 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
499 RETURN(isconstr ? QCONID : QVARID);
501 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
502 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
503 RETURN(isconstr ? QCONSYM : QVARSYM);
507 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
509 /* Because we can make the former well-behaved (we defined them).
511 Sadly, the latter is defined by Haskell, which allows such
512 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
516 <GlaExt,UserPragma>"`"{Id}"#`" {
517 hsnewid(yytext + 1, yyleng - 2);
518 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
523 * Character literals. The first form is the quick form, for character
524 * literals that don't contain backslashes. Literals with backslashes are
525 * lexed through multiple rules. First, we match the open ' and as many
526 * normal characters as possible. This puts us into the <Char> state, where
527 * a backslash is legal. Then, we match the backslash and move into the
528 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
529 * characters and the close '. We may end up with too many characters, but
530 * this allows us to easily share the lex rules with strings. Excess characters
531 * are ignored with a warning.
535 <GlaExt>'({CHAR}|"\"")"'#" {
536 yylval.uhstring = installHstring(1, yytext+1);
539 <Code,GlaExt>'({CHAR}|"\"")' {
540 yylval.uhstring = installHstring(1, yytext+1);
543 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
544 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
547 <Code,GlaExt>'({CHAR}|"\"")* {
548 hsmlcolno = hspcolno;
550 addtext(yytext+1, yyleng-1);
553 <Char>({CHAR}|"\"")*'# {
557 addtext(yytext, yyleng - 2);
558 text = fetchtext(&length);
560 if (! (nonstandardFlag || in_interface)) {
561 char errbuf[ERR_BUF_SIZE];
562 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
567 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
568 input_filename, hsplineno, hspcolno + 1);
569 format_string(stderr, (unsigned char *) text, length);
570 fputs("' too long\n", stderr);
573 yylval.uhstring = installHstring(1, text);
574 hspcolno = hsmlcolno;
578 <Char>({CHAR}|"\"")*' {
582 addtext(yytext, yyleng - 1);
583 text = fetchtext(&length);
586 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
587 input_filename, hsplineno, hspcolno + 1);
588 format_string(stderr, (unsigned char *) text, length);
589 fputs("' too long\n", stderr);
592 yylval.uhstring = installHstring(1, text);
593 hspcolno = hsmlcolno;
597 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
602 * String literals. The first form is the quick form, for string literals
603 * that don't contain backslashes. Literals with backslashes are lexed
604 * through multiple rules. First, we match the open " and as many normal
605 * characters as possible. This puts us into the <String> state, where
606 * a backslash is legal. Then, we match the backslash and move into the
607 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
608 * characters, moving back and forth between <String> and <StringEsc> as more
609 * backslashes are encountered. (We may even digress into <Comment> mode if we
610 * find a comment in a gap between backslashes.) Finally, we read the last chunk
611 * of normal characters and the close ".
615 <GlaExt>"\""({CHAR}|"'")*"\""# {
616 yylval.uhstring = installHstring(yyleng-3, yytext+1);
617 /* the -3 accounts for the " on front, "# on the end */
620 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
621 yylval.uhstring = installHstring(yyleng-2, yytext+1);
624 <Code,GlaExt>"\""({CHAR}|"'")* {
625 hsmlcolno = hspcolno;
627 addtext(yytext+1, yyleng-1);
630 <String>({CHAR}|"'")*"\"#" {
634 addtext(yytext, yyleng-2);
635 text = fetchtext(&length);
637 if (! (nonstandardFlag || in_interface)) {
638 char errbuf[ERR_BUF_SIZE];
639 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
643 yylval.uhstring = installHstring(length, text);
644 hspcolno = hsmlcolno;
648 <String>({CHAR}|"'")*"\"" {
652 addtext(yytext, yyleng-1);
653 text = fetchtext(&length);
655 yylval.uhstring = installHstring(length, text);
656 hspcolno = hsmlcolno;
660 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
664 * Character and string escapes are roughly the same, but strings have the
665 * extra `\&' sequence which is not allowed for characters. Also, comments
666 * are allowed in the <StringEsc> state. (See the comment section much
669 * NB: Backslashes and tabs are stored in strings as themselves.
670 * But if we print them (in printtree.c), they must go out as
671 * "\\\\" and "\\t" respectively. (This is because of the bogus
672 * intermediate format that the parser produces. It uses '\t' fpr end of
673 * string, so it needs to be able to escape tabs, which means that it
674 * also needs to be able to escape the escape character ('\\'). Sigh.
678 <Char>\\ { PUSH_STATE(CharEsc); }
679 <String>\\& /* Ignore */ ;
680 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
682 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
683 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
685 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
686 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
687 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
688 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
689 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
690 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
691 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
692 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
693 <CharEsc,StringEsc>BEL |
694 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
695 <CharEsc,StringEsc>BS |
696 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
697 <CharEsc,StringEsc>HT |
698 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
699 <CharEsc,StringEsc>LF |
700 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
701 <CharEsc,StringEsc>VT |
702 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
703 <CharEsc,StringEsc>FF |
704 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
705 <CharEsc,StringEsc>CR |
706 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
707 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
708 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
709 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
710 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
711 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
712 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
713 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
714 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
715 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
716 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
717 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
718 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
719 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
720 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
721 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
722 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
723 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
724 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
725 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
726 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
727 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
728 <CharEsc,StringEsc>{D}+ {
729 int i = strtol(yytext, NULL, 10);
733 char errbuf[ERR_BUF_SIZE];
734 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
740 <CharEsc,StringEsc>o{O}+ {
741 int i = strtol(yytext + 1, NULL, 8);
745 char errbuf[ERR_BUF_SIZE];
746 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
752 <CharEsc,StringEsc>x{H}+ {
753 int i = strtol(yytext + 1, NULL, 16);
757 char errbuf[ERR_BUF_SIZE];
758 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
767 * Simple comments and whitespace. Normally, we would just ignore these, but
768 * in case we're processing a string escape, we need to note that we've seen
771 * Note that we cater for a comment line that *doesn't* end in a newline.
772 * This is incorrect, strictly speaking, but seems like the right thing
773 * to do. Reported by Rajiv Mirani. (WDP 95/08)
777 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
778 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
782 * Nested comments. The major complication here is in trying to match the
783 * longest lexemes possible, for better performance. (See the flex document.)
784 * That's why the rules look so bizarre.
788 <Code,GlaExt,UserPragma,StringEsc>"{-" {
789 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
793 <Comment>"-"+[^-{}]+ |
794 <Comment>"{"+[^-{}]+ ;
795 <Comment>"{-" { nested_comments++; }
796 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
801 * Illegal characters. This used to be a single rule, but we might as well
802 * pass on as much information as we have, so now we indicate our state in
807 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
808 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
809 input_filename, hsplineno, hspcolno + 1);
810 format_string(stderr, (unsigned char *) yytext, 1);
811 fputs("'\n", stderr);
815 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
816 input_filename, hsplineno, hspcolno + 1);
817 format_string(stderr, (unsigned char *) yytext, 1);
818 fputs("' in a character literal\n", stderr);
822 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
823 input_filename, hsplineno, hspcolno + 1);
824 format_string(stderr, (unsigned char *) yytext, 1);
825 fputs("'\n", stderr);
828 <String>(.|\n) { if (nonstandardFlag) {
829 addtext(yytext, yyleng);
831 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
832 input_filename, hsplineno, hspcolno + 1);
833 format_string(stderr, (unsigned char *) yytext, 1);
834 fputs("' in a string literal\n", stderr);
840 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
841 input_filename, hsplineno, hspcolno + 1);
842 format_string(stderr, (unsigned char *) yytext, 1);
843 fputs("'\n", stderr);
846 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
847 input_filename, hsplineno, hspcolno + 1);
848 format_string(stderr, (unsigned char *) yytext, 1);
849 fputs("' in a string gap\n", stderr);
856 * End of file. In any sub-state, this is an error. However, for the primary
857 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
858 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
859 * adding virtual close curlies, or closing an interface and returning to the
860 * primary source file.
862 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
863 * line/column advancement has to be done by hand.
867 <Char,CharEsc><<EOF>> {
868 hsplineno = hslineno; hspcolno = hscolno;
869 hsperror("unterminated character literal");
872 hsplineno = hslineno; hspcolno = hscolno;
873 hsperror("unterminated comment");
875 <String,StringEsc><<EOF>> {
876 hsplineno = hslineno; hspcolno = hscolno;
877 hsperror("unterminated string literal");
879 <UserPragma><<EOF>> {
880 hsplineno = hslineno; hspcolno = hscolno;
881 hsperror("unterminated user-specified pragma");
883 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
887 /**********************************************************************
890 * YACC/LEX Initialisation etc. *
893 **********************************************************************/
896 We initialise input_filename to "<stdin>".
897 This allows unnamed sources to be piped into the parser.
900 extern BOOLEAN acceptPrim;
905 input_filename = xstrdup("<stdin>");
907 /* We must initialize the input buffer _now_, because we call
908 setyyin _before_ calling yylex for the first time! */
909 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
918 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
920 if (input_filename != NULL)
921 free(input_filename);
922 input_filename = xstrdup(f);
925 /**********************************************************************
928 * Layout Processing *
931 **********************************************************************/
934 The following section deals with Haskell Layout conventions
935 forcing insertion of ; or } as appropriate
941 return (!forgetindent && INDENTON);
945 /* Enter new context and set new indentation level */
950 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
954 * partain: first chk that new indent won't be less than current one; this code
955 * doesn't make sense to me; hscolno tells the position of the _end_ of the
956 * current token; what that has to do with indenting, I don't know.
960 if (hscolno - 1 <= INDENTPT) {
962 return; /* Empty input OK for Haskell 1.1 */
964 char errbuf[ERR_BUF_SIZE];
966 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
970 hsentercontext((hspcolno << 1) | 1);
974 /* Enter a new context without changing the indentation level */
979 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
981 hsentercontext(indenttab[icontexts] & ~1);
985 /* Turn off indentation processing, usually because an explicit "{" has been seen */
993 /* Enter a new layout context. */
995 hsentercontext(int indent)
997 /* Enter new context and set indentation as specified */
998 if (++icontexts >= MAX_CONTEXTS) {
999 char errbuf[ERR_BUF_SIZE];
1001 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1004 forgetindent = FALSE;
1005 indenttab[icontexts] = indent;
1007 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1012 /* Exit a layout context */
1018 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1023 * Return checks the indentation level and returns ;, } or the specified token.
1033 if (hsshouldindent()) {
1034 if (hspcolno < INDENTPT) {
1036 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1040 } else if (hspcolno == INDENTPT) {
1042 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1050 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1057 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1063 static BOOLEAN eof = FALSE;
1066 if (hssttok != -1) {
1074 endlineno = hslineno;
1075 if ((tok = yylex1()) != EOF)
1081 if (icontexts > icontexts_save) {
1084 indenttab[icontexts] = 0;
1087 hsperror("missing '}' at end of file");
1088 } else if (hsbuf_save != NULL) {
1090 yy_delete_buffer(YY_CURRENT_BUFFER);
1091 yy_switch_to_buffer(hsbuf_save);
1093 new_filename(filename_save);
1094 free(filename_save);
1095 hslineno = hslineno_save;
1096 hsplineno = hsplineno_save;
1097 hscolno = hscolno_save;
1098 hspcolno = hspcolno_save;
1100 in_interface = FALSE;
1101 icontexts = icontexts_save - 1;
1104 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1109 hsperror("No longer using yacc to parse interface files");
1114 abort(); /* should never get here! */
1118 /**********************************************************************
1121 * Input Processing for Interfaces -- Not currently used !!! *
1124 **********************************************************************/
1126 /* setyyin(file) open file as new lex input buffer */
1132 hsbuf_save = YY_CURRENT_BUFFER;
1133 if ((yyin = fopen(file, "r")) == NULL) {
1134 char errbuf[ERR_BUF_SIZE];
1136 sprintf(errbuf, "can't read \"%-.50s\"", file);
1139 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1141 hslineno_save = hslineno;
1142 hsplineno_save = hsplineno;
1143 hslineno = hsplineno = 1;
1145 filename_save = input_filename;
1146 input_filename = NULL;
1148 hscolno_save = hscolno;
1149 hspcolno_save = hspcolno;
1150 hscolno = hspcolno = 0;
1151 in_interface = TRUE;
1152 etags_save = etags; /* do not do "etags" stuff in interfaces */
1153 etags = 0; /* We remember whether we are doing it in
1154 the module, so we can restore it later [WDP 94/09] */
1155 hsentercontext(-1); /* partain: changed this from 0 */
1156 icontexts_save = icontexts;
1158 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1163 layout_input(char *text, int len)
1166 fprintf(stderr, "Scanning \"%s\"\n", text);
1169 hsplineno = hslineno;
1181 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1193 setstartlineno(void)
1195 startlineno = hsplineno;
1197 if (modulelineno == 0) {
1198 modulelineno = startlineno;
1204 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1208 /**********************************************************************
1214 **********************************************************************/
1216 #define CACHE_SIZE YY_BUF_SIZE
1222 } textcache = { 0, 0, NULL };
1227 /* fprintf(stderr, "cleartext\n"); */
1229 if (textcache.allocated == 0) {
1230 textcache.allocated = CACHE_SIZE;
1231 textcache.text = xmalloc(CACHE_SIZE);
1236 addtext(char *text, unsigned length)
1238 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1243 if (textcache.next + length + 1 >= textcache.allocated) {
1244 textcache.allocated += length + CACHE_SIZE;
1245 textcache.text = xrealloc(textcache.text, textcache.allocated);
1247 bcopy(text, textcache.text + textcache.next, length);
1248 textcache.next += length;
1254 /* fprintf(stderr, "addchar: %c\n", c); */
1256 if (textcache.next + 2 >= textcache.allocated) {
1257 textcache.allocated += CACHE_SIZE;
1258 textcache.text = xrealloc(textcache.text, textcache.allocated);
1260 textcache.text[textcache.next++] = c;
1264 fetchtext(unsigned *length)
1266 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1268 *length = textcache.next;
1269 textcache.text[textcache.next] = '\0';
1270 return textcache.text;
1273 /**********************************************************************
1276 * Identifier Processing *
1279 **********************************************************************/
1282 hsnewid Enters an id of length n into the symbol table.
1286 hsnewid(char *name, int length)
1288 char save = name[length];
1290 name[length] = '\0';
1291 yylval.uid = installid(name);
1292 name[length] = save;
1296 hsnewqid(char *name, int length)
1299 char save = name[length];
1300 name[length] = '\0';
1302 dot = strchr(name, '.');
1304 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1306 name[length] = save;
1308 return _isconstr(dot+1);
1312 isconstr(char *s) /* walks past leading underscores before using the macro */
1316 for ( ; temp != NULL && *temp == '_' ; temp++ );
1318 return _isconstr(temp);