2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 #define _isconstr(s) (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
50 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
51 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
52 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
53 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
54 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
56 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
57 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
58 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
59 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
60 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
61 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
79 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
80 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /**********************************************************************
89 **********************************************************************/
91 char *input_filename = NULL; /* Always points to a dynamically allocated string */
94 * For my own sanity, things that are not part of the flex skeleton
95 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
98 static int hslineno = 0; /* Line number at end of token */
99 int hsplineno = 0; /* Line number at end of previous token */
101 static int hscolno = 0; /* Column number at end of token */
102 int hspcolno = 0; /* Column number at end of previous token */
103 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
105 int modulelineno = -1; /* The line number where the module starts */
106 int startlineno = 0; /* The line number where something starts */
107 int endlineno = 0; /* The line number where something ends */
109 static BOOLEAN noGap = TRUE; /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
112 static int nested_comments; /* For counting comment nesting depth */
114 /* Hacky definition of yywrap: see flex doc.
116 If we don't do this, then we'll have to get the default
117 yywrap from the flex library, which is often something
118 we are not good at locating. This avoids that difficulty.
119 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124 /* Essential forward declarations */
126 static void hsnewid PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext (NO_ARGS);
129 static void addtext PROTO((char *, unsigned));
130 static void addchar PROTO((char));
131 static char *fetchtext PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int Return PROTO((int));
134 static void hsentercontext PROTO((int));
136 /* Special file handling for IMPORTS */
137 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
139 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
140 static char *filename_save; /* File Name */
141 static int hslineno_save = 0, /* Line Number */
142 hsplineno_save = 0, /* Line Number of Prev. token */
143 hscolno_save = 0, /* Indentation */
144 hspcolno_save = 0; /* Left Indentation */
145 static short icontexts_save = 0; /* Indent Context Level */
147 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags; /* that which is saved */
150 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
152 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
153 * inserted before token +ve -- "}" inserted before
156 short icontexts = 0; /* Which context we're in */
159 Table of indentations: right bit indicates whether to use
160 indentation rules (1 = use rules; 0 = ignore)
163 push one of these "contexts" at every "case" or "where"; the right bit says
164 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
166 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
167 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
168 push is shown just below.
173 static short indenttab[MAX_CONTEXTS] = {-1};
175 #define INDENTPT (indenttab[icontexts]>>1)
176 #define INDENTON (indenttab[icontexts]&1)
178 #define RETURN(tok) return(Return(tok))
181 #define YY_DECL int yylex1()
183 /* We should not peek at yy_act, but flex calls us even for the internal action
184 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
185 to support older versions of flex, we'll continue to peek for now.
187 #define YY_USER_ACTION \
188 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
192 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
195 /* Each time we enter a new start state, we push it onto the state stack.
196 Note that the rules do not allow us to underflow or overflow the stack.
197 (At least, they shouldn't.) The maximum expected depth is 4:
198 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
200 static int StateStack[5];
201 static int StateDepth = -1;
204 #define PUSH_STATE(n) do {\
205 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
206 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
207 #define POP_STATE do {--StateDepth;\
208 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
209 BEGIN(StateStack[StateDepth]);} while(0)
211 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
212 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
217 /* The start states are:
218 Code -- normal Haskell code (principal lexer)
219 GlaExt -- Haskell code with Glasgow extensions
220 Comment -- Nested comment processing
221 String -- Inside a string literal with backslashes
222 StringEsc -- Immediately following a backslash in a string literal
223 Char -- Inside a character literal with backslashes
224 CharEsc -- Immediately following a backslash in a character literal
226 Note that the INITIAL state is unused. Also note that these states
227 are _exclusive_. All rules should be prefixed with an appropriate
228 list of start states.
231 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
233 isoS [\xa1-\xbf\xd7\xf7]
234 isoL [\xc0-\xd6\xd8-\xde]
235 isol [\xdf-\xf6\xf8-\xff]
242 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
243 S [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
245 L [A-Z\xc0-\xd6\xd8-\xde]
246 l [a-z\xdf-\xf6\xf8-\xff]
251 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
260 * Special GHC pragma rules. Do we need a start state for interface files,
261 * so these won't be matched in source files? --JSM
265 <Code,GlaExt>^"# ".*{NL} {
266 char tempf[FILENAME_SIZE];
267 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
269 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
272 <Code,GlaExt>^"#line ".*{NL} {
273 char tempf[FILENAME_SIZE];
274 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
276 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
279 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
280 /* partain: pragma-style line directive */
281 char tempf[FILENAME_SIZE];
282 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
284 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
287 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
288 PUSH_STATE(UserPragma);
289 RETURN(INTERFACE_UPRAGMA);
291 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
292 PUSH_STATE(UserPragma);
293 RETURN(SPECIALISE_UPRAGMA);
295 <Code,GlaExt>"{-#"{WS}*"INLINE" {
296 PUSH_STATE(UserPragma);
297 RETURN(INLINE_UPRAGMA);
299 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
300 PUSH_STATE(UserPragma);
301 RETURN(MAGIC_UNFOLDING_UPRAGMA);
303 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
304 PUSH_STATE(UserPragma);
305 RETURN(DEFOREST_UPRAGMA);
307 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
308 fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
309 input_filename, hsplineno);
310 format_string(stderr, (unsigned char *) yytext, yyleng);
311 fputs("'\n", stderr);
315 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
319 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
320 * intentionally accepted as a keyword even for normal <Code>.
324 <Code,GlaExt>"case" { RETURN(CASE); }
325 <Code,GlaExt>"class" { RETURN(CLASS); }
326 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
327 <Code,GlaExt>"default" { RETURN(DEFAULT); }
328 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
329 <Code,GlaExt>"do" { RETURN(DO); }
330 <Code,GlaExt>"else" { RETURN(ELSE); }
331 <Code,GlaExt>"if" { RETURN(IF); }
332 <Code,GlaExt>"import" { RETURN(IMPORT); }
333 <Code,GlaExt>"in" { RETURN(IN); }
334 <Code,GlaExt>"infix" { RETURN(INFIX); }
335 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
336 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
337 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
338 <Code,GlaExt>"let" { RETURN(LET); }
339 <Code,GlaExt>"module" { RETURN(MODULE); }
340 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
341 <Code,GlaExt>"of" { RETURN(OF); }
342 <Code,GlaExt>"then" { RETURN(THEN); }
343 <Code,GlaExt>"type" { RETURN(TYPE); }
344 <Code,GlaExt>"where" { RETURN(WHERE); }
346 <Code,GlaExt>"as" { RETURN(AS); }
347 <Code,GlaExt>"hiding" { RETURN(HIDING); }
348 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
350 <Code,GlaExt>"_scc_" { RETURN(SCC); }
351 <GlaExt>"_ccall_" { RETURN(CCALL); }
352 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
353 <GlaExt>"_casm_" { RETURN(CASM); }
354 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
358 * Haskell operators: special, reservedops and useful varsyms
362 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
363 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
364 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
365 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
366 <Code,GlaExt>"{" { RETURN(OCURLY); }
367 <Code,GlaExt>"}" { RETURN(CCURLY); }
368 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
369 <Code,GlaExt>";" { RETURN(SEMI); }
370 <Code,GlaExt>"`" { RETURN(BQUOTE); }
371 <Code,GlaExt>"_" { RETURN(WILDCARD); }
373 <Code,GlaExt>".." { RETURN(DOTDOT); }
374 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
375 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
376 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
377 <Code,GlaExt>"|" { RETURN(VBAR); }
378 <Code,GlaExt>"<-" { RETURN(LARROW); }
379 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
380 <Code,GlaExt>"-" { RETURN(MINUS); }
382 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
383 <Code,GlaExt>"@" { RETURN(AT); }
384 <Code,GlaExt>"!" { RETURN(BANG); }
385 <Code,GlaExt>"~" { RETURN(LAZY); }
389 * Integers and (for Glasgow extensions) primitive integers. Note that
390 * we pass all of the text on to the parser, because flex/C can't handle
391 * arbitrary precision numbers.
395 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
396 yylval.uid = xstrndup(yytext, yyleng - 1);
399 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
400 yylval.uid = xstrndup(yytext, yyleng);
403 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
404 yylval.uid = xstrndup(yytext, yyleng - 1);
407 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
408 yylval.uid = xstrndup(yytext, yyleng);
411 <GlaExt>("-")?{N}"#" {
412 yylval.uid = xstrndup(yytext, yyleng - 1);
415 <Code,GlaExt,UserPragma>{N} {
416 yylval.uid = xstrndup(yytext, yyleng);
422 * Floats and (for Glasgow extensions) primitive floats/doubles.
426 <GlaExt>("-")?{F}"##" {
427 yylval.uid = xstrndup(yytext, yyleng - 2);
430 <GlaExt>("-")?{F}"#" {
431 yylval.uid = xstrndup(yytext, yyleng - 1);
435 yylval.uid = xstrndup(yytext, yyleng);
441 * Funky ``foo'' style C literals for Glasgow extensions
445 <GlaExt>"``"[^']+"''" {
446 hsnewid(yytext + 2, yyleng - 4);
452 * Identifiers, both variables and operators. The trailing hash is allowed
453 * for Glasgow extensions.
459 /* These SHOULDNAE work in "Code" (sigh) */
461 <Code,GlaExt,UserPragma>{Id}"#" {
462 if (! nonstandardFlag) {
463 char errbuf[ERR_BUF_SIZE];
464 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
467 hsnewid(yytext, yyleng);
468 RETURN(_isconstr(yytext) ? CONID : VARID);
470 <Code,GlaExt,UserPragma>_+{Id} {
471 if (! nonstandardFlag) {
472 char errbuf[ERR_BUF_SIZE];
473 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
476 hsnewid(yytext, yyleng);
477 RETURN(isconstr(yytext) ? CONID : VARID);
478 /* NB: ^^^^^^^^ : not the macro! */
480 <Code,GlaExt,UserPragma>{Id} {
481 hsnewid(yytext, yyleng);
482 RETURN(_isconstr(yytext) ? CONID : VARID);
484 <Code,GlaExt,UserPragma>{SId} {
485 hsnewid(yytext, yyleng);
486 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
488 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
489 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
490 RETURN(isconstr ? QCONID : QVARID);
492 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
493 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
494 RETURN(isconstr ? QCONSYM : QVARSYM);
498 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
500 /* Because we can make the former well-behaved (we defined them).
502 Sadly, the latter is defined by Haskell, which allows such
503 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
507 <GlaExt,UserPragma>"`"{Id}"#`" {
508 hsnewid(yytext + 1, yyleng - 2);
509 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
514 * Character literals. The first form is the quick form, for character
515 * literals that don't contain backslashes. Literals with backslashes are
516 * lexed through multiple rules. First, we match the open ' and as many
517 * normal characters as possible. This puts us into the <Char> state, where
518 * a backslash is legal. Then, we match the backslash and move into the
519 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
520 * characters and the close '. We may end up with too many characters, but
521 * this allows us to easily share the lex rules with strings. Excess characters
522 * are ignored with a warning.
526 <GlaExt>'({CHAR}|"\"")"'#" {
527 yylval.uhstring = installHstring(1, yytext+1);
530 <Code,GlaExt>'({CHAR}|"\"")' {
531 yylval.uhstring = installHstring(1, yytext+1);
534 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
535 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
538 <Code,GlaExt>'({CHAR}|"\"")* {
539 hsmlcolno = hspcolno;
541 addtext(yytext+1, yyleng-1);
544 <Char>({CHAR}|"\"")*'# {
548 addtext(yytext, yyleng - 2);
549 text = fetchtext(&length);
551 if (! nonstandardFlag) {
552 char errbuf[ERR_BUF_SIZE];
553 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
558 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
559 input_filename, hsplineno, hspcolno + 1);
560 format_string(stderr, (unsigned char *) text, length);
561 fputs("' too long\n", stderr);
564 yylval.uhstring = installHstring(1, text);
565 hspcolno = hsmlcolno;
569 <Char>({CHAR}|"\"")*' {
573 addtext(yytext, yyleng - 1);
574 text = fetchtext(&length);
577 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
578 input_filename, hsplineno, hspcolno + 1);
579 format_string(stderr, (unsigned char *) text, length);
580 fputs("' too long\n", stderr);
583 yylval.uhstring = installHstring(1, text);
584 hspcolno = hsmlcolno;
588 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
593 * String literals. The first form is the quick form, for string literals
594 * that don't contain backslashes. Literals with backslashes are lexed
595 * through multiple rules. First, we match the open " and as many normal
596 * characters as possible. This puts us into the <String> state, where
597 * a backslash is legal. Then, we match the backslash and move into the
598 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
599 * characters, moving back and forth between <String> and <StringEsc> as more
600 * backslashes are encountered. (We may even digress into <Comment> mode if we
601 * find a comment in a gap between backslashes.) Finally, we read the last chunk
602 * of normal characters and the close ".
606 <GlaExt>"\""({CHAR}|"'")*"\""# {
607 yylval.uhstring = installHstring(yyleng-3, yytext+1);
608 /* the -3 accounts for the " on front, "# on the end */
611 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
612 yylval.uhstring = installHstring(yyleng-2, yytext+1);
615 <Code,GlaExt>"\""({CHAR}|"'")* {
616 hsmlcolno = hspcolno;
618 addtext(yytext+1, yyleng-1);
621 <String>({CHAR}|"'")*"\"#" {
625 addtext(yytext, yyleng-2);
626 text = fetchtext(&length);
628 if (! nonstandardFlag) {
629 char errbuf[ERR_BUF_SIZE];
630 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
634 yylval.uhstring = installHstring(length, text);
635 hspcolno = hsmlcolno;
639 <String>({CHAR}|"'")*"\"" {
643 addtext(yytext, yyleng-1);
644 text = fetchtext(&length);
646 yylval.uhstring = installHstring(length, text);
647 hspcolno = hsmlcolno;
651 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
655 * Character and string escapes are roughly the same, but strings have the
656 * extra `\&' sequence which is not allowed for characters. Also, comments
657 * are allowed in the <StringEsc> state. (See the comment section much
660 * NB: Backslashes and tabs are stored in strings as themselves.
661 * But if we print them (in printtree.c), they must go out as
662 * "\\\\" and "\\t" respectively. (This is because of the bogus
663 * intermediate format that the parser produces. It uses '\t' fpr end of
664 * string, so it needs to be able to escape tabs, which means that it
665 * also needs to be able to escape the escape character ('\\'). Sigh.
669 <Char>\\ { PUSH_STATE(CharEsc); }
670 <String>\\& /* Ignore */ ;
671 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
673 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
674 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
676 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
677 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
678 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
679 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
680 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
681 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
682 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
683 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
684 <CharEsc,StringEsc>BEL |
685 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
686 <CharEsc,StringEsc>BS |
687 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
688 <CharEsc,StringEsc>HT |
689 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
690 <CharEsc,StringEsc>LF |
691 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
692 <CharEsc,StringEsc>VT |
693 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
694 <CharEsc,StringEsc>FF |
695 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
696 <CharEsc,StringEsc>CR |
697 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
698 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
699 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
700 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
701 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
702 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
703 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
704 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
705 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
706 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
707 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
708 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
709 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
710 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
711 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
712 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
713 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
714 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
715 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
716 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
717 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
718 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
719 <CharEsc,StringEsc>{D}+ {
720 int i = strtol(yytext, NULL, 10);
724 char errbuf[ERR_BUF_SIZE];
725 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
731 <CharEsc,StringEsc>o{O}+ {
732 int i = strtol(yytext + 1, NULL, 8);
736 char errbuf[ERR_BUF_SIZE];
737 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
743 <CharEsc,StringEsc>x{H}+ {
744 int i = strtol(yytext + 1, NULL, 16);
748 char errbuf[ERR_BUF_SIZE];
749 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
758 * Simple comments and whitespace. Normally, we would just ignore these, but
759 * in case we're processing a string escape, we need to note that we've seen
762 * Note that we cater for a comment line that *doesn't* end in a newline.
763 * This is incorrect, strictly speaking, but seems like the right thing
764 * to do. Reported by Rajiv Mirani. (WDP 95/08)
768 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
769 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
773 * Nested comments. The major complication here is in trying to match the
774 * longest lexemes possible, for better performance. (See the flex document.)
775 * That's why the rules look so bizarre.
779 <Code,GlaExt,UserPragma,StringEsc>"{-" {
780 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
784 <Comment>"-"+[^-{}]+ |
785 <Comment>"{"+[^-{}]+ ;
786 <Comment>"{-" { nested_comments++; }
787 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
792 * Illegal characters. This used to be a single rule, but we might as well
793 * pass on as much information as we have, so now we indicate our state in
798 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
799 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
800 input_filename, hsplineno, hspcolno + 1);
801 format_string(stderr, (unsigned char *) yytext, 1);
802 fputs("'\n", stderr);
806 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
807 input_filename, hsplineno, hspcolno + 1);
808 format_string(stderr, (unsigned char *) yytext, 1);
809 fputs("' in a character literal\n", stderr);
813 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
814 input_filename, hsplineno, hspcolno + 1);
815 format_string(stderr, (unsigned char *) yytext, 1);
816 fputs("'\n", stderr);
819 <String>(.|\n) { if (nonstandardFlag) {
820 addtext(yytext, yyleng);
822 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
823 input_filename, hsplineno, hspcolno + 1);
824 format_string(stderr, (unsigned char *) yytext, 1);
825 fputs("' in a string literal\n", stderr);
831 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
832 input_filename, hsplineno, hspcolno + 1);
833 format_string(stderr, (unsigned char *) yytext, 1);
834 fputs("'\n", stderr);
837 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
838 input_filename, hsplineno, hspcolno + 1);
839 format_string(stderr, (unsigned char *) yytext, 1);
840 fputs("' in a string gap\n", stderr);
847 * End of file. In any sub-state, this is an error. However, for the primary
848 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
849 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
850 * adding virtual close curlies, or closing an interface and returning to the
851 * primary source file.
853 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
854 * line/column advancement has to be done by hand.
858 <Char,CharEsc><<EOF>> {
859 hsplineno = hslineno; hspcolno = hscolno;
860 hsperror("unterminated character literal");
863 hsplineno = hslineno; hspcolno = hscolno;
864 hsperror("unterminated comment");
866 <String,StringEsc><<EOF>> {
867 hsplineno = hslineno; hspcolno = hscolno;
868 hsperror("unterminated string literal");
870 <UserPragma><<EOF>> {
871 hsplineno = hslineno; hspcolno = hscolno;
872 hsperror("unterminated user-specified pragma");
874 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
878 /**********************************************************************
881 * YACC/LEX Initialisation etc. *
884 **********************************************************************/
887 We initialise input_filename to "<stdin>".
888 This allows unnamed sources to be piped into the parser.
891 extern BOOLEAN acceptPrim;
896 input_filename = xstrdup("<stdin>");
898 /* We must initialize the input buffer _now_, because we call
899 setyyin _before_ calling yylex for the first time! */
900 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
909 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
911 if (input_filename != NULL)
912 free(input_filename);
913 input_filename = xstrdup(f);
916 /**********************************************************************
919 * Layout Processing *
922 **********************************************************************/
925 The following section deals with Haskell Layout conventions
926 forcing insertion of ; or } as appropriate
932 return (!forgetindent && INDENTON);
936 /* Enter new context and set new indentation level */
941 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
945 * partain: first chk that new indent won't be less than current one; this code
946 * doesn't make sense to me; hscolno tells the position of the _end_ of the
947 * current token; what that has to do with indenting, I don't know.
951 if (hscolno - 1 <= INDENTPT) {
953 return; /* Empty input OK for Haskell 1.1 */
955 char errbuf[ERR_BUF_SIZE];
957 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
961 hsentercontext((hspcolno << 1) | 1);
965 /* Enter a new context without changing the indentation level */
970 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
972 hsentercontext(indenttab[icontexts] & ~1);
976 /* Turn off indentation processing, usually because an explicit "{" has been seen */
984 /* Enter a new layout context. */
986 hsentercontext(int indent)
988 /* Enter new context and set indentation as specified */
989 if (++icontexts >= MAX_CONTEXTS) {
990 char errbuf[ERR_BUF_SIZE];
992 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
995 forgetindent = FALSE;
996 indenttab[icontexts] = indent;
998 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1003 /* Exit a layout context */
1009 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1014 * Return checks the indentation level and returns ;, } or the specified token.
1024 if (hsshouldindent()) {
1025 if (hspcolno < INDENTPT) {
1027 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1031 } else if (hspcolno == INDENTPT) {
1033 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1041 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1048 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1054 static BOOLEAN eof = FALSE;
1057 if (hssttok != -1) {
1065 endlineno = hslineno;
1066 if ((tok = yylex1()) != EOF)
1072 if (icontexts > icontexts_save) {
1075 indenttab[icontexts] = 0;
1078 hsperror("missing '}' at end of file");
1079 } else if (hsbuf_save != NULL) {
1081 yy_delete_buffer(YY_CURRENT_BUFFER);
1082 yy_switch_to_buffer(hsbuf_save);
1084 new_filename(filename_save);
1085 free(filename_save);
1086 hslineno = hslineno_save;
1087 hsplineno = hsplineno_save;
1088 hscolno = hscolno_save;
1089 hspcolno = hspcolno_save;
1091 icontexts = icontexts_save - 1;
1094 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1099 hsperror("No longer using yacc to parse interface files");
1104 abort(); /* should never get here! */
1108 /**********************************************************************
1111 * Input Processing for Interfaces -- Not currently used !!! *
1114 **********************************************************************/
1116 /* setyyin(file) open file as new lex input buffer */
1122 hsbuf_save = YY_CURRENT_BUFFER;
1123 if ((yyin = fopen(file, "r")) == NULL) {
1124 char errbuf[ERR_BUF_SIZE];
1126 sprintf(errbuf, "can't read \"%-.50s\"", file);
1129 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1131 hslineno_save = hslineno;
1132 hsplineno_save = hsplineno;
1133 hslineno = hsplineno = 1;
1135 filename_save = input_filename;
1136 input_filename = NULL;
1138 hscolno_save = hscolno;
1139 hspcolno_save = hspcolno;
1140 hscolno = hspcolno = 0;
1141 etags_save = etags; /* do not do "etags" stuff in interfaces */
1142 etags = 0; /* We remember whether we are doing it in
1143 the module, so we can restore it later [WDP 94/09] */
1144 hsentercontext(-1); /* partain: changed this from 0 */
1145 icontexts_save = icontexts;
1147 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1152 layout_input(char *text, int len)
1155 fprintf(stderr, "Scanning \"%s\"\n", text);
1158 hsplineno = hslineno;
1170 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1182 setstartlineno(void)
1184 startlineno = hsplineno;
1186 if (modulelineno == 0) {
1187 modulelineno = startlineno;
1193 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1197 /**********************************************************************
1203 **********************************************************************/
1205 #define CACHE_SIZE YY_BUF_SIZE
1211 } textcache = { 0, 0, NULL };
1216 /* fprintf(stderr, "cleartext\n"); */
1218 if (textcache.allocated == 0) {
1219 textcache.allocated = CACHE_SIZE;
1220 textcache.text = xmalloc(CACHE_SIZE);
1225 addtext(char *text, unsigned length)
1227 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1232 if (textcache.next + length + 1 >= textcache.allocated) {
1233 textcache.allocated += length + CACHE_SIZE;
1234 textcache.text = xrealloc(textcache.text, textcache.allocated);
1236 bcopy(text, textcache.text + textcache.next, length);
1237 textcache.next += length;
1243 /* fprintf(stderr, "addchar: %c\n", c); */
1245 if (textcache.next + 2 >= textcache.allocated) {
1246 textcache.allocated += CACHE_SIZE;
1247 textcache.text = xrealloc(textcache.text, textcache.allocated);
1249 textcache.text[textcache.next++] = c;
1253 fetchtext(unsigned *length)
1255 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1257 *length = textcache.next;
1258 textcache.text[textcache.next] = '\0';
1259 return textcache.text;
1262 /**********************************************************************
1265 * Identifier Processing *
1268 **********************************************************************/
1271 hsnewid Enters an id of length n into the symbol table.
1275 hsnewid(char *name, int length)
1277 char save = name[length];
1279 name[length] = '\0';
1280 yylval.uid = installid(name);
1281 name[length] = save;
1285 hsnewqid(char *name, int length)
1288 char save = name[length];
1289 name[length] = '\0';
1291 dot = strchr(name, '.');
1293 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1295 name[length] = save;
1297 return _isconstr(dot+1);
1301 isconstr(char *s) /* walks past leading underscores before using the macro */
1305 for ( ; temp != NULL && *temp == '_' ; temp++ );
1307 return _isconstr(temp);