2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
117 static int comment_start;
119 /* OLD: Hacky definition of yywrap: see flex doc.
121 If we don't do this, then we'll have to get the default
122 yywrap from the flex library, which is often something
123 we are not good at locating. This avoids that difficulty.
124 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
129 /* Essential forward declarations */
131 static void hsnewid PROTO((char *, int));
132 static void layout_input PROTO((char *, int));
133 static void cleartext (NO_ARGS);
134 static void addtext PROTO((char *, unsigned));
135 static void addchar PROTO((char));
136 static char *fetchtext PROTO((unsigned *));
137 static void new_filename PROTO((char *));
138 static int Return PROTO((int));
139 static void hsentercontext PROTO((int));
141 static BOOLEAN is_commment PROTO((char*, int));
143 /* Special file handling for IMPORTS */
144 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
146 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
147 static char *filename_save; /* File Name */
148 static int hslineno_save = 0, /* Line Number */
149 hsplineno_save = 0, /* Line Number of Prev. token */
150 hscolno_save = 0, /* Indentation */
151 hspcolno_save = 0; /* Left Indentation */
152 static short icontexts_save = 0; /* Indent Context Level */
154 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
155 extern BOOLEAN etags; /* that which is saved */
157 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
159 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
160 * inserted before token +ve -- "}" inserted before
163 short icontexts = 0; /* Which context we're in */
166 Table of indentations: right bit indicates whether to use
167 indentation rules (1 = use rules; 0 = ignore)
170 push one of these "contexts" at every "case" or "where"; the right bit says
171 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
173 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
174 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
175 push is shown just below.
180 static short indenttab[MAX_CONTEXTS] = {-1};
182 #define INDENTPT (indenttab[icontexts]>>1)
183 #define INDENTON (indenttab[icontexts]&1)
185 #define RETURN(tok) return(Return(tok))
188 #define YY_DECL int yylex1()
190 /* We should not peek at yy_act, but flex calls us even for the internal action
191 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
192 to support older versions of flex, we'll continue to peek for now.
194 #define YY_USER_ACTION \
195 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
199 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
202 /* Each time we enter a new start state, we push it onto the state stack.
204 #define PUSH_STATE(n) yy_push_state(n)
205 #define POP_STATE yy_pop_state()
210 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
211 never-interactive (to go a bit faster)
212 stack (use a start-condition stack)
216 %option never-interactive
219 /* The start states are:
220 Code -- normal Haskell code (principal lexer)
221 GlaExt -- Haskell code with Glasgow extensions
222 Comment -- Nested comment processing
223 String -- Inside a string literal with backslashes
224 StringEsc -- Immediately following a backslash in a string literal
225 Char -- Inside a character literal with backslashes
226 CharEsc -- Immediately following a backslash in a character literal
228 Note that the INITIAL state is unused. Also note that these states
229 are _exclusive_. All rules should be prefixed with an appropriate
230 list of start states.
233 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
235 isoS [\xa1-\xbf\xd7\xf7]
236 isoL [\xc0-\xd6\xd8-\xde]
237 isol [\xdf-\xf6\xf8-\xff]
244 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
245 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
247 L [A-Z\xc0-\xd6\xd8-\xde]
248 l [a-z_\xdf-\xf6\xf8-\xff]
253 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
262 * Simple comments and whitespace. Normally, we would just ignore these, but
263 * in case we're processing a string escape, we need to note that we've seen
266 * Note that we cater for a comment line that *doesn't* end in a newline.
267 * This is incorrect, strictly speaking, but seems like the right thing
268 * to do. Reported by Rajiv Mirani. (WDP 95/08)
270 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
274 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
278 * Special GHC pragma rules. Do we need a start state for interface files,
279 * so these won't be matched in source files? --JSM
285 /* I believe the next rule is not ever matched.
287 The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
288 Driver has now been modified to output `standard' {-# LINE ..-} pragmas
289 where possible, so the lexer should now never see cpp directives
290 like '# ' and '#line'.
294 <Code,GlaExt>^"# ".*{NL} {
295 char tempf[FILENAME_SIZE];
296 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
298 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
301 <Code,GlaExt>^"#line ".*{NL} {
302 char tempf[FILENAME_SIZE];
303 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
305 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
310 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
311 /* partain: pragma-style line directive */
312 char tempf[FILENAME_SIZE];
313 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
315 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
318 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
319 PUSH_STATE(UserPragma);
320 RETURN(INTERFACE_UPRAGMA);
322 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
323 PUSH_STATE(UserPragma);
324 RETURN(SPECIALISE_UPRAGMA);
326 <Code,GlaExt>"{-#"{WS}*"INLINE" {
327 PUSH_STATE(UserPragma);
328 RETURN(INLINE_UPRAGMA);
330 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
331 PUSH_STATE(UserPragma);
332 RETURN(NOINLINE_UPRAGMA);
334 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
335 PUSH_STATE(UserPragma);
336 RETURN(MAGIC_UNFOLDING_UPRAGMA);
338 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
339 /* these are handled by hscpp */
340 nested_comments =1; comment_start = hsplineno;
343 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
344 /* these are for the driver! */
345 nested_comments =1; comment_start = hsplineno;
348 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
349 /* these are used by `make depend' and the
350 compiler to indicate that a module should
351 be imported from source */
352 nested_comments =1; comment_start = hsplineno;
353 RETURN(SOURCE_UPRAGMA);
356 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
357 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
358 input_filename, hsplineno);
359 format_string(stderr, (unsigned char *) yytext, yyleng);
360 fputs("'\n", stderr);
361 nested_comments = 1; comment_start = hsplineno;
364 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
368 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
369 * intentionally accepted as a keyword even for normal <Code>.
373 <Code,GlaExt>"case" { RETURN(CASE); }
374 <Code,GlaExt>"class" { RETURN(CLASS); }
375 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
376 <Code,GlaExt>"default" { RETURN(DEFAULT); }
377 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
378 <Code,GlaExt>"do" { RETURN(DO); }
379 <Code,GlaExt>"else" { RETURN(ELSE); }
380 <Code,GlaExt>"if" { RETURN(IF); }
381 <Code,GlaExt>"import" { RETURN(IMPORT); }
382 <Code,GlaExt>"in" { RETURN(IN); }
383 <Code,GlaExt>"infix" { RETURN(INFIX); }
384 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
385 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
386 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
387 <Code,GlaExt>"let" { RETURN(LET); }
388 <Code,GlaExt>"module" { RETURN(MODULE); }
389 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
390 <Code,GlaExt>"of" { RETURN(OF); }
391 <Code,GlaExt>"then" { RETURN(THEN); }
392 <Code,GlaExt>"type" { RETURN(TYPE); }
393 <Code,GlaExt>"where" { RETURN(WHERE); }
395 <Code,GlaExt>"as" { RETURN(AS); }
396 <Code,GlaExt>"hiding" { RETURN(HIDING); }
397 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
399 <Code,GlaExt>"forall" { RETURN(FORALL); }
401 <Code,GlaExt>"_scc_" { RETURN(SCC); }
402 <GlaExt>"_ccall_" { RETURN(CCALL); }
403 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
404 <GlaExt>"_casm_" { RETURN(CASM); }
405 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
406 <GlaExt>"(#" { RETURN(OUNBOXPAREN); }
407 <GlaExt>"#)" { RETURN(CUNBOXPAREN); }
408 <GlaExt>"foreign" { RETURN(FOREIGN); }
409 <GlaExt>"export" { RETURN(EXPORT); }
410 <GlaExt>"label" { RETURN(LABEL); }
411 <GlaExt>"unsafe" { RETURN(UNSAFE); }
412 <GlaExt>"_stdcall" { RETURN(STDCALL); }
413 <GlaExt>"_ccall" { RETURN(C_CALL); }
414 <GlaExt>"_pascal" { RETURN(PASCAL); }
415 <GlaExt>"stdcall" { RETURN(STDCALL); }
416 <GlaExt>"ccall" { RETURN(C_CALL); }
417 <GlaExt>"pascal" { RETURN(PASCAL); }
418 <GlaExt>"dynamic" { RETURN(DYNAMIC); }
422 * Haskell operators: special, reservedops and useful varsyms
426 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
427 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
428 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
429 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
430 <Code,GlaExt>"{" { RETURN(OCURLY); }
431 <Code,GlaExt>"}" { RETURN(CCURLY); }
432 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
433 <Code,GlaExt>";" { RETURN(SEMI); }
434 <Code,GlaExt>"`" { RETURN(BQUOTE); }
436 <Code,GlaExt>"." { RETURN(DOT); }
437 <Code,GlaExt>".." { RETURN(DOTDOT); }
438 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
439 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
440 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
441 <Code,GlaExt>"|" { RETURN(VBAR); }
442 <Code,GlaExt>"<-" { RETURN(LARROW); }
443 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
444 <Code,GlaExt>"-" { RETURN(MINUS); }
445 <Code,GlaExt>"+" { RETURN(PLUS); }
447 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
448 <Code,GlaExt>"@" { RETURN(AT); }
449 <Code,GlaExt>"!" { RETURN(BANG); }
450 <Code,GlaExt>"~" { RETURN(LAZY); }
454 * Integers and (for Glasgow extensions) primitive integers. Note that
455 * we pass all of the text on to the parser, because flex/C can't handle
456 * arbitrary precision numbers.
460 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
461 yylval.uid = xstrndup(yytext, yyleng - 1);
464 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
465 yylval.uid = xstrndup(yytext, yyleng);
468 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
469 yylval.uid = xstrndup(yytext, yyleng - 1);
472 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
473 yylval.uid = xstrndup(yytext, yyleng);
476 <GlaExt>("-")?{N}"#" {
477 yylval.uid = xstrndup(yytext, yyleng - 1);
480 <Code,GlaExt,UserPragma>{N} {
481 yylval.uid = xstrndup(yytext, yyleng);
487 * Floats and (for Glasgow extensions) primitive floats/doubles.
491 <GlaExt>("-")?{F}"##" {
492 yylval.uid = xstrndup(yytext, yyleng - 2);
495 <GlaExt>("-")?{F}"#" {
496 yylval.uid = xstrndup(yytext, yyleng - 1);
500 yylval.uid = xstrndup(yytext, yyleng);
506 * Funky ``foo'' style C literals for Glasgow extensions
510 <GlaExt>"``"[^']+"''" {
511 hsnewid(yytext + 2, yyleng - 4);
517 * Identifiers, both variables and operators. The trailing hash is allowed
518 * for Glasgow extensions.
524 /* These SHOULDNAE work in "Code" (sigh) */
526 <GlaExt,UserPragma>{Id}"#" {
527 if (! nonstandardFlag) {
528 char errbuf[ERR_BUF_SIZE];
529 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
532 hsnewid(yytext, yyleng);
533 RETURN(isconstr(yytext) ? CONID : VARID);
535 <Code,GlaExt,UserPragma>{Id} {
536 hsnewid(yytext, yyleng);
537 RETURN(isconstr(yytext) ? CONID : VARID);
539 <Code,GlaExt,UserPragma>{SId} {
540 if (is_commment(yytext,yyleng)) {
542 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
547 hsnewid(yytext, yyleng);
548 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
551 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
553 if (! nonstandardFlag) {
554 char errbuf[ERR_BUF_SIZE];
555 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
558 is_constr = hsnewqid(yytext, yyleng);
559 RETURN(is_constr ? QCONID : QVARID);
561 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
562 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
563 RETURN(is_constr ? QCONID : QVARID);
565 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
566 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
567 RETURN(is_constr ? QCONSYM : QVARSYM);
571 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
573 /* Because we can make the former well-behaved (we defined them).
575 Sadly, the latter is defined by Haskell, which allows such
576 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
580 <GlaExt,UserPragma>"`"{Id}"#`" {
581 hsnewid(yytext + 1, yyleng - 2);
582 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
587 * Character literals. The first form is the quick form, for character
588 * literals that don't contain backslashes. Literals with backslashes are
589 * lexed through multiple rules. First, we match the open ' and as many
590 * normal characters as possible. This puts us into the <Char> state, where
591 * a backslash is legal. Then, we match the backslash and move into the
592 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
593 * characters and the close '. We may end up with too many characters, but
594 * this allows us to easily share the lex rules with strings. Excess characters
595 * are ignored with a warning.
599 <GlaExt>'({CHAR}|"\"")"'#" {
600 yylval.uhstring = installHstring(1, yytext+1);
603 <Code,GlaExt>'({CHAR}|"\"")' {
604 yylval.uhstring = installHstring(1, yytext+1);
607 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
608 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
611 <Code,GlaExt>'({CHAR}|"\"")* {
612 hsmlcolno = hspcolno;
614 addtext(yytext+1, yyleng-1);
617 <Char>({CHAR}|"\"")*'# {
621 addtext(yytext, yyleng - 2);
622 text = fetchtext(&length);
624 if (! nonstandardFlag) {
625 char errbuf[ERR_BUF_SIZE];
626 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
631 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
632 input_filename, hsplineno, hspcolno + 1);
633 format_string(stderr, (unsigned char *) text, length);
634 fputs("' too long\n", stderr);
637 yylval.uhstring = installHstring(1, text);
638 hspcolno = hsmlcolno;
642 <Char>({CHAR}|"\"")*' {
646 addtext(yytext, yyleng - 1);
647 text = fetchtext(&length);
650 fprintf(stderr, "%s:%d:%d: Character literal '",
651 input_filename, hsplineno, hspcolno + 1);
652 format_string(stderr, (unsigned char *) text, length);
653 fputs("' too long\n", stderr);
656 yylval.uhstring = installHstring(1, text);
657 hspcolno = hsmlcolno;
661 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
666 * String literals. The first form is the quick form, for string literals
667 * that don't contain backslashes. Literals with backslashes are lexed
668 * through multiple rules. First, we match the open " and as many normal
669 * characters as possible. This puts us into the <String> state, where
670 * a backslash is legal. Then, we match the backslash and move into the
671 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
672 * characters, moving back and forth between <String> and <StringEsc> as more
673 * backslashes are encountered. (We may even digress into <Comment> mode if we
674 * find a comment in a gap between backslashes.) Finally, we read the last chunk
675 * of normal characters and the close ".
679 <GlaExt>"\""({CHAR}|"'")*"\""# {
680 yylval.uhstring = installHstring(yyleng-3, yytext+1);
681 /* the -3 accounts for the " on front, "# on the end */
684 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
685 yylval.uhstring = installHstring(yyleng-2, yytext+1);
688 <Code,GlaExt>"\""({CHAR}|"'")* {
689 hsmlcolno = hspcolno;
691 addtext(yytext+1, yyleng-1);
694 <String>({CHAR}|"'")*"\"#" {
698 addtext(yytext, yyleng-2);
699 text = fetchtext(&length);
701 if (! nonstandardFlag) {
702 char errbuf[ERR_BUF_SIZE];
703 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
707 yylval.uhstring = installHstring(length, text);
708 hspcolno = hsmlcolno;
712 <String>({CHAR}|"'")*"\"" {
716 addtext(yytext, yyleng-1);
717 text = fetchtext(&length);
719 yylval.uhstring = installHstring(length, text);
720 hspcolno = hsmlcolno;
724 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
728 * Character and string escapes are roughly the same, but strings have the
729 * extra `\&' sequence which is not allowed for characters. Also, comments
730 * are allowed in the <StringEsc> state. (See the comment section much
733 * NB: Backslashes and tabs are stored in strings as themselves.
734 * But if we print them (in printtree.c), they must go out as
735 * "\\\\" and "\\t" respectively. (This is because of the bogus
736 * intermediate format that the parser produces. It uses '\t' fpr end of
737 * string, so it needs to be able to escape tabs, which means that it
738 * also needs to be able to escape the escape character ('\\'). Sigh.
742 <Char>\\ { PUSH_STATE(CharEsc); }
743 <String>\\& /* Ignore */ ;
744 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
746 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
747 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
751 Not 100% correct, tokenizes "foo \ --<>--
754 as "foo bar", but this is not correct as per Haskell 98 report and its
755 maximal munch rule for "--"-style comments.
757 For the moment, not deemed worthy to fix.
760 <StringEsc>"--"[^\n\r]*{NL}?{WS}* { noGap=FALSE; }
762 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
763 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
764 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
765 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
766 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
767 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
768 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
769 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
770 <CharEsc,StringEsc>BEL |
771 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
772 <CharEsc,StringEsc>BS |
773 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
774 <CharEsc,StringEsc>HT |
775 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
776 <CharEsc,StringEsc>LF |
777 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
778 <CharEsc,StringEsc>VT |
779 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
780 <CharEsc,StringEsc>FF |
781 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
782 <CharEsc,StringEsc>CR |
783 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
784 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
785 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
786 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
787 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
788 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
789 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
790 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
791 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
792 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
793 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
794 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
795 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
796 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
797 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
798 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
799 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
800 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
801 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
802 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
803 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
804 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
805 <CharEsc,StringEsc>{D}+ {
806 int i = strtol(yytext, NULL, 10);
810 char errbuf[ERR_BUF_SIZE];
811 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
817 <CharEsc,StringEsc>o{O}+ {
818 int i = strtol(yytext + 1, NULL, 8);
822 char errbuf[ERR_BUF_SIZE];
823 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
829 <CharEsc,StringEsc>x{H}+ {
830 int i = strtol(yytext + 1, NULL, 16);
834 char errbuf[ERR_BUF_SIZE];
835 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
845 * Nested comments. The major complication here is in trying to match the
846 * longest lexemes possible, for better performance. (See the flex document.)
847 * That's why the rules look so bizarre.
851 <Code,GlaExt,UserPragma,StringEsc>"{-" {
852 noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment);
856 <Comment>"-"+[^-{}]+ |
857 <Comment>"{"+[^-{}]+ ;
858 <Comment>"{-" { nested_comments++; }
859 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
865 * Illegal characters. This used to be a single rule, but we might as well
866 * pass on as much information as we have, so now we indicate our state in
871 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
872 fprintf(stderr, "%s:%d:%d: Illegal character: `",
873 input_filename, hsplineno, hspcolno + 1);
874 format_string(stderr, (unsigned char *) yytext, 1);
875 fputs("'\n", stderr);
879 fprintf(stderr, "%s:%d:%d: Illegal character: `",
880 input_filename, hsplineno, hspcolno + 1);
881 format_string(stderr, (unsigned char *) yytext, 1);
882 fputs("' in a character literal\n", stderr);
886 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
887 input_filename, hsplineno, hspcolno + 1);
888 format_string(stderr, (unsigned char *) yytext, 1);
889 fputs("'\n", stderr);
892 <String>(.|\n) { if (nonstandardFlag) {
893 addtext(yytext, yyleng);
895 fprintf(stderr, "%s:%d:%d: Illegal character: `",
896 input_filename, hsplineno, hspcolno + 1);
897 format_string(stderr, (unsigned char *) yytext, 1);
898 fputs("' in a string literal\n", stderr);
904 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
905 input_filename, hsplineno, hspcolno + 1);
906 format_string(stderr, (unsigned char *) yytext, 1);
907 fputs("'\n", stderr);
910 fprintf(stderr, "%s:%d:%d: Illegal character: `",
911 input_filename, hsplineno, hspcolno + 1);
912 format_string(stderr, (unsigned char *) yytext, 1);
913 fputs("' in a string gap\n", stderr);
920 * End of file. In any sub-state, this is an error. However, for the primary
921 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
922 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
923 * adding virtual close curlies, or closing an interface and returning to the
924 * primary source file.
926 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
927 * line/column advancement has to be done by hand.
931 <Char,CharEsc><<EOF>> {
932 hsplineno = hslineno; hspcolno = hscolno;
933 hsperror("unterminated character literal");
936 char errbuf[ERR_BUF_SIZE];
937 hsplineno = hslineno; hspcolno = hscolno;
938 sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
941 <String,StringEsc><<EOF>> {
942 hsplineno = hslineno; hspcolno = hscolno;
943 hsperror("unterminated string literal");
945 <UserPragma><<EOF>> {
946 hsplineno = hslineno; hspcolno = hscolno;
947 hsperror("unterminated user-specified pragma");
949 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
953 /**********************************************************************
956 * YACC/LEX Initialisation etc. *
959 **********************************************************************/
962 We initialise input_filename to "<stdin>".
963 This allows unnamed sources to be piped into the parser.
969 input_filename = xstrdup("<stdin>");
971 /* We must initialize the input buffer _now_, because we call
972 setyyin _before_ calling yylex for the first time! */
973 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
982 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
984 if (input_filename != NULL)
985 free(input_filename);
986 input_filename = xstrdup(f);
989 /**********************************************************************
992 * Layout Processing *
995 **********************************************************************/
998 The following section deals with Haskell Layout conventions
999 forcing insertion of ; or } as appropriate
1003 #define LAYOUT_DEBUG
1008 hsshouldindent(void)
1010 return (!forgetindent && INDENTON);
1014 /* Enter new context and set new indentation level */
1019 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1023 * partain: first chk that new indent won't be less than current one; this code
1024 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1025 * current token; what that has to do with indenting, I don't know.
1029 if (hscolno - 1 <= INDENTPT) {
1031 return; /* Empty input OK for Haskell 1.1 */
1033 char errbuf[ERR_BUF_SIZE];
1035 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1039 hsentercontext((hspcolno << 1) | 1);
1043 /* Enter a new context without changing the indentation level */
1048 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1050 hsentercontext(indenttab[icontexts] & ~1);
1054 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1058 forgetindent = TRUE;
1062 /* Enter a new layout context. */
1064 hsentercontext(int indent)
1066 /* Enter new context and set indentation as specified */
1067 if (++icontexts >= MAX_CONTEXTS) {
1068 char errbuf[ERR_BUF_SIZE];
1070 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1073 forgetindent = FALSE;
1074 indenttab[icontexts] = indent;
1076 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1081 /* Exit a layout context */
1087 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1092 * Return checks the indentation level and returns ;, } or the specified token.
1100 if (hsshouldindent()) {
1101 if (hspcolno < INDENTPT) {
1103 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1107 } else if (hspcolno == INDENTPT) {
1109 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1118 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1125 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1131 static BOOLEAN eof = FALSE;
1134 if (hssttok != -1) {
1142 endlineno = hslineno;
1143 if ((tok = yylex1()) != EOF)
1149 if (icontexts > icontexts_save) {
1152 indenttab[icontexts] = 0;
1155 hsperror("missing '}' at end of file");
1156 } else if (hsbuf_save != NULL) {
1158 yy_delete_buffer(YY_CURRENT_BUFFER);
1159 yy_switch_to_buffer(hsbuf_save);
1161 new_filename(filename_save);
1162 free(filename_save);
1163 hslineno = hslineno_save;
1164 hsplineno = hsplineno_save;
1165 hscolno = hscolno_save;
1166 hspcolno = hspcolno_save;
1168 icontexts = icontexts_save - 1;
1171 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1176 hsperror("No longer using yacc to parse interface files");
1181 abort(); /* should never get here! */
1185 /**********************************************************************
1188 * Input Processing for Interfaces -- Not currently used !!! *
1191 **********************************************************************/
1193 /* setyyin(file) open file as new lex input buffer */
1199 hsbuf_save = YY_CURRENT_BUFFER;
1200 if ((yyin = fopen(file, "r")) == NULL) {
1201 char errbuf[ERR_BUF_SIZE];
1203 sprintf(errbuf, "can't read \"%-.50s\"", file);
1206 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1208 hslineno_save = hslineno;
1209 hsplineno_save = hsplineno;
1210 hslineno = hsplineno = 1;
1212 filename_save = input_filename;
1213 input_filename = NULL;
1215 hscolno_save = hscolno;
1216 hspcolno_save = hspcolno;
1217 hscolno = hspcolno = 0;
1218 etags_save = etags; /* do not do "etags" stuff in interfaces */
1219 etags = 0; /* We remember whether we are doing it in
1220 the module, so we can restore it later [WDP 94/09] */
1221 hsentercontext(-1); /* partain: changed this from 0 */
1222 icontexts_save = icontexts;
1224 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1229 layout_input(char *text, int len)
1232 fprintf(stderr, "Scanning \"%s\"\n", text);
1235 hsplineno = hslineno;
1247 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1259 setstartlineno(void)
1261 startlineno = hsplineno;
1263 if (modulelineno == 0) {
1264 modulelineno = startlineno;
1270 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1274 /**********************************************************************
1280 **********************************************************************/
1282 #define CACHE_SIZE YY_BUF_SIZE
1288 } textcache = { 0, 0, NULL };
1293 /* fprintf(stderr, "cleartext\n"); */
1295 if (textcache.allocated == 0) {
1296 textcache.allocated = CACHE_SIZE;
1297 textcache.text = xmalloc(CACHE_SIZE);
1302 addtext(char *text, unsigned length)
1304 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1309 if (textcache.next + length + 1 >= textcache.allocated) {
1310 textcache.allocated += length + CACHE_SIZE;
1311 textcache.text = xrealloc(textcache.text, textcache.allocated);
1313 bcopy(text, textcache.text + textcache.next, length);
1314 textcache.next += length;
1320 /* fprintf(stderr, "addchar: %c\n", c); */
1322 if (textcache.next + 2 >= textcache.allocated) {
1323 textcache.allocated += CACHE_SIZE;
1324 textcache.text = xrealloc(textcache.text, textcache.allocated);
1326 textcache.text[textcache.next++] = c;
1330 fetchtext(unsigned *length)
1332 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1334 *length = textcache.next;
1335 textcache.text[textcache.next] = '\0';
1336 return textcache.text;
1339 /**********************************************************************
1342 * Identifier Processing *
1345 **********************************************************************/
1348 hsnewid Enters an id of length n into the symbol table.
1352 hsnewid(char *name, int length)
1354 char save = name[length];
1356 name[length] = '\0';
1357 yylval.uid = installid(name);
1358 name[length] = save;
1362 hsnewqid(char *name, int length)
1365 char save = name[length];
1366 name[length] = '\0';
1368 dot = strchr(name, '.');
1370 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1372 name[length] = save;
1374 return isconstr(dot+1);
1379 is_commment(char* lexeme, int len)
1388 for(i=0;i<len;i++) {
1389 if (lexeme[i] != '-') return FALSE;