2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
117 static int comment_start;
119 /* OLD: Hacky definition of yywrap: see flex doc.
121 If we don't do this, then we'll have to get the default
122 yywrap from the flex library, which is often something
123 we are not good at locating. This avoids that difficulty.
124 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
129 /* Essential forward declarations */
131 static void hsnewid PROTO((char *, int));
132 static void layout_input PROTO((char *, int));
133 static void cleartext (NO_ARGS);
134 static void addtext PROTO((char *, unsigned));
135 static void addchar PROTO((char));
136 static char *fetchtext PROTO((unsigned *));
137 static void new_filename PROTO((char *));
138 static int Return PROTO((int));
139 static void hsentercontext PROTO((int));
141 static BOOLEAN is_commment PROTO((char*, int));
143 /* Special file handling for IMPORTS */
144 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
146 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
147 static char *filename_save; /* File Name */
148 static int hslineno_save = 0, /* Line Number */
149 hsplineno_save = 0, /* Line Number of Prev. token */
150 hscolno_save = 0, /* Indentation */
151 hspcolno_save = 0; /* Left Indentation */
152 static short icontexts_save = 0; /* Indent Context Level */
154 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
155 extern BOOLEAN etags; /* that which is saved */
157 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
159 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
160 * inserted before token +ve -- "}" inserted before
163 short icontexts = 0; /* Which context we're in */
166 Table of indentations: right bit indicates whether to use
167 indentation rules (1 = use rules; 0 = ignore)
170 push one of these "contexts" at every "case" or "where"; the right bit says
171 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
173 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
174 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
175 push is shown just below.
180 static short indenttab[MAX_CONTEXTS] = {-1};
182 #define INDENTPT (indenttab[icontexts]>>1)
183 #define INDENTON (indenttab[icontexts]&1)
185 #define RETURN(tok) return(Return(tok))
188 #define YY_DECL int yylex1()
190 /* We should not peek at yy_act, but flex calls us even for the internal action
191 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
192 to support older versions of flex, we'll continue to peek for now.
194 #define YY_USER_ACTION \
195 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
199 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
202 /* Each time we enter a new start state, we push it onto the state stack.
204 #define PUSH_STATE(n) yy_push_state(n)
205 #define POP_STATE yy_pop_state()
210 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
211 never-interactive (to go a bit faster)
212 stack (use a start-condition stack)
216 %option never-interactive
219 /* The start states are:
220 Code -- normal Haskell code (principal lexer)
221 GlaExt -- Haskell code with Glasgow extensions
222 Comment -- Nested comment processing
223 String -- Inside a string literal with backslashes
224 StringEsc -- Immediately following a backslash in a string literal
225 Char -- Inside a character literal with backslashes
226 CharEsc -- Immediately following a backslash in a character literal
228 Note that the INITIAL state is unused. Also note that these states
229 are _exclusive_. All rules should be prefixed with an appropriate
230 list of start states.
233 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
235 isoS [\xa1-\xbf\xd7\xf7]
236 isoL [\xc0-\xd6\xd8-\xde]
237 isol [\xdf-\xf6\xf8-\xff]
244 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
245 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
247 L [A-Z\xc0-\xd6\xd8-\xde]
248 l [a-z_\xdf-\xf6\xf8-\xff]
253 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
262 * Simple comments and whitespace. Normally, we would just ignore these, but
263 * in case we're processing a string escape, we need to note that we've seen
266 * Note that we cater for a comment line that *doesn't* end in a newline.
267 * This is incorrect, strictly speaking, but seems like the right thing
268 * to do. Reported by Rajiv Mirani. (WDP 95/08)
270 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
274 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
278 * Special GHC pragma rules. Do we need a start state for interface files,
279 * so these won't be matched in source files? --JSM
285 /* I believe the next rule is not ever matched.
287 The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
288 Driver has now been modified to output `standard' {-# LINE ..-} pragmas
289 where possible, so the lexer should now never see cpp directives
290 like '# ' and '#line'.
294 <Code,GlaExt>^"# ".*{NL} {
295 char tempf[FILENAME_SIZE];
296 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
298 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
301 <Code,GlaExt>^"#line ".*{NL} {
302 char tempf[FILENAME_SIZE];
303 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
305 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
310 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
311 /* partain: pragma-style line directive */
312 char tempf[FILENAME_SIZE];
313 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
315 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
318 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
319 PUSH_STATE(UserPragma);
320 RETURN(INTERFACE_UPRAGMA);
322 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
323 PUSH_STATE(UserPragma);
324 RETURN(SPECIALISE_UPRAGMA);
326 <Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
327 PUSH_STATE(UserPragma);
328 RETURN(SPECIALISE_UPRAGMA);
330 <Code,GlaExt>"{-#"{WS}*"INLINE" {
331 PUSH_STATE(UserPragma);
332 RETURN(INLINE_UPRAGMA);
334 <Code,GlaExt>"{-#"{WS}*"inline" {
335 PUSH_STATE(UserPragma);
336 RETURN(INLINE_UPRAGMA);
338 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
339 PUSH_STATE(UserPragma);
340 RETURN(NOINLINE_UPRAGMA);
342 <Code,GlaExt>"{-#"{WS}*"notInline" {
343 PUSH_STATE(UserPragma);
344 RETURN(NOINLINE_UPRAGMA);
346 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
347 PUSH_STATE(UserPragma);
348 RETURN(MAGIC_UNFOLDING_UPRAGMA);
350 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
351 /* these are handled by hscpp */
352 nested_comments =1; comment_start = hsplineno;
355 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
356 /* these are for the driver! */
357 nested_comments =1; comment_start = hsplineno;
360 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
361 /* these are used by `make depend' and the
362 compiler to indicate that a module should
363 be imported from source */
364 nested_comments =1; comment_start = hsplineno;
365 RETURN(SOURCE_UPRAGMA);
368 <Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
369 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
370 input_filename, hsplineno);
371 format_string(stderr, (unsigned char *) yytext, yyleng);
372 fputs("'\n", stderr);
373 nested_comments = 1; comment_start = hsplineno;
376 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
380 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
381 * intentionally accepted as a keyword even for normal <Code>.
385 <Code,GlaExt>"case" { RETURN(CASE); }
386 <Code,GlaExt>"class" { RETURN(CLASS); }
387 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
388 <Code,GlaExt>"default" { RETURN(DEFAULT); }
389 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
390 <Code,GlaExt>"do" { RETURN(DO); }
391 <Code,GlaExt>"else" { RETURN(ELSE); }
392 <Code,GlaExt>"if" { RETURN(IF); }
393 <Code,GlaExt>"import" { RETURN(IMPORT); }
394 <Code,GlaExt>"in" { RETURN(IN); }
395 <Code,GlaExt>"infix" { RETURN(INFIX); }
396 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
397 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
398 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
399 <Code,GlaExt>"let" { RETURN(LET); }
400 <Code,GlaExt>"module" { RETURN(MODULE); }
401 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
402 <Code,GlaExt>"of" { RETURN(OF); }
403 <Code,GlaExt>"then" { RETURN(THEN); }
404 <Code,GlaExt>"type" { RETURN(TYPE); }
405 <Code,GlaExt>"where" { RETURN(WHERE); }
407 <Code,GlaExt>"as" { RETURN(AS); }
408 <Code,GlaExt>"hiding" { RETURN(HIDING); }
409 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
411 <Code,GlaExt>"forall" { RETURN(FORALL); }
413 <Code,GlaExt>"_scc_" { RETURN(SCC); }
414 <GlaExt>"_ccall_" { RETURN(CCALL); }
415 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
416 <GlaExt>"_casm_" { RETURN(CASM); }
417 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
418 <GlaExt>"(#" { RETURN(OUNBOXPAREN); }
419 <GlaExt>"#)" { RETURN(CUNBOXPAREN); }
420 <GlaExt>"foreign" { RETURN(FOREIGN); }
421 <GlaExt>"export" { RETURN(EXPORT); }
422 <GlaExt>"label" { RETURN(LABEL); }
423 <GlaExt>"unsafe" { RETURN(UNSAFE); }
424 <GlaExt>"_stdcall" { RETURN(STDCALL); }
425 <GlaExt>"_ccall" { RETURN(C_CALL); }
426 <GlaExt>"_pascal" { RETURN(PASCAL); }
427 <GlaExt>"stdcall" { RETURN(STDCALL); }
428 <GlaExt>"ccall" { RETURN(C_CALL); }
429 <GlaExt>"pascal" { RETURN(PASCAL); }
430 <GlaExt>"dynamic" { RETURN(DYNAMIC); }
434 * Haskell operators: special, reservedops and useful varsyms
438 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
439 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
440 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
441 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
442 <Code,GlaExt>"{" { RETURN(OCURLY); }
443 <Code,GlaExt>"}" { RETURN(CCURLY); }
444 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
445 <Code,GlaExt>";" { RETURN(SEMI); }
446 <Code,GlaExt>"`" { RETURN(BQUOTE); }
448 <Code,GlaExt>"." { RETURN(DOT); }
449 <Code,GlaExt>".." { RETURN(DOTDOT); }
450 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
451 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
452 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
453 <Code,GlaExt>"|" { RETURN(VBAR); }
454 <Code,GlaExt>"<-" { RETURN(LARROW); }
455 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
456 <Code,GlaExt>"-" { RETURN(MINUS); }
457 <Code,GlaExt>"+" { RETURN(PLUS); }
459 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
460 <Code,GlaExt>"@" { RETURN(AT); }
461 <Code,GlaExt>"!" { RETURN(BANG); }
462 <Code,GlaExt>"~" { RETURN(LAZY); }
466 * Integers and (for Glasgow extensions) primitive integers. Note that
467 * we pass all of the text on to the parser, because flex/C can't handle
468 * arbitrary precision numbers.
472 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
473 yylval.uid = xstrndup(yytext, yyleng - 1);
476 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
477 yylval.uid = xstrndup(yytext, yyleng);
480 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
481 yylval.uid = xstrndup(yytext, yyleng - 1);
484 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
485 yylval.uid = xstrndup(yytext, yyleng);
488 <GlaExt>("-")?{N}"#" {
489 yylval.uid = xstrndup(yytext, yyleng - 1);
492 <Code,GlaExt,UserPragma>{N} {
493 yylval.uid = xstrndup(yytext, yyleng);
499 * Floats and (for Glasgow extensions) primitive floats/doubles.
503 <GlaExt>("-")?{F}"##" {
504 yylval.uid = xstrndup(yytext, yyleng - 2);
507 <GlaExt>("-")?{F}"#" {
508 yylval.uid = xstrndup(yytext, yyleng - 1);
512 yylval.uid = xstrndup(yytext, yyleng);
518 * Funky ``foo'' style C literals for Glasgow extensions
522 <GlaExt>"``"[^']+"''" {
523 hsnewid(yytext + 2, yyleng - 4);
529 * Identifiers, both variables and operators. The trailing hash is allowed
530 * for Glasgow extensions.
536 /* These SHOULDNAE work in "Code" (sigh) */
538 <GlaExt,UserPragma>{Id}"#" {
539 if (! nonstandardFlag) {
540 char errbuf[ERR_BUF_SIZE];
541 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
544 hsnewid(yytext, yyleng);
545 RETURN(isconstr(yytext) ? CONID : VARID);
547 <Code,GlaExt,UserPragma>{Id} {
548 hsnewid(yytext, yyleng);
549 RETURN(isconstr(yytext) ? CONID : VARID);
551 <Code,GlaExt,UserPragma>{SId} {
552 if (is_commment(yytext,yyleng)) {
554 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
559 hsnewid(yytext, yyleng);
560 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
563 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
565 if (! nonstandardFlag) {
566 char errbuf[ERR_BUF_SIZE];
567 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
570 is_constr = hsnewqid(yytext, yyleng);
571 RETURN(is_constr ? QCONID : QVARID);
573 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
574 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
575 RETURN(is_constr ? QCONID : QVARID);
577 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
578 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
579 RETURN(is_constr ? QCONSYM : QVARSYM);
583 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
585 /* Because we can make the former well-behaved (we defined them).
587 Sadly, the latter is defined by Haskell, which allows such
588 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
592 <GlaExt,UserPragma>"`"{Id}"#`" {
593 hsnewid(yytext + 1, yyleng - 2);
594 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
599 * Character literals. The first form is the quick form, for character
600 * literals that don't contain backslashes. Literals with backslashes are
601 * lexed through multiple rules. First, we match the open ' and as many
602 * normal characters as possible. This puts us into the <Char> state, where
603 * a backslash is legal. Then, we match the backslash and move into the
604 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
605 * characters and the close '. We may end up with too many characters, but
606 * this allows us to easily share the lex rules with strings. Excess characters
607 * are ignored with a warning.
611 <GlaExt>'({CHAR}|"\"")"'#" {
612 yylval.uhstring = installHstring(1, yytext+1);
615 <Code,GlaExt>'({CHAR}|"\"")' {
616 yylval.uhstring = installHstring(1, yytext+1);
619 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
620 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
623 <Code,GlaExt>'({CHAR}|"\"")* {
624 hsmlcolno = hspcolno;
626 addtext(yytext+1, yyleng-1);
629 <Char>({CHAR}|"\"")*'# {
633 addtext(yytext, yyleng - 2);
634 text = fetchtext(&length);
636 if (! nonstandardFlag) {
637 char errbuf[ERR_BUF_SIZE];
638 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
643 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
644 input_filename, hsplineno, hspcolno + 1);
645 format_string(stderr, (unsigned char *) text, length);
646 fputs("' too long\n", stderr);
649 yylval.uhstring = installHstring(1, text);
650 hspcolno = hsmlcolno;
654 <Char>({CHAR}|"\"")*' {
658 addtext(yytext, yyleng - 1);
659 text = fetchtext(&length);
662 fprintf(stderr, "%s:%d:%d: Character literal '",
663 input_filename, hsplineno, hspcolno + 1);
664 format_string(stderr, (unsigned char *) text, length);
665 fputs("' too long\n", stderr);
668 yylval.uhstring = installHstring(1, text);
669 hspcolno = hsmlcolno;
673 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
678 * String literals. The first form is the quick form, for string literals
679 * that don't contain backslashes. Literals with backslashes are lexed
680 * through multiple rules. First, we match the open " and as many normal
681 * characters as possible. This puts us into the <String> state, where
682 * a backslash is legal. Then, we match the backslash and move into the
683 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
684 * characters, moving back and forth between <String> and <StringEsc> as more
685 * backslashes are encountered. (We may even digress into <Comment> mode if we
686 * find a comment in a gap between backslashes.) Finally, we read the last chunk
687 * of normal characters and the close ".
691 <GlaExt>"\""({CHAR}|"'")*"\""# {
692 yylval.uhstring = installHstring(yyleng-3, yytext+1);
693 /* the -3 accounts for the " on front, "# on the end */
696 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
697 yylval.uhstring = installHstring(yyleng-2, yytext+1);
700 <Code,GlaExt>"\""({CHAR}|"'")* {
701 hsmlcolno = hspcolno;
703 addtext(yytext+1, yyleng-1);
706 <String>({CHAR}|"'")*"\"#" {
710 addtext(yytext, yyleng-2);
711 text = fetchtext(&length);
713 if (! nonstandardFlag) {
714 char errbuf[ERR_BUF_SIZE];
715 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
719 yylval.uhstring = installHstring(length, text);
720 hspcolno = hsmlcolno;
724 <String>({CHAR}|"'")*"\"" {
728 addtext(yytext, yyleng-1);
729 text = fetchtext(&length);
731 yylval.uhstring = installHstring(length, text);
732 hspcolno = hsmlcolno;
736 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
740 * Character and string escapes are roughly the same, but strings have the
741 * extra `\&' sequence which is not allowed for characters. Also, comments
742 * are allowed in the <StringEsc> state. (See the comment section much
745 * NB: Backslashes and tabs are stored in strings as themselves.
746 * But if we print them (in printtree.c), they must go out as
747 * "\\\\" and "\\t" respectively. (This is because of the bogus
748 * intermediate format that the parser produces. It uses '\t' fpr end of
749 * string, so it needs to be able to escape tabs, which means that it
750 * also needs to be able to escape the escape character ('\\'). Sigh.
754 <Char>\\ { PUSH_STATE(CharEsc); }
755 <String>\\& /* Ignore */ ;
756 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
758 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
759 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
763 Not 100% correct, tokenizes "foo \ --<>--
766 as "foo bar", but this is not correct as per Haskell 98 report and its
767 maximal munch rule for "--"-style comments.
769 For the moment, not deemed worthy to fix.
772 <StringEsc>"--"[^\n\r]*{NL}?{WS}* { noGap=FALSE; }
774 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
775 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
776 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
777 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
778 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
779 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
780 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
781 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
782 <CharEsc,StringEsc>BEL |
783 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
784 <CharEsc,StringEsc>BS |
785 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
786 <CharEsc,StringEsc>HT |
787 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
788 <CharEsc,StringEsc>LF |
789 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
790 <CharEsc,StringEsc>VT |
791 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
792 <CharEsc,StringEsc>FF |
793 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
794 <CharEsc,StringEsc>CR |
795 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
796 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
797 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
798 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
799 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
800 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
801 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
802 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
803 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
804 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
805 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
806 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
807 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
808 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
809 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
810 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
811 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
812 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
813 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
814 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
815 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
816 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
817 <CharEsc,StringEsc>{D}+ {
818 int i = strtol(yytext, NULL, 10);
822 char errbuf[ERR_BUF_SIZE];
823 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
829 <CharEsc,StringEsc>o{O}+ {
830 int i = strtol(yytext + 1, NULL, 8);
834 char errbuf[ERR_BUF_SIZE];
835 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
841 <CharEsc,StringEsc>x{H}+ {
842 int i = strtol(yytext + 1, NULL, 16);
846 char errbuf[ERR_BUF_SIZE];
847 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
857 * Nested comments. The major complication here is in trying to match the
858 * longest lexemes possible, for better performance. (See the flex document.)
859 * That's why the rules look so bizarre.
863 <Code,GlaExt,UserPragma,StringEsc>"{-" {
864 noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment);
868 <Comment>"-"+[^-{}]+ |
869 <Comment>"{"+[^-{}]+ ;
870 <Comment>"{-" { nested_comments++; }
871 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
877 * Illegal characters. This used to be a single rule, but we might as well
878 * pass on as much information as we have, so now we indicate our state in
883 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
884 fprintf(stderr, "%s:%d:%d: Illegal character: `",
885 input_filename, hsplineno, hspcolno + 1);
886 format_string(stderr, (unsigned char *) yytext, 1);
887 fputs("'\n", stderr);
891 fprintf(stderr, "%s:%d:%d: Illegal character: `",
892 input_filename, hsplineno, hspcolno + 1);
893 format_string(stderr, (unsigned char *) yytext, 1);
894 fputs("' in a character literal\n", stderr);
898 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
899 input_filename, hsplineno, hspcolno + 1);
900 format_string(stderr, (unsigned char *) yytext, 1);
901 fputs("'\n", stderr);
904 <String>(.|\n) { if (nonstandardFlag) {
905 addtext(yytext, yyleng);
907 fprintf(stderr, "%s:%d:%d: Illegal character: `",
908 input_filename, hsplineno, hspcolno + 1);
909 format_string(stderr, (unsigned char *) yytext, 1);
910 fputs("' in a string literal\n", stderr);
916 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
917 input_filename, hsplineno, hspcolno + 1);
918 format_string(stderr, (unsigned char *) yytext, 1);
919 fputs("'\n", stderr);
922 fprintf(stderr, "%s:%d:%d: Illegal character: `",
923 input_filename, hsplineno, hspcolno + 1);
924 format_string(stderr, (unsigned char *) yytext, 1);
925 fputs("' in a string gap\n", stderr);
932 * End of file. In any sub-state, this is an error. However, for the primary
933 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
934 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
935 * adding virtual close curlies, or closing an interface and returning to the
936 * primary source file.
938 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
939 * line/column advancement has to be done by hand.
943 <Char,CharEsc><<EOF>> {
944 hsplineno = hslineno; hspcolno = hscolno;
945 hsperror("unterminated character literal");
948 char errbuf[ERR_BUF_SIZE];
949 hsplineno = hslineno; hspcolno = hscolno;
950 sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
953 <String,StringEsc><<EOF>> {
954 hsplineno = hslineno; hspcolno = hscolno;
955 hsperror("unterminated string literal");
957 <UserPragma><<EOF>> {
958 hsplineno = hslineno; hspcolno = hscolno;
959 hsperror("unterminated user-specified pragma");
961 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
965 /**********************************************************************
968 * YACC/LEX Initialisation etc. *
971 **********************************************************************/
974 We initialise input_filename to "<stdin>".
975 This allows unnamed sources to be piped into the parser.
981 input_filename = xstrdup("<stdin>");
983 /* We must initialize the input buffer _now_, because we call
984 setyyin _before_ calling yylex for the first time! */
985 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
994 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
996 if (input_filename != NULL)
997 free(input_filename);
998 input_filename = xstrdup(f);
1001 /**********************************************************************
1004 * Layout Processing *
1007 **********************************************************************/
1010 The following section deals with Haskell Layout conventions
1011 forcing insertion of ; or } as appropriate
1015 #define LAYOUT_DEBUG
1020 hsshouldindent(void)
1022 return (!forgetindent && INDENTON);
1026 /* Enter new context and set new indentation level */
1031 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1035 * partain: first chk that new indent won't be less than current one; this code
1036 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1037 * current token; what that has to do with indenting, I don't know.
1041 if (hscolno - 1 <= INDENTPT) {
1043 return; /* Empty input OK for Haskell 1.1 */
1045 char errbuf[ERR_BUF_SIZE];
1047 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1051 hsentercontext((hspcolno << 1) | 1);
1055 /* Enter a new context without changing the indentation level */
1060 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1062 hsentercontext(indenttab[icontexts] & ~1);
1066 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1070 forgetindent = TRUE;
1074 /* Enter a new layout context. */
1076 hsentercontext(int indent)
1078 /* Enter new context and set indentation as specified */
1079 if (++icontexts >= MAX_CONTEXTS) {
1080 char errbuf[ERR_BUF_SIZE];
1082 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1085 forgetindent = FALSE;
1086 indenttab[icontexts] = indent;
1088 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1093 /* Exit a layout context */
1099 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1104 * Return checks the indentation level and returns ;, } or the specified token.
1112 if (hsshouldindent()) {
1113 if (hspcolno < INDENTPT) {
1115 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1119 } else if (hspcolno == INDENTPT) {
1121 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1130 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1137 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1143 static BOOLEAN eof = FALSE;
1146 if (hssttok != -1) {
1154 endlineno = hslineno;
1155 if ((tok = yylex1()) != EOF)
1161 if (icontexts > icontexts_save) {
1164 indenttab[icontexts] = 0;
1167 hsperror("missing '}' at end of file");
1168 } else if (hsbuf_save != NULL) {
1170 yy_delete_buffer(YY_CURRENT_BUFFER);
1171 yy_switch_to_buffer(hsbuf_save);
1173 new_filename(filename_save);
1174 free(filename_save);
1175 hslineno = hslineno_save;
1176 hsplineno = hsplineno_save;
1177 hscolno = hscolno_save;
1178 hspcolno = hspcolno_save;
1180 icontexts = icontexts_save - 1;
1183 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1188 hsperror("No longer using yacc to parse interface files");
1193 abort(); /* should never get here! */
1197 /**********************************************************************
1200 * Input Processing for Interfaces -- Not currently used !!! *
1203 **********************************************************************/
1205 /* setyyin(file) open file as new lex input buffer */
1211 hsbuf_save = YY_CURRENT_BUFFER;
1212 if ((yyin = fopen(file, "r")) == NULL) {
1213 char errbuf[ERR_BUF_SIZE];
1215 sprintf(errbuf, "can't read \"%-.50s\"", file);
1218 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1220 hslineno_save = hslineno;
1221 hsplineno_save = hsplineno;
1222 hslineno = hsplineno = 1;
1224 filename_save = input_filename;
1225 input_filename = NULL;
1227 hscolno_save = hscolno;
1228 hspcolno_save = hspcolno;
1229 hscolno = hspcolno = 0;
1230 etags_save = etags; /* do not do "etags" stuff in interfaces */
1231 etags = 0; /* We remember whether we are doing it in
1232 the module, so we can restore it later [WDP 94/09] */
1233 hsentercontext(-1); /* partain: changed this from 0 */
1234 icontexts_save = icontexts;
1236 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1241 layout_input(char *text, int len)
1244 fprintf(stderr, "Scanning \"%s\"\n", text);
1247 hsplineno = hslineno;
1259 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1271 setstartlineno(void)
1273 startlineno = hsplineno;
1275 if (modulelineno == 0) {
1276 modulelineno = startlineno;
1282 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1286 /**********************************************************************
1292 **********************************************************************/
1294 #define CACHE_SIZE YY_BUF_SIZE
1300 } textcache = { 0, 0, NULL };
1305 /* fprintf(stderr, "cleartext\n"); */
1307 if (textcache.allocated == 0) {
1308 textcache.allocated = CACHE_SIZE;
1309 textcache.text = xmalloc(CACHE_SIZE);
1314 addtext(char *text, unsigned length)
1316 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1321 if (textcache.next + length + 1 >= textcache.allocated) {
1322 textcache.allocated += length + CACHE_SIZE;
1323 textcache.text = xrealloc(textcache.text, textcache.allocated);
1325 bcopy(text, textcache.text + textcache.next, length);
1326 textcache.next += length;
1332 /* fprintf(stderr, "addchar: %c\n", c); */
1334 if (textcache.next + 2 >= textcache.allocated) {
1335 textcache.allocated += CACHE_SIZE;
1336 textcache.text = xrealloc(textcache.text, textcache.allocated);
1338 textcache.text[textcache.next++] = c;
1342 fetchtext(unsigned *length)
1344 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1346 *length = textcache.next;
1347 textcache.text[textcache.next] = '\0';
1348 return textcache.text;
1351 /**********************************************************************
1354 * Identifier Processing *
1357 **********************************************************************/
1360 hsnewid Enters an id of length n into the symbol table.
1364 hsnewid(char *name, int length)
1366 char save = name[length];
1368 name[length] = '\0';
1369 yylval.uid = installid(name);
1370 name[length] = save;
1374 hsnewqid(char *name, int length)
1377 char save = name[length];
1378 name[length] = '\0';
1380 dot = strchr(name, '.');
1382 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1384 name[length] = save;
1386 return isconstr(dot+1);
1391 is_commment(char* lexeme, int len)
1400 for(i=0;i<len;i++) {
1401 if (lexeme[i] != '-') return FALSE;