2 /**********************************************************************
8 **********************************************************************/
10 /* The includes/config.h one */
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
17 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
28 /* memory.h and strings.h conflict on some systems. */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
32 #include "hsparser.tab.h"
33 #include "constants.h"
36 /* Our substitute for <ctype.h> */
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
47 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
48 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
50 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
51 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0,
52 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
54 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
55 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
56 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
57 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0,
58 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
59 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
60 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
61 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
64 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
84 return(CharTable[*s]&(_C));
87 /**********************************************************************
93 **********************************************************************/
95 char *input_filename = NULL; /* Always points to a dynamically allocated string */
98 * For my own sanity, things that are not part of the flex skeleton
99 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
102 static int hslineno = 0; /* Line number at end of token */
103 int hsplineno = 0; /* Line number at end of previous token */
105 static int hscolno = 0; /* Column number at end of token */
106 int hspcolno = 0; /* Column number at end of previous token */
107 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
109 int modulelineno = -1; /* The line number where the module starts */
110 int startlineno = 0; /* The line number where something starts */
111 int endlineno = 0; /* The line number where something ends */
113 static BOOLEAN noGap = TRUE; /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
116 static int nested_comments; /* For counting comment nesting depth */
117 static int comment_start;
119 /* OLD: Hacky definition of yywrap: see flex doc.
121 If we don't do this, then we'll have to get the default
122 yywrap from the flex library, which is often something
123 we are not good at locating. This avoids that difficulty.
124 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
129 /* Essential forward declarations */
131 static void hsnewid PROTO((char *, int));
132 static void layout_input PROTO((char *, int));
133 static void cleartext (NO_ARGS);
134 static void addtext PROTO((char *, unsigned));
135 static void addchar PROTO((char));
136 static char *fetchtext PROTO((unsigned *));
137 static void new_filename PROTO((char *));
138 static int Return PROTO((int));
139 static void hsentercontext PROTO((int));
141 static BOOLEAN is_commment PROTO((char*, int));
143 /* Special file handling for IMPORTS */
144 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
146 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
147 static char *filename_save; /* File Name */
148 static int hslineno_save = 0, /* Line Number */
149 hsplineno_save = 0, /* Line Number of Prev. token */
150 hscolno_save = 0, /* Indentation */
151 hspcolno_save = 0; /* Left Indentation */
152 static short icontexts_save = 0; /* Indent Context Level */
154 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
155 extern BOOLEAN etags; /* that which is saved */
157 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
159 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
160 * inserted before token +ve -- "}" inserted before
163 short icontexts = 0; /* Which context we're in */
166 Table of indentations: right bit indicates whether to use
167 indentation rules (1 = use rules; 0 = ignore)
170 push one of these "contexts" at every "case" or "where"; the right bit says
171 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
173 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
174 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
175 push is shown just below.
180 static short indenttab[MAX_CONTEXTS] = {-1};
182 #define INDENTPT (indenttab[icontexts]>>1)
183 #define INDENTON (indenttab[icontexts]&1)
185 #define RETURN(tok) return(Return(tok))
188 #define YY_DECL int yylex1()
190 /* We should not peek at yy_act, but flex calls us even for the internal action
191 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
192 to support older versions of flex, we'll continue to peek for now.
194 #define YY_USER_ACTION \
195 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
199 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
202 /* Each time we enter a new start state, we push it onto the state stack.
204 #define PUSH_STATE(n) yy_push_state(n)
205 #define POP_STATE yy_pop_state()
210 noyywrap (do not call yywrap on end of file; avoid use of -lfl)
211 never-interactive (to go a bit faster)
212 stack (use a start-condition stack)
216 %option never-interactive
219 /* The start states are:
220 Code -- normal Haskell code (principal lexer)
221 GlaExt -- Haskell code with Glasgow extensions
222 Comment -- Nested comment processing
223 String -- Inside a string literal with backslashes
224 StringEsc -- Immediately following a backslash in a string literal
225 Char -- Inside a character literal with backslashes
226 CharEsc -- Immediately following a backslash in a character literal
228 Note that the INITIAL state is unused. Also note that these states
229 are _exclusive_. All rules should be prefixed with an appropriate
230 list of start states.
233 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
235 isoS [\xa1-\xbf\xd7\xf7]
236 isoL [\xc0-\xd6\xd8-\xde]
237 isol [\xdf-\xf6\xf8-\xff]
244 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
245 S [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
247 L [A-Z\xc0-\xd6\xd8-\xde]
248 l [a-z_\xdf-\xf6\xf8-\xff]
253 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
262 * Simple comments and whitespace. Normally, we would just ignore these, but
263 * in case we're processing a string escape, we need to note that we've seen
266 * Note that we cater for a comment line that *doesn't* end in a newline.
267 * This is incorrect, strictly speaking, but seems like the right thing
268 * to do. Reported by Rajiv Mirani. (WDP 95/08)
270 * Hackily moved up here so that --<<EOF>> will match -- SOF 5/97
274 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
278 * Special GHC pragma rules. Do we need a start state for interface files,
279 * so these won't be matched in source files? --JSM
285 /* I believe the next rule is not ever matched.
287 The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
288 Driver has now been modified to output `standard' {-# LINE ..-} pragmas
289 where possible, so the lexer should now never see cpp directives
290 like '# ' and '#line'.
294 <Code,GlaExt>^"# ".*{NL} {
295 char tempf[FILENAME_SIZE];
296 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
298 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
301 <Code,GlaExt>^"#line ".*{NL} {
302 char tempf[FILENAME_SIZE];
303 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
305 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
310 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
311 /* partain: pragma-style line directive */
312 char tempf[FILENAME_SIZE];
313 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
315 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
318 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
319 PUSH_STATE(UserPragma);
320 RETURN(INTERFACE_UPRAGMA);
322 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
323 PUSH_STATE(UserPragma);
324 RETURN(SPECIALISE_UPRAGMA);
326 <Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
327 PUSH_STATE(UserPragma);
328 RETURN(SPECIALISE_UPRAGMA);
330 <Code,GlaExt>"{-#"{WS}*"INLINE" {
331 PUSH_STATE(UserPragma);
332 RETURN(INLINE_UPRAGMA);
334 <Code,GlaExt>"{-#"{WS}*"RULES" {
335 PUSH_STATE(Code); /* I'm not sure about this */
336 RETURN(RULES_UPRAGMA);
338 <Code,GlaExt>"{-#"{WS}*"inline" {
339 PUSH_STATE(UserPragma);
340 RETURN(INLINE_UPRAGMA);
342 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
343 PUSH_STATE(UserPragma);
344 RETURN(NOINLINE_UPRAGMA);
346 <Code,GlaExt>"{-#"{WS}*"notInline" {
347 PUSH_STATE(UserPragma);
348 RETURN(NOINLINE_UPRAGMA);
350 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
351 PUSH_STATE(UserPragma);
352 RETURN(MAGIC_UNFOLDING_UPRAGMA);
354 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
355 /* these are handled by hscpp */
356 nested_comments =1; comment_start = hsplineno;
359 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
360 /* these are for the driver! */
361 nested_comments =1; comment_start = hsplineno;
364 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
365 /* these are used by `make depend' and the
366 compiler to indicate that a module should
367 be imported from source */
368 nested_comments =1; comment_start = hsplineno;
369 RETURN(SOURCE_UPRAGMA);
372 <Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
373 fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
374 input_filename, hsplineno);
375 format_string(stderr, (unsigned char *) yytext, yyleng);
376 fputs("'\n", stderr);
377 nested_comments = 1; comment_start = hsplineno;
380 <Code,GlaExt,UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
384 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
385 * intentionally accepted as a keyword even for normal <Code>.
389 <Code,GlaExt>"case" { RETURN(CASE); }
390 <Code,GlaExt>"class" { RETURN(CLASS); }
391 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
392 <Code,GlaExt>"default" { RETURN(DEFAULT); }
393 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
394 <Code,GlaExt>"do" { RETURN(DO); }
395 <Code,GlaExt>"else" { RETURN(ELSE); }
396 <Code,GlaExt>"if" { RETURN(IF); }
397 <Code,GlaExt>"import" { RETURN(IMPORT); }
398 <Code,GlaExt>"in" { RETURN(IN); }
399 <Code,GlaExt>"infix" { RETURN(INFIX); }
400 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
401 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
402 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
403 <Code,GlaExt>"let" { RETURN(LET); }
404 <Code,GlaExt>"module" { RETURN(MODULE); }
405 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
406 <Code,GlaExt>"of" { RETURN(OF); }
407 <Code,GlaExt>"then" { RETURN(THEN); }
408 <Code,GlaExt>"type" { RETURN(TYPE); }
409 <Code,GlaExt>"where" { RETURN(WHERE); }
411 <Code,GlaExt>"as" { RETURN(AS); }
412 <Code,GlaExt>"hiding" { RETURN(HIDING); }
413 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
415 <Code,GlaExt>"forall" { RETURN(FORALL); }
417 <Code,GlaExt>"_scc_" { RETURN(SCC); }
418 <GlaExt>"_ccall_" { RETURN(CCALL); }
419 <GlaExt>"_ccall_GC_" { RETURN(CCALL_GC); }
420 <GlaExt>"_casm_" { RETURN(CASM); }
421 <GlaExt>"_casm_GC_" { RETURN(CASM_GC); }
422 <GlaExt>"(#" { RETURN(OUNBOXPAREN); }
423 <GlaExt>"#)" { RETURN(CUNBOXPAREN); }
424 <GlaExt>"foreign" { RETURN(FOREIGN); }
425 <GlaExt>"export" { RETURN(EXPORT); }
426 <GlaExt>"label" { RETURN(LABEL); }
427 <GlaExt>"unsafe" { RETURN(UNSAFE); }
428 <GlaExt>"_stdcall" { RETURN(STDCALL); }
429 <GlaExt>"_ccall" { RETURN(C_CALL); }
430 <GlaExt>"_pascal" { RETURN(PASCAL); }
431 <GlaExt>"stdcall" { RETURN(STDCALL); }
432 <GlaExt>"ccall" { RETURN(C_CALL); }
433 <GlaExt>"pascal" { RETURN(PASCAL); }
434 <GlaExt>"dynamic" { RETURN(DYNAMIC); }
438 * Haskell operators: special, reservedops and useful varsyms
442 <Code,GlaExt,UserPragma>"(" { RETURN(OPAREN); }
443 <Code,GlaExt,UserPragma>")" { RETURN(CPAREN); }
444 <Code,GlaExt,UserPragma>"[" { RETURN(OBRACK); }
445 <Code,GlaExt,UserPragma>"]" { RETURN(CBRACK); }
446 <Code,GlaExt>"{" { RETURN(OCURLY); }
447 <Code,GlaExt>"}" { RETURN(CCURLY); }
448 <Code,GlaExt,UserPragma>"," { RETURN(COMMA); }
449 <Code,GlaExt>";" { RETURN(SEMI); }
450 <Code,GlaExt>"`" { RETURN(BQUOTE); }
452 <Code,GlaExt>"." { RETURN(DOT); }
453 <Code,GlaExt>".." { RETURN(DOTDOT); }
454 <Code,GlaExt,UserPragma>"::" { RETURN(DCOLON); }
455 <Code,GlaExt,UserPragma>"=" { RETURN(EQUAL); }
456 <Code,GlaExt>"\\" { RETURN(LAMBDA); }
457 <Code,GlaExt>"|" { RETURN(VBAR); }
458 <Code,GlaExt>"<-" { RETURN(LARROW); }
459 <Code,GlaExt,UserPragma>"->" { RETURN(RARROW); }
460 <Code,GlaExt>"-" { RETURN(MINUS); }
461 <Code,GlaExt>"+" { RETURN(PLUS); }
463 <Code,GlaExt,UserPragma>"=>" { RETURN(DARROW); }
464 <Code,GlaExt>"@" { RETURN(AT); }
465 <Code,GlaExt>"!" { RETURN(BANG); }
466 <Code,GlaExt>"~" { RETURN(LAZY); }
470 * Integers and (for Glasgow extensions) primitive integers. Note that
471 * we pass all of the text on to the parser, because flex/C can't handle
472 * arbitrary precision numbers.
476 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
477 yylval.uid = xstrndup(yytext, yyleng - 1);
480 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
481 yylval.uid = xstrndup(yytext, yyleng);
484 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
485 yylval.uid = xstrndup(yytext, yyleng - 1);
488 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
489 yylval.uid = xstrndup(yytext, yyleng);
492 <GlaExt>("-")?{N}"#" {
493 yylval.uid = xstrndup(yytext, yyleng - 1);
496 <Code,GlaExt,UserPragma>{N} {
497 yylval.uid = xstrndup(yytext, yyleng);
503 * Floats and (for Glasgow extensions) primitive floats/doubles.
507 <GlaExt>("-")?{F}"##" {
508 yylval.uid = xstrndup(yytext, yyleng - 2);
511 <GlaExt>("-")?{F}"#" {
512 yylval.uid = xstrndup(yytext, yyleng - 1);
516 yylval.uid = xstrndup(yytext, yyleng);
522 * Funky ``foo'' style C literals for Glasgow extensions
526 <GlaExt>"``"[^']+"''" {
527 hsnewid(yytext + 2, yyleng - 4);
533 * Identifiers, both variables and operators. The trailing hash is allowed
534 * for Glasgow extensions.
540 /* These SHOULDNAE work in "Code" (sigh) */
542 <GlaExt,UserPragma>{Id}"#" {
543 if (! nonstandardFlag) {
544 char errbuf[ERR_BUF_SIZE];
545 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
548 hsnewid(yytext, yyleng);
549 RETURN(isconstr(yytext) ? CONID : VARID);
551 <Code,GlaExt,UserPragma>{Id} {
552 hsnewid(yytext, yyleng);
553 RETURN(isconstr(yytext) ? CONID : VARID);
555 <Code,GlaExt,UserPragma>{SId} {
556 if (is_commment(yytext,yyleng)) {
558 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
563 hsnewid(yytext, yyleng);
564 RETURN(isconstr(yytext) ? CONSYM : VARSYM);
567 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
569 if (! nonstandardFlag) {
570 char errbuf[ERR_BUF_SIZE];
571 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
574 is_constr = hsnewqid(yytext, yyleng);
575 RETURN(is_constr ? QCONID : QVARID);
577 <Code,GlaExt,UserPragma>{Mod}"."{Id} {
578 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
579 RETURN(is_constr ? QCONID : QVARID);
581 <Code,GlaExt,UserPragma>{Mod}"."{SId} {
582 BOOLEAN is_constr = hsnewqid(yytext, yyleng);
583 RETURN(is_constr ? QCONSYM : QVARSYM);
587 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
589 /* Because we can make the former well-behaved (we defined them).
591 Sadly, the latter is defined by Haskell, which allows such
592 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
596 <GlaExt,UserPragma>"`"{Id}"#`" {
597 hsnewid(yytext + 1, yyleng - 2);
598 RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
603 * Character literals. The first form is the quick form, for character
604 * literals that don't contain backslashes. Literals with backslashes are
605 * lexed through multiple rules. First, we match the open ' and as many
606 * normal characters as possible. This puts us into the <Char> state, where
607 * a backslash is legal. Then, we match the backslash and move into the
608 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
609 * characters and the close '. We may end up with too many characters, but
610 * this allows us to easily share the lex rules with strings. Excess characters
611 * are ignored with a warning.
615 <GlaExt>'({CHAR}|"\"")"'#" {
616 yylval.uhstring = installHstring(1, yytext+1);
619 <Code,GlaExt>'({CHAR}|"\"")' {
620 yylval.uhstring = installHstring(1, yytext+1);
623 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
624 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
627 <Code,GlaExt>'({CHAR}|"\"")* {
628 hsmlcolno = hspcolno;
630 addtext(yytext+1, yyleng-1);
633 <Char>({CHAR}|"\"")*'# {
637 addtext(yytext, yyleng - 2);
638 text = fetchtext(&length);
640 if (! nonstandardFlag) {
641 char errbuf[ERR_BUF_SIZE];
642 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
647 fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
648 input_filename, hsplineno, hspcolno + 1);
649 format_string(stderr, (unsigned char *) text, length);
650 fputs("' too long\n", stderr);
653 yylval.uhstring = installHstring(1, text);
654 hspcolno = hsmlcolno;
658 <Char>({CHAR}|"\"")*' {
662 addtext(yytext, yyleng - 1);
663 text = fetchtext(&length);
666 fprintf(stderr, "%s:%d:%d: Character literal '",
667 input_filename, hsplineno, hspcolno + 1);
668 format_string(stderr, (unsigned char *) text, length);
669 fputs("' too long\n", stderr);
672 yylval.uhstring = installHstring(1, text);
673 hspcolno = hsmlcolno;
677 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
682 * String literals. The first form is the quick form, for string literals
683 * that don't contain backslashes. Literals with backslashes are lexed
684 * through multiple rules. First, we match the open " and as many normal
685 * characters as possible. This puts us into the <String> state, where
686 * a backslash is legal. Then, we match the backslash and move into the
687 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
688 * characters, moving back and forth between <String> and <StringEsc> as more
689 * backslashes are encountered. (We may even digress into <Comment> mode if we
690 * find a comment in a gap between backslashes.) Finally, we read the last chunk
691 * of normal characters and the close ".
695 <GlaExt>"\""({CHAR}|"'")*"\""# {
696 yylval.uhstring = installHstring(yyleng-3, yytext+1);
697 /* the -3 accounts for the " on front, "# on the end */
700 <Code,GlaExt>"\""({CHAR}|"'")*"\"" {
701 yylval.uhstring = installHstring(yyleng-2, yytext+1);
704 <Code,GlaExt>"\""({CHAR}|"'")* {
705 hsmlcolno = hspcolno;
707 addtext(yytext+1, yyleng-1);
710 <String>({CHAR}|"'")*"\"#" {
714 addtext(yytext, yyleng-2);
715 text = fetchtext(&length);
717 if (! nonstandardFlag) {
718 char errbuf[ERR_BUF_SIZE];
719 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
723 yylval.uhstring = installHstring(length, text);
724 hspcolno = hsmlcolno;
728 <String>({CHAR}|"'")*"\"" {
732 addtext(yytext, yyleng-1);
733 text = fetchtext(&length);
735 yylval.uhstring = installHstring(length, text);
736 hspcolno = hsmlcolno;
740 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
744 * Character and string escapes are roughly the same, but strings have the
745 * extra `\&' sequence which is not allowed for characters. Also, comments
746 * are allowed in the <StringEsc> state. (See the comment section much
749 * NB: Backslashes and tabs are stored in strings as themselves.
750 * But if we print them (in printtree.c), they must go out as
751 * "\\\\" and "\\t" respectively. (This is because of the bogus
752 * intermediate format that the parser produces. It uses '\t' fpr end of
753 * string, so it needs to be able to escape tabs, which means that it
754 * also needs to be able to escape the escape character ('\\'). Sigh.
758 <Char>\\ { PUSH_STATE(CharEsc); }
759 <String>\\& /* Ignore */ ;
760 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
762 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
763 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
767 Not 100% correct, tokenizes "foo \ --<>--
770 as "foo bar", but this is not correct as per Haskell 98 report and its
771 maximal munch rule for "--"-style comments.
773 For the moment, not deemed worthy to fix.
776 <StringEsc>"--"[^\n\r]*{NL}?{WS}* { noGap=FALSE; }
778 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
779 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
780 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
781 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
782 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
783 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
784 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
785 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
786 <CharEsc,StringEsc>BEL |
787 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
788 <CharEsc,StringEsc>BS |
789 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
790 <CharEsc,StringEsc>HT |
791 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
792 <CharEsc,StringEsc>LF |
793 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
794 <CharEsc,StringEsc>VT |
795 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
796 <CharEsc,StringEsc>FF |
797 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
798 <CharEsc,StringEsc>CR |
799 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
800 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
801 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
802 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
803 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
804 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
805 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
806 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
807 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
808 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
809 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
810 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
811 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
812 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
813 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
814 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
815 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
816 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
817 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
818 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
819 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
820 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
821 <CharEsc,StringEsc>{D}+ {
822 int i = strtol(yytext, NULL, 10);
826 char errbuf[ERR_BUF_SIZE];
827 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
833 <CharEsc,StringEsc>o{O}+ {
834 int i = strtol(yytext + 1, NULL, 8);
838 char errbuf[ERR_BUF_SIZE];
839 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
845 <CharEsc,StringEsc>x{H}+ {
846 int i = strtol(yytext + 1, NULL, 16);
850 char errbuf[ERR_BUF_SIZE];
851 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
861 * Nested comments. The major complication here is in trying to match the
862 * longest lexemes possible, for better performance. (See the flex document.)
863 * That's why the rules look so bizarre.
867 <Code,GlaExt,UserPragma,StringEsc>"{-" {
868 noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment);
872 <Comment>"-"+[^-{}]+ |
873 <Comment>"{"+[^-{}]+ ;
874 <Comment>"{-" { nested_comments++; }
875 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
881 * Illegal characters. This used to be a single rule, but we might as well
882 * pass on as much information as we have, so now we indicate our state in
887 <INITIAL,Code,GlaExt,UserPragma>(.|\n) {
888 fprintf(stderr, "%s:%d:%d: Illegal character: `",
889 input_filename, hsplineno, hspcolno + 1);
890 format_string(stderr, (unsigned char *) yytext, 1);
891 fputs("'\n", stderr);
895 fprintf(stderr, "%s:%d:%d: Illegal character: `",
896 input_filename, hsplineno, hspcolno + 1);
897 format_string(stderr, (unsigned char *) yytext, 1);
898 fputs("' in a character literal\n", stderr);
902 fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
903 input_filename, hsplineno, hspcolno + 1);
904 format_string(stderr, (unsigned char *) yytext, 1);
905 fputs("'\n", stderr);
908 <String>(.|\n) { if (nonstandardFlag) {
909 addtext(yytext, yyleng);
911 fprintf(stderr, "%s:%d:%d: Illegal character: `",
912 input_filename, hsplineno, hspcolno + 1);
913 format_string(stderr, (unsigned char *) yytext, 1);
914 fputs("' in a string literal\n", stderr);
920 fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
921 input_filename, hsplineno, hspcolno + 1);
922 format_string(stderr, (unsigned char *) yytext, 1);
923 fputs("'\n", stderr);
926 fprintf(stderr, "%s:%d:%d: Illegal character: `",
927 input_filename, hsplineno, hspcolno + 1);
928 format_string(stderr, (unsigned char *) yytext, 1);
929 fputs("' in a string gap\n", stderr);
936 * End of file. In any sub-state, this is an error. However, for the primary
937 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
938 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
939 * adding virtual close curlies, or closing an interface and returning to the
940 * primary source file.
942 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
943 * line/column advancement has to be done by hand.
947 <Char,CharEsc><<EOF>> {
948 hsplineno = hslineno; hspcolno = hscolno;
949 hsperror("unterminated character literal");
952 char errbuf[ERR_BUF_SIZE];
953 hsplineno = hslineno; hspcolno = hscolno;
954 sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
957 <String,StringEsc><<EOF>> {
958 hsplineno = hslineno; hspcolno = hscolno;
959 hsperror("unterminated string literal");
961 <UserPragma><<EOF>> {
962 hsplineno = hslineno; hspcolno = hscolno;
963 hsperror("unterminated user-specified pragma");
965 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
969 /**********************************************************************
972 * YACC/LEX Initialisation etc. *
975 **********************************************************************/
978 We initialise input_filename to "<stdin>".
979 This allows unnamed sources to be piped into the parser.
985 input_filename = xstrdup("<stdin>");
987 /* We must initialize the input buffer _now_, because we call
988 setyyin _before_ calling yylex for the first time! */
989 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
998 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
1000 if (input_filename != NULL)
1001 free(input_filename);
1002 input_filename = xstrdup(f);
1005 /**********************************************************************
1008 * Layout Processing *
1011 **********************************************************************/
1014 The following section deals with Haskell Layout conventions
1015 forcing insertion of ; or } as appropriate
1019 #define LAYOUT_DEBUG
1024 hsshouldindent(void)
1026 return (!forgetindent && INDENTON);
1030 /* Enter new context and set new indentation level */
1035 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1039 * partain: first chk that new indent won't be less than current one; this code
1040 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1041 * current token; what that has to do with indenting, I don't know.
1045 if (hscolno - 1 <= INDENTPT) {
1047 return; /* Empty input OK for Haskell 1.1 */
1049 char errbuf[ERR_BUF_SIZE];
1051 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1055 hsentercontext((hspcolno << 1) | 1);
1059 /* Enter a new context without changing the indentation level */
1064 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1066 hsentercontext(indenttab[icontexts] & ~1);
1070 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1074 forgetindent = TRUE;
1078 /* Enter a new layout context. */
1080 hsentercontext(int indent)
1082 /* Enter new context and set indentation as specified */
1083 if (++icontexts >= MAX_CONTEXTS) {
1084 char errbuf[ERR_BUF_SIZE];
1086 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1089 forgetindent = FALSE;
1090 indenttab[icontexts] = indent;
1092 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1097 /* Exit a layout context */
1103 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1108 * Return checks the indentation level and returns ;, } or the specified token.
1116 if (hsshouldindent()) {
1117 if (hspcolno < INDENTPT) {
1119 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1123 } else if (hspcolno == INDENTPT) {
1125 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1134 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1141 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1147 static BOOLEAN eof = FALSE;
1150 if (hssttok != -1) {
1158 endlineno = hslineno;
1159 if ((tok = yylex1()) != EOF)
1165 if (icontexts > icontexts_save) {
1168 indenttab[icontexts] = 0;
1171 hsperror("missing '}' at end of file");
1172 } else if (hsbuf_save != NULL) {
1174 yy_delete_buffer(YY_CURRENT_BUFFER);
1175 yy_switch_to_buffer(hsbuf_save);
1177 new_filename(filename_save);
1178 free(filename_save);
1179 hslineno = hslineno_save;
1180 hsplineno = hsplineno_save;
1181 hscolno = hscolno_save;
1182 hspcolno = hspcolno_save;
1184 icontexts = icontexts_save - 1;
1187 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1192 hsperror("No longer using yacc to parse interface files");
1197 abort(); /* should never get here! */
1201 /**********************************************************************
1204 * Input Processing for Interfaces -- Not currently used !!! *
1207 **********************************************************************/
1209 /* setyyin(file) open file as new lex input buffer */
1215 hsbuf_save = YY_CURRENT_BUFFER;
1216 if ((yyin = fopen(file, "r")) == NULL) {
1217 char errbuf[ERR_BUF_SIZE];
1219 sprintf(errbuf, "can't read \"%-.50s\"", file);
1222 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1224 hslineno_save = hslineno;
1225 hsplineno_save = hsplineno;
1226 hslineno = hsplineno = 1;
1228 filename_save = input_filename;
1229 input_filename = NULL;
1231 hscolno_save = hscolno;
1232 hspcolno_save = hspcolno;
1233 hscolno = hspcolno = 0;
1234 etags_save = etags; /* do not do "etags" stuff in interfaces */
1235 etags = 0; /* We remember whether we are doing it in
1236 the module, so we can restore it later [WDP 94/09] */
1237 hsentercontext(-1); /* partain: changed this from 0 */
1238 icontexts_save = icontexts;
1240 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1245 layout_input(char *text, int len)
1248 fprintf(stderr, "Scanning \"%s\"\n", text);
1251 hsplineno = hslineno;
1263 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1275 setstartlineno(void)
1277 startlineno = hsplineno;
1279 if (modulelineno == 0) {
1280 modulelineno = startlineno;
1286 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1290 /**********************************************************************
1296 **********************************************************************/
1298 #define CACHE_SIZE YY_BUF_SIZE
1304 } textcache = { 0, 0, NULL };
1309 /* fprintf(stderr, "cleartext\n"); */
1311 if (textcache.allocated == 0) {
1312 textcache.allocated = CACHE_SIZE;
1313 textcache.text = xmalloc(CACHE_SIZE);
1318 addtext(char *text, unsigned length)
1320 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1325 if (textcache.next + length + 1 >= textcache.allocated) {
1326 textcache.allocated += length + CACHE_SIZE;
1327 textcache.text = xrealloc(textcache.text, textcache.allocated);
1329 bcopy(text, textcache.text + textcache.next, length);
1330 textcache.next += length;
1336 /* fprintf(stderr, "addchar: %c\n", c); */
1338 if (textcache.next + 2 >= textcache.allocated) {
1339 textcache.allocated += CACHE_SIZE;
1340 textcache.text = xrealloc(textcache.text, textcache.allocated);
1342 textcache.text[textcache.next++] = c;
1346 fetchtext(unsigned *length)
1348 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1350 *length = textcache.next;
1351 textcache.text[textcache.next] = '\0';
1352 return textcache.text;
1355 /**********************************************************************
1358 * Identifier Processing *
1361 **********************************************************************/
1364 hsnewid Enters an id of length n into the symbol table.
1368 hsnewid(char *name, int length)
1370 char save = name[length];
1372 name[length] = '\0';
1373 yylval.uid = installid(name);
1374 name[length] = save;
1378 hsnewqid(char *name, int length)
1381 char save = name[length];
1382 name[length] = '\0';
1384 dot = strchr(name, '.');
1386 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1388 name[length] = save;
1390 return isconstr(dot+1);
1395 is_commment(char* lexeme, int len)
1404 for(i=0;i<len;i++) {
1405 if (lexeme[i] != '-') return FALSE;