2 /**********************************************************************
5 * LEX grammar for Haskell. *
6 * ------------------------ *
8 * (c) Copyright K. Hammond, University of Glasgow, *
9 * 10th. February 1989 *
11 * Modification History *
12 * -------------------- *
14 * 22/08/91 kh Initial Haskell 1.1 version. *
15 * 18/10/91 kh Added 'ccall'. *
16 * 19/11/91 kh Tidied generally. *
17 * 04/12/91 kh Added Int#. *
18 * 31/01/92 kh Haskell 1.2 version. *
19 * 24/04/92 ps Added 'scc'. *
20 * 03/06/92 kh Changed Infix/Prelude Handling. *
21 * 23/08/93 jsm Changed to support flex *
28 **********************************************************************/
30 #include "../../includes/config.h"
34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
36 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
41 #define rindex strrchr
42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
44 #define bzero(s, n) memset ((s), 0, (n))
45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
47 /* memory.h and strings.h conflict on some systems. */
48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
51 #include "hsparser.tab.h"
52 #include "constants.h"
55 /* Our substitute for <ctype.h> */
64 #define _isconstr(s) (CharTable[*s]&(_C))
65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
67 static unsigned char CharTable[NCHARS] = {
68 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
70 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
73 /* '(' */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
75 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
76 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
77 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
78 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
79 /* 'X' */ _C, _C, _C, 0, 0, 0, 0, 0,
80 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
81 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
82 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
85 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
86 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
87 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
88 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
89 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
90 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
91 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
92 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
93 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
94 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
95 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
96 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
97 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
98 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
99 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
100 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
103 /**********************************************************************
109 **********************************************************************/
111 char *input_filename = NULL; /* Always points to a dynamically allocated string */
114 * For my own sanity, things that are not part of the flex skeleton
115 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
118 static int hslineno = 0; /* Line number at end of token */
119 int hsplineno = 0; /* Line number at end of previous token */
121 static int hscolno = 0; /* Column number at end of token */
122 int hspcolno = 0; /* Column number at end of previous token */
123 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
125 int startlineno = 0; /* The line number where something starts */
126 int endlineno = 0; /* The line number where something ends */
128 static BOOLEAN noGap = TRUE; /* For checking string gaps */
129 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
131 static int nested_comments; /* For counting comment nesting depth */
133 /* Hacky definition of yywrap: see flex doc.
135 If we don't do this, then we'll have to get the default
136 yywrap from the flex library, which is often something
137 we are not good at locating. This avoids that difficulty.
138 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
143 /* Essential forward declarations */
145 static void hsnewid PROTO((char *, int));
146 static void layout_input PROTO((char *, int));
147 static void cleartext (NO_ARGS);
148 static void addtext PROTO((char *, unsigned));
149 static void addchar PROTO((char));
150 static char *fetchtext PROTO((unsigned *));
151 static void new_filename PROTO((char *));
152 static int Return PROTO((int));
153 static void hsentercontext PROTO((int));
155 /* Special file handling for IMPORTS */
156 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
158 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
159 static char *filename_save; /* File Name */
160 static int hslineno_save = 0, /* Line Number */
161 hsplineno_save = 0, /* Line Number of Prev. token */
162 hscolno_save = 0, /* Indentation */
163 hspcolno_save = 0; /* Left Indentation */
164 static short icontexts_save = 0; /* Indent Context Level */
166 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
167 extern BOOLEAN etags; /* that which is saved */
169 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
171 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
173 extern BOOLEAN ignorePragmas; /* True when we should ignore pragmas */
174 extern int minAcceptablePragmaVersion; /* see documentation in main.c */
175 extern int maxAcceptablePragmaVersion;
176 extern int thisIfacePragmaVersion;
178 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
179 * inserted before token +ve -- "}" inserted before
182 short icontexts = 0; /* Which context we're in */
187 Table of indentations: right bit indicates whether to use
188 indentation rules (1 = use rules; 0 = ignore)
191 push one of these "contexts" at every "case" or "where"; the right bit says
192 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
194 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
195 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
196 push is shown just below.
201 static short indenttab[MAX_CONTEXTS] = {-1};
203 #define INDENTPT (indenttab[icontexts]>>1)
204 #define INDENTON (indenttab[icontexts]&1)
206 #define RETURN(tok) return(Return(tok))
209 #define YY_DECL int yylex1()
211 /* We should not peek at yy_act, but flex calls us even for the internal action
212 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
213 to support older versions of flex, we'll continue to peek for now.
215 #define YY_USER_ACTION \
216 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
220 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
223 /* Each time we enter a new start state, we push it onto the state stack.
224 Note that the rules do not allow us to underflow or overflow the stack.
225 (At least, they shouldn't.) The maximum expected depth is 4:
226 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
228 static int StateStack[5];
229 static int StateDepth = -1;
232 #define PUSH_STATE(n) do {\
233 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
234 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
235 #define POP_STATE do {--StateDepth;\
236 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
237 BEGIN(StateStack[StateDepth]);} while(0)
239 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
240 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
245 /* The start states are:
246 Code -- normal Haskell code (principal lexer)
247 GlaExt -- Haskell code with Glasgow extensions
248 Comment -- Nested comment processing
249 String -- Inside a string literal with backslashes
250 StringEsc -- Immediately following a backslash in a string literal
251 Char -- Inside a character literal with backslashes
252 CharEsc -- Immediately following a backslash in a character literal
254 Note that the INITIAL state is unused. Also note that these states
255 are _exclusive_. All rules should be prefixed with an appropriate
256 list of start states.
259 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
265 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
266 S [!#$%&*+./<=>?@\\^|~:]
268 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
281 * Special GHC pragma rules. Do we need a start state for interface files,
282 * so these won't be matched in source files? --JSM
286 <Code,GlaExt>^"# ".*{NL} {
287 char tempf[FILENAME_SIZE];
288 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
290 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
293 <Code,GlaExt>^"#line ".*{NL} {
294 char tempf[FILENAME_SIZE];
295 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
297 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
300 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
301 /* partain: pragma-style line directive */
302 char tempf[FILENAME_SIZE];
303 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
305 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
307 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}" {
308 sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
310 <Code,GlaExt>"{-# GHC_PRAGMA " {
311 if ( ignorePragmas ||
312 thisIfacePragmaVersion < minAcceptablePragmaVersion ||
313 thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
317 PUSH_STATE(GhcPragma);
321 <GhcPragma>"_N_" { RETURN(NO_PRAGMA); }
322 <GhcPragma>"_NI_" { RETURN(NOINFO_PRAGMA); }
323 <GhcPragma>"_ABSTRACT_" { RETURN(ABSTRACT_PRAGMA); }
324 <GhcPragma>"_DEFOREST_" { RETURN(DEFOREST_PRAGMA); }
325 <GhcPragma>"_SPECIALISE_" { RETURN(SPECIALISE_PRAGMA); }
326 <GhcPragma>"_M_" { RETURN(MODNAME_PRAGMA); }
327 <GhcPragma>"_A_" { RETURN(ARITY_PRAGMA); }
328 <GhcPragma>"_U_" { RETURN(UPDATE_PRAGMA); }
329 <GhcPragma>"_S_" { RETURN(STRICTNESS_PRAGMA); }
330 <GhcPragma>"_K_" { RETURN(KIND_PRAGMA); }
331 <GhcPragma>"_MF_" { RETURN(MAGIC_UNFOLDING_PRAGMA); }
332 <GhcPragma>"_F_" { RETURN(UNFOLDING_PRAGMA); }
334 <GhcPragma>"_!_" { RETURN(COCON); }
335 <GhcPragma>"_#_" { RETURN(COPRIM); }
336 <GhcPragma>"_APP_" { RETURN(COAPP); }
337 <GhcPragma>"_TYAPP_" { RETURN(COTYAPP); }
338 <GhcPragma>"_ALG_" { RETURN(CO_ALG_ALTS); }
339 <GhcPragma>"_PRIM_" { RETURN(CO_PRIM_ALTS); }
340 <GhcPragma>"_NO_DEFLT_" { RETURN(CO_NO_DEFAULT); }
341 <GhcPragma>"_LETREC_" { RETURN(CO_LETREC); }
343 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
344 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
345 <GhcPragma>"_USER_CC_" { RETURN(CO_USER_CC); }
346 <GhcPragma>"_AUTO_CC_" { RETURN(CO_AUTO_CC); }
347 <GhcPragma>"_DICT_CC_" { RETURN(CO_DICT_CC); }
349 <GhcPragma>"_DUPD_CC_" { RETURN(CO_DUPD_CC); }
350 <GhcPragma>"_CAF_CC_" { RETURN(CO_CAF_CC); }
352 <GhcPragma>"_SDSEL_" { RETURN(CO_SDSEL_ID); }
353 <GhcPragma>"_METH_" { RETURN(CO_METH_ID); }
354 <GhcPragma>"_DEFM_" { RETURN(CO_DEFM_ID); }
355 <GhcPragma>"_DFUN_" { RETURN(CO_DFUN_ID); }
356 <GhcPragma>"_CONSTM_" { RETURN(CO_CONSTM_ID); }
357 <GhcPragma>"_SPEC_" { RETURN(CO_SPEC_ID); }
358 <GhcPragma>"_WRKR_" { RETURN(CO_WRKR_ID); }
359 <GhcPragma>"_ORIG_" { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
361 <GhcPragma>"_ALWAYS_" { RETURN(UNFOLD_ALWAYS); }
362 <GhcPragma>"_IF_ARGS_" { RETURN(UNFOLD_IF_ARGS); }
364 <GhcPragma>"_NOREP_I_" { RETURN(NOREP_INTEGER); }
365 <GhcPragma>"_NOREP_R_" { RETURN(NOREP_RATIONAL); }
366 <GhcPragma>"_NOREP_S_" { RETURN(NOREP_STRING); }
368 <GhcPragma>" #-}" { POP_STATE; RETURN(END_PRAGMA); }
370 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
371 PUSH_STATE(UserPragma);
372 RETURN(SPECIALISE_UPRAGMA);
374 <Code,GlaExt>"{-#"{WS}*"INLINE" {
375 PUSH_STATE(UserPragma);
376 RETURN(INLINE_UPRAGMA);
378 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
379 PUSH_STATE(UserPragma);
380 RETURN(MAGIC_UNFOLDING_UPRAGMA);
382 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
383 PUSH_STATE(UserPragma);
384 RETURN(DEFOREST_UPRAGMA);
386 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
387 PUSH_STATE(UserPragma);
388 RETURN(ABSTRACT_UPRAGMA);
390 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
391 fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
392 input_filename, hsplineno);
393 format_string(stderr, (unsigned char *) yytext, yyleng);
394 fputs("'\n", stderr);
398 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
402 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
403 * intentionally accepted as a keyword even for normal <Code>.
407 <Code,GlaExt,GhcPragma>"case" { RETURN(CASE); }
408 <Code,GlaExt>"class" { RETURN(CLASS); }
409 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
410 <Code,GlaExt>"default" { RETURN(DEFAULT); }
411 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
412 <Code,GlaExt>"else" { RETURN(ELSE); }
413 <Code,GlaExt>"hiding" { RETURN(HIDING); }
414 <Code,GlaExt>"if" { RETURN(IF); }
415 <Code,GlaExt>"import" { RETURN(IMPORT); }
416 <Code,GlaExt>"infix" { RETURN(INFIX); }
417 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
418 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
419 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
420 <Code,GlaExt>"interface" { RETURN(INTERFACE); }
421 <Code,GlaExt>"module" { RETURN(MODULE); }
422 <Code,GlaExt,GhcPragma>"of" { RETURN(OF); }
423 <Code,GlaExt>"renaming" { RETURN(RENAMING); }
424 <Code,GlaExt>"then" { RETURN(THEN); }
425 <Code,GlaExt>"to" { RETURN(TO); }
426 <Code,GlaExt>"type" { RETURN(TYPE); }
427 <Code,GlaExt>"where" { RETURN(WHERE); }
428 <Code,GlaExt,GhcPragma>"in" { RETURN(IN); }
429 <Code,GlaExt,GhcPragma>"let" { RETURN(LET); }
430 <GlaExt,GhcPragma>"_ccall_" { RETURN(CCALL); }
431 <GlaExt,GhcPragma>"_ccall_GC_" { RETURN(CCALL_GC); }
432 <GlaExt,GhcPragma>"_casm_" { RETURN(CASM); }
433 <GlaExt,GhcPragma>"_casm_GC_" { RETURN(CASM_GC); }
434 <Code,GlaExt,GhcPragma>"_scc_" { RETURN(SCC); }
435 <GhcPragma>"_forall_" { RETURN(FORALL); }
439 * Haskell operators. Nothing special about these.
443 <Code,GlaExt>".." { RETURN(DOTDOT); }
444 <Code,GlaExt,GhcPragma>";" { RETURN(SEMI); }
445 <Code,GlaExt,GhcPragma,UserPragma>"," { RETURN(COMMA); }
446 <Code,GlaExt,GhcPragma>"|" { RETURN(VBAR); }
447 <Code,GlaExt,GhcPragma,UserPragma>"=" { RETURN(EQUAL); }
448 <Code,GlaExt>"<-" { RETURN(LARROW); }
449 <Code,GlaExt,GhcPragma,UserPragma>"->" { RETURN(RARROW); }
450 <Code,GlaExt,GhcPragma,UserPragma>"=>" { RETURN(DARROW); }
451 <Code,GlaExt,GhcPragma,UserPragma>"::" { RETURN(DCOLON); }
452 <Code,GlaExt,GhcPragma,UserPragma>"(" { RETURN(OPAREN); }
453 <Code,GlaExt,GhcPragma,UserPragma>")" { RETURN(CPAREN); }
454 <Code,GlaExt,GhcPragma,UserPragma>"[" { RETURN(OBRACK); }
455 <Code,GlaExt,GhcPragma,UserPragma>"]" { RETURN(CBRACK); }
456 <Code,GlaExt,GhcPragma>"{" { RETURN(OCURLY); }
457 <Code,GlaExt,GhcPragma>"}" { RETURN(CCURLY); }
458 <Code,GlaExt>"+" { RETURN(PLUS); }
459 <Code,GlaExt>"@" { RETURN(AT); }
460 <Code,GlaExt,GhcPragma>"\\" { RETURN(LAMBDA); }
461 <GhcPragma>"_/\\_" { RETURN(TYLAMBDA); }
462 <Code,GlaExt>"_" { RETURN(WILDCARD); }
463 <Code,GlaExt,GhcPragma>"`" { RETURN(BQUOTE); }
464 <Code,GlaExt>"~" { RETURN(LAZY); }
465 <Code,GlaExt>"-" { RETURN(MINUS); }
469 * Integers and (for Glasgow extensions) primitive integers. Note that
470 * we pass all of the text on to the parser, because flex/C can't handle
471 * arbitrary precision numbers.
475 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
476 yylval.uid = xstrndup(yytext, yyleng - 1);
479 <Code,GlaExt>"0o"{O}+ { /* octal */
480 yylval.uid = xstrndup(yytext, yyleng);
483 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
484 yylval.uid = xstrndup(yytext, yyleng - 1);
487 <Code,GlaExt>"0x"{H}+ { /* hexadecimal */
488 yylval.uid = xstrndup(yytext, yyleng);
491 <GlaExt,GhcPragma>("-")?{N}"#" {
492 yylval.uid = xstrndup(yytext, yyleng - 1);
495 <Code,GlaExt,GhcPragma>{N} {
496 yylval.uid = xstrndup(yytext, yyleng);
502 * Floats and (for Glasgow extensions) primitive floats/doubles.
506 <GlaExt,GhcPragma>("-")?{F}"##" {
507 yylval.uid = xstrndup(yytext, yyleng - 2);
510 <GlaExt,GhcPragma>("-")?{F}"#" {
511 yylval.uid = xstrndup(yytext, yyleng - 1);
515 yylval.uid = xstrndup(yytext, yyleng);
521 * Funky ``foo'' style C literals for Glasgow extensions
525 <GlaExt,GhcPragma>"``"[^']+"''" {
526 hsnewid(yytext + 2, yyleng - 4);
532 * Identifiers, both variables and operators. The trailing hash is allowed
533 * for Glasgow extensions.
537 <GhcPragma>"_NIL_" { hsnewid(yytext, yyleng); RETURN(CONID); }
538 <GhcPragma>"_TUP_"{D}+ { hsnewid(yytext, yyleng); RETURN(CONID); }
539 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
542 /* These SHOULDNAE work in "Code" (sigh) */
544 <Code,GlaExt,GhcPragma,UserPragma>{Id}"#" {
545 if (! (nonstandardFlag || in_interface)) {
546 char errbuf[ERR_BUF_SIZE];
547 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
550 hsnewid(yytext, yyleng);
551 RETURN(_isconstr(yytext) ? CONID : VARID);
553 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
554 if (! (nonstandardFlag || in_interface)) {
555 char errbuf[ERR_BUF_SIZE];
556 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
559 hsnewid(yytext, yyleng);
560 RETURN(isconstr(yytext) ? CONID : VARID);
561 /* NB: ^^^^^^^^ : not the macro! */
563 <Code,GlaExt,GhcPragma,UserPragma>{Id} {
564 hsnewid(yytext, yyleng);
565 RETURN(_isconstr(yytext) ? CONID : VARID);
567 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
568 hsnewid(yytext, yyleng);
569 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
573 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
575 /* Because we can make the former well-behaved (we defined them).
577 Sadly, the latter is defined by Haskell, which allows such
578 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
582 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`" {
583 hsnewid(yytext + 1, yyleng - 2);
584 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
589 * Character literals. The first form is the quick form, for character
590 * literals that don't contain backslashes. Literals with backslashes are
591 * lexed through multiple rules. First, we match the open ' and as many
592 * normal characters as possible. This puts us into the <Char> state, where
593 * a backslash is legal. Then, we match the backslash and move into the
594 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
595 * characters and the close '. We may end up with too many characters, but
596 * this allows us to easily share the lex rules with strings. Excess characters
597 * are ignored with a warning.
601 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
602 yylval.uhstring = installHstring(1, yytext+1);
605 <Code,GlaExt>'({CHAR}|"\"")' {
606 yylval.uhstring = installHstring(1, yytext+1);
609 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
610 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
613 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
614 hsmlcolno = hspcolno;
616 addtext(yytext+1, yyleng-1);
619 <Char>({CHAR}|"\"")*'# {
623 addtext(yytext, yyleng - 2);
624 text = fetchtext(&length);
626 if (! (nonstandardFlag || in_interface)) {
627 char errbuf[ERR_BUF_SIZE];
628 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
633 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
634 input_filename, hsplineno, hspcolno + 1);
635 format_string(stderr, (unsigned char *) text, length);
636 fputs("' too long\n", stderr);
639 yylval.uhstring = installHstring(1, text);
640 hspcolno = hsmlcolno;
644 <Char>({CHAR}|"\"")*' {
648 addtext(yytext, yyleng - 1);
649 text = fetchtext(&length);
652 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
653 input_filename, hsplineno, hspcolno + 1);
654 format_string(stderr, (unsigned char *) text, length);
655 fputs("' too long\n", stderr);
658 yylval.uhstring = installHstring(1, text);
659 hspcolno = hsmlcolno;
663 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
668 * String literals. The first form is the quick form, for string literals
669 * that don't contain backslashes. Literals with backslashes are lexed
670 * through multiple rules. First, we match the open " and as many normal
671 * characters as possible. This puts us into the <String> state, where
672 * a backslash is legal. Then, we match the backslash and move into the
673 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
674 * characters, moving back and forth between <String> and <StringEsc> as more
675 * backslashes are encountered. (We may even digress into <Comment> mode if we
676 * find a comment in a gap between backslashes.) Finally, we read the last chunk
677 * of normal characters and the close ".
681 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""# {
682 yylval.uhstring = installHstring(yyleng-3, yytext+1);
683 /* the -3 accounts for the " on front, "# on the end */
686 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\"" {
687 yylval.uhstring = installHstring(yyleng-2, yytext+1);
690 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
691 hsmlcolno = hspcolno;
693 addtext(yytext+1, yyleng-1);
696 <String>({CHAR}|"'")*"\"#" {
700 addtext(yytext, yyleng-2);
701 text = fetchtext(&length);
703 if (! (nonstandardFlag || in_interface)) {
704 char errbuf[ERR_BUF_SIZE];
705 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
709 yylval.uhstring = installHstring(length, text);
710 hspcolno = hsmlcolno;
714 <String>({CHAR}|"'")*"\"" {
718 addtext(yytext, yyleng-1);
719 text = fetchtext(&length);
721 yylval.uhstring = installHstring(length, text);
722 hspcolno = hsmlcolno;
726 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
730 * Character and string escapes are roughly the same, but strings have the
731 * extra `\&' sequence which is not allowed for characters. Also, comments
732 * are allowed in the <StringEsc> state. (See the comment section much
735 * NB: Backslashes and tabs are stored in strings as themselves.
736 * But if we print them (in printtree.c), they must go out as
737 * "\\\\" and "\\t" respectively. (This is because of the bogus
738 * intermediate format that the parser produces. It uses '\t' fpr end of
739 * string, so it needs to be able to escape tabs, which means that it
740 * also needs to be able to escape the escape character ('\\'). Sigh.
744 <Char>\\ { PUSH_STATE(CharEsc); }
745 <String>\\& /* Ignore */ ;
746 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
748 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
749 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
751 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
752 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
753 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
754 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
755 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
756 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
757 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
758 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
759 <CharEsc,StringEsc>BEL |
760 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
761 <CharEsc,StringEsc>BS |
762 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
763 <CharEsc,StringEsc>HT |
764 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
765 <CharEsc,StringEsc>LF |
766 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
767 <CharEsc,StringEsc>VT |
768 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
769 <CharEsc,StringEsc>FF |
770 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
771 <CharEsc,StringEsc>CR |
772 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
773 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
774 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
775 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
776 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
777 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
778 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
779 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
780 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
781 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
782 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
783 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
784 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
785 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
786 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
787 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
788 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
789 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
790 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
791 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
792 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
793 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
794 <CharEsc,StringEsc>{D}+ {
795 int i = strtol(yytext, NULL, 10);
799 char errbuf[ERR_BUF_SIZE];
800 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
806 <CharEsc,StringEsc>o{O}+ {
807 int i = strtol(yytext + 1, NULL, 8);
811 char errbuf[ERR_BUF_SIZE];
812 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
818 <CharEsc,StringEsc>x{H}+ {
819 int i = strtol(yytext + 1, NULL, 16);
823 char errbuf[ERR_BUF_SIZE];
824 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
833 * Simple comments and whitespace. Normally, we would just ignore these, but
834 * in case we're processing a string escape, we need to note that we've seen
837 * Note that we cater for a comment line that *doesn't* end in a newline.
838 * This is incorrect, strictly speaking, but seems like the right thing
839 * to do. Reported by Rajiv Mirani. (WDP 95/08)
843 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
844 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
848 * Nested comments. The major complication here is in trying to match the
849 * longest lexemes possible, for better performance. (See the flex document.)
850 * That's why the rules look so bizarre.
854 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-" {
855 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
859 <Comment>"-"+[^-{}]+ |
860 <Comment>"{"+[^-{}]+ ;
861 <Comment>"{-" { nested_comments++; }
862 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
867 * Illegal characters. This used to be a single rule, but we might as well
868 * pass on as much information as we have, so now we indicate our state in
873 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n) {
874 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
875 input_filename, hsplineno, hspcolno + 1);
876 format_string(stderr, (unsigned char *) yytext, 1);
877 fputs("'\n", stderr);
881 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
882 input_filename, hsplineno, hspcolno + 1);
883 format_string(stderr, (unsigned char *) yytext, 1);
884 fputs("' in a character literal\n", stderr);
888 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
889 input_filename, hsplineno, hspcolno + 1);
890 format_string(stderr, (unsigned char *) yytext, 1);
891 fputs("'\n", stderr);
894 <String>(.|\n) { if (nonstandardFlag) {
895 addtext(yytext, yyleng);
897 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
898 input_filename, hsplineno, hspcolno + 1);
899 format_string(stderr, (unsigned char *) yytext, 1);
900 fputs("' in a string literal\n", stderr);
906 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
907 input_filename, hsplineno, hspcolno + 1);
908 format_string(stderr, (unsigned char *) yytext, 1);
909 fputs("'\n", stderr);
912 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
913 input_filename, hsplineno, hspcolno + 1);
914 format_string(stderr, (unsigned char *) yytext, 1);
915 fputs("' in a string gap\n", stderr);
922 * End of file. In any sub-state, this is an error. However, for the primary
923 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
924 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
925 * adding virtual close curlies, or closing an interface and returning to the
926 * primary source file.
928 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
929 * line/column advancement has to be done by hand.
933 <Char,CharEsc><<EOF>> {
934 hsplineno = hslineno; hspcolno = hscolno;
935 hsperror("unterminated character literal");
938 hsplineno = hslineno; hspcolno = hscolno;
939 hsperror("unterminated comment");
941 <String,StringEsc><<EOF>> {
942 hsplineno = hslineno; hspcolno = hscolno;
943 hsperror("unterminated string literal");
946 hsplineno = hslineno; hspcolno = hscolno;
947 hsperror("unterminated interface pragma");
949 <UserPragma><<EOF>> {
950 hsplineno = hslineno; hspcolno = hscolno;
951 hsperror("unterminated user-specified pragma");
953 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
957 /**********************************************************************
960 * YACC/LEX Initialisation etc. *
963 **********************************************************************/
966 We initialise input_filename to "<stdin>".
967 This allows unnamed sources to be piped into the parser.
970 extern BOOLEAN acceptPrim;
975 input_filename = xstrdup("<stdin>");
977 /* We must initialize the input buffer _now_, because we call
978 setyyin _before_ calling yylex for the first time! */
979 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
988 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
990 if (input_filename != NULL)
991 free(input_filename);
992 input_filename = xstrdup(f);
995 /**********************************************************************
998 * Layout Processing *
1001 **********************************************************************/
1004 The following section deals with Haskell Layout conventions
1005 forcing insertion of ; or } as appropriate
1009 hsshouldindent(void)
1011 return (!forgetindent && INDENTON);
1015 /* Enter new context and set new indentation level */
1020 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1024 * partain: first chk that new indent won't be less than current one; this code
1025 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1026 * current token; what that has to do with indenting, I don't know.
1030 if (hscolno - 1 <= INDENTPT) {
1032 return; /* Empty input OK for Haskell 1.1 */
1034 char errbuf[ERR_BUF_SIZE];
1036 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1040 hsentercontext((hspcolno << 1) | 1);
1044 /* Enter a new context without changing the indentation level */
1049 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1051 hsentercontext(indenttab[icontexts] & ~1);
1055 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1059 forgetindent = TRUE;
1063 /* Enter a new layout context. */
1065 hsentercontext(int indent)
1067 /* Enter new context and set indentation as specified */
1068 if (++icontexts >= MAX_CONTEXTS) {
1069 char errbuf[ERR_BUF_SIZE];
1071 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1074 forgetindent = FALSE;
1075 indenttab[icontexts] = indent;
1077 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1082 /* Exit a layout context */
1088 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1093 * Return checks the indentation level and returns ;, } or the specified token.
1103 if (hsshouldindent()) {
1104 if (hspcolno < INDENTPT) {
1106 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1110 } else if (hspcolno == INDENTPT) {
1112 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1120 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1127 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1133 static BOOLEAN eof = FALSE;
1136 if (hssttok != -1) {
1144 endlineno = hslineno;
1145 if ((tok = yylex1()) != EOF)
1151 if (icontexts > icontexts_save) {
1154 indenttab[icontexts] = 0;
1157 hsperror("missing '}' at end of file");
1158 } else if (hsbuf_save != NULL) {
1160 yy_delete_buffer(YY_CURRENT_BUFFER);
1161 yy_switch_to_buffer(hsbuf_save);
1163 new_filename(filename_save);
1164 free(filename_save);
1165 hslineno = hslineno_save;
1166 hsplineno = hsplineno_save;
1167 hscolno = hscolno_save;
1168 hspcolno = hspcolno_save;
1170 in_interface = FALSE;
1171 icontexts = icontexts_save - 1;
1174 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1181 abort(); /* should never get here! */
1185 /**********************************************************************
1188 * Input Processing for Interfaces *
1191 **********************************************************************/
1193 /* setyyin(file) open file as new lex input buffer */
1199 hsbuf_save = YY_CURRENT_BUFFER;
1200 if ((yyin = fopen(file, "r")) == NULL) {
1201 char errbuf[ERR_BUF_SIZE];
1203 sprintf(errbuf, "can't read \"%-.50s\"", file);
1206 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1208 hslineno_save = hslineno;
1209 hsplineno_save = hsplineno;
1210 hslineno = hsplineno = 1;
1212 filename_save = input_filename;
1213 input_filename = NULL;
1215 hscolno_save = hscolno;
1216 hspcolno_save = hspcolno;
1217 hscolno = hspcolno = 0;
1218 in_interface = TRUE;
1219 etags_save = etags; /* do not do "etags" stuff in interfaces */
1220 etags = 0; /* We remember whether we are doing it in
1221 the module, so we can restore it later [WDP 94/09] */
1222 hsentercontext(-1); /* partain: changed this from 0 */
1223 icontexts_save = icontexts;
1225 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1230 layout_input(char *text, int len)
1233 fprintf(stderr, "Scanning \"%s\"\n", text);
1236 hsplineno = hslineno;
1248 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1260 setstartlineno(void)
1262 startlineno = hsplineno;
1266 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1270 /**********************************************************************
1276 **********************************************************************/
1278 #define CACHE_SIZE YY_BUF_SIZE
1284 } textcache = { 0, 0, NULL };
1289 /* fprintf(stderr, "cleartext\n"); */
1291 if (textcache.allocated == 0) {
1292 textcache.allocated = CACHE_SIZE;
1293 textcache.text = xmalloc(CACHE_SIZE);
1298 addtext(char *text, unsigned length)
1300 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1305 if (textcache.next + length + 1 >= textcache.allocated) {
1306 textcache.allocated += length + CACHE_SIZE;
1307 textcache.text = xrealloc(textcache.text, textcache.allocated);
1309 bcopy(text, textcache.text + textcache.next, length);
1310 textcache.next += length;
1316 /* fprintf(stderr, "addchar: %c\n", c); */
1318 if (textcache.next + 2 >= textcache.allocated) {
1319 textcache.allocated += CACHE_SIZE;
1320 textcache.text = xrealloc(textcache.text, textcache.allocated);
1322 textcache.text[textcache.next++] = c;
1326 fetchtext(unsigned *length)
1328 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1330 *length = textcache.next;
1331 textcache.text[textcache.next] = '\0';
1332 return textcache.text;
1335 /**********************************************************************
1338 * Identifier Processing *
1341 **********************************************************************/
1344 hsnewid Enters an id of length n into the symbol table.
1348 hsnewid(char *name, int length)
1350 char save = name[length];
1352 name[length] = '\0';
1353 yylval.uid = installid(name);
1354 name[length] = save;
1358 isconstr(char *s) /* walks past leading underscores before using the macro */
1362 for ( ; temp != NULL && *temp == '_' ; temp++ );
1364 return _isconstr(temp);