2 /**********************************************************************
5 * LEX grammar for Haskell. *
6 * ------------------------ *
8 * (c) Copyright K. Hammond, University of Glasgow, *
9 * 10th. February 1989 *
11 * Modification History *
12 * -------------------- *
14 * 22/08/91 kh Initial Haskell 1.1 version. *
15 * 18/10/91 kh Added 'ccall'. *
16 * 19/11/91 kh Tidied generally. *
17 * 04/12/91 kh Added Int#. *
18 * 31/01/92 kh Haskell 1.2 version. *
19 * 24/04/92 ps Added 'scc'. *
20 * 03/06/92 kh Changed Infix/Prelude Handling. *
21 * 23/08/93 jsm Changed to support flex *
28 **********************************************************************/
30 #include "../../includes/config.h"
34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
36 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
41 #define rindex strrchr
42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
44 #define bzero(s, n) memset ((s), 0, (n))
45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
47 /* memory.h and strings.h conflict on some systems. */
48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
51 #include "hsparser.tab.h"
52 #include "constants.h"
55 /* Our substitute for <ctype.h> */
64 #define _isconstr(s) (CharTable[*s]&(_C))
65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
67 static unsigned char CharTable[NCHARS] = {
68 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
70 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
73 /* '(' */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
75 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
76 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
77 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
78 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
79 /* 'X' */ _C, _C, _C, 0, 0, 0, 0, 0,
80 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
81 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
82 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
85 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
86 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
87 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
88 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
89 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
90 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
91 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
92 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
93 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
94 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
95 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
96 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
97 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
98 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
99 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
100 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
103 /**********************************************************************
109 **********************************************************************/
111 char *input_filename = NULL; /* Always points to a dynamically allocated string */
114 * For my own sanity, things that are not part of the flex skeleton
115 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
118 static int hslineno = 0; /* Line number at end of token */
119 int hsplineno = 0; /* Line number at end of previous token */
121 static int hscolno = 0; /* Column number at end of token */
122 int hspcolno = 0; /* Column number at end of previous token */
123 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
125 int startlineno = 0; /* The line number where something starts */
126 int endlineno = 0; /* The line number where something ends */
128 static BOOLEAN noGap = TRUE; /* For checking string gaps */
129 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
131 static int nested_comments; /* For counting comment nesting depth */
133 /* Hacky definition of yywrap: see flex doc.
135 If we don't do this, then we'll have to get the default
136 yywrap from the flex library, which is often something
137 we are not good at locating. This avoids that difficulty.
138 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
143 /* Essential forward declarations */
145 static void hsnewid PROTO((char *, int));
146 static void layout_input PROTO((char *, int));
147 static void cleartext (NO_ARGS);
148 static void addtext PROTO((char *, unsigned));
149 static void addchar PROTO((char));
150 static char *fetchtext PROTO((unsigned *));
151 static void new_filename PROTO((char *));
152 static int Return PROTO((int));
153 static void hsentercontext PROTO((int));
155 /* Special file handling for IMPORTS */
156 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
158 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
159 static char *filename_save; /* File Name */
160 static int hslineno_save = 0, /* Line Number */
161 hsplineno_save = 0, /* Line Number of Prev. token */
162 hscolno_save = 0, /* Indentation */
163 hspcolno_save = 0; /* Left Indentation */
164 static short icontexts_save = 0; /* Indent Context Level */
166 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
167 extern BOOLEAN etags; /* that which is saved */
169 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
171 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
173 extern BOOLEAN ignorePragmas; /* True when we should ignore pragmas */
174 extern int minAcceptablePragmaVersion; /* see documentation in main.c */
175 extern int maxAcceptablePragmaVersion;
176 extern int thisIfacePragmaVersion;
178 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
179 * inserted before token +ve -- "}" inserted before
182 short icontexts = 0; /* Which context we're in */
187 Table of indentations: right bit indicates whether to use
188 indentation rules (1 = use rules; 0 = ignore)
191 push one of these "contexts" at every "case" or "where"; the right bit says
192 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
194 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
195 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
196 push is shown just below.
201 static short indenttab[MAX_CONTEXTS] = {-1};
203 #define INDENTPT (indenttab[icontexts]>>1)
204 #define INDENTON (indenttab[icontexts]&1)
206 #define RETURN(tok) return(Return(tok))
209 #define YY_DECL int yylex1()
211 /* We should not peek at yy_act, but flex calls us even for the internal action
212 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
213 to support older versions of flex, we'll continue to peek for now.
215 #define YY_USER_ACTION \
216 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
220 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
223 /* Each time we enter a new start state, we push it onto the state stack.
224 Note that the rules do not allow us to underflow or overflow the stack.
225 (At least, they shouldn't.) The maximum expected depth is 4:
226 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
228 static int StateStack[5];
229 static int StateDepth = -1;
232 #define PUSH_STATE(n) do {\
233 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
234 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
235 #define POP_STATE do {--StateDepth;\
236 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
237 BEGIN(StateStack[StateDepth]);} while(0)
239 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
240 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
245 /* The start states are:
246 Code -- normal Haskell code (principal lexer)
247 GlaExt -- Haskell code with Glasgow extensions
248 Comment -- Nested comment processing
249 String -- Inside a string literal with backslashes
250 StringEsc -- Immediately following a backslash in a string literal
251 Char -- Inside a character literal with backslashes
252 CharEsc -- Immediately following a backslash in a character literal
254 Note that the INITIAL state is unused. Also note that these states
255 are _exclusive_. All rules should be prefixed with an appropriate
256 list of start states.
259 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
265 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
266 S [!#$%&*+./<=>?@\\^|~:]
268 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
281 * Special GHC pragma rules. Do we need a start state for interface files,
282 * so these won't be matched in source files? --JSM
286 <Code,GlaExt>^"# ".*{NL} {
287 char tempf[FILENAME_SIZE];
288 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
290 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
293 <Code,GlaExt>^"#line ".*{NL} {
294 char tempf[FILENAME_SIZE];
295 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
297 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
300 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
301 /* partain: pragma-style line directive */
302 char tempf[FILENAME_SIZE];
303 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
305 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
307 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}" {
308 sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
310 <Code,GlaExt>"{-# GHC_PRAGMA " {
311 if ( ignorePragmas ||
312 thisIfacePragmaVersion < minAcceptablePragmaVersion ||
313 thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
317 PUSH_STATE(GhcPragma);
321 <GhcPragma>"_N_" { RETURN(NO_PRAGMA); }
322 <GhcPragma>"_NI_" { RETURN(NOINFO_PRAGMA); }
323 <GhcPragma>"_ABSTRACT_" { RETURN(ABSTRACT_PRAGMA); }
324 <GhcPragma>"_DEFOREST_" { RETURN(DEFOREST_PRAGMA); }
325 <GhcPragma>"_SPECIALISE_" { RETURN(SPECIALISE_PRAGMA); }
326 <GhcPragma>"_M_" { RETURN(MODNAME_PRAGMA); }
327 <GhcPragma>"_A_" { RETURN(ARITY_PRAGMA); }
328 <GhcPragma>"_U_" { RETURN(UPDATE_PRAGMA); }
329 <GhcPragma>"_S_" { RETURN(STRICTNESS_PRAGMA); }
330 <GhcPragma>"_K_" { RETURN(KIND_PRAGMA); }
331 <GhcPragma>"_MF_" { RETURN(MAGIC_UNFOLDING_PRAGMA); }
332 <GhcPragma>"_F_" { RETURN(UNFOLDING_PRAGMA); }
334 <GhcPragma>"_!_" { RETURN(COCON); }
335 <GhcPragma>"_#_" { RETURN(COPRIM); }
336 <GhcPragma>"_APP_" { RETURN(COAPP); }
337 <GhcPragma>"_TYAPP_" { RETURN(COTYAPP); }
338 <GhcPragma>"_ALG_" { RETURN(CO_ALG_ALTS); }
339 <GhcPragma>"_PRIM_" { RETURN(CO_PRIM_ALTS); }
340 <GhcPragma>"_NO_DEFLT_" { RETURN(CO_NO_DEFAULT); }
341 <GhcPragma>"_LETREC_" { RETURN(CO_LETREC); }
343 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
344 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
345 <GhcPragma>"_USER_CC_" { RETURN(CO_USER_CC); }
346 <GhcPragma>"_AUTO_CC_" { RETURN(CO_AUTO_CC); }
347 <GhcPragma>"_DICT_CC_" { RETURN(CO_DICT_CC); }
349 <GhcPragma>"_DUPD_CC_" { RETURN(CO_DUPD_CC); }
350 <GhcPragma>"_CAF_CC_" { RETURN(CO_CAF_CC); }
352 <GhcPragma>"_SDSEL_" { RETURN(CO_SDSEL_ID); }
353 <GhcPragma>"_METH_" { RETURN(CO_METH_ID); }
354 <GhcPragma>"_DEFM_" { RETURN(CO_DEFM_ID); }
355 <GhcPragma>"_DFUN_" { RETURN(CO_DFUN_ID); }
356 <GhcPragma>"_CONSTM_" { RETURN(CO_CONSTM_ID); }
357 <GhcPragma>"_SPEC_" { RETURN(CO_SPEC_ID); }
358 <GhcPragma>"_WRKR_" { RETURN(CO_WRKR_ID); }
359 <GhcPragma>"_ORIG_" { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
361 <GhcPragma>"_ALWAYS_" { RETURN(UNFOLD_ALWAYS); }
362 <GhcPragma>"_IF_ARGS_" { RETURN(UNFOLD_IF_ARGS); }
364 <GhcPragma>"_NOREP_I_" { RETURN(NOREP_INTEGER); }
365 <GhcPragma>"_NOREP_R_" { RETURN(NOREP_RATIONAL); }
366 <GhcPragma>"_NOREP_S_" { RETURN(NOREP_STRING); }
368 <GhcPragma>" #-}" { POP_STATE; RETURN(END_PRAGMA); }
370 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
371 PUSH_STATE(UserPragma);
372 RETURN(SPECIALISE_UPRAGMA);
374 <Code,GlaExt>"{-#"{WS}*"INLINE" {
375 PUSH_STATE(UserPragma);
376 RETURN(INLINE_UPRAGMA);
378 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
379 PUSH_STATE(UserPragma);
380 RETURN(MAGIC_UNFOLDING_UPRAGMA);
382 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
383 PUSH_STATE(UserPragma);
384 RETURN(DEFOREST_UPRAGMA);
386 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
387 PUSH_STATE(UserPragma);
388 RETURN(ABSTRACT_UPRAGMA);
390 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
391 fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
392 input_filename, hsplineno);
393 format_string(stderr, (unsigned char *) yytext, yyleng);
394 fputs("'\n", stderr);
398 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
402 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
403 * intentionally accepted as a keyword even for normal <Code>.
407 <Code,GlaExt,GhcPragma>"case" { RETURN(CASE); }
408 <Code,GlaExt>"class" { RETURN(CLASS); }
409 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
410 <Code,GlaExt>"default" { RETURN(DEFAULT); }
411 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
412 <Code,GlaExt>"else" { RETURN(ELSE); }
413 <Code,GlaExt>"hiding" { RETURN(HIDING); }
414 <Code,GlaExt>"if" { RETURN(IF); }
415 <Code,GlaExt>"import" { RETURN(IMPORT); }
416 <Code,GlaExt>"infix" { RETURN(INFIX); }
417 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
418 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
419 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
420 <Code,GlaExt>"interface" { RETURN(INTERFACE); }
421 <Code,GlaExt>"module" { RETURN(MODULE); }
422 <Code,GlaExt,GhcPragma>"of" { RETURN(OF); }
423 <Code,GlaExt>"renaming" { RETURN(RENAMING); }
424 <Code,GlaExt>"then" { RETURN(THEN); }
425 <Code,GlaExt>"to" { RETURN(TO); }
426 <Code,GlaExt>"type" { RETURN(TYPE); }
427 <Code,GlaExt>"where" { RETURN(WHERE); }
428 <Code,GlaExt,GhcPragma>"in" { RETURN(IN); }
429 <Code,GlaExt,GhcPragma>"let" { RETURN(LET); }
430 <GlaExt,GhcPragma>"_ccall_" { RETURN(CCALL); }
431 <GlaExt,GhcPragma>"_ccall_GC_" { RETURN(CCALL_GC); }
432 <GlaExt,GhcPragma>"_casm_" { RETURN(CASM); }
433 <GlaExt,GhcPragma>"_casm_GC_" { RETURN(CASM_GC); }
434 <Code,GlaExt,GhcPragma>"_scc_" { RETURN(SCC); }
435 <GhcPragma>"_forall_" { RETURN(FORALL); }
439 * Haskell operators. Nothing special about these.
443 <Code,GlaExt>".." { RETURN(DOTDOT); }
444 <Code,GlaExt,GhcPragma>";" { RETURN(SEMI); }
445 <Code,GlaExt,GhcPragma,UserPragma>"," { RETURN(COMMA); }
446 <Code,GlaExt,GhcPragma>"|" { RETURN(VBAR); }
447 <Code,GlaExt,GhcPragma,UserPragma>"=" { RETURN(EQUAL); }
448 <Code,GlaExt>"<-" { RETURN(LARROW); }
449 <Code,GlaExt,GhcPragma,UserPragma>"->" { RETURN(RARROW); }
450 <Code,GlaExt,GhcPragma,UserPragma>"=>" { RETURN(DARROW); }
451 <Code,GlaExt,GhcPragma,UserPragma>"::" { RETURN(DCOLON); }
452 <Code,GlaExt,GhcPragma,UserPragma>"(" { RETURN(OPAREN); }
453 <Code,GlaExt,GhcPragma,UserPragma>")" { RETURN(CPAREN); }
454 <Code,GlaExt,GhcPragma,UserPragma>"[" { RETURN(OBRACK); }
455 <Code,GlaExt,GhcPragma,UserPragma>"]" { RETURN(CBRACK); }
456 <Code,GlaExt,GhcPragma>"{" { RETURN(OCURLY); }
457 <Code,GlaExt,GhcPragma>"}" { RETURN(CCURLY); }
458 <Code,GlaExt>"+" { RETURN(PLUS); }
459 <Code,GlaExt>"@" { RETURN(AT); }
460 <Code,GlaExt,GhcPragma>"\\" { RETURN(LAMBDA); }
461 <GhcPragma>"_/\\_" { RETURN(TYLAMBDA); }
462 <Code,GlaExt>"_" { RETURN(WILDCARD); }
463 <Code,GlaExt,GhcPragma>"`" { RETURN(BQUOTE); }
464 <Code,GlaExt>"~" { RETURN(LAZY); }
465 <Code,GlaExt>"-" { RETURN(MINUS); }
469 * Integers and (for Glasgow extensions) primitive integers. Note that
470 * we pass all of the text on to the parser, because flex/C can't handle
471 * arbitrary precision numbers.
475 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
476 yylval.uid = xstrndup(yytext, yyleng - 1);
479 <Code,GlaExt>"0o"{O}+ { /* octal */
480 yylval.uid = xstrndup(yytext, yyleng);
483 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
484 yylval.uid = xstrndup(yytext, yyleng - 1);
487 <Code,GlaExt>"0x"{H}+ { /* hexadecimal */
488 yylval.uid = xstrndup(yytext, yyleng);
491 <GlaExt,GhcPragma>("-")?{N}"#" {
492 yylval.uid = xstrndup(yytext, yyleng - 1);
495 <Code,GlaExt,GhcPragma>{N} {
496 yylval.uid = xstrndup(yytext, yyleng);
502 * Floats and (for Glasgow extensions) primitive floats/doubles.
506 <GlaExt,GhcPragma>("-")?{F}"##" {
507 yylval.uid = xstrndup(yytext, yyleng - 2);
510 <GlaExt,GhcPragma>("-")?{F}"#" {
511 yylval.uid = xstrndup(yytext, yyleng - 1);
515 yylval.uid = xstrndup(yytext, yyleng);
521 * Funky ``foo'' style C literals for Glasgow extensions
525 <GlaExt,GhcPragma>"``"[^']+"''" {
526 hsnewid(yytext + 2, yyleng - 4);
532 * Identifiers, both variables and operators. The trailing hash is allowed
533 * for Glasgow extensions.
537 <GhcPragma>"_NIL_" { hsnewid(yytext, yyleng); RETURN(CONID); }
538 <GhcPragma>"_TUP_"{D}+ { hsnewid(yytext, yyleng); RETURN(CONID); }
539 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
541 <GlaExt,GhcPragma,UserPragma>{Id}"#" {
542 hsnewid(yytext, yyleng);
543 RETURN(_isconstr(yytext) ? CONID : VARID);
546 /* This SHOULDNAE work in "Code" (sigh) */
548 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
549 if (! (nonstandardFlag || in_interface)) {
550 char errbuf[ERR_BUF_SIZE];
551 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
554 hsnewid(yytext, yyleng);
555 RETURN(isconstr(yytext) ? CONID : VARID);
556 /* NB: ^^^^^^^^ : not the macro! */
558 <Code,GlaExt,GhcPragma,UserPragma>{Id} {
559 hsnewid(yytext, yyleng);
560 RETURN(_isconstr(yytext) ? CONID : VARID);
562 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
563 hsnewid(yytext, yyleng);
564 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
568 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
570 /* Because we can make the former well-behaved (we defined them).
572 Sadly, the latter is defined by Haskell, which allows such
573 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
577 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`" {
578 hsnewid(yytext + 1, yyleng - 2);
579 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
584 * Character literals. The first form is the quick form, for character
585 * literals that don't contain backslashes. Literals with backslashes are
586 * lexed through multiple rules. First, we match the open ' and as many
587 * normal characters as possible. This puts us into the <Char> state, where
588 * a backslash is legal. Then, we match the backslash and move into the
589 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
590 * characters and the close '. We may end up with too many characters, but
591 * this allows us to easily share the lex rules with strings. Excess characters
592 * are ignored with a warning.
596 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
597 yylval.uhstring = installHstring(1, yytext+1);
600 <Code,GlaExt>'({CHAR}|"\"")' {
601 yylval.uhstring = installHstring(1, yytext+1);
604 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
605 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
608 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
609 hsmlcolno = hspcolno;
611 addtext(yytext+1, yyleng-1);
614 <Char>({CHAR}|"\"")*'# {
618 addtext(yytext, yyleng - 2);
619 text = fetchtext(&length);
621 if (! (nonstandardFlag || in_interface)) {
622 char errbuf[ERR_BUF_SIZE];
623 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
628 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
629 input_filename, hsplineno, hspcolno + 1);
630 format_string(stderr, (unsigned char *) text, length);
631 fputs("' too long\n", stderr);
634 yylval.uhstring = installHstring(1, text);
635 hspcolno = hsmlcolno;
639 <Char>({CHAR}|"\"")*' {
643 addtext(yytext, yyleng - 1);
644 text = fetchtext(&length);
647 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
648 input_filename, hsplineno, hspcolno + 1);
649 format_string(stderr, (unsigned char *) text, length);
650 fputs("' too long\n", stderr);
653 yylval.uhstring = installHstring(1, text);
654 hspcolno = hsmlcolno;
658 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
663 * String literals. The first form is the quick form, for string literals
664 * that don't contain backslashes. Literals with backslashes are lexed
665 * through multiple rules. First, we match the open " and as many normal
666 * characters as possible. This puts us into the <String> state, where
667 * a backslash is legal. Then, we match the backslash and move into the
668 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
669 * characters, moving back and forth between <String> and <StringEsc> as more
670 * backslashes are encountered. (We may even digress into <Comment> mode if we
671 * find a comment in a gap between backslashes.) Finally, we read the last chunk
672 * of normal characters and the close ".
676 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""# {
677 yylval.uhstring = installHstring(yyleng-3, yytext+1);
678 /* the -3 accounts for the " on front, "# on the end */
681 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\"" {
682 yylval.uhstring = installHstring(yyleng-2, yytext+1);
685 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
686 hsmlcolno = hspcolno;
688 addtext(yytext+1, yyleng-1);
691 <String>({CHAR}|"'")*"\"#" {
695 addtext(yytext, yyleng-2);
696 text = fetchtext(&length);
698 if (! (nonstandardFlag || in_interface)) {
699 char errbuf[ERR_BUF_SIZE];
700 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
704 yylval.uhstring = installHstring(length, text);
705 hspcolno = hsmlcolno;
709 <String>({CHAR}|"'")*"\"" {
713 addtext(yytext, yyleng-1);
714 text = fetchtext(&length);
716 yylval.uhstring = installHstring(length, text);
717 hspcolno = hsmlcolno;
721 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
725 * Character and string escapes are roughly the same, but strings have the
726 * extra `\&' sequence which is not allowed for characters. Also, comments
727 * are allowed in the <StringEsc> state. (See the comment section much
730 * NB: Backslashes and tabs are stored in strings as themselves.
731 * But if we print them (in printtree.c), they must go out as
732 * "\\\\" and "\\t" respectively. (This is because of the bogus
733 * intermediate format that the parser produces. It uses '\t' fpr end of
734 * string, so it needs to be able to escape tabs, which means that it
735 * also needs to be able to escape the escape character ('\\'). Sigh.
739 <Char>\\ { PUSH_STATE(CharEsc); }
740 <String>\\& /* Ignore */ ;
741 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
743 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
744 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
746 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
747 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
748 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
749 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
750 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
751 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
752 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
753 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
754 <CharEsc,StringEsc>BEL |
755 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
756 <CharEsc,StringEsc>BS |
757 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
758 <CharEsc,StringEsc>HT |
759 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
760 <CharEsc,StringEsc>LF |
761 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
762 <CharEsc,StringEsc>VT |
763 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
764 <CharEsc,StringEsc>FF |
765 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
766 <CharEsc,StringEsc>CR |
767 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
768 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
769 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
770 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
771 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
772 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
773 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
774 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
775 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
776 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
777 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
778 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
779 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
780 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
781 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
782 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
783 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
784 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
785 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
786 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
787 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
788 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
789 <CharEsc,StringEsc>{D}+ {
790 int i = strtol(yytext, NULL, 10);
794 char errbuf[ERR_BUF_SIZE];
795 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
801 <CharEsc,StringEsc>o{O}+ {
802 int i = strtol(yytext + 1, NULL, 8);
806 char errbuf[ERR_BUF_SIZE];
807 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
813 <CharEsc,StringEsc>x{H}+ {
814 int i = strtol(yytext + 1, NULL, 16);
818 char errbuf[ERR_BUF_SIZE];
819 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
828 * Simple comments and whitespace. Normally, we would just ignore these, but
829 * in case we're processing a string escape, we need to note that we've seen
832 * Note that we cater for a comment line that *doesn't* end in a newline.
833 * This is incorrect, strictly speaking, but seems like the right thing
834 * to do. Reported by Rajiv Mirani. (WDP 95/08)
838 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
839 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
843 * Nested comments. The major complication here is in trying to match the
844 * longest lexemes possible, for better performance. (See the flex document.)
845 * That's why the rules look so bizarre.
849 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-" {
850 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
854 <Comment>"-"+[^-{}]+ |
855 <Comment>"{"+[^-{}]+ ;
856 <Comment>"{-" { nested_comments++; }
857 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
862 * Illegal characters. This used to be a single rule, but we might as well
863 * pass on as much information as we have, so now we indicate our state in
868 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n) {
869 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
870 input_filename, hsplineno, hspcolno + 1);
871 format_string(stderr, (unsigned char *) yytext, 1);
872 fputs("'\n", stderr);
876 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
877 input_filename, hsplineno, hspcolno + 1);
878 format_string(stderr, (unsigned char *) yytext, 1);
879 fputs("' in a character literal\n", stderr);
883 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
884 input_filename, hsplineno, hspcolno + 1);
885 format_string(stderr, (unsigned char *) yytext, 1);
886 fputs("'\n", stderr);
889 <String>(.|\n) { if (nonstandardFlag) {
890 addtext(yytext, yyleng);
892 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
893 input_filename, hsplineno, hspcolno + 1);
894 format_string(stderr, (unsigned char *) yytext, 1);
895 fputs("' in a string literal\n", stderr);
901 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
902 input_filename, hsplineno, hspcolno + 1);
903 format_string(stderr, (unsigned char *) yytext, 1);
904 fputs("'\n", stderr);
907 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
908 input_filename, hsplineno, hspcolno + 1);
909 format_string(stderr, (unsigned char *) yytext, 1);
910 fputs("' in a string gap\n", stderr);
917 * End of file. In any sub-state, this is an error. However, for the primary
918 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
919 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
920 * adding virtual close curlies, or closing an interface and returning to the
921 * primary source file.
923 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
924 * line/column advancement has to be done by hand.
928 <Char,CharEsc><<EOF>> {
929 hsplineno = hslineno; hspcolno = hscolno;
930 hsperror("unterminated character literal");
933 hsplineno = hslineno; hspcolno = hscolno;
934 hsperror("unterminated comment");
936 <String,StringEsc><<EOF>> {
937 hsplineno = hslineno; hspcolno = hscolno;
938 hsperror("unterminated string literal");
941 hsplineno = hslineno; hspcolno = hscolno;
942 hsperror("unterminated interface pragma");
944 <UserPragma><<EOF>> {
945 hsplineno = hslineno; hspcolno = hscolno;
946 hsperror("unterminated user-specified pragma");
948 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
952 /**********************************************************************
955 * YACC/LEX Initialisation etc. *
958 **********************************************************************/
961 We initialise input_filename to "<stdin>".
962 This allows unnamed sources to be piped into the parser.
965 extern BOOLEAN acceptPrim;
970 input_filename = xstrdup("<stdin>");
972 /* We must initialize the input buffer _now_, because we call
973 setyyin _before_ calling yylex for the first time! */
974 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
983 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
985 if (input_filename != NULL)
986 free(input_filename);
987 input_filename = xstrdup(f);
990 /**********************************************************************
993 * Layout Processing *
996 **********************************************************************/
999 The following section deals with Haskell Layout conventions
1000 forcing insertion of ; or } as appropriate
1004 hsshouldindent(void)
1006 return (!forgetindent && INDENTON);
1010 /* Enter new context and set new indentation level */
1015 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1019 * partain: first chk that new indent won't be less than current one; this code
1020 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1021 * current token; what that has to do with indenting, I don't know.
1025 if (hscolno - 1 <= INDENTPT) {
1027 return; /* Empty input OK for Haskell 1.1 */
1029 char errbuf[ERR_BUF_SIZE];
1031 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1035 hsentercontext((hspcolno << 1) | 1);
1039 /* Enter a new context without changing the indentation level */
1044 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1046 hsentercontext(indenttab[icontexts] & ~1);
1050 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1054 forgetindent = TRUE;
1058 /* Enter a new layout context. */
1060 hsentercontext(int indent)
1062 /* Enter new context and set indentation as specified */
1063 if (++icontexts >= MAX_CONTEXTS) {
1064 char errbuf[ERR_BUF_SIZE];
1066 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1069 forgetindent = FALSE;
1070 indenttab[icontexts] = indent;
1072 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1077 /* Exit a layout context */
1083 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1088 * Return checks the indentation level and returns ;, } or the specified token.
1098 if (hsshouldindent()) {
1099 if (hspcolno < INDENTPT) {
1101 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1105 } else if (hspcolno == INDENTPT) {
1107 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1115 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1122 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1128 static BOOLEAN eof = FALSE;
1131 if (hssttok != -1) {
1139 endlineno = hslineno;
1140 if ((tok = yylex1()) != EOF)
1146 if (icontexts > icontexts_save) {
1149 indenttab[icontexts] = 0;
1152 hsperror("missing '}' at end of file");
1153 } else if (hsbuf_save != NULL) {
1155 yy_delete_buffer(YY_CURRENT_BUFFER);
1156 yy_switch_to_buffer(hsbuf_save);
1158 new_filename(filename_save);
1159 free(filename_save);
1160 hslineno = hslineno_save;
1161 hsplineno = hsplineno_save;
1162 hscolno = hscolno_save;
1163 hspcolno = hspcolno_save;
1165 in_interface = FALSE;
1166 icontexts = icontexts_save - 1;
1169 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1176 abort(); /* should never get here! */
1180 /**********************************************************************
1183 * Input Processing for Interfaces *
1186 **********************************************************************/
1188 /* setyyin(file) open file as new lex input buffer */
1194 hsbuf_save = YY_CURRENT_BUFFER;
1195 if ((yyin = fopen(file, "r")) == NULL) {
1196 char errbuf[ERR_BUF_SIZE];
1198 sprintf(errbuf, "can't read \"%-.50s\"", file);
1201 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1203 hslineno_save = hslineno;
1204 hsplineno_save = hsplineno;
1205 hslineno = hsplineno = 1;
1207 filename_save = input_filename;
1208 input_filename = NULL;
1210 hscolno_save = hscolno;
1211 hspcolno_save = hspcolno;
1212 hscolno = hspcolno = 0;
1213 in_interface = TRUE;
1214 etags_save = etags; /* do not do "etags" stuff in interfaces */
1215 etags = 0; /* We remember whether we are doing it in
1216 the module, so we can restore it later [WDP 94/09] */
1217 hsentercontext(-1); /* partain: changed this from 0 */
1218 icontexts_save = icontexts;
1220 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1225 layout_input(char *text, int len)
1228 fprintf(stderr, "Scanning \"%s\"\n", text);
1231 hsplineno = hslineno;
1243 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1255 setstartlineno(void)
1257 startlineno = hsplineno;
1261 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1265 /**********************************************************************
1271 **********************************************************************/
1273 #define CACHE_SIZE YY_BUF_SIZE
1279 } textcache = { 0, 0, NULL };
1284 /* fprintf(stderr, "cleartext\n"); */
1286 if (textcache.allocated == 0) {
1287 textcache.allocated = CACHE_SIZE;
1288 textcache.text = xmalloc(CACHE_SIZE);
1293 addtext(char *text, unsigned length)
1295 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1300 if (textcache.next + length + 1 >= textcache.allocated) {
1301 textcache.allocated += length + CACHE_SIZE;
1302 textcache.text = xrealloc(textcache.text, textcache.allocated);
1304 bcopy(text, textcache.text + textcache.next, length);
1305 textcache.next += length;
1311 /* fprintf(stderr, "addchar: %c\n", c); */
1313 if (textcache.next + 2 >= textcache.allocated) {
1314 textcache.allocated += CACHE_SIZE;
1315 textcache.text = xrealloc(textcache.text, textcache.allocated);
1317 textcache.text[textcache.next++] = c;
1321 fetchtext(unsigned *length)
1323 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1325 *length = textcache.next;
1326 textcache.text[textcache.next] = '\0';
1327 return textcache.text;
1330 /**********************************************************************
1333 * Identifier Processing *
1336 **********************************************************************/
1339 hsnewid Enters an id of length n into the symbol table.
1343 hsnewid(char *name, int length)
1345 char save = name[length];
1347 name[length] = '\0';
1348 yylval.uid = installid(name);
1349 name[length] = save;
1353 isconstr(char *s) /* walks past leading underscores before using the macro */
1357 for ( ; temp != NULL && *temp == '_' ; temp++ );
1359 return _isconstr(temp);