2 /**********************************************************************
5 * LEX grammar for Haskell. *
6 * ------------------------ *
8 * (c) Copyright K. Hammond, University of Glasgow, *
9 * 10th. February 1989 *
11 * Modification History *
12 * -------------------- *
14 * 22/08/91 kh Initial Haskell 1.1 version. *
15 * 18/10/91 kh Added 'ccall'. *
16 * 19/11/91 kh Tidied generally. *
17 * 04/12/91 kh Added Int#. *
18 * 31/01/92 kh Haskell 1.2 version. *
19 * 24/04/92 ps Added 'scc'. *
20 * 03/06/92 kh Changed Infix/Prelude Handling. *
21 * 23/08/93 jsm Changed to support flex *
28 **********************************************************************/
30 #include "../../includes/config.h"
34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
36 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
41 #define rindex strrchr
42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
44 #define bzero(s, n) memset ((s), 0, (n))
45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
47 /* memory.h and strings.h conflict on some systems. */
48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
51 #include "hsparser.tab.h"
52 #include "constants.h"
55 /* Our substitute for <ctype.h> */
64 #define _isconstr(s) (CharTable[*s]&(_C))
65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
67 unsigned char CharTable[NCHARS] = {
68 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
70 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
73 /* '(' */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
75 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
76 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
77 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
78 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
79 /* 'X' */ _C, _C, _C, 0, 0, 0, 0, 0,
80 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
81 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
82 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
85 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
86 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
87 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
88 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
89 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
90 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
91 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
92 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
93 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
94 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
95 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
96 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
97 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
98 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
99 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
100 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
103 /**********************************************************************
109 **********************************************************************/
111 char *input_filename = NULL; /* Always points to a dynamically allocated string */
114 * For my own sanity, things that are not part of the flex skeleton
115 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
118 int hslineno = 0; /* Line number at end of token */
119 int hsplineno = 0; /* Line number at end of previous token */
121 int hscolno = 0; /* Column number at end of token */
122 int hspcolno = 0; /* Column number at end of previous token */
123 int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
125 int startlineno = 0; /* The line number where something starts */
126 int endlineno = 0; /* The line number where something ends */
128 static BOOLEAN noGap = TRUE; /* For checking string gaps */
129 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
131 static int nested_comments; /* For counting comment nesting depth */
133 /* Hacky definition of yywrap: see flex doc.
135 If we don't do this, then we'll have to get the default
136 yywrap from the flex library, which is often something
137 we are not good at locating. This avoids that difficulty.
138 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
143 /* Essential forward declarations */
145 static VOID hsnewid PROTO((char *, int));
146 static VOID layout_input PROTO((char *, int));
147 static VOID cleartext (NO_ARGS);
148 static VOID addtext PROTO((char *, unsigned));
149 static VOID addchar PROTO((char));
150 static char *fetchtext PROTO((unsigned *));
152 /* Special file handling for IMPORTS */
153 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
155 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
156 static char *filename_save; /* File Name */
157 static int hslineno_save = 0, /* Line Number */
158 hsplineno_save = 0, /* Line Number of Prev. token */
159 hscolno_save = 0, /* Indentation */
160 hspcolno_save = 0; /* Left Indentation */
161 static short icontexts_save = 0; /* Indent Context Level */
163 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
164 extern BOOLEAN etags; /* that which is saved */
166 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
168 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
170 extern BOOLEAN ignorePragmas; /* True when we should ignore pragmas */
171 extern int minAcceptablePragmaVersion; /* see documentation in main.c */
172 extern int maxAcceptablePragmaVersion;
173 extern int thisIfacePragmaVersion;
175 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
176 * inserted before token +ve -- "}" inserted before
179 short icontexts = 0; /* Which context we're in */
184 Table of indentations: right bit indicates whether to use
185 indentation rules (1 = use rules; 0 = ignore)
188 push one of these "contexts" at every "case" or "where"; the right bit says
189 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
191 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
192 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
193 push is shown just below.
198 static short indenttab[MAX_CONTEXTS] = {-1};
200 #define INDENTPT (indenttab[icontexts]>>1)
201 #define INDENTON (indenttab[icontexts]&1)
203 #define RETURN(tok) return(Return(tok))
206 #define YY_DECL int yylex1()
208 /* We should not peek at yy_act, but flex calls us even for the internal action
209 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
210 to support older versions of flex, we'll continue to peek for now.
212 #define YY_USER_ACTION \
213 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
217 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
220 /* Each time we enter a new start state, we push it onto the state stack.
221 Note that the rules do not allow us to underflow or overflow the stack.
222 (At least, they shouldn't.) The maximum expected depth is 4:
223 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
225 static int StateStack[5];
226 static int StateDepth = -1;
229 #define PUSH_STATE(n) do {\
230 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
231 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
232 #define POP_STATE do {--StateDepth;\
233 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
234 BEGIN(StateStack[StateDepth]);} while(0)
236 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
237 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
242 /* The start states are:
243 Code -- normal Haskell code (principal lexer)
244 GlaExt -- Haskell code with Glasgow extensions
245 Comment -- Nested comment processing
246 String -- Inside a string literal with backslashes
247 StringEsc -- Immediately following a backslash in a string literal
248 Char -- Inside a character literal with backslashes
249 CharEsc -- Immediately following a backslash in a character literal
251 Note that the INITIAL state is unused. Also note that these states
252 are _exclusive_. All rules should be prefixed with an appropriate
253 list of start states.
256 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
262 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
263 S [!#$%&*+./<=>?@\\^|~:]
265 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
278 * Special GHC pragma rules. Do we need a start state for interface files,
279 * so these won't be matched in source files? --JSM
283 <Code,GlaExt>^"# ".*{NL} {
284 char tempf[FILENAME_SIZE];
285 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
287 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
290 <Code,GlaExt>^"#line ".*{NL} {
291 char tempf[FILENAME_SIZE];
292 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
294 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
297 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
298 /* partain: pragma-style line directive */
299 char tempf[FILENAME_SIZE];
300 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
302 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
304 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}" {
305 sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
307 <Code,GlaExt>"{-# GHC_PRAGMA " {
308 if ( ignorePragmas ||
309 thisIfacePragmaVersion < minAcceptablePragmaVersion ||
310 thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
314 PUSH_STATE(GhcPragma);
318 <GhcPragma>"_N_" { RETURN(NO_PRAGMA); }
319 <GhcPragma>"_NI_" { RETURN(NOINFO_PRAGMA); }
320 <GhcPragma>"_ABSTRACT_" { RETURN(ABSTRACT_PRAGMA); }
321 <GhcPragma>"_DEFOREST_" { RETURN(DEFOREST_PRAGMA); }
322 <GhcPragma>"_SPECIALISE_" { RETURN(SPECIALISE_PRAGMA); }
323 <GhcPragma>"_M_" { RETURN(MODNAME_PRAGMA); }
324 <GhcPragma>"_A_" { RETURN(ARITY_PRAGMA); }
325 <GhcPragma>"_U_" { RETURN(UPDATE_PRAGMA); }
326 <GhcPragma>"_S_" { RETURN(STRICTNESS_PRAGMA); }
327 <GhcPragma>"_K_" { RETURN(KIND_PRAGMA); }
328 <GhcPragma>"_MF_" { RETURN(MAGIC_UNFOLDING_PRAGMA); }
329 <GhcPragma>"_F_" { RETURN(UNFOLDING_PRAGMA); }
331 <GhcPragma>"_!_" { RETURN(COCON); }
332 <GhcPragma>"_#_" { RETURN(COPRIM); }
333 <GhcPragma>"_APP_" { RETURN(COAPP); }
334 <GhcPragma>"_TYAPP_" { RETURN(COTYAPP); }
335 <GhcPragma>"_ALG_" { RETURN(CO_ALG_ALTS); }
336 <GhcPragma>"_PRIM_" { RETURN(CO_PRIM_ALTS); }
337 <GhcPragma>"_NO_DEFLT_" { RETURN(CO_NO_DEFAULT); }
338 <GhcPragma>"_LETREC_" { RETURN(CO_LETREC); }
340 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
341 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
342 <GhcPragma>"_USER_CC_" { RETURN(CO_USER_CC); }
343 <GhcPragma>"_AUTO_CC_" { RETURN(CO_AUTO_CC); }
344 <GhcPragma>"_DICT_CC_" { RETURN(CO_DICT_CC); }
346 <GhcPragma>"_DUPD_CC_" { RETURN(CO_DUPD_CC); }
347 <GhcPragma>"_CAF_CC_" { RETURN(CO_CAF_CC); }
349 <GhcPragma>"_SDSEL_" { RETURN(CO_SDSEL_ID); }
350 <GhcPragma>"_METH_" { RETURN(CO_METH_ID); }
351 <GhcPragma>"_DEFM_" { RETURN(CO_DEFM_ID); }
352 <GhcPragma>"_DFUN_" { RETURN(CO_DFUN_ID); }
353 <GhcPragma>"_CONSTM_" { RETURN(CO_CONSTM_ID); }
354 <GhcPragma>"_SPEC_" { RETURN(CO_SPEC_ID); }
355 <GhcPragma>"_WRKR_" { RETURN(CO_WRKR_ID); }
356 <GhcPragma>"_ORIG_" { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
358 <GhcPragma>"_ALWAYS_" { RETURN(UNFOLD_ALWAYS); }
359 <GhcPragma>"_IF_ARGS_" { RETURN(UNFOLD_IF_ARGS); }
361 <GhcPragma>"_NOREP_I_" { RETURN(NOREP_INTEGER); }
362 <GhcPragma>"_NOREP_R_" { RETURN(NOREP_RATIONAL); }
363 <GhcPragma>"_NOREP_S_" { RETURN(NOREP_STRING); }
365 <GhcPragma>" #-}" { POP_STATE; RETURN(END_PRAGMA); }
367 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
368 PUSH_STATE(UserPragma);
369 RETURN(SPECIALISE_UPRAGMA);
371 <Code,GlaExt>"{-#"{WS}*"INLINE" {
372 PUSH_STATE(UserPragma);
373 RETURN(INLINE_UPRAGMA);
375 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
376 PUSH_STATE(UserPragma);
377 RETURN(MAGIC_UNFOLDING_UPRAGMA);
379 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
380 PUSH_STATE(UserPragma);
381 RETURN(DEFOREST_UPRAGMA);
383 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
384 PUSH_STATE(UserPragma);
385 RETURN(ABSTRACT_UPRAGMA);
387 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
391 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
392 * intentionally accepted as a keyword even for normal <Code>.
396 <Code,GlaExt,GhcPragma>"case" { RETURN(CASE); }
397 <Code,GlaExt>"class" { RETURN(CLASS); }
398 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
399 <Code,GlaExt>"default" { RETURN(DEFAULT); }
400 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
401 <Code,GlaExt>"else" { RETURN(ELSE); }
402 <Code,GlaExt>"hiding" { RETURN(HIDING); }
403 <Code,GlaExt>"if" { RETURN(IF); }
404 <Code,GlaExt>"import" { RETURN(IMPORT); }
405 <Code,GlaExt>"infix" { RETURN(INFIX); }
406 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
407 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
408 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
409 <Code,GlaExt>"interface" { RETURN(INTERFACE); }
410 <Code,GlaExt>"module" { RETURN(MODULE); }
411 <Code,GlaExt,GhcPragma>"of" { RETURN(OF); }
412 <Code,GlaExt>"renaming" { RETURN(RENAMING); }
413 <Code,GlaExt>"then" { RETURN(THEN); }
414 <Code,GlaExt>"to" { RETURN(TO); }
415 <Code,GlaExt>"type" { RETURN(TYPE); }
416 <Code,GlaExt>"where" { RETURN(WHERE); }
417 <Code,GlaExt,GhcPragma>"in" { RETURN(IN); }
418 <Code,GlaExt,GhcPragma>"let" { RETURN(LET); }
419 <GlaExt,GhcPragma>"_ccall_" { RETURN(CCALL); }
420 <GlaExt,GhcPragma>"_ccall_GC_" { RETURN(CCALL_GC); }
421 <GlaExt,GhcPragma>"_casm_" { RETURN(CASM); }
422 <GlaExt,GhcPragma>"_casm_GC_" { RETURN(CASM_GC); }
423 <Code,GlaExt,GhcPragma>"_scc_" { RETURN(SCC); }
424 <GhcPragma>"_forall_" { RETURN(FORALL); }
428 * Haskell operators. Nothing special about these.
432 <Code,GlaExt>".." { RETURN(DOTDOT); }
433 <Code,GlaExt,GhcPragma>";" { RETURN(SEMI); }
434 <Code,GlaExt,GhcPragma,UserPragma>"," { RETURN(COMMA); }
435 <Code,GlaExt,GhcPragma>"|" { RETURN(VBAR); }
436 <Code,GlaExt,GhcPragma,UserPragma>"=" { RETURN(EQUAL); }
437 <Code,GlaExt>"<-" { RETURN(LARROW); }
438 <Code,GlaExt,GhcPragma,UserPragma>"->" { RETURN(RARROW); }
439 <Code,GlaExt,GhcPragma,UserPragma>"=>" { RETURN(DARROW); }
440 <Code,GlaExt,GhcPragma,UserPragma>"::" { RETURN(DCOLON); }
441 <Code,GlaExt,GhcPragma,UserPragma>"(" { RETURN(OPAREN); }
442 <Code,GlaExt,GhcPragma,UserPragma>")" { RETURN(CPAREN); }
443 <Code,GlaExt,GhcPragma,UserPragma>"[" { RETURN(OBRACK); }
444 <Code,GlaExt,GhcPragma,UserPragma>"]" { RETURN(CBRACK); }
445 <Code,GlaExt,GhcPragma>"{" { RETURN(OCURLY); }
446 <Code,GlaExt,GhcPragma>"}" { RETURN(CCURLY); }
447 <Code,GlaExt>"+" { RETURN(PLUS); }
448 <Code,GlaExt>"@" { RETURN(AT); }
449 <Code,GlaExt,GhcPragma>"\\" { RETURN(LAMBDA); }
450 <GhcPragma>"_/\\_" { RETURN(TYLAMBDA); }
451 <Code,GlaExt>"_" { RETURN(WILDCARD); }
452 <Code,GlaExt,GhcPragma>"`" { RETURN(BQUOTE); }
453 <Code,GlaExt>"~" { RETURN(LAZY); }
454 <Code,GlaExt>"-" { RETURN(MINUS); }
458 * Integers and (for Glasgow extensions) primitive integers. Note that
459 * we pass all of the text on to the parser, because flex/C can't handle
460 * arbitrary precision numbers.
464 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
465 yylval.uid = xstrndup(yytext, yyleng - 1);
468 <Code,GlaExt>"0o"{O}+ { /* octal */
469 yylval.uid = xstrndup(yytext, yyleng);
472 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
473 yylval.uid = xstrndup(yytext, yyleng - 1);
476 <Code,GlaExt>"0x"{H}+ { /* hexadecimal */
477 yylval.uid = xstrndup(yytext, yyleng);
480 <GlaExt,GhcPragma>("-")?{N}"#" {
481 yylval.uid = xstrndup(yytext, yyleng - 1);
484 <Code,GlaExt,GhcPragma>{N} {
485 yylval.uid = xstrndup(yytext, yyleng);
491 * Floats and (for Glasgow extensions) primitive floats/doubles.
495 <GlaExt,GhcPragma>("-")?{F}"##" {
496 yylval.uid = xstrndup(yytext, yyleng - 2);
499 <GlaExt,GhcPragma>("-")?{F}"#" {
500 yylval.uid = xstrndup(yytext, yyleng - 1);
504 yylval.uid = xstrndup(yytext, yyleng);
510 * Funky ``foo'' style C literals for Glasgow extensions
514 <GlaExt,GhcPragma>"``"[^']+"''" {
515 hsnewid(yytext + 2, yyleng - 4);
521 * Identifiers, both variables and operators. The trailing hash is allowed
522 * for Glasgow extensions.
526 <GhcPragma>"_NIL_" { hsnewid(yytext, yyleng); RETURN(CONID); }
527 <GhcPragma>"_TUP_"{D}+ { hsnewid(yytext, yyleng); RETURN(CONID); }
528 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
530 <GlaExt,GhcPragma,UserPragma>{Id}"#" {
531 hsnewid(yytext, yyleng);
532 RETURN(_isconstr(yytext) ? CONID : VARID);
535 /* This SHOULDNAE work in "Code" (sigh) */
537 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
538 if (! (nonstandardFlag || in_interface)) {
539 char errbuf[ERR_BUF_SIZE];
540 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
543 hsnewid(yytext, yyleng);
544 RETURN(isconstr(yytext) ? CONID : VARID);
545 /* NB: ^^^^^^^^ : not the macro! */
547 <Code,GlaExt,GhcPragma,UserPragma>{Id} {
548 hsnewid(yytext, yyleng);
549 RETURN(_isconstr(yytext) ? CONID : VARID);
551 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
552 hsnewid(yytext, yyleng);
553 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
557 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
559 /* Because we can make the former well-behaved (we defined them).
561 Sadly, the latter is defined by Haskell, which allows such
562 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
566 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`" {
567 hsnewid(yytext + 1, yyleng - 2);
568 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
573 * Character literals. The first form is the quick form, for character
574 * literals that don't contain backslashes. Literals with backslashes are
575 * lexed through multiple rules. First, we match the open ' and as many
576 * normal characters as possible. This puts us into the <Char> state, where
577 * a backslash is legal. Then, we match the backslash and move into the
578 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
579 * characters and the close '. We may end up with too many characters, but
580 * this allows us to easily share the lex rules with strings. Excess characters
581 * are ignored with a warning.
585 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
586 yylval.uhstring = installHstring(1, yytext+1);
589 <Code,GlaExt>'({CHAR}|"\"")' {
590 yylval.uhstring = installHstring(1, yytext+1);
593 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
594 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
597 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
598 hsmlcolno = hspcolno;
600 addtext(yytext+1, yyleng-1);
603 <Char>({CHAR}|"\"")*'# {
607 addtext(yytext, yyleng - 2);
608 text = fetchtext(&length);
610 if (! (nonstandardFlag || in_interface)) {
611 char errbuf[ERR_BUF_SIZE];
612 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
617 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
618 input_filename, hsplineno, hspcolno + 1);
619 format_string(stderr, (unsigned char *) text, length);
620 fputs("' too long\n", stderr);
623 yylval.uhstring = installHstring(1, text);
624 hspcolno = hsmlcolno;
628 <Char>({CHAR}|"\"")*' {
632 addtext(yytext, yyleng - 1);
633 text = fetchtext(&length);
636 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
637 input_filename, hsplineno, hspcolno + 1);
638 format_string(stderr, (unsigned char *) text, length);
639 fputs("' too long\n", stderr);
642 yylval.uhstring = installHstring(1, text);
643 hspcolno = hsmlcolno;
647 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
652 * String literals. The first form is the quick form, for string literals
653 * that don't contain backslashes. Literals with backslashes are lexed
654 * through multiple rules. First, we match the open " and as many normal
655 * characters as possible. This puts us into the <String> state, where
656 * a backslash is legal. Then, we match the backslash and move into the
657 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
658 * characters, moving back and forth between <String> and <StringEsc> as more
659 * backslashes are encountered. (We may even digress into <Comment> mode if we
660 * find a comment in a gap between backslashes.) Finally, we read the last chunk
661 * of normal characters and the close ".
665 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""# {
666 yylval.uhstring = installHstring(yyleng-3, yytext+1);
667 /* the -3 accounts for the " on front, "# on the end */
670 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\"" {
671 yylval.uhstring = installHstring(yyleng-2, yytext+1);
674 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
675 hsmlcolno = hspcolno;
677 addtext(yytext+1, yyleng-1);
680 <String>({CHAR}|"'")*"\"#" {
684 addtext(yytext, yyleng-2);
685 text = fetchtext(&length);
687 if (! (nonstandardFlag || in_interface)) {
688 char errbuf[ERR_BUF_SIZE];
689 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
693 yylval.uhstring = installHstring(length, text);
694 hspcolno = hsmlcolno;
698 <String>({CHAR}|"'")*"\"" {
702 addtext(yytext, yyleng-1);
703 text = fetchtext(&length);
705 yylval.uhstring = installHstring(length, text);
706 hspcolno = hsmlcolno;
710 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
714 * Character and string escapes are roughly the same, but strings have the
715 * extra `\&' sequence which is not allowed for characters. Also, comments
716 * are allowed in the <StringEsc> state. (See the comment section much
719 * NB: Backslashes and tabs are stored in strings as themselves.
720 * But if we print them (in printtree.c), they must go out as
721 * "\\\\" and "\\t" respectively. (This is because of the bogus
722 * intermediate format that the parser produces. It uses '\t' fpr end of
723 * string, so it needs to be able to escape tabs, which means that it
724 * also needs to be able to escape the escape character ('\\'). Sigh.
728 <Char>\\ { PUSH_STATE(CharEsc); }
729 <String>\\& /* Ignore */ ;
730 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
732 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
733 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
735 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
736 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
737 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
738 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
739 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
740 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
741 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
742 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
743 <CharEsc,StringEsc>BEL |
744 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
745 <CharEsc,StringEsc>BS |
746 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
747 <CharEsc,StringEsc>HT |
748 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
749 <CharEsc,StringEsc>LF |
750 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
751 <CharEsc,StringEsc>VT |
752 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
753 <CharEsc,StringEsc>FF |
754 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
755 <CharEsc,StringEsc>CR |
756 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
757 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
758 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
759 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
760 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
761 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
762 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
763 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
764 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
765 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
766 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
767 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
768 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
769 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
770 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
771 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
772 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
773 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
774 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
775 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
776 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
777 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
778 <CharEsc,StringEsc>{D}+ {
779 int i = strtol(yytext, NULL, 10);
783 char errbuf[ERR_BUF_SIZE];
784 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
790 <CharEsc,StringEsc>o{O}+ {
791 int i = strtol(yytext + 1, NULL, 8);
795 char errbuf[ERR_BUF_SIZE];
796 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
802 <CharEsc,StringEsc>x{H}+ {
803 int i = strtol(yytext + 1, NULL, 16);
807 char errbuf[ERR_BUF_SIZE];
808 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
817 * Simple comments and whitespace. Normally, we would just ignore these, but
818 * in case we're processing a string escape, we need to note that we've seen
823 <Code,GlaExt,StringEsc>"--".*{NL}{WS}* |
824 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
828 * Nested comments. The major complication here is in trying to match the
829 * longest lexemes possible, for better performance. (See the flex document.)
830 * That's why the rules look so bizarre.
834 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-" {
835 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
839 <Comment>"-"+[^-{}]+ |
840 <Comment>"{"+[^-{}]+ ;
841 <Comment>"{-" { nested_comments++; }
842 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
847 * Illegal characters. This used to be a single rule, but we might as well
848 * pass on as much information as we have, so now we indicate our state in
853 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n) {
854 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
855 input_filename, hsplineno, hspcolno + 1);
856 format_string(stderr, (unsigned char *) yytext, 1);
857 fputs("'\n", stderr);
861 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
862 input_filename, hsplineno, hspcolno + 1);
863 format_string(stderr, (unsigned char *) yytext, 1);
864 fputs("' in a character literal\n", stderr);
868 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
869 input_filename, hsplineno, hspcolno + 1);
870 format_string(stderr, (unsigned char *) yytext, 1);
871 fputs("'\n", stderr);
874 <String>(.|\n) { if (nonstandardFlag) {
875 addtext(yytext, yyleng);
877 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
878 input_filename, hsplineno, hspcolno + 1);
879 format_string(stderr, (unsigned char *) yytext, 1);
880 fputs("' in a string literal\n", stderr);
886 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
887 input_filename, hsplineno, hspcolno + 1);
888 format_string(stderr, (unsigned char *) yytext, 1);
889 fputs("'\n", stderr);
892 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
893 input_filename, hsplineno, hspcolno + 1);
894 format_string(stderr, (unsigned char *) yytext, 1);
895 fputs("' in a string gap\n", stderr);
902 * End of file. In any sub-state, this is an error. However, for the primary
903 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
904 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
905 * adding virtual close curlies, or closing an interface and returning to the
906 * primary source file.
908 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
909 * line/column advancement has to be done by hand.
913 <Char,CharEsc><<EOF>> {
914 hsplineno = hslineno; hspcolno = hscolno;
915 hsperror("unterminated character literal");
918 hsplineno = hslineno; hspcolno = hscolno;
919 hsperror("unterminated comment");
921 <String,StringEsc><<EOF>> {
922 hsplineno = hslineno; hspcolno = hscolno;
923 hsperror("unterminated string literal");
926 hsplineno = hslineno; hspcolno = hscolno;
927 hsperror("unterminated interface pragma");
929 <UserPragma><<EOF>> {
930 hsplineno = hslineno; hspcolno = hscolno;
931 hsperror("unterminated user-specified pragma");
933 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
937 /**********************************************************************
940 * YACC/LEX Initialisation etc. *
943 **********************************************************************/
946 We initialise input_filename to "<stdin>".
947 This allows unnamed sources to be piped into the parser.
953 extern BOOLEAN acceptPrim;
955 input_filename = xstrdup("<stdin>");
957 /* We must initialize the input buffer _now_, because we call
958 setyyin _before_ calling yylex for the first time! */
959 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
968 new_filename(f) /* This looks pretty dodgy to me (WDP) */
971 if (input_filename != NULL)
972 free(input_filename);
973 input_filename = xstrdup(f);
976 /**********************************************************************
979 * Layout Processing *
982 **********************************************************************/
985 The following section deals with Haskell Layout conventions
986 forcing insertion of ; or } as appropriate
992 return (!forgetindent && INDENTON);
996 /* Enter new context and set new indentation level */
1001 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1005 * partain: first chk that new indent won't be less than current one; this code
1006 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1007 * current token; what that has to do with indenting, I don't know.
1011 if (hscolno - 1 <= INDENTPT) {
1013 return; /* Empty input OK for Haskell 1.1 */
1015 char errbuf[ERR_BUF_SIZE];
1017 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1021 hsentercontext((hspcolno << 1) | 1);
1025 /* Enter a new context without changing the indentation level */
1030 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1032 hsentercontext(indenttab[icontexts] & ~1);
1036 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1040 forgetindent = TRUE;
1044 /* Enter a new layout context. */
1046 hsentercontext(indent)
1049 /* Enter new context and set indentation as specified */
1050 if (++icontexts >= MAX_CONTEXTS) {
1051 char errbuf[ERR_BUF_SIZE];
1053 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1056 forgetindent = FALSE;
1057 indenttab[icontexts] = indent;
1059 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1064 /* Exit a layout context */
1070 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1075 * Return checks the indentation level and returns ;, } or the specified token.
1086 if (hsshouldindent()) {
1087 if (hspcolno < INDENTPT) {
1089 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1093 } else if (hspcolno == INDENTPT) {
1095 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1103 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1110 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1116 static BOOLEAN eof = FALSE;
1119 if (hssttok != -1) {
1127 endlineno = hslineno;
1128 if ((tok = yylex1()) != EOF)
1134 if (icontexts > icontexts_save) {
1137 indenttab[icontexts] = 0;
1140 hsperror("missing '}' at end of file");
1141 } else if (hsbuf_save != NULL) {
1143 yy_delete_buffer(YY_CURRENT_BUFFER);
1144 yy_switch_to_buffer(hsbuf_save);
1146 new_filename(filename_save);
1147 free(filename_save);
1148 hslineno = hslineno_save;
1149 hsplineno = hsplineno_save;
1150 hscolno = hscolno_save;
1151 hspcolno = hspcolno_save;
1153 in_interface = FALSE;
1154 icontexts = icontexts_save - 1;
1157 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1164 abort(); /* should never get here! */
1168 /**********************************************************************
1171 * Input Processing for Interfaces *
1174 **********************************************************************/
1176 /* setyyin(file) open file as new lex input buffer */
1183 hsbuf_save = YY_CURRENT_BUFFER;
1184 if ((yyin = fopen(file, "r")) == NULL) {
1185 char errbuf[ERR_BUF_SIZE];
1187 sprintf(errbuf, "can't read \"%-.50s\"", file);
1190 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1192 hslineno_save = hslineno;
1193 hsplineno_save = hsplineno;
1194 hslineno = hsplineno = 1;
1196 filename_save = input_filename;
1197 input_filename = NULL;
1199 hscolno_save = hscolno;
1200 hspcolno_save = hspcolno;
1201 hscolno = hspcolno = 0;
1202 in_interface = TRUE;
1203 etags_save = etags; /* do not do "etags" stuff in interfaces */
1204 etags = 0; /* We remember whether we are doing it in
1205 the module, so we can restore it later [WDP 94/09] */
1206 hsentercontext(-1); /* partain: changed this from 0 */
1207 icontexts_save = icontexts;
1209 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1214 layout_input(text, len)
1219 fprintf(stderr, "Scanning \"%s\"\n", text);
1222 hsplineno = hslineno;
1234 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1248 startlineno = hsplineno;
1252 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1256 /**********************************************************************
1262 **********************************************************************/
1264 #define CACHE_SIZE YY_BUF_SIZE
1270 } textcache = { 0, 0, NULL };
1275 /* fprintf(stderr, "cleartext\n"); */
1277 if (textcache.allocated == 0) {
1278 textcache.allocated = CACHE_SIZE;
1279 textcache.text = xmalloc(CACHE_SIZE);
1284 addtext(text, length)
1288 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1293 if (textcache.next + length + 1 >= textcache.allocated) {
1294 textcache.allocated += length + CACHE_SIZE;
1295 textcache.text = xrealloc(textcache.text, textcache.allocated);
1297 bcopy(text, textcache.text + textcache.next, length);
1298 textcache.next += length;
1309 /* fprintf(stderr, "addchar: %c\n", c); */
1311 if (textcache.next + 2 >= textcache.allocated) {
1312 textcache.allocated += CACHE_SIZE;
1313 textcache.text = xrealloc(textcache.text, textcache.allocated);
1315 textcache.text[textcache.next++] = c;
1322 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1324 *length = textcache.next;
1325 textcache.text[textcache.next] = '\0';
1326 return textcache.text;
1329 /**********************************************************************
1332 * Identifier Processing *
1335 **********************************************************************/
1338 hsnewid Enters an id of length n into the symbol table.
1342 hsnewid(name, length)
1346 char save = name[length];
1348 name[length] = '\0';
1349 yylval.uid = installid(name);
1350 name[length] = save;
1354 isconstr(s) /* walks past leading underscores before using the macro */
1359 for ( ; temp != NULL && *temp == '_' ; temp++ );
1361 return _isconstr(temp);