2 /**********************************************************************
8 **********************************************************************/
10 #include "../../includes/config.h"
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 /* An ANSI string.h and pre-ANSI memory.h might conflict. */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 /* memory.h and strings.h conflict on some systems. */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
31 #include "hsparser.tab.h"
32 #include "constants.h"
35 /* Our substitute for <ctype.h> */
44 #define _isconstr(s) (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */ 0, 0, 0, 0, 0, 0, 0, 0,
49 /* bs */ 0, _S, _S, _S, _S, 0, 0, 0,
50 /* dle */ 0, 0, 0, 0, 0, 0, 0, 0,
51 /* can */ 0, 0, 0, 0, 0, 0, 0, 0,
52 /* sp */ _S, 0, 0, 0, 0, 0, 0, 0,
53 /* '(' */ _C, 0, 0, 0, 0, 0, 0, 0, /* ( */
54 /* '0' */ _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */ _D|_H, _D|_H, _C, 0, 0, 0, 0, 0,
56 /* '@' */ 0, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _H|_C, _C,
57 /* 'H' */ _C, _C, _C, _C, _C, _C, _C, _C,
58 /* 'P' */ _C, _C, _C, _C, _C, _C, _C, _C,
59 /* 'X' */ _C, _C, _C, _C, 0, 0, 0, 0, /* [ */
60 /* '`' */ 0, _H, _H, _H, _H, _H, _H, 0,
61 /* 'h' */ 0, 0, 0, 0, 0, 0, 0, 0,
62 /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0,
63 /* 'x' */ 0, 0, 0, 0, 0, 0, 0, 0,
65 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
66 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
67 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
68 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
69 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
70 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
71 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
72 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
73 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
74 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
75 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
76 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
77 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
78 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
79 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
80 /* */ 0, 0, 0, 0, 0, 0, 0, 0,
83 /**********************************************************************
89 **********************************************************************/
91 char *input_filename = NULL; /* Always points to a dynamically allocated string */
94 * For my own sanity, things that are not part of the flex skeleton
95 * have been renamed as hsXXXXX rather than yyXXXXX. --JSM
98 static int hslineno = 0; /* Line number at end of token */
99 int hsplineno = 0; /* Line number at end of previous token */
101 static int hscolno = 0; /* Column number at end of token */
102 int hspcolno = 0; /* Column number at end of previous token */
103 static int hsmlcolno = 0; /* Column number for multiple-rule lexemes */
105 int modulelineno = -1; /* The line number where the module starts */
106 int startlineno = 0; /* The line number where something starts */
107 int endlineno = 0; /* The line number where something ends */
109 static BOOLEAN noGap = TRUE; /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE; /* Don't bother applying indentation rules */
112 static int nested_comments; /* For counting comment nesting depth */
114 /* Hacky definition of yywrap: see flex doc.
116 If we don't do this, then we'll have to get the default
117 yywrap from the flex library, which is often something
118 we are not good at locating. This avoids that difficulty.
119 (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124 /* Essential forward declarations */
126 static void hsnewid PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext (NO_ARGS);
129 static void addtext PROTO((char *, unsigned));
130 static void addchar PROTO((char));
131 static char *fetchtext PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int Return PROTO((int));
134 static void hsentercontext PROTO((int));
136 /* Special file handling for IMPORTS */
137 /* Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
139 static YY_BUFFER_STATE hsbuf_save = NULL; /* Saved input buffer */
140 static char *filename_save; /* File Name */
141 static int hslineno_save = 0, /* Line Number */
142 hsplineno_save = 0, /* Line Number of Prev. token */
143 hscolno_save = 0, /* Indentation */
144 hspcolno_save = 0; /* Left Indentation */
145 static short icontexts_save = 0; /* Indent Context Level */
147 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags; /* that which is saved */
150 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
152 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
154 extern BOOLEAN ignorePragmas; /* True when we should ignore pragmas */
155 extern int minAcceptablePragmaVersion; /* see documentation in main.c */
156 extern int maxAcceptablePragmaVersion;
157 extern int thisIfacePragmaVersion;
159 static int hssttok = -1; /* Stacked Token: -1 -- no token; -ve -- ";"
160 * inserted before token +ve -- "}" inserted before
163 short icontexts = 0; /* Which context we're in */
168 Table of indentations: right bit indicates whether to use
169 indentation rules (1 = use rules; 0 = ignore)
172 push one of these "contexts" at every "case" or "where"; the right bit says
173 whether user supplied braces, etc., or not. pop appropriately (hsendindent).
175 ALSO, a push/pop when enter/exit a new file (e.g., on importing). A -1 is
176 pushed (the "column" for "module", "interface" and EOF). The -1 from the initial
177 push is shown just below.
182 static short indenttab[MAX_CONTEXTS] = {-1};
184 #define INDENTPT (indenttab[icontexts]>>1)
185 #define INDENTON (indenttab[icontexts]&1)
187 #define RETURN(tok) return(Return(tok))
190 #define YY_DECL int yylex1()
192 /* We should not peek at yy_act, but flex calls us even for the internal action
193 triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
194 to support older versions of flex, we'll continue to peek for now.
196 #define YY_USER_ACTION \
197 if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
201 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
204 /* Each time we enter a new start state, we push it onto the state stack.
205 Note that the rules do not allow us to underflow or overflow the stack.
206 (At least, they shouldn't.) The maximum expected depth is 4:
207 0: Code -> 1: String -> 2: StringEsc -> 3: Comment
209 static int StateStack[5];
210 static int StateDepth = -1;
213 #define PUSH_STATE(n) do {\
214 fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
215 StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
216 #define POP_STATE do {--StateDepth;\
217 fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
218 BEGIN(StateStack[StateDepth]);} while(0)
220 #define PUSH_STATE(n) do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
221 #define POP_STATE do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
226 /* The start states are:
227 Code -- normal Haskell code (principal lexer)
228 GlaExt -- Haskell code with Glasgow extensions
229 Comment -- Nested comment processing
230 String -- Inside a string literal with backslashes
231 StringEsc -- Immediately following a backslash in a string literal
232 Char -- Inside a character literal with backslashes
233 CharEsc -- Immediately following a backslash in a character literal
235 Note that the INITIAL state is unused. Also note that these states
236 are _exclusive_. All rules should be prefixed with an appropriate
237 list of start states.
240 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
242 isoS [\xa1-\xbf\xd7\xf7]
243 isoL [\xc0-\xd6\xd8-\xde]
244 isol [\xdf-\xf6\xf8-\xff]
251 F {N}"."{N}(("e"|"E")("+"|"-")?{N})?
252 S [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
254 L [A-Z\xc0-\xd6\xd8-\xde]
255 l [a-z\xdf-\xf6\xf8-\xff]
260 CHAR [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
269 * Special GHC pragma rules. Do we need a start state for interface files,
270 * so these won't be matched in source files? --JSM
274 <Code,GlaExt>^"# ".*{NL} {
275 char tempf[FILENAME_SIZE];
276 sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
278 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
281 <Code,GlaExt>^"#line ".*{NL} {
282 char tempf[FILENAME_SIZE];
283 sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
285 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
288 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
289 /* partain: pragma-style line directive */
290 char tempf[FILENAME_SIZE];
291 sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
293 hsplineno = hslineno; hscolno = 0; hspcolno = 0;
295 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}" {
296 sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
298 <Code,GlaExt>"{-# GHC_PRAGMA " {
299 if ( ignorePragmas ||
300 thisIfacePragmaVersion < minAcceptablePragmaVersion ||
301 thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
305 PUSH_STATE(GhcPragma);
309 <GhcPragma>"_N_" { RETURN(NO_PRAGMA); }
310 <GhcPragma>"_NI_" { RETURN(NOINFO_PRAGMA); }
311 <GhcPragma>"_DEFOREST_" { RETURN(DEFOREST_PRAGMA); }
312 <GhcPragma>"_SPECIALISE_" { RETURN(SPECIALISE_PRAGMA); }
313 <GhcPragma>"_A_" { RETURN(ARITY_PRAGMA); }
314 <GhcPragma>"_U_" { RETURN(UPDATE_PRAGMA); }
315 <GhcPragma>"_S_" { RETURN(STRICTNESS_PRAGMA); }
316 <GhcPragma>"_K_" { RETURN(KIND_PRAGMA); }
317 <GhcPragma>"_MF_" { RETURN(MAGIC_UNFOLDING_PRAGMA); }
318 <GhcPragma>"_F_" { RETURN(UNFOLDING_PRAGMA); }
320 <GhcPragma>"_!_" { RETURN(COCON); }
321 <GhcPragma>"_#_" { RETURN(COPRIM); }
322 <GhcPragma>"_APP_" { RETURN(COAPP); }
323 <GhcPragma>"_TYAPP_" { RETURN(COTYAPP); }
324 <GhcPragma>"_ALG_" { RETURN(CO_ALG_ALTS); }
325 <GhcPragma>"_PRIM_" { RETURN(CO_PRIM_ALTS); }
326 <GhcPragma>"_NO_DEFLT_" { RETURN(CO_NO_DEFAULT); }
327 <GhcPragma>"_LETREC_" { RETURN(CO_LETREC); }
329 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
330 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
331 <GhcPragma>"_USER_CC_" { RETURN(CO_USER_CC); }
332 <GhcPragma>"_AUTO_CC_" { RETURN(CO_AUTO_CC); }
333 <GhcPragma>"_DICT_CC_" { RETURN(CO_DICT_CC); }
335 <GhcPragma>"_DUPD_CC_" { RETURN(CO_DUPD_CC); }
336 <GhcPragma>"_CAF_CC_" { RETURN(CO_CAF_CC); }
338 <GhcPragma>"_SDSEL_" { RETURN(CO_SDSEL_ID); }
339 <GhcPragma>"_METH_" { RETURN(CO_METH_ID); }
340 <GhcPragma>"_DEFM_" { RETURN(CO_DEFM_ID); }
341 <GhcPragma>"_DFUN_" { RETURN(CO_DFUN_ID); }
342 <GhcPragma>"_CONSTM_" { RETURN(CO_CONSTM_ID); }
343 <GhcPragma>"_SPEC_" { RETURN(CO_SPEC_ID); }
344 <GhcPragma>"_WRKR_" { RETURN(CO_WRKR_ID); }
345 <GhcPragma>"_ORIG_" { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
347 <GhcPragma>"_ALWAYS_" { RETURN(UNFOLD_ALWAYS); }
348 <GhcPragma>"_IF_ARGS_" { RETURN(UNFOLD_IF_ARGS); }
350 <GhcPragma>"_NOREP_I_" { RETURN(NOREP_INTEGER); }
351 <GhcPragma>"_NOREP_R_" { RETURN(NOREP_RATIONAL); }
352 <GhcPragma>"_NOREP_S_" { RETURN(NOREP_STRING); }
354 <GhcPragma>" #-}" { POP_STATE; RETURN(END_PRAGMA); }
356 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
357 PUSH_STATE(UserPragma);
358 RETURN(SPECIALISE_UPRAGMA);
360 <Code,GlaExt>"{-#"{WS}*"INLINE" {
361 PUSH_STATE(UserPragma);
362 RETURN(INLINE_UPRAGMA);
364 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
365 PUSH_STATE(UserPragma);
366 RETURN(MAGIC_UNFOLDING_UPRAGMA);
368 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
369 PUSH_STATE(UserPragma);
370 RETURN(DEFOREST_UPRAGMA);
372 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
373 fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
374 input_filename, hsplineno);
375 format_string(stderr, (unsigned char *) yytext, yyleng);
376 fputs("'\n", stderr);
380 <UserPragma>"#-}" { POP_STATE; RETURN(END_UPRAGMA); }
384 * Haskell keywords. `scc' is actually a Glasgow extension, but it is
385 * intentionally accepted as a keyword even for normal <Code>.
389 <Code,GlaExt,GhcPragma>"case" { RETURN(CASE); }
390 <Code,GlaExt>"class" { RETURN(CLASS); }
391 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
392 <Code,GlaExt>"default" { RETURN(DEFAULT); }
393 <Code,GlaExt>"deriving" { RETURN(DERIVING); }
394 <Code,GlaExt>"do" { RETURN(DO); }
395 <Code,GlaExt>"else" { RETURN(ELSE); }
396 <Code,GlaExt>"if" { RETURN(IF); }
397 <Code,GlaExt>"import" { RETURN(IMPORT); }
398 <Code,GlaExt,GhcPragma>"in" { RETURN(IN); }
399 <Code,GlaExt>"infix" { RETURN(INFIX); }
400 <Code,GlaExt>"infixl" { RETURN(INFIXL); }
401 <Code,GlaExt>"infixr" { RETURN(INFIXR); }
402 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
403 <Code,GlaExt,GhcPragma>"let" { RETURN(LET); }
404 <Code,GlaExt>"module" { RETURN(MODULE); }
405 <Code,GlaExt>"newtype" { RETURN(NEWTYPE); }
406 <Code,GlaExt,GhcPragma>"of" { RETURN(OF); }
407 <Code,GlaExt>"then" { RETURN(THEN); }
408 <Code,GlaExt>"type" { RETURN(TYPE); }
409 <Code,GlaExt>"where" { RETURN(WHERE); }
411 <Code,GlaExt>"as" { RETURN(AS); }
412 <Code,GlaExt>"hiding" { RETURN(HIDING); }
413 <Code,GlaExt>"qualified" { RETURN(QUALIFIED); }
414 <Code,GlaExt>"interface" { RETURN(INTERFACE); }
416 <Code,GlaExt,GhcPragma>"_scc_" { RETURN(SCC); }
417 <GlaExt,GhcPragma>"_ccall_" { RETURN(CCALL); }
418 <GlaExt,GhcPragma>"_ccall_GC_" { RETURN(CCALL_GC); }
419 <GlaExt,GhcPragma>"_casm_" { RETURN(CASM); }
420 <GlaExt,GhcPragma>"_casm_GC_" { RETURN(CASM_GC); }
421 <GhcPragma>"_forall_" { RETURN(FORALL); }
425 * Haskell operators: special, reservedops and useful varsyms
429 <Code,GlaExt,GhcPragma,UserPragma>"(" { RETURN(OPAREN); }
430 <Code,GlaExt,GhcPragma,UserPragma>")" { RETURN(CPAREN); }
431 <Code,GlaExt,GhcPragma,UserPragma>"[" { RETURN(OBRACK); }
432 <Code,GlaExt,GhcPragma,UserPragma>"]" { RETURN(CBRACK); }
433 <Code,GlaExt,GhcPragma>"{" { RETURN(OCURLY); }
434 <Code,GlaExt,GhcPragma>"}" { RETURN(CCURLY); }
435 <Code,GlaExt,GhcPragma,UserPragma>"," { RETURN(COMMA); }
436 <Code,GlaExt,GhcPragma>";" { RETURN(SEMI); }
437 <Code,GlaExt,GhcPragma>"`" { RETURN(BQUOTE); }
438 <Code,GlaExt>"_" { RETURN(WILDCARD); }
440 <Code,GlaExt>".." { RETURN(DOTDOT); }
441 <Code,GlaExt,GhcPragma,UserPragma>"::" { RETURN(DCOLON); }
442 <Code,GlaExt,GhcPragma,UserPragma>"=" { RETURN(EQUAL); }
443 <Code,GlaExt,GhcPragma>"\\" { RETURN(LAMBDA); }
444 <Code,GlaExt,GhcPragma>"|" { RETURN(VBAR); }
445 <Code,GlaExt>"<-" { RETURN(LARROW); }
446 <Code,GlaExt,GhcPragma,UserPragma>"->" { RETURN(RARROW); }
447 <Code,GlaExt>"-" { RETURN(MINUS); }
449 <Code,GlaExt,GhcPragma,UserPragma>"=>" { RETURN(DARROW); }
450 <Code,GlaExt>"@" { RETURN(AT); }
451 <Code,GlaExt>"!" { RETURN(BANG); }
452 <Code,GlaExt>"~" { RETURN(LAZY); }
454 <GhcPragma>"_/\\_" { RETURN(TYLAMBDA); }
458 * Integers and (for Glasgow extensions) primitive integers. Note that
459 * we pass all of the text on to the parser, because flex/C can't handle
460 * arbitrary precision numbers.
464 <GlaExt>("-")?"0"[Oo]{O}+"#" { /* octal */
465 yylval.uid = xstrndup(yytext, yyleng - 1);
468 <Code,GlaExt>"0"[Oo]{O}+ { /* octal */
469 yylval.uid = xstrndup(yytext, yyleng);
472 <GlaExt>("-")?"0"[Xx]{H}+"#" { /* hexadecimal */
473 yylval.uid = xstrndup(yytext, yyleng - 1);
476 <Code,GlaExt>"0"[Xx]{H}+ { /* hexadecimal */
477 yylval.uid = xstrndup(yytext, yyleng);
480 <GlaExt,GhcPragma>("-")?{N}"#" {
481 yylval.uid = xstrndup(yytext, yyleng - 1);
484 <Code,GlaExt,GhcPragma>{N} {
485 yylval.uid = xstrndup(yytext, yyleng);
491 * Floats and (for Glasgow extensions) primitive floats/doubles.
495 <GlaExt,GhcPragma>("-")?{F}"##" {
496 yylval.uid = xstrndup(yytext, yyleng - 2);
499 <GlaExt,GhcPragma>("-")?{F}"#" {
500 yylval.uid = xstrndup(yytext, yyleng - 1);
504 yylval.uid = xstrndup(yytext, yyleng);
510 * Funky ``foo'' style C literals for Glasgow extensions
514 <GlaExt,GhcPragma>"``"[^']+"''" {
515 hsnewid(yytext + 2, yyleng - 4);
521 * Identifiers, both variables and operators. The trailing hash is allowed
522 * for Glasgow extensions.
526 <GhcPragma>"_NIL_" { hsnewid(yytext, yyleng); RETURN(CONID); }
527 <GhcPragma>"_TUP_"{D}+ { hsnewid(yytext, yyleng); RETURN(CONID); }
528 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
531 /* These SHOULDNAE work in "Code" (sigh) */
533 <Code,GlaExt,GhcPragma,UserPragma>{Id}"#" {
534 if (! (nonstandardFlag || in_interface)) {
535 char errbuf[ERR_BUF_SIZE];
536 sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
539 hsnewid(yytext, yyleng);
540 RETURN(_isconstr(yytext) ? CONID : VARID);
542 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
543 if (! (nonstandardFlag || in_interface)) {
544 char errbuf[ERR_BUF_SIZE];
545 sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
548 hsnewid(yytext, yyleng);
549 RETURN(isconstr(yytext) ? CONID : VARID);
550 /* NB: ^^^^^^^^ : not the macro! */
552 <Code,GlaExt,GhcPragma,UserPragma>{Id} {
553 hsnewid(yytext, yyleng);
554 RETURN(_isconstr(yytext) ? CONID : VARID);
556 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
557 hsnewid(yytext, yyleng);
558 RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
560 <Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{Id} {
561 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
562 RETURN(isconstr ? QCONID : QVARID);
564 <Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{SId} {
565 BOOLEAN isconstr = hsnewqid(yytext, yyleng);
566 RETURN(isconstr ? QCONSYM : QVARSYM);
570 /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
572 /* Because we can make the former well-behaved (we defined them).
574 Sadly, the latter is defined by Haskell, which allows such
575 la-la land constructs as `{-a 900-line comment-} foo`. (WDP 94/12)
579 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`" {
580 hsnewid(yytext + 1, yyleng - 2);
581 RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
586 * Character literals. The first form is the quick form, for character
587 * literals that don't contain backslashes. Literals with backslashes are
588 * lexed through multiple rules. First, we match the open ' and as many
589 * normal characters as possible. This puts us into the <Char> state, where
590 * a backslash is legal. Then, we match the backslash and move into the
591 * <CharEsc> state. When we drop out of <CharEsc>, we collect more normal
592 * characters and the close '. We may end up with too many characters, but
593 * this allows us to easily share the lex rules with strings. Excess characters
594 * are ignored with a warning.
598 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
599 yylval.uhstring = installHstring(1, yytext+1);
602 <Code,GlaExt>'({CHAR}|"\"")' {
603 yylval.uhstring = installHstring(1, yytext+1);
606 <Code,GlaExt>'' {char errbuf[ERR_BUF_SIZE];
607 sprintf(errbuf, "'' is not a valid character (or string) literal\n");
610 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
611 hsmlcolno = hspcolno;
613 addtext(yytext+1, yyleng-1);
616 <Char>({CHAR}|"\"")*'# {
620 addtext(yytext, yyleng - 2);
621 text = fetchtext(&length);
623 if (! (nonstandardFlag || in_interface)) {
624 char errbuf[ERR_BUF_SIZE];
625 sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
630 fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
631 input_filename, hsplineno, hspcolno + 1);
632 format_string(stderr, (unsigned char *) text, length);
633 fputs("' too long\n", stderr);
636 yylval.uhstring = installHstring(1, text);
637 hspcolno = hsmlcolno;
641 <Char>({CHAR}|"\"")*' {
645 addtext(yytext, yyleng - 1);
646 text = fetchtext(&length);
649 fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
650 input_filename, hsplineno, hspcolno + 1);
651 format_string(stderr, (unsigned char *) text, length);
652 fputs("' too long\n", stderr);
655 yylval.uhstring = installHstring(1, text);
656 hspcolno = hsmlcolno;
660 <Char>({CHAR}|"\"")+ { addtext(yytext, yyleng); }
665 * String literals. The first form is the quick form, for string literals
666 * that don't contain backslashes. Literals with backslashes are lexed
667 * through multiple rules. First, we match the open " and as many normal
668 * characters as possible. This puts us into the <String> state, where
669 * a backslash is legal. Then, we match the backslash and move into the
670 * <StringEsc> state. When we drop out of <StringEsc>, we collect more normal
671 * characters, moving back and forth between <String> and <StringEsc> as more
672 * backslashes are encountered. (We may even digress into <Comment> mode if we
673 * find a comment in a gap between backslashes.) Finally, we read the last chunk
674 * of normal characters and the close ".
678 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""# {
679 yylval.uhstring = installHstring(yyleng-3, yytext+1);
680 /* the -3 accounts for the " on front, "# on the end */
683 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\"" {
684 yylval.uhstring = installHstring(yyleng-2, yytext+1);
687 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
688 hsmlcolno = hspcolno;
690 addtext(yytext+1, yyleng-1);
693 <String>({CHAR}|"'")*"\"#" {
697 addtext(yytext, yyleng-2);
698 text = fetchtext(&length);
700 if (! (nonstandardFlag || in_interface)) {
701 char errbuf[ERR_BUF_SIZE];
702 sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
706 yylval.uhstring = installHstring(length, text);
707 hspcolno = hsmlcolno;
711 <String>({CHAR}|"'")*"\"" {
715 addtext(yytext, yyleng-1);
716 text = fetchtext(&length);
718 yylval.uhstring = installHstring(length, text);
719 hspcolno = hsmlcolno;
723 <String>({CHAR}|"'")+ { addtext(yytext, yyleng); }
727 * Character and string escapes are roughly the same, but strings have the
728 * extra `\&' sequence which is not allowed for characters. Also, comments
729 * are allowed in the <StringEsc> state. (See the comment section much
732 * NB: Backslashes and tabs are stored in strings as themselves.
733 * But if we print them (in printtree.c), they must go out as
734 * "\\\\" and "\\t" respectively. (This is because of the bogus
735 * intermediate format that the parser produces. It uses '\t' fpr end of
736 * string, so it needs to be able to escape tabs, which means that it
737 * also needs to be able to escape the escape character ('\\'). Sigh.
741 <Char>\\ { PUSH_STATE(CharEsc); }
742 <String>\\& /* Ignore */ ;
743 <String>\\ { PUSH_STATE(StringEsc); noGap = TRUE; }
745 <CharEsc>\\ { addchar(*yytext); POP_STATE; }
746 <StringEsc>\\ { if (noGap) { addchar(*yytext); } POP_STATE; }
748 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
749 <CharEsc,StringEsc>NUL { addchar('\000'); POP_STATE; }
750 <CharEsc,StringEsc>SOH { addchar('\001'); POP_STATE; }
751 <CharEsc,StringEsc>STX { addchar('\002'); POP_STATE; }
752 <CharEsc,StringEsc>ETX { addchar('\003'); POP_STATE; }
753 <CharEsc,StringEsc>EOT { addchar('\004'); POP_STATE; }
754 <CharEsc,StringEsc>ENQ { addchar('\005'); POP_STATE; }
755 <CharEsc,StringEsc>ACK { addchar('\006'); POP_STATE; }
756 <CharEsc,StringEsc>BEL |
757 <CharEsc,StringEsc>a { addchar('\007'); POP_STATE; }
758 <CharEsc,StringEsc>BS |
759 <CharEsc,StringEsc>b { addchar('\010'); POP_STATE; }
760 <CharEsc,StringEsc>HT |
761 <CharEsc,StringEsc>t { addchar('\011'); POP_STATE; }
762 <CharEsc,StringEsc>LF |
763 <CharEsc,StringEsc>n { addchar('\012'); POP_STATE; }
764 <CharEsc,StringEsc>VT |
765 <CharEsc,StringEsc>v { addchar('\013'); POP_STATE; }
766 <CharEsc,StringEsc>FF |
767 <CharEsc,StringEsc>f { addchar('\014'); POP_STATE; }
768 <CharEsc,StringEsc>CR |
769 <CharEsc,StringEsc>r { addchar('\015'); POP_STATE; }
770 <CharEsc,StringEsc>SO { addchar('\016'); POP_STATE; }
771 <CharEsc,StringEsc>SI { addchar('\017'); POP_STATE; }
772 <CharEsc,StringEsc>DLE { addchar('\020'); POP_STATE; }
773 <CharEsc,StringEsc>DC1 { addchar('\021'); POP_STATE; }
774 <CharEsc,StringEsc>DC2 { addchar('\022'); POP_STATE; }
775 <CharEsc,StringEsc>DC3 { addchar('\023'); POP_STATE; }
776 <CharEsc,StringEsc>DC4 { addchar('\024'); POP_STATE; }
777 <CharEsc,StringEsc>NAK { addchar('\025'); POP_STATE; }
778 <CharEsc,StringEsc>SYN { addchar('\026'); POP_STATE; }
779 <CharEsc,StringEsc>ETB { addchar('\027'); POP_STATE; }
780 <CharEsc,StringEsc>CAN { addchar('\030'); POP_STATE; }
781 <CharEsc,StringEsc>EM { addchar('\031'); POP_STATE; }
782 <CharEsc,StringEsc>SUB { addchar('\032'); POP_STATE; }
783 <CharEsc,StringEsc>ESC { addchar('\033'); POP_STATE; }
784 <CharEsc,StringEsc>FS { addchar('\034'); POP_STATE; }
785 <CharEsc,StringEsc>GS { addchar('\035'); POP_STATE; }
786 <CharEsc,StringEsc>RS { addchar('\036'); POP_STATE; }
787 <CharEsc,StringEsc>US { addchar('\037'); POP_STATE; }
788 <CharEsc,StringEsc>SP { addchar('\040'); POP_STATE; }
789 <CharEsc,StringEsc>DEL { addchar('\177'); POP_STATE; }
790 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
791 <CharEsc,StringEsc>{D}+ {
792 int i = strtol(yytext, NULL, 10);
796 char errbuf[ERR_BUF_SIZE];
797 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
803 <CharEsc,StringEsc>o{O}+ {
804 int i = strtol(yytext + 1, NULL, 8);
808 char errbuf[ERR_BUF_SIZE];
809 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
815 <CharEsc,StringEsc>x{H}+ {
816 int i = strtol(yytext + 1, NULL, 16);
820 char errbuf[ERR_BUF_SIZE];
821 sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
830 * Simple comments and whitespace. Normally, we would just ignore these, but
831 * in case we're processing a string escape, we need to note that we've seen
834 * Note that we cater for a comment line that *doesn't* end in a newline.
835 * This is incorrect, strictly speaking, but seems like the right thing
836 * to do. Reported by Rajiv Mirani. (WDP 95/08)
840 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
841 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
845 * Nested comments. The major complication here is in trying to match the
846 * longest lexemes possible, for better performance. (See the flex document.)
847 * That's why the rules look so bizarre.
851 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-" {
852 noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
856 <Comment>"-"+[^-{}]+ |
857 <Comment>"{"+[^-{}]+ ;
858 <Comment>"{-" { nested_comments++; }
859 <Comment>"-}" { if (--nested_comments == 0) POP_STATE; }
864 * Illegal characters. This used to be a single rule, but we might as well
865 * pass on as much information as we have, so now we indicate our state in
870 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n) {
871 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
872 input_filename, hsplineno, hspcolno + 1);
873 format_string(stderr, (unsigned char *) yytext, 1);
874 fputs("'\n", stderr);
878 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
879 input_filename, hsplineno, hspcolno + 1);
880 format_string(stderr, (unsigned char *) yytext, 1);
881 fputs("' in a character literal\n", stderr);
885 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
886 input_filename, hsplineno, hspcolno + 1);
887 format_string(stderr, (unsigned char *) yytext, 1);
888 fputs("'\n", stderr);
891 <String>(.|\n) { if (nonstandardFlag) {
892 addtext(yytext, yyleng);
894 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
895 input_filename, hsplineno, hspcolno + 1);
896 format_string(stderr, (unsigned char *) yytext, 1);
897 fputs("' in a string literal\n", stderr);
903 fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
904 input_filename, hsplineno, hspcolno + 1);
905 format_string(stderr, (unsigned char *) yytext, 1);
906 fputs("'\n", stderr);
909 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
910 input_filename, hsplineno, hspcolno + 1);
911 format_string(stderr, (unsigned char *) yytext, 1);
912 fputs("' in a string gap\n", stderr);
919 * End of file. In any sub-state, this is an error. However, for the primary
920 * <Code> and <GlaExt> states, this is perfectly normal. We just return an EOF
921 * and let the yylex() wrapper deal with whatever has to be done next (e.g.
922 * adding virtual close curlies, or closing an interface and returning to the
923 * primary source file.
925 * Note that flex does not call YY_USER_ACTION for <<EOF>> rules. Hence the
926 * line/column advancement has to be done by hand.
930 <Char,CharEsc><<EOF>> {
931 hsplineno = hslineno; hspcolno = hscolno;
932 hsperror("unterminated character literal");
935 hsplineno = hslineno; hspcolno = hscolno;
936 hsperror("unterminated comment");
938 <String,StringEsc><<EOF>> {
939 hsplineno = hslineno; hspcolno = hscolno;
940 hsperror("unterminated string literal");
943 hsplineno = hslineno; hspcolno = hscolno;
944 hsperror("unterminated interface pragma");
946 <UserPragma><<EOF>> {
947 hsplineno = hslineno; hspcolno = hscolno;
948 hsperror("unterminated user-specified pragma");
950 <Code,GlaExt><<EOF>> { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
954 /**********************************************************************
957 * YACC/LEX Initialisation etc. *
960 **********************************************************************/
963 We initialise input_filename to "<stdin>".
964 This allows unnamed sources to be piped into the parser.
967 extern BOOLEAN acceptPrim;
972 input_filename = xstrdup("<stdin>");
974 /* We must initialize the input buffer _now_, because we call
975 setyyin _before_ calling yylex for the first time! */
976 yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
985 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
987 if (input_filename != NULL)
988 free(input_filename);
989 input_filename = xstrdup(f);
992 /**********************************************************************
995 * Layout Processing *
998 **********************************************************************/
1001 The following section deals with Haskell Layout conventions
1002 forcing insertion of ; or } as appropriate
1006 hsshouldindent(void)
1008 return (!forgetindent && INDENTON);
1012 /* Enter new context and set new indentation level */
1017 fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1021 * partain: first chk that new indent won't be less than current one; this code
1022 * doesn't make sense to me; hscolno tells the position of the _end_ of the
1023 * current token; what that has to do with indenting, I don't know.
1027 if (hscolno - 1 <= INDENTPT) {
1029 return; /* Empty input OK for Haskell 1.1 */
1031 char errbuf[ERR_BUF_SIZE];
1033 sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1037 hsentercontext((hspcolno << 1) | 1);
1041 /* Enter a new context without changing the indentation level */
1046 fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1048 hsentercontext(indenttab[icontexts] & ~1);
1052 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1056 forgetindent = TRUE;
1060 /* Enter a new layout context. */
1062 hsentercontext(int indent)
1064 /* Enter new context and set indentation as specified */
1065 if (++icontexts >= MAX_CONTEXTS) {
1066 char errbuf[ERR_BUF_SIZE];
1068 sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1071 forgetindent = FALSE;
1072 indenttab[icontexts] = indent;
1074 fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1079 /* Exit a layout context */
1085 fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1090 * Return checks the indentation level and returns ;, } or the specified token.
1100 if (hsshouldindent()) {
1101 if (hspcolno < INDENTPT) {
1103 fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1107 } else if (hspcolno == INDENTPT) {
1109 fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1117 fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1124 * Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1130 static BOOLEAN eof = FALSE;
1133 if (hssttok != -1) {
1141 endlineno = hslineno;
1142 if ((tok = yylex1()) != EOF)
1148 if (icontexts > icontexts_save) {
1151 indenttab[icontexts] = 0;
1154 hsperror("missing '}' at end of file");
1155 } else if (hsbuf_save != NULL) {
1157 yy_delete_buffer(YY_CURRENT_BUFFER);
1158 yy_switch_to_buffer(hsbuf_save);
1160 new_filename(filename_save);
1161 free(filename_save);
1162 hslineno = hslineno_save;
1163 hsplineno = hsplineno_save;
1164 hscolno = hscolno_save;
1165 hspcolno = hspcolno_save;
1167 in_interface = FALSE;
1168 icontexts = icontexts_save - 1;
1171 fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1178 abort(); /* should never get here! */
1182 /**********************************************************************
1185 * Input Processing for Interfaces *
1188 **********************************************************************/
1190 /* setyyin(file) open file as new lex input buffer */
1196 hsbuf_save = YY_CURRENT_BUFFER;
1197 if ((yyin = fopen(file, "r")) == NULL) {
1198 char errbuf[ERR_BUF_SIZE];
1200 sprintf(errbuf, "can't read \"%-.50s\"", file);
1203 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1205 hslineno_save = hslineno;
1206 hsplineno_save = hsplineno;
1207 hslineno = hsplineno = 1;
1209 filename_save = input_filename;
1210 input_filename = NULL;
1212 hscolno_save = hscolno;
1213 hspcolno_save = hspcolno;
1214 hscolno = hspcolno = 0;
1215 in_interface = TRUE;
1216 etags_save = etags; /* do not do "etags" stuff in interfaces */
1217 etags = 0; /* We remember whether we are doing it in
1218 the module, so we can restore it later [WDP 94/09] */
1219 hsentercontext(-1); /* partain: changed this from 0 */
1220 icontexts_save = icontexts;
1222 fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1227 layout_input(char *text, int len)
1230 fprintf(stderr, "Scanning \"%s\"\n", text);
1233 hsplineno = hslineno;
1245 hscolno += 8 - (hscolno % 8); /* Tabs stops are 8 columns apart */
1257 setstartlineno(void)
1259 startlineno = hsplineno;
1261 if (modulelineno == 0) {
1262 modulelineno = startlineno;
1268 fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1272 /**********************************************************************
1278 **********************************************************************/
1280 #define CACHE_SIZE YY_BUF_SIZE
1286 } textcache = { 0, 0, NULL };
1291 /* fprintf(stderr, "cleartext\n"); */
1293 if (textcache.allocated == 0) {
1294 textcache.allocated = CACHE_SIZE;
1295 textcache.text = xmalloc(CACHE_SIZE);
1300 addtext(char *text, unsigned length)
1302 /* fprintf(stderr, "addtext: %d %s\n", length, text); */
1307 if (textcache.next + length + 1 >= textcache.allocated) {
1308 textcache.allocated += length + CACHE_SIZE;
1309 textcache.text = xrealloc(textcache.text, textcache.allocated);
1311 bcopy(text, textcache.text + textcache.next, length);
1312 textcache.next += length;
1318 /* fprintf(stderr, "addchar: %c\n", c); */
1320 if (textcache.next + 2 >= textcache.allocated) {
1321 textcache.allocated += CACHE_SIZE;
1322 textcache.text = xrealloc(textcache.text, textcache.allocated);
1324 textcache.text[textcache.next++] = c;
1328 fetchtext(unsigned *length)
1330 /* fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1332 *length = textcache.next;
1333 textcache.text[textcache.next] = '\0';
1334 return textcache.text;
1337 /**********************************************************************
1340 * Identifier Processing *
1343 **********************************************************************/
1346 hsnewid Enters an id of length n into the symbol table.
1350 hsnewid(char *name, int length)
1352 char save = name[length];
1354 name[length] = '\0';
1355 yylval.uid = installid(name);
1356 name[length] = save;
1360 hsnewqid(char *name, int length)
1363 char save = name[length];
1364 name[length] = '\0';
1366 dot = strchr(name, '.');
1368 yylval.uqid = mkaqual(installid(name),installid(dot+1));
1370 name[length] = save;
1372 return _isconstr(dot+1);
1376 isconstr(char *s) /* walks past leading underscores before using the macro */
1380 for ( ; temp != NULL && *temp == '_' ; temp++ );
1382 return _isconstr(temp);