[project @ 1999-01-14 17:58:41 by sof]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 static BOOLEAN is_commment PROTO((char*, int));
141
142 /* Special file handling for IMPORTS */
143 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
144
145 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
146 static char *filename_save;             /* File Name                     */
147 static int hslineno_save = 0,           /* Line Number                   */
148  hsplineno_save = 0,                    /* Line Number of Prev. token    */
149  hscolno_save = 0,                      /* Indentation                   */
150  hspcolno_save = 0;                     /* Left Indentation              */
151 static short icontexts_save = 0;        /* Indent Context Level          */
152
153 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
154 extern BOOLEAN etags;                   /* that which is saved */
155
156 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
157
158 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
159                                  * inserted before token +ve  -- "}" inserted before
160                                  * token */
161
162 short icontexts = 0;            /* Which context we're in */
163
164 /*
165         Table of indentations:  right bit indicates whether to use
166           indentation rules (1 = use rules; 0 = ignore)
167
168     partain:
169     push one of these "contexts" at every "case" or "where"; the right bit says
170     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
171
172     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
173     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
174     push is shown just below.
175
176 */
177
178
179 static short indenttab[MAX_CONTEXTS] = {-1};
180
181 #define INDENTPT (indenttab[icontexts]>>1)
182 #define INDENTON (indenttab[icontexts]&1)
183
184 #define RETURN(tok) return(Return(tok))
185
186 #undef YY_DECL
187 #define YY_DECL int yylex1()
188
189 /* We should not peek at yy_act, but flex calls us even for the internal action
190    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
191    to support older versions of flex, we'll continue to peek for now.
192  */
193 #define YY_USER_ACTION \
194     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
195
196 #if 0/*debug*/
197 #undef YY_BREAK
198 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
199 #endif
200
201 /* Each time we enter a new start state, we push it onto the state stack.
202 */
203 #define PUSH_STATE(n)   yy_push_state(n)
204 #define POP_STATE       yy_pop_state()
205
206 %}
207 /* Options:
208     8bit (8-bit input)
209     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
210     never-interactive (to go a bit faster)
211     stack (use a start-condition stack)
212 */
213 %option 8bit
214 %option noyywrap
215 %option never-interactive
216 %option stack
217
218 /* The start states are:
219    Code -- normal Haskell code (principal lexer)
220    GlaExt -- Haskell code with Glasgow extensions
221    Comment -- Nested comment processing
222    String -- Inside a string literal with backslashes
223    StringEsc -- Immediately following a backslash in a string literal
224    Char -- Inside a character literal with backslashes
225    CharEsc -- Immediately following a backslash in a character literal 
226
227    Note that the INITIAL state is unused.  Also note that these states
228    are _exclusive_.  All rules should be prefixed with an appropriate
229    list of start states.
230  */
231
232 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
233
234 isoS                    [\xa1-\xbf\xd7\xf7]
235 isoL                    [\xc0-\xd6\xd8-\xde]
236 isol                    [\xdf-\xf6\xf8-\xff]
237 isoA                    [\xa1-\xff]
238
239 D                       [0-9]
240 O                       [0-7]
241 H                       [0-9A-Fa-f]
242 N                       {D}+
243 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
244 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
245 SId                     {S}{S}*
246 L                       [A-Z\xc0-\xd6\xd8-\xde]
247 l                       [a-z_\xdf-\xf6\xf8-\xff]
248 I                       {L}|{l}
249 i                       {L}|{l}|[0-9'_]
250 Id                      {I}{i}*
251 Mod                     {L}{i}*
252 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
253 CNTRL                   [@A-Z\[\\\]^_]
254 WS                      [ \t\n\r\f\v]
255 NL                      [\n\r]
256
257 %%
258
259 %{
260     /*
261      * Simple comments and whitespace.  Normally, we would just ignore these, but
262      * in case we're processing a string escape, we need to note that we've seen
263      * a gap.
264      *
265      * Note that we cater for a comment line that *doesn't* end in a newline.
266      * This is incorrect, strictly speaking, but seems like the right thing
267      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
268      *
269      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
270      */
271 %}
272
273 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
274
275 %{
276     /* 
277      * Special GHC pragma rules.  Do we need a start state for interface files,
278      * so these won't be matched in source files? --JSM
279      */
280
281 %}
282
283 %{
284 /* I believe the next rule is not ever matched.
285
286    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
287    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
288    where possible, so the lexer should now never see cpp directives
289    like '# ' and '#line'.
290
291    -- SOF
292
293 <Code,GlaExt>^"# ".*{NL}    {
294                           char tempf[FILENAME_SIZE];
295                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
296                           new_filename(tempf);
297                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
298                         }
299
300 <Code,GlaExt>^"#line ".*{NL}    {
301                           char tempf[FILENAME_SIZE];
302                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
303                           new_filename(tempf); 
304                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
305                         }
306 */
307 %}
308
309 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
310                           /* partain: pragma-style line directive */
311                           char tempf[FILENAME_SIZE];
312                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
313                           new_filename(tempf);
314                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
315                         }
316
317 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
318                               PUSH_STATE(UserPragma);
319                               RETURN(INTERFACE_UPRAGMA);
320                             }
321 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
322                               PUSH_STATE(UserPragma);
323                               RETURN(SPECIALISE_UPRAGMA);
324                             }
325 <Code,GlaExt>"{-#"{WS}*"INLINE" {
326                               PUSH_STATE(UserPragma);
327                               RETURN(INLINE_UPRAGMA);
328                             }
329 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
330                               PUSH_STATE(UserPragma);
331                               RETURN(NOINLINE_UPRAGMA);
332                             }
333 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
334                               PUSH_STATE(UserPragma);
335                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
336                             }
337 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
338                               /* these are handled by hscpp */
339                               nested_comments =1;
340                               PUSH_STATE(Comment);
341                             }
342 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
343                               /* these are for the driver! */
344                               nested_comments =1;
345                               PUSH_STATE(Comment);
346                             }
347 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
348                               /* these are used by `make depend' and the
349                                  compiler to indicate that a module should
350                                  be imported from source */
351                               nested_comments =1;
352                               RETURN(SOURCE_UPRAGMA); 
353                             }
354
355 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
356                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
357                                 input_filename, hsplineno);
358                               format_string(stderr, (unsigned char *) yytext, yyleng);
359                               fputs("'\n", stderr);
360                               nested_comments = 1;
361                               PUSH_STATE(Comment);
362                             }
363 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
364
365 %{
366     /*
367      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
368      * intentionally accepted as a keyword even for normal <Code>.
369      */
370 %}
371
372 <Code,GlaExt>"case"             { RETURN(CASE); }
373 <Code,GlaExt>"class"            { RETURN(CLASS); }
374 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
375 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
376 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
377 <Code,GlaExt>"do"               { RETURN(DO); }
378 <Code,GlaExt>"else"             { RETURN(ELSE); }
379 <Code,GlaExt>"if"               { RETURN(IF); }
380 <Code,GlaExt>"import"           { RETURN(IMPORT); }
381 <Code,GlaExt>"in"               { RETURN(IN); }
382 <Code,GlaExt>"infix"            { RETURN(INFIX); }
383 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
384 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
385 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
386 <Code,GlaExt>"let"              { RETURN(LET); }
387 <Code,GlaExt>"module"           { RETURN(MODULE); }
388 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
389 <Code,GlaExt>"of"               { RETURN(OF); }
390 <Code,GlaExt>"then"             { RETURN(THEN); }
391 <Code,GlaExt>"type"             { RETURN(TYPE); }
392 <Code,GlaExt>"where"            { RETURN(WHERE); }
393
394 <Code,GlaExt>"as"               { RETURN(AS); }
395 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
396 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
397
398 <Code,GlaExt>"forall"           { RETURN(FORALL); }
399
400 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
401 <GlaExt>"_ccall_"               { RETURN(CCALL); }
402 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
403 <GlaExt>"_casm_"                { RETURN(CASM); }
404 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
405 <GlaExt>"(#"                    { RETURN(OUNBOXPAREN); }
406 <GlaExt>"#)"                    { RETURN(CUNBOXPAREN); }
407 <GlaExt>"foreign"               { RETURN(FOREIGN); }
408 <GlaExt>"export"                { RETURN(EXPORT); }
409 <GlaExt>"label"                 { RETURN(LABEL); }
410 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
411 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
412 <GlaExt>"_ccall"                { RETURN(C_CALL); }
413 <GlaExt>"_pascal"               { RETURN(PASCAL); }
414 <GlaExt>"stdcall"               { RETURN(STDCALL); }
415 <GlaExt>"ccall"                 { RETURN(C_CALL); }
416 <GlaExt>"pascal"                { RETURN(PASCAL); }
417 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
418
419 %{
420     /* 
421      * Haskell operators: special, reservedops and useful varsyms
422      */
423 %}
424
425 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
426 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
427 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
428 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
429 <Code,GlaExt>"{"                { RETURN(OCURLY); }
430 <Code,GlaExt>"}"                { RETURN(CCURLY); }
431 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
432 <Code,GlaExt>";"                { RETURN(SEMI); }
433 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
434
435 <Code,GlaExt>"."                { RETURN(DOT); }
436 <Code,GlaExt>".."               { RETURN(DOTDOT); }
437 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
438 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
439 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
440 <Code,GlaExt>"|"                { RETURN(VBAR); }
441 <Code,GlaExt>"<-"               { RETURN(LARROW); }
442 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
443 <Code,GlaExt>"-"                { RETURN(MINUS); }
444 <Code,GlaExt>"+"                { RETURN(PLUS); }
445
446 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
447 <Code,GlaExt>"@"                { RETURN(AT); }
448 <Code,GlaExt>"!"                { RETURN(BANG); }
449 <Code,GlaExt>"~"                { RETURN(LAZY); }
450
451 %{
452     /*
453      * Integers and (for Glasgow extensions) primitive integers.  Note that
454      * we pass all of the text on to the parser, because flex/C can't handle
455      * arbitrary precision numbers.
456      */
457 %}
458
459 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
460                          yylval.uid = xstrndup(yytext, yyleng - 1);
461                          RETURN(INTPRIM);
462                         }
463 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
464                          yylval.uid = xstrndup(yytext, yyleng);
465                          RETURN(INTEGER);
466                         }
467 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
468                          yylval.uid = xstrndup(yytext, yyleng - 1);
469                          RETURN(INTPRIM);
470                         }
471 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
472                          yylval.uid = xstrndup(yytext, yyleng);
473                          RETURN(INTEGER);
474                         }
475 <GlaExt>("-")?{N}"#"    {
476                          yylval.uid = xstrndup(yytext, yyleng - 1);
477                          RETURN(INTPRIM);
478                         }
479 <Code,GlaExt,UserPragma>{N} {
480                          yylval.uid = xstrndup(yytext, yyleng);
481                          RETURN(INTEGER);
482                         }
483
484 %{
485     /*
486      * Floats and (for Glasgow extensions) primitive floats/doubles.
487      */
488 %}
489
490 <GlaExt>("-")?{F}"##"   {
491                          yylval.uid = xstrndup(yytext, yyleng - 2);
492                          RETURN(DOUBLEPRIM);
493                         }
494 <GlaExt>("-")?{F}"#"    {
495                          yylval.uid = xstrndup(yytext, yyleng - 1);
496                          RETURN(FLOATPRIM);
497                         }
498 <Code,GlaExt>{F}        {
499                          yylval.uid = xstrndup(yytext, yyleng);
500                          RETURN(FLOAT);
501                         }
502
503 %{
504     /*
505      * Funky ``foo'' style C literals for Glasgow extensions
506      */
507 %}
508
509 <GlaExt>"``"[^']+"''"   {
510                          hsnewid(yytext + 2, yyleng - 4);
511                          RETURN(CLITLIT);
512                         }
513
514 %{
515     /*
516      * Identifiers, both variables and operators.  The trailing hash is allowed
517      * for Glasgow extensions.
518      */
519 %}
520
521
522 %{
523 /* These SHOULDNAE work in "Code" (sigh) */
524 %}
525 <GlaExt,UserPragma>{Id}"#" { 
526                         if (! nonstandardFlag) {
527                             char errbuf[ERR_BUF_SIZE];
528                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
529                             hsperror(errbuf);
530                          }
531                          hsnewid(yytext, yyleng);
532                          RETURN(isconstr(yytext) ? CONID : VARID);
533                         }
534 <Code,GlaExt,UserPragma>{Id}    {
535                          hsnewid(yytext, yyleng);
536                          RETURN(isconstr(yytext) ? CONID : VARID);
537                         }
538 <Code,GlaExt,UserPragma>{SId}   {
539                          if (is_commment(yytext,yyleng)) {
540                                 int c;
541                                 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
542                                         ;
543                                 if (c != EOF)
544                                    unput(c);
545                          } else {
546                             hsnewid(yytext, yyleng);
547                             RETURN(isconstr(yytext) ? CONSYM : VARSYM);
548                          }
549                         }
550 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
551                          BOOLEAN is_constr;
552                          if (! nonstandardFlag) {
553                             char errbuf[ERR_BUF_SIZE];
554                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
555                             hsperror(errbuf);
556                          }
557                          is_constr = hsnewqid(yytext, yyleng);
558                          RETURN(is_constr ? QCONID : QVARID);
559                         }
560 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
561                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
562                          RETURN(is_constr ? QCONID : QVARID);
563                         }
564 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
565                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
566                          RETURN(is_constr ? QCONSYM : QVARSYM);
567                         }
568
569 %{
570     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
571
572     /* Because we can make the former well-behaved (we defined them).
573
574        Sadly, the latter is defined by Haskell, which allows such
575        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
576     */
577 %}
578
579 <GlaExt,UserPragma>"`"{Id}"#`"  {       
580                          hsnewid(yytext + 1, yyleng - 2);
581                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
582                         }
583
584 %{
585     /*
586      * Character literals.  The first form is the quick form, for character
587      * literals that don't contain backslashes.  Literals with backslashes are
588      * lexed through multiple rules.  First, we match the open ' and as many
589      * normal characters as possible.  This puts us into the <Char> state, where
590      * a backslash is legal.  Then, we match the backslash and move into the 
591      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
592      * characters and the close '.  We may end up with too many characters, but
593      * this allows us to easily share the lex rules with strings.  Excess characters
594      * are ignored with a warning.
595      */
596 %}
597
598 <GlaExt>'({CHAR}|"\"")"'#" {
599                          yylval.uhstring = installHstring(1, yytext+1);
600                          RETURN(CHARPRIM);
601                         }
602 <Code,GlaExt>'({CHAR}|"\"")'    {
603                          yylval.uhstring = installHstring(1, yytext+1);
604                          RETURN(CHAR);
605                         }
606 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
607                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
608                          hsperror(errbuf);
609                         }
610 <Code,GlaExt>'({CHAR}|"\"")* {
611                          hsmlcolno = hspcolno;
612                          cleartext();
613                          addtext(yytext+1, yyleng-1);
614                          PUSH_STATE(Char);
615                         }
616 <Char>({CHAR}|"\"")*'#  {
617                          unsigned length;
618                          char *text;
619
620                          addtext(yytext, yyleng - 2);
621                          text = fetchtext(&length);
622
623                          if (! nonstandardFlag) {
624                             char errbuf[ERR_BUF_SIZE];
625                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
626                             hsperror(errbuf);
627                          }
628
629                          if (length > 1) {
630                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
631                               input_filename, hsplineno, hspcolno + 1);
632                             format_string(stderr, (unsigned char *) text, length);
633                             fputs("' too long\n", stderr);
634                             hsperror("");
635                          }
636                          yylval.uhstring = installHstring(1, text);
637                          hspcolno = hsmlcolno;
638                          POP_STATE;
639                          RETURN(CHARPRIM); 
640                         }
641 <Char>({CHAR}|"\"")*'   {
642                          unsigned length;
643                          char *text;
644
645                          addtext(yytext, yyleng - 1);
646                          text = fetchtext(&length);
647
648                          if (length > 1) {
649                             fprintf(stderr, "%s:%d:%d: Character literal '",
650                               input_filename, hsplineno, hspcolno + 1);
651                             format_string(stderr, (unsigned char *) text, length);
652                             fputs("' too long\n", stderr);
653                             hsperror("");
654                          }
655                          yylval.uhstring = installHstring(1, text);
656                          hspcolno = hsmlcolno;
657                          POP_STATE;
658                          RETURN(CHAR); 
659                         }
660 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
661
662
663 %{
664     /*
665      * String literals.  The first form is the quick form, for string literals
666      * that don't contain backslashes.  Literals with backslashes are lexed
667      * through multiple rules.  First, we match the open " and as many normal
668      * characters as possible.  This puts us into the <String> state, where
669      * a backslash is legal.  Then, we match the backslash and move into the 
670      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
671      * characters, moving back and forth between <String> and <StringEsc> as more
672      * backslashes are encountered.  (We may even digress into <Comment> mode if we
673      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
674      * of normal characters and the close ".
675      */
676 %}
677
678 <GlaExt>"\""({CHAR}|"'")*"\""#  {
679                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
680                             /* the -3 accounts for the " on front, "# on the end */
681                          RETURN(STRINGPRIM); 
682                         }
683 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
684                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
685                          RETURN(STRING); 
686                         }
687 <Code,GlaExt>"\""({CHAR}|"'")* {
688                          hsmlcolno = hspcolno;
689                          cleartext();
690                          addtext(yytext+1, yyleng-1);
691                          PUSH_STATE(String);
692                         }
693 <String>({CHAR}|"'")*"\"#"   {
694                          unsigned length;
695                          char *text;
696
697                          addtext(yytext, yyleng-2);
698                          text = fetchtext(&length);
699
700                          if (! nonstandardFlag) {
701                             char errbuf[ERR_BUF_SIZE];
702                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
703                             hsperror(errbuf);
704                          }
705
706                          yylval.uhstring = installHstring(length, text);
707                          hspcolno = hsmlcolno;
708                          POP_STATE;
709                          RETURN(STRINGPRIM);
710                         }
711 <String>({CHAR}|"'")*"\""   {
712                          unsigned length;
713                          char *text;
714
715                          addtext(yytext, yyleng-1);
716                          text = fetchtext(&length);
717
718                          yylval.uhstring = installHstring(length, text);
719                          hspcolno = hsmlcolno;
720                          POP_STATE;
721                          RETURN(STRING); 
722                         }
723 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
724
725 %{
726     /*
727      * Character and string escapes are roughly the same, but strings have the
728      * extra `\&' sequence which is not allowed for characters.  Also, comments
729      * are allowed in the <StringEsc> state.  (See the comment section much
730      * further down.)
731      *
732      * NB: Backslashes and tabs are stored in strings as themselves.
733      * But if we print them (in printtree.c), they must go out as
734      * "\\\\" and "\\t" respectively.  (This is because of the bogus
735      * intermediate format that the parser produces.  It uses '\t' fpr end of
736      * string, so it needs to be able to escape tabs, which means that it
737      * also needs to be able to escape the escape character ('\\').  Sigh.
738      */
739 %}
740
741 <Char>\\                { PUSH_STATE(CharEsc); }
742 <String>\\&             /* Ignore */ ;
743 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
744
745 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
746 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
747
748 %{
749 /*
750  Not 100% correct, tokenizes "foo \  --<>--
751                                  \ bar"
752
753  as "foo  bar", but this is not correct as per Haskell 98 report and its
754  maximal munch rule for "--"-style comments.
755
756  For the moment, not deemed worthy to fix.
757 */
758 %}
759 <StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
760
761 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
762 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
763 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
764 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
765 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
766 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
767 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
768 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
769 <CharEsc,StringEsc>BEL  |
770 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
771 <CharEsc,StringEsc>BS   |
772 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
773 <CharEsc,StringEsc>HT   |
774 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
775 <CharEsc,StringEsc>LF   |
776 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
777 <CharEsc,StringEsc>VT   |
778 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
779 <CharEsc,StringEsc>FF   |
780 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
781 <CharEsc,StringEsc>CR   |
782 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
783 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
784 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
785 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
786 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
787 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
788 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
789 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
790 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
791 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
792 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
793 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
794 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
795 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
796 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
797 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
798 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
799 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
800 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
801 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
802 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
803 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
804 <CharEsc,StringEsc>{D}+  {
805                           int i = strtol(yytext, NULL, 10);
806                           if (i < NCHARS) {
807                              addchar((char) i);
808                           } else {
809                              char errbuf[ERR_BUF_SIZE];
810                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
811                                 yytext);
812                              hsperror(errbuf);
813                           }
814                           POP_STATE;
815                         }
816 <CharEsc,StringEsc>o{O}+ {
817                           int i = strtol(yytext + 1, NULL, 8);
818                           if (i < NCHARS) {
819                              addchar((char) i);
820                           } else {
821                              char errbuf[ERR_BUF_SIZE];
822                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
823                                 yytext);
824                              hsperror(errbuf);
825                           }
826                           POP_STATE;
827                         }
828 <CharEsc,StringEsc>x{H}+ {
829                           int i = strtol(yytext + 1, NULL, 16);
830                           if (i < NCHARS) {
831                              addchar((char) i);
832                           } else {
833                              char errbuf[ERR_BUF_SIZE];
834                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
835                                 yytext);
836                              hsperror(errbuf);
837                           }
838                           POP_STATE;
839                         }
840
841
842 %{
843     /*
844      * Nested comments.  The major complication here is in trying to match the
845      * longest lexemes possible, for better performance.  (See the flex document.)
846      * That's why the rules look so bizarre.
847      */
848 %}
849
850 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
851                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
852                         }
853
854 <Comment>[^-{]*         |
855 <Comment>"-"+[^-{}]+    |
856 <Comment>"{"+[^-{}]+    ;
857 <Comment>"{-"           { nested_comments++; }
858 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
859 <Comment>(.|\n)         ;
860
861
862 %{
863     /*
864      * Illegal characters.  This used to be a single rule, but we might as well
865      * pass on as much information as we have, so now we indicate our state in
866      * the error message.
867      */
868 %}
869
870 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
871                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
872                             input_filename, hsplineno, hspcolno + 1); 
873                          format_string(stderr, (unsigned char *) yytext, 1);
874                          fputs("'\n", stderr);
875                          hsperror("");
876                         }
877 <Char>(.|\n)            { 
878                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
879                             input_filename, hsplineno, hspcolno + 1); 
880                          format_string(stderr, (unsigned char *) yytext, 1);
881                          fputs("' in a character literal\n", stderr);
882                          hsperror("");
883                         }
884 <CharEsc>(.|\n)         {
885                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
886                             input_filename, hsplineno, hspcolno + 1); 
887                          format_string(stderr, (unsigned char *) yytext, 1);
888                          fputs("'\n", stderr);
889                          hsperror("");
890                         }
891 <String>(.|\n)          { if (nonstandardFlag) {
892                              addtext(yytext, yyleng);
893                           } else { 
894                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
895                                 input_filename, hsplineno, hspcolno + 1); 
896                                 format_string(stderr, (unsigned char *) yytext, 1);
897                                 fputs("' in a string literal\n", stderr);
898                                 hsperror("");
899                           }
900                         }
901 <StringEsc>(.|\n)       {
902                          if (noGap) {
903                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
904                                 input_filename, hsplineno, hspcolno + 1); 
905                              format_string(stderr, (unsigned char *) yytext, 1);
906                              fputs("'\n", stderr);
907                              hsperror("");
908                          } else {
909                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
910                                 input_filename, hsplineno, hspcolno + 1);
911                              format_string(stderr, (unsigned char *) yytext, 1);
912                              fputs("' in a string gap\n", stderr);
913                              hsperror("");
914                          }
915                         }
916
917 %{
918     /*
919      * End of file.  In any sub-state, this is an error.  However, for the primary
920      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
921      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
922      * adding virtual close curlies, or closing an interface and returning to the
923      * primary source file.
924      *
925      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
926      * line/column advancement has to be done by hand.
927      */
928 %}
929
930 <Char,CharEsc><<EOF>>   { 
931                           hsplineno = hslineno; hspcolno = hscolno;
932                           hsperror("unterminated character literal");
933                         }
934 <Comment><<EOF>>        { 
935                           hsplineno = hslineno; hspcolno = hscolno;
936                           hsperror("unterminated comment"); 
937                         }
938 <String,StringEsc><<EOF>>   { 
939                           hsplineno = hslineno; hspcolno = hscolno;
940                           hsperror("unterminated string literal"); 
941                         }
942 <UserPragma><<EOF>>     {
943                           hsplineno = hslineno; hspcolno = hscolno;
944                           hsperror("unterminated user-specified pragma"); 
945                         }
946 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
947
948 %%
949
950 /**********************************************************************
951 *                                                                     *
952 *                                                                     *
953 *     YACC/LEX Initialisation etc.                                    *
954 *                                                                     *
955 *                                                                     *
956 **********************************************************************/
957
958 /*
959    We initialise input_filename to "<stdin>".
960    This allows unnamed sources to be piped into the parser.
961 */
962
963 void
964 yyinit(void)
965 {
966     input_filename = xstrdup("<stdin>");
967
968     /* We must initialize the input buffer _now_, because we call
969        setyyin _before_ calling yylex for the first time! */
970     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
971
972     if (nonstandardFlag)
973         PUSH_STATE(GlaExt);
974     else
975         PUSH_STATE(Code);
976 }
977
978 static void
979 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
980 {
981     if (input_filename != NULL)
982         free(input_filename);
983     input_filename = xstrdup(f);
984 }
985
986 /**********************************************************************
987 *                                                                     *
988 *                                                                     *
989 *     Layout Processing                                               *
990 *                                                                     *
991 *                                                                     *
992 **********************************************************************/
993
994 /*
995         The following section deals with Haskell Layout conventions
996         forcing insertion of ; or } as appropriate
997 */
998
999 #ifdef HSP_DEBUG
1000 #define LAYOUT_DEBUG
1001 #endif
1002
1003
1004 static BOOLEAN
1005 hsshouldindent(void)
1006 {
1007     return (!forgetindent && INDENTON);
1008 }
1009
1010
1011 /* Enter new context and set new indentation level */
1012 void
1013 hssetindent(void)
1014 {
1015 #ifdef LAYOUT_DEBUG
1016     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1017 #endif
1018
1019     /*
1020      * partain: first chk that new indent won't be less than current one; this code
1021      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1022      * current token; what that has to do with indenting, I don't know.
1023      */
1024
1025
1026     if (hscolno - 1 <= INDENTPT) {
1027         if (INDENTPT == -1)
1028             return;             /* Empty input OK for Haskell 1.1 */
1029         else {
1030             char errbuf[ERR_BUF_SIZE];
1031
1032             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1033             hsperror(errbuf);
1034         }
1035     }
1036     hsentercontext((hspcolno << 1) | 1);
1037 }
1038
1039
1040 /* Enter a new context without changing the indentation level */
1041 void
1042 hsincindent(void)
1043 {
1044 #ifdef LAYOUT_DEBUG
1045     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1046 #endif
1047     hsentercontext(indenttab[icontexts] & ~1);
1048 }
1049
1050
1051 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1052 void
1053 hsindentoff(void)
1054 {
1055     forgetindent = TRUE;
1056 }
1057
1058
1059 /* Enter a new layout context. */
1060 static void
1061 hsentercontext(int indent)
1062 {
1063     /* Enter new context and set indentation as specified */
1064     if (++icontexts >= MAX_CONTEXTS) {
1065         char errbuf[ERR_BUF_SIZE];
1066
1067         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1068         hsperror(errbuf);
1069     }
1070     forgetindent = FALSE;
1071     indenttab[icontexts] = indent;
1072 #ifdef LAYOUT_DEBUG
1073     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1074 #endif
1075 }
1076
1077
1078 /* Exit a layout context */
1079 void
1080 hsendindent(void)
1081 {
1082     --icontexts;
1083 #ifdef LAYOUT_DEBUG
1084     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1085 #endif
1086 }
1087
1088 /*
1089  *      Return checks the indentation level and returns ;, } or the specified token.
1090  */
1091 static int
1092 Return(int tok)
1093 {
1094 #ifdef HSP_DEBUG
1095     extern int yyleng;
1096 #endif
1097     if (hsshouldindent()) {
1098         if (hspcolno < INDENTPT) {
1099 #ifdef HSP_DEBUG
1100             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1101 #endif
1102             hssttok = tok;
1103             return (VCCURLY);
1104         } else if (hspcolno == INDENTPT) {
1105 #ifdef HSP_DEBUG
1106             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1107 #endif
1108             hssttok = -tok;
1109             return (SEMI);
1110         }
1111     }
1112
1113     hssttok = -1;
1114 #ifdef HSP_DEBUG
1115     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1116 #endif
1117     return (tok);
1118 }
1119
1120
1121 /*
1122  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1123  */
1124 int
1125 yylex()
1126 {
1127     int tok;
1128     static BOOLEAN eof = FALSE;
1129
1130     if (!eof) {
1131         if (hssttok != -1) {
1132             if (hssttok < 0) {
1133                 tok = -hssttok;
1134                 hssttok = -1;
1135                 return tok;
1136             }
1137             RETURN(hssttok);
1138         } else {
1139             endlineno = hslineno;
1140             if ((tok = yylex1()) != EOF)
1141                 return tok;
1142             else
1143                 eof = TRUE;
1144         }
1145     }
1146     if (icontexts > icontexts_save) {
1147         if (INDENTON) {
1148             eof = TRUE;
1149             indenttab[icontexts] = 0;
1150             return (VCCURLY);
1151         } else
1152             hsperror("missing '}' at end of file");
1153     } else if (hsbuf_save != NULL) {
1154         fclose(yyin);
1155         yy_delete_buffer(YY_CURRENT_BUFFER);
1156         yy_switch_to_buffer(hsbuf_save);
1157         hsbuf_save = NULL;
1158         new_filename(filename_save);
1159         free(filename_save);
1160         hslineno = hslineno_save;
1161         hsplineno = hsplineno_save;
1162         hscolno = hscolno_save;
1163         hspcolno = hspcolno_save;
1164         etags = etags_save;
1165         icontexts = icontexts_save - 1;
1166         icontexts_save = 0;
1167 #ifdef HSP_DEBUG
1168         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1169 #endif
1170         eof = FALSE;
1171
1172         /* RETURN(LEOF); */
1173         hsperror("No longer using yacc to parse interface files");
1174
1175     } else {
1176         yyterminate();
1177     }
1178     abort(); /* should never get here! */
1179     return(0);
1180 }
1181
1182 /**********************************************************************
1183 *                                                                     *
1184 *                                                                     *
1185 *     Input Processing for Interfaces -- Not currently used !!!       *
1186 *                                                                     *
1187 *                                                                     *
1188 **********************************************************************/
1189
1190 /* setyyin(file)        open file as new lex input buffer */
1191 extern FILE *yyin;
1192
1193 void
1194 setyyin(char *file)
1195 {
1196     hsbuf_save = YY_CURRENT_BUFFER;
1197     if ((yyin = fopen(file, "r")) == NULL) {
1198         char errbuf[ERR_BUF_SIZE];
1199
1200         sprintf(errbuf, "can't read \"%-.50s\"", file);
1201         hsperror(errbuf);
1202     }
1203     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1204
1205     hslineno_save = hslineno;
1206     hsplineno_save = hsplineno;
1207     hslineno = hsplineno = 1;
1208
1209     filename_save = input_filename;
1210     input_filename = NULL;
1211     new_filename(file);
1212     hscolno_save = hscolno;
1213     hspcolno_save = hspcolno;
1214     hscolno = hspcolno = 0;
1215     etags_save = etags; /* do not do "etags" stuff in interfaces */
1216     etags = 0;          /* We remember whether we are doing it in
1217                            the module, so we can restore it later [WDP 94/09] */
1218     hsentercontext(-1);         /* partain: changed this from 0 */
1219     icontexts_save = icontexts;
1220 #ifdef HSP_DEBUG
1221     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1222 #endif
1223 }
1224
1225 static void
1226 layout_input(char *text, int len)
1227 {
1228 #ifdef HSP_DEBUG
1229     fprintf(stderr, "Scanning \"%s\"\n", text);
1230 #endif
1231
1232     hsplineno = hslineno;
1233     hspcolno = hscolno;
1234
1235     while (len-- > 0) {
1236         switch (*text++) {
1237         case '\n':
1238         case '\r':
1239         case '\f':
1240             hslineno++;
1241             hscolno = 0;
1242             break;
1243         case '\t':
1244             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1245             break;
1246         case '\v':
1247             break;
1248         default:
1249             ++hscolno;
1250             break;
1251         }
1252     }
1253 }
1254
1255 void
1256 setstartlineno(void)
1257 {
1258     startlineno = hsplineno;
1259
1260     if (modulelineno == 0) {
1261         modulelineno = startlineno;
1262     }
1263
1264 #if 1/*etags*/
1265 #else
1266     if (etags)
1267         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1268 #endif
1269 }
1270
1271 /**********************************************************************
1272 *                                                                     *
1273 *                                                                     *
1274 *                      Text Caching                                   *
1275 *                                                                     *
1276 *                                                                     *
1277 **********************************************************************/
1278
1279 #define CACHE_SIZE YY_BUF_SIZE
1280
1281 static struct {
1282     unsigned allocated;
1283     unsigned next;
1284     char *text;
1285 } textcache = { 0, 0, NULL };
1286
1287 static void
1288 cleartext(void)
1289 {
1290 /*  fprintf(stderr, "cleartext\n"); */
1291     textcache.next = 0;
1292     if (textcache.allocated == 0) {
1293         textcache.allocated = CACHE_SIZE;
1294         textcache.text = xmalloc(CACHE_SIZE);
1295     }
1296 }
1297
1298 static void
1299 addtext(char *text, unsigned length)
1300 {
1301 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1302
1303     if (length == 0)
1304         return;
1305
1306     if (textcache.next + length + 1 >= textcache.allocated) {
1307         textcache.allocated += length + CACHE_SIZE;
1308         textcache.text = xrealloc(textcache.text, textcache.allocated);
1309     }
1310     bcopy(text, textcache.text + textcache.next, length);
1311     textcache.next += length;
1312 }
1313
1314 static void
1315 addchar(char c)
1316 {
1317 /*  fprintf(stderr, "addchar: %c\n", c); */
1318
1319     if (textcache.next + 2 >= textcache.allocated) {
1320         textcache.allocated += CACHE_SIZE;
1321         textcache.text = xrealloc(textcache.text, textcache.allocated);
1322     }
1323     textcache.text[textcache.next++] = c;
1324 }
1325
1326 static char *
1327 fetchtext(unsigned *length)
1328 {
1329 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1330
1331     *length = textcache.next;
1332     textcache.text[textcache.next] = '\0';
1333     return textcache.text;
1334 }
1335
1336 /**********************************************************************
1337 *                                                                     *
1338 *                                                                     *
1339 *    Identifier Processing                                             *
1340 *                                                                     *
1341 *                                                                     *
1342 **********************************************************************/
1343
1344 /*
1345         hsnewid         Enters an id of length n into the symbol table.
1346 */
1347
1348 static void
1349 hsnewid(char *name, int length)
1350 {
1351     char save = name[length];
1352
1353     name[length] = '\0';
1354     yylval.uid = installid(name);
1355     name[length] = save;
1356 }
1357
1358 BOOLEAN
1359 hsnewqid(char *name, int length)
1360 {
1361     char* dot;
1362     char save = name[length];
1363     name[length] = '\0';
1364
1365     dot = strchr(name, '.');
1366     *dot = '\0';
1367     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1368     *dot = '.';
1369     name[length] = save;
1370
1371     return isconstr(dot+1);
1372 }
1373
1374 static
1375 BOOLEAN
1376 is_commment(char* lexeme, int len)
1377 {
1378    char* ptr;
1379    int i;
1380         
1381    if (len < 2) {
1382       return FALSE;
1383    }
1384
1385    for(i=0;i<len;i++) {
1386      if (lexeme[i] != '-') return FALSE;
1387    }        
1388    return TRUE;
1389 }
1390