a3bb0350ed0d12b059a4f59312c8ca1638cca5ce
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117 static int comment_start;
118
119 /* OLD: Hacky definition of yywrap: see flex doc.
120
121    If we don't do this, then we'll have to get the default
122    yywrap from the flex library, which is often something
123    we are not good at locating.  This avoids that difficulty.
124    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
125    WDP 94/09/05
126 #define yywrap() 1
127 */
128
129 /* Essential forward declarations */
130
131 static void hsnewid      PROTO((char *, int));
132 static void layout_input PROTO((char *, int));
133 static void cleartext    (NO_ARGS);
134 static void addtext      PROTO((char *, unsigned));
135 static void addchar      PROTO((char));
136 static char *fetchtext   PROTO((unsigned *));
137 static void new_filename PROTO((char *));
138 static int  Return       PROTO((int));
139 static void hsentercontext PROTO((int));
140
141 static BOOLEAN is_commment PROTO((char*, int));
142
143 /* Special file handling for IMPORTS */
144 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
145
146 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
147 static char *filename_save;             /* File Name                     */
148 static int hslineno_save = 0,           /* Line Number                   */
149  hsplineno_save = 0,                    /* Line Number of Prev. token    */
150  hscolno_save = 0,                      /* Indentation                   */
151  hspcolno_save = 0;                     /* Left Indentation              */
152 static short icontexts_save = 0;        /* Indent Context Level          */
153
154 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
155 extern BOOLEAN etags;                   /* that which is saved */
156
157 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
158
159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
160                                  * inserted before token +ve  -- "}" inserted before
161                                  * token */
162
163 short icontexts = 0;            /* Which context we're in */
164
165 /*
166         Table of indentations:  right bit indicates whether to use
167           indentation rules (1 = use rules; 0 = ignore)
168
169     partain:
170     push one of these "contexts" at every "case" or "where"; the right bit says
171     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
172
173     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
174     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
175     push is shown just below.
176
177 */
178
179
180 static short indenttab[MAX_CONTEXTS] = {-1};
181
182 #define INDENTPT (indenttab[icontexts]>>1)
183 #define INDENTON (indenttab[icontexts]&1)
184
185 #define RETURN(tok) return(Return(tok))
186
187 #undef YY_DECL
188 #define YY_DECL int yylex1()
189
190 /* We should not peek at yy_act, but flex calls us even for the internal action
191    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
192    to support older versions of flex, we'll continue to peek for now.
193  */
194 #define YY_USER_ACTION \
195     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
196
197 #if 0/*debug*/
198 #undef YY_BREAK
199 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
200 #endif
201
202 /* Each time we enter a new start state, we push it onto the state stack.
203 */
204 #define PUSH_STATE(n)   yy_push_state(n)
205 #define POP_STATE       yy_pop_state()
206
207 %}
208 /* Options:
209     8bit (8-bit input)
210     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
211     never-interactive (to go a bit faster)
212     stack (use a start-condition stack)
213 */
214 %option 8bit
215 %option noyywrap
216 %option never-interactive
217 %option stack
218
219 /* The start states are:
220    Code -- normal Haskell code (principal lexer)
221    GlaExt -- Haskell code with Glasgow extensions
222    Comment -- Nested comment processing
223    String -- Inside a string literal with backslashes
224    StringEsc -- Immediately following a backslash in a string literal
225    Char -- Inside a character literal with backslashes
226    CharEsc -- Immediately following a backslash in a character literal 
227
228    Note that the INITIAL state is unused.  Also note that these states
229    are _exclusive_.  All rules should be prefixed with an appropriate
230    list of start states.
231  */
232
233 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
234
235 isoS                    [\xa1-\xbf\xd7\xf7]
236 isoL                    [\xc0-\xd6\xd8-\xde]
237 isol                    [\xdf-\xf6\xf8-\xff]
238 isoA                    [\xa1-\xff]
239
240 D                       [0-9]
241 O                       [0-7]
242 H                       [0-9A-Fa-f]
243 N                       {D}+
244 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
245 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
246 SId                     {S}{S}*
247 L                       [A-Z\xc0-\xd6\xd8-\xde]
248 l                       [a-z_\xdf-\xf6\xf8-\xff]
249 I                       {L}|{l}
250 i                       {L}|{l}|[0-9'_]
251 Id                      {I}{i}*
252 Mod                     {L}{i}*
253 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
254 CNTRL                   [@A-Z\[\\\]^_]
255 WS                      [ \t\n\r\f\v]
256 NL                      [\n\r]
257
258 %%
259
260 %{
261     /*
262      * Simple comments and whitespace.  Normally, we would just ignore these, but
263      * in case we're processing a string escape, we need to note that we've seen
264      * a gap.
265      *
266      * Note that we cater for a comment line that *doesn't* end in a newline.
267      * This is incorrect, strictly speaking, but seems like the right thing
268      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
269      *
270      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
271      */
272 %}
273
274 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
275
276 %{
277     /* 
278      * Special GHC pragma rules.  Do we need a start state for interface files,
279      * so these won't be matched in source files? --JSM
280      */
281
282 %}
283
284 %{
285 /* I believe the next rule is not ever matched.
286
287    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
288    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
289    where possible, so the lexer should now never see cpp directives
290    like '# ' and '#line'.
291
292    -- SOF
293
294 <Code,GlaExt>^"# ".*{NL}    {
295                           char tempf[FILENAME_SIZE];
296                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
297                           new_filename(tempf);
298                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
299                         }
300
301 <Code,GlaExt>^"#line ".*{NL}    {
302                           char tempf[FILENAME_SIZE];
303                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
304                           new_filename(tempf); 
305                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
306                         }
307 */
308 %}
309
310 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
311                           /* partain: pragma-style line directive */
312                           char tempf[FILENAME_SIZE];
313                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
314                           new_filename(tempf);
315                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
316                         }
317
318 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
319                               PUSH_STATE(UserPragma);
320                               RETURN(INTERFACE_UPRAGMA);
321                             }
322 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
323                               PUSH_STATE(UserPragma);
324                               RETURN(SPECIALISE_UPRAGMA);
325                             }
326 <Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
327                               PUSH_STATE(UserPragma);
328                               RETURN(SPECIALISE_UPRAGMA);
329                             }
330 <Code,GlaExt>"{-#"{WS}*"INLINE" {
331                               PUSH_STATE(UserPragma);
332                               RETURN(INLINE_UPRAGMA);
333                             }
334 <Code,GlaExt>"{-#"{WS}*"RULES" {
335                               PUSH_STATE(Code); /* I'm not sure about this */
336                               RETURN(RULES_UPRAGMA);
337                             }
338 <Code,GlaExt>"{-#"{WS}*"inline" {
339                               PUSH_STATE(UserPragma);
340                               RETURN(INLINE_UPRAGMA);
341                             }
342 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
343                               PUSH_STATE(UserPragma);
344                               RETURN(NOINLINE_UPRAGMA);
345                             }
346 <Code,GlaExt>"{-#"{WS}*"notInline" {
347                               PUSH_STATE(UserPragma);
348                               RETURN(NOINLINE_UPRAGMA);
349                             }
350 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
351                               PUSH_STATE(UserPragma);
352                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
353                             }
354 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
355                               /* these are handled by hscpp */
356                               nested_comments =1; comment_start = hsplineno;
357                               PUSH_STATE(Comment);
358                             }
359 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
360                               /* these are for the driver! */
361                               nested_comments =1; comment_start = hsplineno;
362                               PUSH_STATE(Comment);
363                             }
364 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
365                               /* these are used by `make depend' and the
366                                  compiler to indicate that a module should
367                                  be imported from source */
368                               nested_comments =1; comment_start = hsplineno; 
369                               RETURN(SOURCE_UPRAGMA); 
370                             }
371
372 <Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
373                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
374                                 input_filename, hsplineno);
375                               format_string(stderr, (unsigned char *) yytext, yyleng);
376                               fputs("'\n", stderr);
377                               nested_comments = 1; comment_start = hsplineno;
378                               PUSH_STATE(Comment);
379                             }
380 <Code,GlaExt,UserPragma>"#-}"       { POP_STATE; RETURN(END_UPRAGMA); }
381
382 %{
383     /*
384      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
385      * intentionally accepted as a keyword even for normal <Code>.
386      */
387 %}
388
389 <Code,GlaExt>"case"             { RETURN(CASE); }
390 <Code,GlaExt>"class"            { RETURN(CLASS); }
391 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
392 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
393 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
394 <Code,GlaExt>"do"               { RETURN(DO); }
395 <Code,GlaExt>"else"             { RETURN(ELSE); }
396 <Code,GlaExt>"if"               { RETURN(IF); }
397 <Code,GlaExt>"import"           { RETURN(IMPORT); }
398 <Code,GlaExt>"in"               { RETURN(IN); }
399 <Code,GlaExt>"infix"            { RETURN(INFIX); }
400 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
401 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
402 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
403 <Code,GlaExt>"let"              { RETURN(LET); }
404 <Code,GlaExt>"module"           { RETURN(MODULE); }
405 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
406 <Code,GlaExt>"of"               { RETURN(OF); }
407 <Code,GlaExt>"then"             { RETURN(THEN); }
408 <Code,GlaExt>"type"             { RETURN(TYPE); }
409 <Code,GlaExt>"where"            { RETURN(WHERE); }
410
411 <Code,GlaExt>"as"               { RETURN(AS); }
412 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
413 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
414
415 <Code,GlaExt>"forall"           { RETURN(FORALL); }
416
417 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
418 <GlaExt>"_ccall_"               { RETURN(CCALL); }
419 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
420 <GlaExt>"_casm_"                { RETURN(CASM); }
421 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
422 <GlaExt>"(#"                    { RETURN(OUNBOXPAREN); }
423 <GlaExt>"#)"                    { RETURN(CUNBOXPAREN); }
424 <GlaExt>"foreign"               { RETURN(FOREIGN); }
425 <GlaExt>"export"                { RETURN(EXPORT); }
426 <GlaExt>"label"                 { RETURN(LABEL); }
427 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
428 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
429 <GlaExt>"_ccall"                { RETURN(C_CALL); }
430 <GlaExt>"_pascal"               { RETURN(PASCAL); }
431 <GlaExt>"stdcall"               { RETURN(STDCALL); }
432 <GlaExt>"ccall"                 { RETURN(C_CALL); }
433 <GlaExt>"pascal"                { RETURN(PASCAL); }
434 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
435
436 %{
437     /* 
438      * Haskell operators: special, reservedops and useful varsyms
439      */
440 %}
441
442 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
443 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
444 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
445 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
446 <Code,GlaExt>"{"                { RETURN(OCURLY); }
447 <Code,GlaExt>"}"                { RETURN(CCURLY); }
448 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
449 <Code,GlaExt>";"                { RETURN(SEMI); }
450 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
451
452 <Code,GlaExt>"."                { RETURN(DOT); }
453 <Code,GlaExt>".."               { RETURN(DOTDOT); }
454 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
455 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
456 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
457 <Code,GlaExt>"|"                { RETURN(VBAR); }
458 <Code,GlaExt>"<-"               { RETURN(LARROW); }
459 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
460 <Code,GlaExt>"-"                { RETURN(MINUS); }
461 <Code,GlaExt>"+"                { RETURN(PLUS); }
462
463 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
464 <Code,GlaExt>"@"                { RETURN(AT); }
465 <Code,GlaExt>"!"                { RETURN(BANG); }
466 <Code,GlaExt>"~"                { RETURN(LAZY); }
467
468 %{
469     /*
470      * Integers and (for Glasgow extensions) primitive integers.  Note that
471      * we pass all of the text on to the parser, because flex/C can't handle
472      * arbitrary precision numbers.
473      */
474 %}
475
476 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
477                          yylval.uid = xstrndup(yytext, yyleng - 1);
478                          RETURN(INTPRIM);
479                         }
480 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
481                          yylval.uid = xstrndup(yytext, yyleng);
482                          RETURN(INTEGER);
483                         }
484 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
485                          yylval.uid = xstrndup(yytext, yyleng - 1);
486                          RETURN(INTPRIM);
487                         }
488 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
489                          yylval.uid = xstrndup(yytext, yyleng);
490                          RETURN(INTEGER);
491                         }
492 <GlaExt>("-")?{N}"#"    {
493                          yylval.uid = xstrndup(yytext, yyleng - 1);
494                          RETURN(INTPRIM);
495                         }
496 <Code,GlaExt,UserPragma>{N} {
497                          yylval.uid = xstrndup(yytext, yyleng);
498                          RETURN(INTEGER);
499                         }
500
501 %{
502     /*
503      * Floats and (for Glasgow extensions) primitive floats/doubles.
504      */
505 %}
506
507 <GlaExt>("-")?{F}"##"   {
508                          yylval.uid = xstrndup(yytext, yyleng - 2);
509                          RETURN(DOUBLEPRIM);
510                         }
511 <GlaExt>("-")?{F}"#"    {
512                          yylval.uid = xstrndup(yytext, yyleng - 1);
513                          RETURN(FLOATPRIM);
514                         }
515 <Code,GlaExt>{F}        {
516                          yylval.uid = xstrndup(yytext, yyleng);
517                          RETURN(FLOAT);
518                         }
519
520 %{
521     /*
522      * Funky ``foo'' style C literals for Glasgow extensions
523      */
524 %}
525
526 <GlaExt>"``"[^']+"''"   {
527                          hsnewid(yytext + 2, yyleng - 4);
528                          RETURN(CLITLIT);
529                         }
530
531 %{
532     /*
533      * Identifiers, both variables and operators.  The trailing hash is allowed
534      * for Glasgow extensions.
535      */
536 %}
537
538
539 %{
540 /* These SHOULDNAE work in "Code" (sigh) */
541 %}
542 <GlaExt,UserPragma>{Id}"#" { 
543                         if (! nonstandardFlag) {
544                             char errbuf[ERR_BUF_SIZE];
545                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
546                             hsperror(errbuf);
547                          }
548                          hsnewid(yytext, yyleng);
549                          RETURN(isconstr(yytext) ? CONID : VARID);
550                         }
551 <Code,GlaExt,UserPragma>{Id}    {
552                          hsnewid(yytext, yyleng);
553                          RETURN(isconstr(yytext) ? CONID : VARID);
554                         }
555 <Code,GlaExt,UserPragma>{SId}   {
556                          if (is_commment(yytext,yyleng)) {
557                                 int c;
558                                 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
559                                         ;
560                                 if (c != EOF)
561                                    unput(c);
562                          } else {
563                             hsnewid(yytext, yyleng);
564                             RETURN(isconstr(yytext) ? CONSYM : VARSYM);
565                          }
566                         }
567 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
568                          BOOLEAN is_constr;
569                          if (! nonstandardFlag) {
570                             char errbuf[ERR_BUF_SIZE];
571                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
572                             hsperror(errbuf);
573                          }
574                          is_constr = hsnewqid(yytext, yyleng);
575                          RETURN(is_constr ? QCONID : QVARID);
576                         }
577 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
578                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
579                          RETURN(is_constr ? QCONID : QVARID);
580                         }
581 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
582                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
583                          RETURN(is_constr ? QCONSYM : QVARSYM);
584                         }
585
586 %{
587     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
588
589     /* Because we can make the former well-behaved (we defined them).
590
591        Sadly, the latter is defined by Haskell, which allows such
592        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
593     */
594 %}
595
596 <GlaExt,UserPragma>"`"{Id}"#`"  {       
597                          hsnewid(yytext + 1, yyleng - 2);
598                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
599                         }
600
601 %{
602     /*
603      * Character literals.  The first form is the quick form, for character
604      * literals that don't contain backslashes.  Literals with backslashes are
605      * lexed through multiple rules.  First, we match the open ' and as many
606      * normal characters as possible.  This puts us into the <Char> state, where
607      * a backslash is legal.  Then, we match the backslash and move into the 
608      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
609      * characters and the close '.  We may end up with too many characters, but
610      * this allows us to easily share the lex rules with strings.  Excess characters
611      * are ignored with a warning.
612      */
613 %}
614
615 <GlaExt>'({CHAR}|"\"")"'#" {
616                          yylval.uhstring = installHstring(1, yytext+1);
617                          RETURN(CHARPRIM);
618                         }
619 <Code,GlaExt>'({CHAR}|"\"")'    {
620                          yylval.uhstring = installHstring(1, yytext+1);
621                          RETURN(CHAR);
622                         }
623 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
624                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
625                          hsperror(errbuf);
626                         }
627 <Code,GlaExt>'({CHAR}|"\"")* {
628                          hsmlcolno = hspcolno;
629                          cleartext();
630                          addtext(yytext+1, yyleng-1);
631                          PUSH_STATE(Char);
632                         }
633 <Char>({CHAR}|"\"")*'#  {
634                          unsigned length;
635                          char *text;
636
637                          addtext(yytext, yyleng - 2);
638                          text = fetchtext(&length);
639
640                          if (! nonstandardFlag) {
641                             char errbuf[ERR_BUF_SIZE];
642                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
643                             hsperror(errbuf);
644                          }
645
646                          if (length > 1) {
647                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
648                               input_filename, hsplineno, hspcolno + 1);
649                             format_string(stderr, (unsigned char *) text, length);
650                             fputs("' too long\n", stderr);
651                             hsperror("");
652                          }
653                          yylval.uhstring = installHstring(1, text);
654                          hspcolno = hsmlcolno;
655                          POP_STATE;
656                          RETURN(CHARPRIM); 
657                         }
658 <Char>({CHAR}|"\"")*'   {
659                          unsigned length;
660                          char *text;
661
662                          addtext(yytext, yyleng - 1);
663                          text = fetchtext(&length);
664
665                          if (length > 1) {
666                             fprintf(stderr, "%s:%d:%d: Character literal '",
667                               input_filename, hsplineno, hspcolno + 1);
668                             format_string(stderr, (unsigned char *) text, length);
669                             fputs("' too long\n", stderr);
670                             hsperror("");
671                          }
672                          yylval.uhstring = installHstring(1, text);
673                          hspcolno = hsmlcolno;
674                          POP_STATE;
675                          RETURN(CHAR); 
676                         }
677 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
678
679
680 %{
681     /*
682      * String literals.  The first form is the quick form, for string literals
683      * that don't contain backslashes.  Literals with backslashes are lexed
684      * through multiple rules.  First, we match the open " and as many normal
685      * characters as possible.  This puts us into the <String> state, where
686      * a backslash is legal.  Then, we match the backslash and move into the 
687      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
688      * characters, moving back and forth between <String> and <StringEsc> as more
689      * backslashes are encountered.  (We may even digress into <Comment> mode if we
690      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
691      * of normal characters and the close ".
692      */
693 %}
694
695 <GlaExt>"\""({CHAR}|"'")*"\""#  {
696                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
697                             /* the -3 accounts for the " on front, "# on the end */
698                          RETURN(STRINGPRIM); 
699                         }
700 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
701                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
702                          RETURN(STRING); 
703                         }
704 <Code,GlaExt>"\""({CHAR}|"'")* {
705                          hsmlcolno = hspcolno;
706                          cleartext();
707                          addtext(yytext+1, yyleng-1);
708                          PUSH_STATE(String);
709                         }
710 <String>({CHAR}|"'")*"\"#"   {
711                          unsigned length;
712                          char *text;
713
714                          addtext(yytext, yyleng-2);
715                          text = fetchtext(&length);
716
717                          if (! nonstandardFlag) {
718                             char errbuf[ERR_BUF_SIZE];
719                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
720                             hsperror(errbuf);
721                          }
722
723                          yylval.uhstring = installHstring(length, text);
724                          hspcolno = hsmlcolno;
725                          POP_STATE;
726                          RETURN(STRINGPRIM);
727                         }
728 <String>({CHAR}|"'")*"\""   {
729                          unsigned length;
730                          char *text;
731
732                          addtext(yytext, yyleng-1);
733                          text = fetchtext(&length);
734
735                          yylval.uhstring = installHstring(length, text);
736                          hspcolno = hsmlcolno;
737                          POP_STATE;
738                          RETURN(STRING); 
739                         }
740 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
741
742 %{
743     /*
744      * Character and string escapes are roughly the same, but strings have the
745      * extra `\&' sequence which is not allowed for characters.  Also, comments
746      * are allowed in the <StringEsc> state.  (See the comment section much
747      * further down.)
748      *
749      * NB: Backslashes and tabs are stored in strings as themselves.
750      * But if we print them (in printtree.c), they must go out as
751      * "\\\\" and "\\t" respectively.  (This is because of the bogus
752      * intermediate format that the parser produces.  It uses '\t' fpr end of
753      * string, so it needs to be able to escape tabs, which means that it
754      * also needs to be able to escape the escape character ('\\').  Sigh.
755      */
756 %}
757
758 <Char>\\                { PUSH_STATE(CharEsc); }
759 <String>\\&             /* Ignore */ ;
760 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
761
762 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
763 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
764
765 %{
766 /*
767  Not 100% correct, tokenizes "foo \  --<>--
768                                  \ bar"
769
770  as "foo  bar", but this is not correct as per Haskell 98 report and its
771  maximal munch rule for "--"-style comments.
772
773  For the moment, not deemed worthy to fix.
774 */
775 %}
776 <StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
777
778 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
779 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
780 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
781 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
782 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
783 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
784 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
785 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
786 <CharEsc,StringEsc>BEL  |
787 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
788 <CharEsc,StringEsc>BS   |
789 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
790 <CharEsc,StringEsc>HT   |
791 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
792 <CharEsc,StringEsc>LF   |
793 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
794 <CharEsc,StringEsc>VT   |
795 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
796 <CharEsc,StringEsc>FF   |
797 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
798 <CharEsc,StringEsc>CR   |
799 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
800 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
801 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
802 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
803 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
804 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
805 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
806 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
807 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
808 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
809 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
810 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
811 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
812 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
813 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
814 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
815 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
816 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
817 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
818 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
819 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
820 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
821 <CharEsc,StringEsc>{D}+  {
822                           int i = strtol(yytext, NULL, 10);
823                           if (i < NCHARS) {
824                              addchar((char) i);
825                           } else {
826                              char errbuf[ERR_BUF_SIZE];
827                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
828                                 yytext);
829                              hsperror(errbuf);
830                           }
831                           POP_STATE;
832                         }
833 <CharEsc,StringEsc>o{O}+ {
834                           int i = strtol(yytext + 1, NULL, 8);
835                           if (i < NCHARS) {
836                              addchar((char) i);
837                           } else {
838                              char errbuf[ERR_BUF_SIZE];
839                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
840                                 yytext);
841                              hsperror(errbuf);
842                           }
843                           POP_STATE;
844                         }
845 <CharEsc,StringEsc>x{H}+ {
846                           int i = strtol(yytext + 1, NULL, 16);
847                           if (i < NCHARS) {
848                              addchar((char) i);
849                           } else {
850                              char errbuf[ERR_BUF_SIZE];
851                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
852                                 yytext);
853                              hsperror(errbuf);
854                           }
855                           POP_STATE;
856                         }
857
858
859 %{
860     /*
861      * Nested comments.  The major complication here is in trying to match the
862      * longest lexemes possible, for better performance.  (See the flex document.)
863      * That's why the rules look so bizarre.
864      */
865 %}
866
867 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
868                           noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment); 
869                         }
870
871 <Comment>[^-{]*         |
872 <Comment>"-"+[^-{}]+    |
873 <Comment>"{"+[^-{}]+    ;
874 <Comment>"{-"           { nested_comments++; }
875 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
876 <Comment>(.|\n)         ;
877
878
879 %{
880     /*
881      * Illegal characters.  This used to be a single rule, but we might as well
882      * pass on as much information as we have, so now we indicate our state in
883      * the error message.
884      */
885 %}
886
887 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
888                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
889                             input_filename, hsplineno, hspcolno + 1); 
890                          format_string(stderr, (unsigned char *) yytext, 1);
891                          fputs("'\n", stderr);
892                          hsperror("");
893                         }
894 <Char>(.|\n)            { 
895                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
896                             input_filename, hsplineno, hspcolno + 1); 
897                          format_string(stderr, (unsigned char *) yytext, 1);
898                          fputs("' in a character literal\n", stderr);
899                          hsperror("");
900                         }
901 <CharEsc>(.|\n)         {
902                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
903                             input_filename, hsplineno, hspcolno + 1); 
904                          format_string(stderr, (unsigned char *) yytext, 1);
905                          fputs("'\n", stderr);
906                          hsperror("");
907                         }
908 <String>(.|\n)          { if (nonstandardFlag) {
909                              addtext(yytext, yyleng);
910                           } else { 
911                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
912                                 input_filename, hsplineno, hspcolno + 1); 
913                                 format_string(stderr, (unsigned char *) yytext, 1);
914                                 fputs("' in a string literal\n", stderr);
915                                 hsperror("");
916                           }
917                         }
918 <StringEsc>(.|\n)       {
919                          if (noGap) {
920                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
921                                 input_filename, hsplineno, hspcolno + 1); 
922                              format_string(stderr, (unsigned char *) yytext, 1);
923                              fputs("'\n", stderr);
924                              hsperror("");
925                          } else {
926                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
927                                 input_filename, hsplineno, hspcolno + 1);
928                              format_string(stderr, (unsigned char *) yytext, 1);
929                              fputs("' in a string gap\n", stderr);
930                              hsperror("");
931                          }
932                         }
933
934 %{
935     /*
936      * End of file.  In any sub-state, this is an error.  However, for the primary
937      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
938      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
939      * adding virtual close curlies, or closing an interface and returning to the
940      * primary source file.
941      *
942      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
943      * line/column advancement has to be done by hand.
944      */
945 %}
946
947 <Char,CharEsc><<EOF>>   { 
948                           hsplineno = hslineno; hspcolno = hscolno;
949                           hsperror("unterminated character literal");
950                         }
951 <Comment><<EOF>>        { 
952                           char errbuf[ERR_BUF_SIZE];
953                           hsplineno = hslineno; hspcolno = hscolno;
954                           sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
955                           hsperror(errbuf); 
956                         }
957 <String,StringEsc><<EOF>>   { 
958                           hsplineno = hslineno; hspcolno = hscolno;
959                           hsperror("unterminated string literal"); 
960                         }
961 <UserPragma><<EOF>>     {
962                           hsplineno = hslineno; hspcolno = hscolno;
963                           hsperror("unterminated user-specified pragma"); 
964                         }
965 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
966
967 %%
968
969 /**********************************************************************
970 *                                                                     *
971 *                                                                     *
972 *     YACC/LEX Initialisation etc.                                    *
973 *                                                                     *
974 *                                                                     *
975 **********************************************************************/
976
977 /*
978    We initialise input_filename to "<stdin>".
979    This allows unnamed sources to be piped into the parser.
980 */
981
982 void
983 yyinit(void)
984 {
985     input_filename = xstrdup("<stdin>");
986
987     /* We must initialize the input buffer _now_, because we call
988        setyyin _before_ calling yylex for the first time! */
989     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
990
991     if (nonstandardFlag)
992         PUSH_STATE(GlaExt);
993     else
994         PUSH_STATE(Code);
995 }
996
997 static void
998 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
999 {
1000     if (input_filename != NULL)
1001         free(input_filename);
1002     input_filename = xstrdup(f);
1003 }
1004
1005 /**********************************************************************
1006 *                                                                     *
1007 *                                                                     *
1008 *     Layout Processing                                               *
1009 *                                                                     *
1010 *                                                                     *
1011 **********************************************************************/
1012
1013 /*
1014         The following section deals with Haskell Layout conventions
1015         forcing insertion of ; or } as appropriate
1016 */
1017
1018 #ifdef HSP_DEBUG
1019 #define LAYOUT_DEBUG
1020 #endif
1021
1022
1023 static BOOLEAN
1024 hsshouldindent(void)
1025 {
1026     return (!forgetindent && INDENTON);
1027 }
1028
1029
1030 /* Enter new context and set new indentation level */
1031 void
1032 hssetindent(void)
1033 {
1034 #ifdef LAYOUT_DEBUG
1035     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1036 #endif
1037
1038     /*
1039      * partain: first chk that new indent won't be less than current one; this code
1040      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1041      * current token; what that has to do with indenting, I don't know.
1042      */
1043
1044
1045     if (hscolno - 1 <= INDENTPT) {
1046         if (INDENTPT == -1)
1047             return;             /* Empty input OK for Haskell 1.1 */
1048         else {
1049             char errbuf[ERR_BUF_SIZE];
1050
1051             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1052             hsperror(errbuf);
1053         }
1054     }
1055     hsentercontext((hspcolno << 1) | 1);
1056 }
1057
1058
1059 /* Enter a new context without changing the indentation level */
1060 void
1061 hsincindent(void)
1062 {
1063 #ifdef LAYOUT_DEBUG
1064     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1065 #endif
1066     hsentercontext(indenttab[icontexts] & ~1);
1067 }
1068
1069
1070 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1071 void
1072 hsindentoff(void)
1073 {
1074     forgetindent = TRUE;
1075 }
1076
1077
1078 /* Enter a new layout context. */
1079 static void
1080 hsentercontext(int indent)
1081 {
1082     /* Enter new context and set indentation as specified */
1083     if (++icontexts >= MAX_CONTEXTS) {
1084         char errbuf[ERR_BUF_SIZE];
1085
1086         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1087         hsperror(errbuf);
1088     }
1089     forgetindent = FALSE;
1090     indenttab[icontexts] = indent;
1091 #ifdef LAYOUT_DEBUG
1092     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1093 #endif
1094 }
1095
1096
1097 /* Exit a layout context */
1098 void
1099 hsendindent(void)
1100 {
1101     --icontexts;
1102 #ifdef LAYOUT_DEBUG
1103     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1104 #endif
1105 }
1106
1107 /*
1108  *      Return checks the indentation level and returns ;, } or the specified token.
1109  */
1110 static int
1111 Return(int tok)
1112 {
1113 #ifdef HSP_DEBUG
1114     extern int yyleng;
1115 #endif
1116     if (hsshouldindent()) {
1117         if (hspcolno < INDENTPT) {
1118 #ifdef HSP_DEBUG
1119             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1120 #endif
1121             hssttok = tok;
1122             return (VCCURLY);
1123         } else if (hspcolno == INDENTPT) {
1124 #ifdef HSP_DEBUG
1125             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1126 #endif
1127             hssttok = -tok;
1128             return (SEMI);
1129         }
1130     }
1131
1132     hssttok = -1;
1133 #ifdef HSP_DEBUG
1134     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1135 #endif
1136     return (tok);
1137 }
1138
1139
1140 /*
1141  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1142  */
1143 int
1144 yylex()
1145 {
1146     int tok;
1147     static BOOLEAN eof = FALSE;
1148
1149     if (!eof) {
1150         if (hssttok != -1) {
1151             if (hssttok < 0) {
1152                 tok = -hssttok;
1153                 hssttok = -1;
1154                 return tok;
1155             }
1156             RETURN(hssttok);
1157         } else {
1158             endlineno = hslineno;
1159             if ((tok = yylex1()) != EOF)
1160                 return tok;
1161             else
1162                 eof = TRUE;
1163         }
1164     }
1165     if (icontexts > icontexts_save) {
1166         if (INDENTON) {
1167             eof = TRUE;
1168             indenttab[icontexts] = 0;
1169             return (VCCURLY);
1170         } else
1171             hsperror("missing '}' at end of file");
1172     } else if (hsbuf_save != NULL) {
1173         fclose(yyin);
1174         yy_delete_buffer(YY_CURRENT_BUFFER);
1175         yy_switch_to_buffer(hsbuf_save);
1176         hsbuf_save = NULL;
1177         new_filename(filename_save);
1178         free(filename_save);
1179         hslineno = hslineno_save;
1180         hsplineno = hsplineno_save;
1181         hscolno = hscolno_save;
1182         hspcolno = hspcolno_save;
1183         etags = etags_save;
1184         icontexts = icontexts_save - 1;
1185         icontexts_save = 0;
1186 #ifdef HSP_DEBUG
1187         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1188 #endif
1189         eof = FALSE;
1190
1191         /* RETURN(LEOF); */
1192         hsperror("No longer using yacc to parse interface files");
1193
1194     } else {
1195         yyterminate();
1196     }
1197     abort(); /* should never get here! */
1198     return(0);
1199 }
1200
1201 /**********************************************************************
1202 *                                                                     *
1203 *                                                                     *
1204 *     Input Processing for Interfaces -- Not currently used !!!       *
1205 *                                                                     *
1206 *                                                                     *
1207 **********************************************************************/
1208
1209 /* setyyin(file)        open file as new lex input buffer */
1210 extern FILE *yyin;
1211
1212 void
1213 setyyin(char *file)
1214 {
1215     hsbuf_save = YY_CURRENT_BUFFER;
1216     if ((yyin = fopen(file, "r")) == NULL) {
1217         char errbuf[ERR_BUF_SIZE];
1218
1219         sprintf(errbuf, "can't read \"%-.50s\"", file);
1220         hsperror(errbuf);
1221     }
1222     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1223
1224     hslineno_save = hslineno;
1225     hsplineno_save = hsplineno;
1226     hslineno = hsplineno = 1;
1227
1228     filename_save = input_filename;
1229     input_filename = NULL;
1230     new_filename(file);
1231     hscolno_save = hscolno;
1232     hspcolno_save = hspcolno;
1233     hscolno = hspcolno = 0;
1234     etags_save = etags; /* do not do "etags" stuff in interfaces */
1235     etags = 0;          /* We remember whether we are doing it in
1236                            the module, so we can restore it later [WDP 94/09] */
1237     hsentercontext(-1);         /* partain: changed this from 0 */
1238     icontexts_save = icontexts;
1239 #ifdef HSP_DEBUG
1240     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1241 #endif
1242 }
1243
1244 static void
1245 layout_input(char *text, int len)
1246 {
1247 #ifdef HSP_DEBUG
1248     fprintf(stderr, "Scanning \"%s\"\n", text);
1249 #endif
1250
1251     hsplineno = hslineno;
1252     hspcolno = hscolno;
1253
1254     while (len-- > 0) {
1255         switch (*text++) {
1256         case '\n':
1257         case '\r':
1258         case '\f':
1259             hslineno++;
1260             hscolno = 0;
1261             break;
1262         case '\t':
1263             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1264             break;
1265         case '\v':
1266             break;
1267         default:
1268             ++hscolno;
1269             break;
1270         }
1271     }
1272 }
1273
1274 void
1275 setstartlineno(void)
1276 {
1277     startlineno = hsplineno;
1278
1279     if (modulelineno == 0) {
1280         modulelineno = startlineno;
1281     }
1282
1283 #if 1/*etags*/
1284 #else
1285     if (etags)
1286         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1287 #endif
1288 }
1289
1290 /**********************************************************************
1291 *                                                                     *
1292 *                                                                     *
1293 *                      Text Caching                                   *
1294 *                                                                     *
1295 *                                                                     *
1296 **********************************************************************/
1297
1298 #define CACHE_SIZE YY_BUF_SIZE
1299
1300 static struct {
1301     unsigned allocated;
1302     unsigned next;
1303     char *text;
1304 } textcache = { 0, 0, NULL };
1305
1306 static void
1307 cleartext(void)
1308 {
1309 /*  fprintf(stderr, "cleartext\n"); */
1310     textcache.next = 0;
1311     if (textcache.allocated == 0) {
1312         textcache.allocated = CACHE_SIZE;
1313         textcache.text = xmalloc(CACHE_SIZE);
1314     }
1315 }
1316
1317 static void
1318 addtext(char *text, unsigned length)
1319 {
1320 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1321
1322     if (length == 0)
1323         return;
1324
1325     if (textcache.next + length + 1 >= textcache.allocated) {
1326         textcache.allocated += length + CACHE_SIZE;
1327         textcache.text = xrealloc(textcache.text, textcache.allocated);
1328     }
1329     bcopy(text, textcache.text + textcache.next, length);
1330     textcache.next += length;
1331 }
1332
1333 static void
1334 addchar(char c)
1335 {
1336 /*  fprintf(stderr, "addchar: %c\n", c); */
1337
1338     if (textcache.next + 2 >= textcache.allocated) {
1339         textcache.allocated += CACHE_SIZE;
1340         textcache.text = xrealloc(textcache.text, textcache.allocated);
1341     }
1342     textcache.text[textcache.next++] = c;
1343 }
1344
1345 static char *
1346 fetchtext(unsigned *length)
1347 {
1348 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1349
1350     *length = textcache.next;
1351     textcache.text[textcache.next] = '\0';
1352     return textcache.text;
1353 }
1354
1355 /**********************************************************************
1356 *                                                                     *
1357 *                                                                     *
1358 *    Identifier Processing                                             *
1359 *                                                                     *
1360 *                                                                     *
1361 **********************************************************************/
1362
1363 /*
1364         hsnewid         Enters an id of length n into the symbol table.
1365 */
1366
1367 static void
1368 hsnewid(char *name, int length)
1369 {
1370     char save = name[length];
1371
1372     name[length] = '\0';
1373     yylval.uid = installid(name);
1374     name[length] = save;
1375 }
1376
1377 BOOLEAN
1378 hsnewqid(char *name, int length)
1379 {
1380     char* dot;
1381     char save = name[length];
1382     name[length] = '\0';
1383
1384     dot = strchr(name, '.');
1385     *dot = '\0';
1386     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1387     *dot = '.';
1388     name[length] = save;
1389
1390     return isconstr(dot+1);
1391 }
1392
1393 static
1394 BOOLEAN
1395 is_commment(char* lexeme, int len)
1396 {
1397    char* ptr;
1398    int i;
1399         
1400    if (len < 2) {
1401       return FALSE;
1402    }
1403
1404    for(i=0;i<len;i++) {
1405      if (lexeme[i] != '-') return FALSE;
1406    }        
1407    return TRUE;
1408 }
1409