[project @ 1997-03-14 07:52:06 by simonpj]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 #include "../../includes/config.h"
11
12 #include <stdio.h>
13
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
15 #include <string.h>
16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
18 #include <memory.h>
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
20 #define index strchr
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
26 #include <strings.h>
27 /* memory.h and strings.h conflict on some systems.  */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
29
30 #include "hspincl.h"
31 #include "hsparser.tab.h"
32 #include "constants.h"
33 #include "utils.h"
34
35 /* Our substitute for <ctype.h> */
36
37 #define NCHARS  256
38 #define _S      0x1
39 #define _D      0x2
40 #define _H      0x4
41 #define _O      0x8
42 #define _C      0x10
43
44 static unsigned char CharTable[NCHARS] = {
45 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
46 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
47 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
48 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
50 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
51 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
52 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
53 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
54 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
55 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
57 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
58 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
59 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
61
62 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 };
79
80 BOOLEAN
81 isconstr (char *s)
82 {
83     return(CharTable[*s]&(_C));
84 }
85
86 /**********************************************************************
87 *                                                                     *
88 *                                                                     *
89 *      Declarations                                                   *
90 *                                                                     *
91 *                                                                     *
92 **********************************************************************/
93
94 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
95
96 /*
97  * For my own sanity, things that are not part of the flex skeleton
98  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
99  */
100
101 static int hslineno = 0;        /* Line number at end of token */
102 int hsplineno = 0;              /* Line number at end of previous token */
103
104 static int hscolno = 0;         /* Column number at end of token */
105 int hspcolno = 0;               /* Column number at end of previous token */
106 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
107
108 int modulelineno = -1;          /* The line number where the module starts */
109 int startlineno = 0;            /* The line number where something starts */
110 int endlineno = 0;              /* The line number where something ends */
111
112 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
113 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
114
115 static int nested_comments;     /* For counting comment nesting depth */
116
117 /* OLD: Hacky definition of yywrap: see flex doc.
118
119    If we don't do this, then we'll have to get the default
120    yywrap from the flex library, which is often something
121    we are not good at locating.  This avoids that difficulty.
122    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
123    WDP 94/09/05
124 #define yywrap() 1
125 */
126
127 /* Essential forward declarations */
128
129 static void hsnewid      PROTO((char *, int));
130 static void layout_input PROTO((char *, int));
131 static void cleartext    (NO_ARGS);
132 static void addtext      PROTO((char *, unsigned));
133 static void addchar      PROTO((char));
134 static char *fetchtext   PROTO((unsigned *));
135 static void new_filename PROTO((char *));
136 static int  Return       PROTO((int));
137 static void hsentercontext PROTO((int));
138
139 /* Special file handling for IMPORTS */
140 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
141
142 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
143 static char *filename_save;             /* File Name                     */
144 static int hslineno_save = 0,           /* Line Number                   */
145  hsplineno_save = 0,                    /* Line Number of Prev. token    */
146  hscolno_save = 0,                      /* Indentation                   */
147  hspcolno_save = 0;                     /* Left Indentation              */
148 static short icontexts_save = 0;        /* Indent Context Level          */
149
150 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
151 extern BOOLEAN etags;                   /* that which is saved */
152
153 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
154
155 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
156                                  * inserted before token +ve  -- "}" inserted before
157                                  * token */
158
159 short icontexts = 0;            /* Which context we're in */
160
161 /*
162         Table of indentations:  right bit indicates whether to use
163           indentation rules (1 = use rules; 0 = ignore)
164
165     partain:
166     push one of these "contexts" at every "case" or "where"; the right bit says
167     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
168
169     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
170     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
171     push is shown just below.
172
173 */
174
175
176 static short indenttab[MAX_CONTEXTS] = {-1};
177
178 #define INDENTPT (indenttab[icontexts]>>1)
179 #define INDENTON (indenttab[icontexts]&1)
180
181 #define RETURN(tok) return(Return(tok))
182
183 #undef YY_DECL
184 #define YY_DECL int yylex1()
185
186 /* We should not peek at yy_act, but flex calls us even for the internal action
187    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
188    to support older versions of flex, we'll continue to peek for now.
189  */
190 #define YY_USER_ACTION \
191     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
192
193 #if 0/*debug*/
194 #undef YY_BREAK
195 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
196 #endif
197
198 /* Each time we enter a new start state, we push it onto the state stack.
199 */
200 #define PUSH_STATE(n)   yy_push_state(n)
201 #define POP_STATE       yy_pop_state()
202
203 %}
204 /* Options:
205     8bit (8-bit input)
206     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
207     never-interactive (to go a bit faster)
208     stack (use a start-condition stack)
209 */
210 %option 8bit
211 %option noyywrap
212 %option never-interactive
213 %option stack
214
215 /* The start states are:
216    Code -- normal Haskell code (principal lexer)
217    GlaExt -- Haskell code with Glasgow extensions
218    Comment -- Nested comment processing
219    String -- Inside a string literal with backslashes
220    StringEsc -- Immediately following a backslash in a string literal
221    Char -- Inside a character literal with backslashes
222    CharEsc -- Immediately following a backslash in a character literal 
223
224    Note that the INITIAL state is unused.  Also note that these states
225    are _exclusive_.  All rules should be prefixed with an appropriate
226    list of start states.
227  */
228
229 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
230
231 isoS                    [\xa1-\xbf\xd7\xf7]
232 isoL                    [\xc0-\xd6\xd8-\xde]
233 isol                    [\xdf-\xf6\xf8-\xff]
234 isoA                    [\xa1-\xff]
235
236 D                       [0-9]
237 O                       [0-7]
238 H                       [0-9A-Fa-f]
239 N                       {D}+
240 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
241 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
242 SId                     {S}{S}*
243 L                       [A-Z\xc0-\xd6\xd8-\xde]
244 l                       [a-z\xdf-\xf6\xf8-\xff]
245 I                       {L}|{l}
246 i                       {L}|{l}|[0-9'_]
247 Id                      {I}{i}*
248 Mod                     {L}{i}*
249 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
250 CNTRL                   [@A-Z\[\\\]^_]
251 WS                      [ \t\n\r\f\v]
252 NL                      [\n\r]
253
254 %%
255
256 %{
257     /* 
258      * Special GHC pragma rules.  Do we need a start state for interface files,
259      * so these won't be matched in source files? --JSM
260      */
261 %}
262
263 <Code,GlaExt>^"# ".*{NL}    {
264                           char tempf[FILENAME_SIZE];
265                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
266                           new_filename(tempf);
267                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
268                         }
269
270 <Code,GlaExt>^"#line ".*{NL}    {
271                           char tempf[FILENAME_SIZE];
272                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
273                           new_filename(tempf); 
274                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
275                         }
276
277 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
278                           /* partain: pragma-style line directive */
279                           char tempf[FILENAME_SIZE];
280                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
281                           new_filename(tempf);
282                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
283                         }
284
285 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
286                               PUSH_STATE(UserPragma);
287                               RETURN(INTERFACE_UPRAGMA);
288                             }
289 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
290                               PUSH_STATE(UserPragma);
291                               RETURN(SPECIALISE_UPRAGMA);
292                             }
293 <Code,GlaExt>"{-#"{WS}*"INLINE" {
294                               PUSH_STATE(UserPragma);
295                               RETURN(INLINE_UPRAGMA);
296                             }
297 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
298                               PUSH_STATE(UserPragma);
299                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
300                             }
301 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
302                               PUSH_STATE(UserPragma);
303                               RETURN(DEFOREST_UPRAGMA);
304                             }
305 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
306                               /* these are handled by hscpp */
307                               nested_comments =1;
308                               PUSH_STATE(Comment);
309                             }
310 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
311                               /* these are by the driver! */
312                               nested_comments =1;
313                               PUSH_STATE(Comment);
314                             }
315 <Code,GlaExt>"{-#"{WS}*"SOURCE" {
316                               /* these are used by `make depend' (temp) */
317                               nested_comments =1;
318                               PUSH_STATE(Comment);
319                             }
320 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
321                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
322                                 input_filename, hsplineno);
323                               format_string(stderr, (unsigned char *) yytext, yyleng);
324                               fputs("'\n", stderr);
325                               nested_comments = 1;
326                               PUSH_STATE(Comment);
327                             }
328 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
329
330 %{
331     /*
332      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
333      * intentionally accepted as a keyword even for normal <Code>.
334      */
335 %}
336
337 <Code,GlaExt>"case"             { RETURN(CASE); }
338 <Code,GlaExt>"class"            { RETURN(CLASS); }
339 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
340 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
341 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
342 <Code,GlaExt>"do"               { RETURN(DO); }
343 <Code,GlaExt>"else"             { RETURN(ELSE); }
344 <Code,GlaExt>"if"               { RETURN(IF); }
345 <Code,GlaExt>"import"           { RETURN(IMPORT); }
346 <Code,GlaExt>"in"               { RETURN(IN); }
347 <Code,GlaExt>"infix"            { RETURN(INFIX); }
348 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
349 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
350 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
351 <Code,GlaExt>"let"              { RETURN(LET); }
352 <Code,GlaExt>"module"           { RETURN(MODULE); }
353 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
354 <Code,GlaExt>"of"               { RETURN(OF); }
355 <Code,GlaExt>"then"             { RETURN(THEN); }
356 <Code,GlaExt>"type"             { RETURN(TYPE); }
357 <Code,GlaExt>"where"            { RETURN(WHERE); }
358
359 <Code,GlaExt>"as"               { RETURN(AS); }
360 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
361 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
362
363 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
364 <GlaExt>"_ccall_"               { RETURN(CCALL); }
365 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
366 <GlaExt>"_casm_"                { RETURN(CASM); }
367 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
368
369 %{
370     /* 
371      * Haskell operators: special, reservedops and useful varsyms
372      */
373 %}
374
375 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
376 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
377 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
378 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
379 <Code,GlaExt>"{"                { RETURN(OCURLY); }
380 <Code,GlaExt>"}"                { RETURN(CCURLY); }
381 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
382 <Code,GlaExt>";"                { RETURN(SEMI); }
383 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
384 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
385
386 <Code,GlaExt>".."               { RETURN(DOTDOT); }
387 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
388 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
389 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
390 <Code,GlaExt>"|"                { RETURN(VBAR); }
391 <Code,GlaExt>"<-"               { RETURN(LARROW); }
392 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
393 <Code,GlaExt>"-"                { RETURN(MINUS); }
394 <Code,GlaExt>"+"                { RETURN(PLUS); }
395
396 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
397 <Code,GlaExt>"@"                { RETURN(AT); }
398 <Code,GlaExt>"!"                { RETURN(BANG); }
399 <Code,GlaExt>"~"                { RETURN(LAZY); }
400
401 %{
402     /*
403      * Integers and (for Glasgow extensions) primitive integers.  Note that
404      * we pass all of the text on to the parser, because flex/C can't handle
405      * arbitrary precision numbers.
406      */
407 %}
408
409 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
410                          yylval.uid = xstrndup(yytext, yyleng - 1);
411                          RETURN(INTPRIM);
412                         }
413 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
414                          yylval.uid = xstrndup(yytext, yyleng);
415                          RETURN(INTEGER);
416                         }
417 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
418                          yylval.uid = xstrndup(yytext, yyleng - 1);
419                          RETURN(INTPRIM);
420                         }
421 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
422                          yylval.uid = xstrndup(yytext, yyleng);
423                          RETURN(INTEGER);
424                         }
425 <GlaExt>("-")?{N}"#"    {
426                          yylval.uid = xstrndup(yytext, yyleng - 1);
427                          RETURN(INTPRIM);
428                         }
429 <Code,GlaExt,UserPragma>{N} {
430                          yylval.uid = xstrndup(yytext, yyleng);
431                          RETURN(INTEGER);
432                         }
433
434 %{
435     /*
436      * Floats and (for Glasgow extensions) primitive floats/doubles.
437      */
438 %}
439
440 <GlaExt>("-")?{F}"##"   {
441                          yylval.uid = xstrndup(yytext, yyleng - 2);
442                          RETURN(DOUBLEPRIM);
443                         }
444 <GlaExt>("-")?{F}"#"    {
445                          yylval.uid = xstrndup(yytext, yyleng - 1);
446                          RETURN(FLOATPRIM);
447                         }
448 <Code,GlaExt>{F}        {
449                          yylval.uid = xstrndup(yytext, yyleng);
450                          RETURN(FLOAT);
451                         }
452
453 %{
454     /*
455      * Funky ``foo'' style C literals for Glasgow extensions
456      */
457 %}
458
459 <GlaExt>"``"[^']+"''"   {
460                          hsnewid(yytext + 2, yyleng - 4);
461                          RETURN(CLITLIT);
462                         }
463
464 %{
465     /*
466      * Identifiers, both variables and operators.  The trailing hash is allowed
467      * for Glasgow extensions.
468      */
469 %}
470
471
472 %{
473 /* These SHOULDNAE work in "Code" (sigh) */
474 %}
475 <GlaExt,UserPragma>{Id}"#" { 
476                         if (! nonstandardFlag) {
477                             char errbuf[ERR_BUF_SIZE];
478                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
479                             hsperror(errbuf);
480                          }
481                          hsnewid(yytext, yyleng);
482                          RETURN(isconstr(yytext) ? CONID : VARID);
483                         }
484 <Code,GlaExt,UserPragma>{Id}    {
485                          hsnewid(yytext, yyleng);
486                          RETURN(isconstr(yytext) ? CONID : VARID);
487                         }
488 <Code,GlaExt,UserPragma>{SId}   {
489                          hsnewid(yytext, yyleng);
490                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
491                         }
492 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
493                          BOOLEAN is_constr;
494                          if (! nonstandardFlag) {
495                             char errbuf[ERR_BUF_SIZE];
496                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
497                             hsperror(errbuf);
498                          }
499                          is_constr = hsnewqid(yytext, yyleng);
500                          RETURN(is_constr ? QCONID : QVARID);
501                         }
502 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
503                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
504                          RETURN(is_constr ? QCONID : QVARID);
505                         }
506 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
507                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
508                          RETURN(is_constr ? QCONSYM : QVARSYM);
509                         }
510
511 %{
512     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
513
514     /* Because we can make the former well-behaved (we defined them).
515
516        Sadly, the latter is defined by Haskell, which allows such
517        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
518     */
519 %}
520
521 <GlaExt,UserPragma>"`"{Id}"#`"  {       
522                          hsnewid(yytext + 1, yyleng - 2);
523                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
524                         }
525
526 %{
527     /*
528      * Character literals.  The first form is the quick form, for character
529      * literals that don't contain backslashes.  Literals with backslashes are
530      * lexed through multiple rules.  First, we match the open ' and as many
531      * normal characters as possible.  This puts us into the <Char> state, where
532      * a backslash is legal.  Then, we match the backslash and move into the 
533      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
534      * characters and the close '.  We may end up with too many characters, but
535      * this allows us to easily share the lex rules with strings.  Excess characters
536      * are ignored with a warning.
537      */
538 %}
539
540 <GlaExt>'({CHAR}|"\"")"'#" {
541                          yylval.uhstring = installHstring(1, yytext+1);
542                          RETURN(CHARPRIM);
543                         }
544 <Code,GlaExt>'({CHAR}|"\"")'    {
545                          yylval.uhstring = installHstring(1, yytext+1);
546                          RETURN(CHAR);
547                         }
548 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
549                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
550                          hsperror(errbuf);
551                         }
552 <Code,GlaExt>'({CHAR}|"\"")* {
553                          hsmlcolno = hspcolno;
554                          cleartext();
555                          addtext(yytext+1, yyleng-1);
556                          PUSH_STATE(Char);
557                         }
558 <Char>({CHAR}|"\"")*'#  {
559                          unsigned length;
560                          char *text;
561
562                          addtext(yytext, yyleng - 2);
563                          text = fetchtext(&length);
564
565                          if (! nonstandardFlag) {
566                             char errbuf[ERR_BUF_SIZE];
567                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
568                             hsperror(errbuf);
569                          }
570
571                          if (length > 1) {
572                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
573                               input_filename, hsplineno, hspcolno + 1);
574                             format_string(stderr, (unsigned char *) text, length);
575                             fputs("' too long\n", stderr);
576                             hsperror("");
577                          }
578                          yylval.uhstring = installHstring(1, text);
579                          hspcolno = hsmlcolno;
580                          POP_STATE;
581                          RETURN(CHARPRIM); 
582                         }
583 <Char>({CHAR}|"\"")*'   {
584                          unsigned length;
585                          char *text;
586
587                          addtext(yytext, yyleng - 1);
588                          text = fetchtext(&length);
589
590                          if (length > 1) {
591                             fprintf(stderr, "%s:%d:%d: Character literal '",
592                               input_filename, hsplineno, hspcolno + 1);
593                             format_string(stderr, (unsigned char *) text, length);
594                             fputs("' too long\n", stderr);
595                             hsperror("");
596                          }
597                          yylval.uhstring = installHstring(1, text);
598                          hspcolno = hsmlcolno;
599                          POP_STATE;
600                          RETURN(CHAR); 
601                         }
602 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
603
604
605 %{
606     /*
607      * String literals.  The first form is the quick form, for string literals
608      * that don't contain backslashes.  Literals with backslashes are lexed
609      * through multiple rules.  First, we match the open " and as many normal
610      * characters as possible.  This puts us into the <String> state, where
611      * a backslash is legal.  Then, we match the backslash and move into the 
612      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
613      * characters, moving back and forth between <String> and <StringEsc> as more
614      * backslashes are encountered.  (We may even digress into <Comment> mode if we
615      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
616      * of normal characters and the close ".
617      */
618 %}
619
620 <GlaExt>"\""({CHAR}|"'")*"\""#  {
621                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
622                             /* the -3 accounts for the " on front, "# on the end */
623                          RETURN(STRINGPRIM); 
624                         }
625 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
626                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
627                          RETURN(STRING); 
628                         }
629 <Code,GlaExt>"\""({CHAR}|"'")* {
630                          hsmlcolno = hspcolno;
631                          cleartext();
632                          addtext(yytext+1, yyleng-1);
633                          PUSH_STATE(String);
634                         }
635 <String>({CHAR}|"'")*"\"#"   {
636                          unsigned length;
637                          char *text;
638
639                          addtext(yytext, yyleng-2);
640                          text = fetchtext(&length);
641
642                          if (! nonstandardFlag) {
643                             char errbuf[ERR_BUF_SIZE];
644                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
645                             hsperror(errbuf);
646                          }
647
648                          yylval.uhstring = installHstring(length, text);
649                          hspcolno = hsmlcolno;
650                          POP_STATE;
651                          RETURN(STRINGPRIM);
652                         }
653 <String>({CHAR}|"'")*"\""   {
654                          unsigned length;
655                          char *text;
656
657                          addtext(yytext, yyleng-1);
658                          text = fetchtext(&length);
659
660                          yylval.uhstring = installHstring(length, text);
661                          hspcolno = hsmlcolno;
662                          POP_STATE;
663                          RETURN(STRING); 
664                         }
665 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
666
667 %{
668     /*
669      * Character and string escapes are roughly the same, but strings have the
670      * extra `\&' sequence which is not allowed for characters.  Also, comments
671      * are allowed in the <StringEsc> state.  (See the comment section much
672      * further down.)
673      *
674      * NB: Backslashes and tabs are stored in strings as themselves.
675      * But if we print them (in printtree.c), they must go out as
676      * "\\\\" and "\\t" respectively.  (This is because of the bogus
677      * intermediate format that the parser produces.  It uses '\t' fpr end of
678      * string, so it needs to be able to escape tabs, which means that it
679      * also needs to be able to escape the escape character ('\\').  Sigh.
680      */
681 %}
682
683 <Char>\\                { PUSH_STATE(CharEsc); }
684 <String>\\&             /* Ignore */ ;
685 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
686
687 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
688 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
689
690 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
691 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
692 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
693 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
694 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
695 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
696 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
697 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
698 <CharEsc,StringEsc>BEL  |
699 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
700 <CharEsc,StringEsc>BS   |
701 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
702 <CharEsc,StringEsc>HT   |
703 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
704 <CharEsc,StringEsc>LF   |
705 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
706 <CharEsc,StringEsc>VT   |
707 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
708 <CharEsc,StringEsc>FF   |
709 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
710 <CharEsc,StringEsc>CR   |
711 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
712 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
713 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
714 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
715 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
716 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
717 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
718 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
719 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
720 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
721 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
722 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
723 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
724 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
725 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
726 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
727 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
728 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
729 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
730 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
731 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
732 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
733 <CharEsc,StringEsc>{D}+  {
734                           int i = strtol(yytext, NULL, 10);
735                           if (i < NCHARS) {
736                              addchar((char) i);
737                           } else {
738                              char errbuf[ERR_BUF_SIZE];
739                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
740                                 yytext);
741                              hsperror(errbuf);
742                           }
743                           POP_STATE;
744                         }
745 <CharEsc,StringEsc>o{O}+ {
746                           int i = strtol(yytext + 1, NULL, 8);
747                           if (i < NCHARS) {
748                              addchar((char) i);
749                           } else {
750                              char errbuf[ERR_BUF_SIZE];
751                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
752                                 yytext);
753                              hsperror(errbuf);
754                           }
755                           POP_STATE;
756                         }
757 <CharEsc,StringEsc>x{H}+ {
758                           int i = strtol(yytext + 1, NULL, 16);
759                           if (i < NCHARS) {
760                              addchar((char) i);
761                           } else {
762                              char errbuf[ERR_BUF_SIZE];
763                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
764                                 yytext);
765                              hsperror(errbuf);
766                           }
767                           POP_STATE;
768                         }
769
770 %{
771     /*
772      * Simple comments and whitespace.  Normally, we would just ignore these, but
773      * in case we're processing a string escape, we need to note that we've seen
774      * a gap.
775      *
776      * Note that we cater for a comment line that *doesn't* end in a newline.
777      * This is incorrect, strictly speaking, but seems like the right thing
778      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
779      */
780 %}
781
782 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
783 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
784
785 %{
786     /*
787      * Nested comments.  The major complication here is in trying to match the
788      * longest lexemes possible, for better performance.  (See the flex document.)
789      * That's why the rules look so bizarre.
790      */
791 %}
792
793 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
794                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
795                         }
796
797 <Comment>[^-{]*         |
798 <Comment>"-"+[^-{}]+    |
799 <Comment>"{"+[^-{}]+    ;
800 <Comment>"{-"           { nested_comments++; }
801 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
802 <Comment>(.|\n)         ;
803
804 %{
805     /*
806      * Illegal characters.  This used to be a single rule, but we might as well
807      * pass on as much information as we have, so now we indicate our state in
808      * the error message.
809      */
810 %}
811
812 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
813                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
814                             input_filename, hsplineno, hspcolno + 1); 
815                          format_string(stderr, (unsigned char *) yytext, 1);
816                          fputs("'\n", stderr);
817                          hsperror("");
818                         }
819 <Char>(.|\n)            { 
820                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
821                             input_filename, hsplineno, hspcolno + 1); 
822                          format_string(stderr, (unsigned char *) yytext, 1);
823                          fputs("' in a character literal\n", stderr);
824                          hsperror("");
825                         }
826 <CharEsc>(.|\n)         {
827                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
828                             input_filename, hsplineno, hspcolno + 1); 
829                          format_string(stderr, (unsigned char *) yytext, 1);
830                          fputs("'\n", stderr);
831                          hsperror("");
832                         }
833 <String>(.|\n)          { if (nonstandardFlag) {
834                              addtext(yytext, yyleng);
835                           } else { 
836                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
837                                 input_filename, hsplineno, hspcolno + 1); 
838                                 format_string(stderr, (unsigned char *) yytext, 1);
839                                 fputs("' in a string literal\n", stderr);
840                                 hsperror("");
841                           }
842                         }
843 <StringEsc>(.|\n)       {
844                          if (noGap) {
845                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
846                                 input_filename, hsplineno, hspcolno + 1); 
847                              format_string(stderr, (unsigned char *) yytext, 1);
848                              fputs("'\n", stderr);
849                              hsperror("");
850                          } else {
851                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
852                                 input_filename, hsplineno, hspcolno + 1);
853                              format_string(stderr, (unsigned char *) yytext, 1);
854                              fputs("' in a string gap\n", stderr);
855                              hsperror("");
856                          }
857                         }
858
859 %{
860     /*
861      * End of file.  In any sub-state, this is an error.  However, for the primary
862      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
863      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
864      * adding virtual close curlies, or closing an interface and returning to the
865      * primary source file.
866      *
867      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
868      * line/column advancement has to be done by hand.
869      */
870 %}
871
872 <Char,CharEsc><<EOF>>   { 
873                           hsplineno = hslineno; hspcolno = hscolno;
874                           hsperror("unterminated character literal");
875                         }
876 <Comment><<EOF>>        { 
877                           hsplineno = hslineno; hspcolno = hscolno;
878                           hsperror("unterminated comment"); 
879                         }
880 <String,StringEsc><<EOF>>   { 
881                           hsplineno = hslineno; hspcolno = hscolno;
882                           hsperror("unterminated string literal"); 
883                         }
884 <UserPragma><<EOF>>     {
885                           hsplineno = hslineno; hspcolno = hscolno;
886                           hsperror("unterminated user-specified pragma"); 
887                         }
888 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
889
890 %%
891
892 /**********************************************************************
893 *                                                                     *
894 *                                                                     *
895 *     YACC/LEX Initialisation etc.                                    *
896 *                                                                     *
897 *                                                                     *
898 **********************************************************************/
899
900 /*
901    We initialise input_filename to "<stdin>".
902    This allows unnamed sources to be piped into the parser.
903 */
904
905 void
906 yyinit(void)
907 {
908     input_filename = xstrdup("<stdin>");
909
910     /* We must initialize the input buffer _now_, because we call
911        setyyin _before_ calling yylex for the first time! */
912     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
913
914     if (nonstandardFlag)
915         PUSH_STATE(GlaExt);
916     else
917         PUSH_STATE(Code);
918 }
919
920 static void
921 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
922 {
923     if (input_filename != NULL)
924         free(input_filename);
925     input_filename = xstrdup(f);
926 }
927
928 /**********************************************************************
929 *                                                                     *
930 *                                                                     *
931 *     Layout Processing                                               *
932 *                                                                     *
933 *                                                                     *
934 **********************************************************************/
935
936 /*
937         The following section deals with Haskell Layout conventions
938         forcing insertion of ; or } as appropriate
939 */
940
941 static BOOLEAN
942 hsshouldindent(void)
943 {
944     return (!forgetindent && INDENTON);
945 }
946
947
948 /* Enter new context and set new indentation level */
949 void
950 hssetindent(void)
951 {
952 #ifdef HSP_DEBUG
953     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
954 #endif
955
956     /*
957      * partain: first chk that new indent won't be less than current one; this code
958      * doesn't make sense to me; hscolno tells the position of the _end_ of the
959      * current token; what that has to do with indenting, I don't know.
960      */
961
962
963     if (hscolno - 1 <= INDENTPT) {
964         if (INDENTPT == -1)
965             return;             /* Empty input OK for Haskell 1.1 */
966         else {
967             char errbuf[ERR_BUF_SIZE];
968
969             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
970             hsperror(errbuf);
971         }
972     }
973     hsentercontext((hspcolno << 1) | 1);
974 }
975
976
977 /* Enter a new context without changing the indentation level */
978 void
979 hsincindent(void)
980 {
981 #ifdef HSP_DEBUG
982     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
983 #endif
984     hsentercontext(indenttab[icontexts] & ~1);
985 }
986
987
988 /* Turn off indentation processing, usually because an explicit "{" has been seen */
989 void
990 hsindentoff(void)
991 {
992     forgetindent = TRUE;
993 }
994
995
996 /* Enter a new layout context. */
997 static void
998 hsentercontext(int indent)
999 {
1000     /* Enter new context and set indentation as specified */
1001     if (++icontexts >= MAX_CONTEXTS) {
1002         char errbuf[ERR_BUF_SIZE];
1003
1004         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1005         hsperror(errbuf);
1006     }
1007     forgetindent = FALSE;
1008     indenttab[icontexts] = indent;
1009 #ifdef HSP_DEBUG
1010     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1011 #endif
1012 }
1013
1014
1015 /* Exit a layout context */
1016 void
1017 hsendindent(void)
1018 {
1019     --icontexts;
1020 #ifdef HSP_DEBUG
1021     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1022 #endif
1023 }
1024
1025 /*
1026  *      Return checks the indentation level and returns ;, } or the specified token.
1027  */
1028
1029 static int
1030 Return(int tok)
1031 {
1032 #ifdef HSP_DEBUG
1033     extern int yyleng;
1034 #endif
1035
1036     if (hsshouldindent()) {
1037         if (hspcolno < INDENTPT) {
1038 #ifdef HSP_DEBUG
1039             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1040 #endif
1041             hssttok = tok;
1042             return (VCCURLY);
1043         } else if (hspcolno == INDENTPT) {
1044 #ifdef HSP_DEBUG
1045             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1046 #endif
1047             hssttok = -tok;
1048             return (SEMI);
1049         }
1050     }
1051     hssttok = -1;
1052 #ifdef HSP_DEBUG
1053     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1054 #endif
1055     return (tok);
1056 }
1057
1058
1059 /*
1060  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1061  */
1062 int
1063 yylex()
1064 {
1065     int tok;
1066     static BOOLEAN eof = FALSE;
1067
1068     if (!eof) {
1069         if (hssttok != -1) {
1070             if (hssttok < 0) {
1071                 tok = -hssttok;
1072                 hssttok = -1;
1073                 return tok;
1074             }
1075             RETURN(hssttok);
1076         } else {
1077             endlineno = hslineno;
1078             if ((tok = yylex1()) != EOF)
1079                 return tok;
1080             else
1081                 eof = TRUE;
1082         }
1083     }
1084     if (icontexts > icontexts_save) {
1085         if (INDENTON) {
1086             eof = TRUE;
1087             indenttab[icontexts] = 0;
1088             return (VCCURLY);
1089         } else
1090             hsperror("missing '}' at end of file");
1091     } else if (hsbuf_save != NULL) {
1092         fclose(yyin);
1093         yy_delete_buffer(YY_CURRENT_BUFFER);
1094         yy_switch_to_buffer(hsbuf_save);
1095         hsbuf_save = NULL;
1096         new_filename(filename_save);
1097         free(filename_save);
1098         hslineno = hslineno_save;
1099         hsplineno = hsplineno_save;
1100         hscolno = hscolno_save;
1101         hspcolno = hspcolno_save;
1102         etags = etags_save;
1103         icontexts = icontexts_save - 1;
1104         icontexts_save = 0;
1105 #ifdef HSP_DEBUG
1106         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1107 #endif
1108         eof = FALSE;
1109
1110         /* RETURN(LEOF); */
1111         hsperror("No longer using yacc to parse interface files");
1112
1113     } else {
1114         yyterminate();
1115     }
1116     abort(); /* should never get here! */
1117     return(0);
1118 }
1119
1120 /**********************************************************************
1121 *                                                                     *
1122 *                                                                     *
1123 *     Input Processing for Interfaces -- Not currently used !!!       *
1124 *                                                                     *
1125 *                                                                     *
1126 **********************************************************************/
1127
1128 /* setyyin(file)        open file as new lex input buffer */
1129 extern FILE *yyin;
1130
1131 void
1132 setyyin(char *file)
1133 {
1134     hsbuf_save = YY_CURRENT_BUFFER;
1135     if ((yyin = fopen(file, "r")) == NULL) {
1136         char errbuf[ERR_BUF_SIZE];
1137
1138         sprintf(errbuf, "can't read \"%-.50s\"", file);
1139         hsperror(errbuf);
1140     }
1141     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1142
1143     hslineno_save = hslineno;
1144     hsplineno_save = hsplineno;
1145     hslineno = hsplineno = 1;
1146
1147     filename_save = input_filename;
1148     input_filename = NULL;
1149     new_filename(file);
1150     hscolno_save = hscolno;
1151     hspcolno_save = hspcolno;
1152     hscolno = hspcolno = 0;
1153     etags_save = etags; /* do not do "etags" stuff in interfaces */
1154     etags = 0;          /* We remember whether we are doing it in
1155                            the module, so we can restore it later [WDP 94/09] */
1156     hsentercontext(-1);         /* partain: changed this from 0 */
1157     icontexts_save = icontexts;
1158 #ifdef HSP_DEBUG
1159     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1160 #endif
1161 }
1162
1163 static void
1164 layout_input(char *text, int len)
1165 {
1166 #ifdef HSP_DEBUG
1167     fprintf(stderr, "Scanning \"%s\"\n", text);
1168 #endif
1169
1170     hsplineno = hslineno;
1171     hspcolno = hscolno;
1172
1173     while (len-- > 0) {
1174         switch (*text++) {
1175         case '\n':
1176         case '\r':
1177         case '\f':
1178             hslineno++;
1179             hscolno = 0;
1180             break;
1181         case '\t':
1182             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1183             break;
1184         case '\v':
1185             break;
1186         default:
1187             ++hscolno;
1188             break;
1189         }
1190     }
1191 }
1192
1193 void
1194 setstartlineno(void)
1195 {
1196     startlineno = hsplineno;
1197
1198     if (modulelineno == 0) {
1199         modulelineno = startlineno;
1200     }
1201
1202 #if 1/*etags*/
1203 #else
1204     if (etags)
1205         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1206 #endif
1207 }
1208
1209 /**********************************************************************
1210 *                                                                     *
1211 *                                                                     *
1212 *                      Text Caching                                   *
1213 *                                                                     *
1214 *                                                                     *
1215 **********************************************************************/
1216
1217 #define CACHE_SIZE YY_BUF_SIZE
1218
1219 static struct {
1220     unsigned allocated;
1221     unsigned next;
1222     char *text;
1223 } textcache = { 0, 0, NULL };
1224
1225 static void
1226 cleartext(void)
1227 {
1228 /*  fprintf(stderr, "cleartext\n"); */
1229     textcache.next = 0;
1230     if (textcache.allocated == 0) {
1231         textcache.allocated = CACHE_SIZE;
1232         textcache.text = xmalloc(CACHE_SIZE);
1233     }
1234 }
1235
1236 static void
1237 addtext(char *text, unsigned length)
1238 {
1239 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1240
1241     if (length == 0)
1242         return;
1243
1244     if (textcache.next + length + 1 >= textcache.allocated) {
1245         textcache.allocated += length + CACHE_SIZE;
1246         textcache.text = xrealloc(textcache.text, textcache.allocated);
1247     }
1248     bcopy(text, textcache.text + textcache.next, length);
1249     textcache.next += length;
1250 }
1251
1252 static void
1253 addchar(char c)
1254 {
1255 /*  fprintf(stderr, "addchar: %c\n", c); */
1256
1257     if (textcache.next + 2 >= textcache.allocated) {
1258         textcache.allocated += CACHE_SIZE;
1259         textcache.text = xrealloc(textcache.text, textcache.allocated);
1260     }
1261     textcache.text[textcache.next++] = c;
1262 }
1263
1264 static char *
1265 fetchtext(unsigned *length)
1266 {
1267 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1268
1269     *length = textcache.next;
1270     textcache.text[textcache.next] = '\0';
1271     return textcache.text;
1272 }
1273
1274 /**********************************************************************
1275 *                                                                     *
1276 *                                                                     *
1277 *    Identifier Processing                                             *
1278 *                                                                     *
1279 *                                                                     *
1280 **********************************************************************/
1281
1282 /*
1283         hsnewid         Enters an id of length n into the symbol table.
1284 */
1285
1286 static void
1287 hsnewid(char *name, int length)
1288 {
1289     char save = name[length];
1290
1291     name[length] = '\0';
1292     yylval.uid = installid(name);
1293     name[length] = save;
1294 }
1295
1296 BOOLEAN
1297 hsnewqid(char *name, int length)
1298 {
1299     char* dot;
1300     char save = name[length];
1301     name[length] = '\0';
1302
1303     dot = strchr(name, '.');
1304     *dot = '\0';
1305     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1306     *dot = '.';
1307     name[length] = save;
1308
1309     return isconstr(dot+1);
1310 }