[project @ 1996-05-20 13:15:10 by partain]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 #include "../../includes/config.h"
11
12 #include <stdio.h>
13
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
15 #include <string.h>
16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
18 #include <memory.h>
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
20 #define index strchr
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
26 #include <strings.h>
27 /* memory.h and strings.h conflict on some systems.  */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
29
30 #include "hspincl.h"
31 #include "hsparser.tab.h"
32 #include "constants.h"
33 #include "utils.h"
34
35 /* Our substitute for <ctype.h> */
36
37 #define NCHARS  256
38 #define _S      0x1
39 #define _D      0x2
40 #define _H      0x4
41 #define _O      0x8
42 #define _C      0x10
43
44 #define _isconstr(s)    (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
46
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
64
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
81 };
82
83 /**********************************************************************
84 *                                                                     *
85 *                                                                     *
86 *      Declarations                                                   *
87 *                                                                     *
88 *                                                                     *
89 **********************************************************************/
90
91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
92
93 /*
94  * For my own sanity, things that are not part of the flex skeleton
95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
96  */
97
98 static int hslineno = 0;        /* Line number at end of token */
99 int hsplineno = 0;              /* Line number at end of previous token */
100
101 static int hscolno = 0;         /* Column number at end of token */
102 int hspcolno = 0;               /* Column number at end of previous token */
103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
104
105 int modulelineno = -1;          /* The line number where the module starts */
106 int startlineno = 0;            /* The line number where something starts */
107 int endlineno = 0;              /* The line number where something ends */
108
109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
111
112 static int nested_comments;     /* For counting comment nesting depth */
113
114 /* Hacky definition of yywrap: see flex doc.
115
116    If we don't do this, then we'll have to get the default
117    yywrap from the flex library, which is often something
118    we are not good at locating.  This avoids that difficulty.
119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
120    WDP 94/09/05
121 */
122 #define yywrap() 1
123
124 /* Essential forward declarations */
125
126 static void hsnewid      PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext    (NO_ARGS);
129 static void addtext      PROTO((char *, unsigned));
130 static void addchar      PROTO((char));
131 static char *fetchtext   PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int  Return       PROTO((int));
134 static void hsentercontext PROTO((int));
135
136 /* Special file handling for IMPORTS */
137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
138
139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
140 static char *filename_save;             /* File Name                     */
141 static int hslineno_save = 0,           /* Line Number                   */
142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
143  hscolno_save = 0,                      /* Indentation                   */
144  hspcolno_save = 0;                     /* Left Indentation              */
145 static short icontexts_save = 0;        /* Indent Context Level          */
146
147 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags;                   /* that which is saved */
149
150 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
151
152 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
153                                  * inserted before token +ve  -- "}" inserted before
154                                  * token */
155
156 short icontexts = 0;            /* Which context we're in */
157
158 /*
159         Table of indentations:  right bit indicates whether to use
160           indentation rules (1 = use rules; 0 = ignore)
161
162     partain:
163     push one of these "contexts" at every "case" or "where"; the right bit says
164     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
165
166     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
167     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
168     push is shown just below.
169
170 */
171
172
173 static short indenttab[MAX_CONTEXTS] = {-1};
174
175 #define INDENTPT (indenttab[icontexts]>>1)
176 #define INDENTON (indenttab[icontexts]&1)
177
178 #define RETURN(tok) return(Return(tok))
179
180 #undef YY_DECL
181 #define YY_DECL int yylex1()
182
183 /* We should not peek at yy_act, but flex calls us even for the internal action
184    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
185    to support older versions of flex, we'll continue to peek for now.
186  */
187 #define YY_USER_ACTION \
188     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
189
190 #if 0/*debug*/
191 #undef YY_BREAK
192 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
193 #endif
194
195 /* Each time we enter a new start state, we push it onto the state stack.
196    Note that the rules do not allow us to underflow or overflow the stack.
197    (At least, they shouldn't.)  The maximum expected depth is 4:
198    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
199 */
200 static int StateStack[5];
201 static int StateDepth = -1;
202
203 #ifdef HSP_DEBUG
204 #define PUSH_STATE(n)   do {\
205     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
206     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
207 #define POP_STATE       do {--StateDepth;\
208     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
209     BEGIN(StateStack[StateDepth]);} while(0)
210 #else
211 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
212 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
213 #endif
214
215 %}
216
217 /* The start states are:
218    Code -- normal Haskell code (principal lexer)
219    GlaExt -- Haskell code with Glasgow extensions
220    Comment -- Nested comment processing
221    String -- Inside a string literal with backslashes
222    StringEsc -- Immediately following a backslash in a string literal
223    Char -- Inside a character literal with backslashes
224    CharEsc -- Immediately following a backslash in a character literal 
225
226    Note that the INITIAL state is unused.  Also note that these states
227    are _exclusive_.  All rules should be prefixed with an appropriate
228    list of start states.
229  */
230
231 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
232
233 isoS                    [\xa1-\xbf\xd7\xf7]
234 isoL                    [\xc0-\xd6\xd8-\xde]
235 isol                    [\xdf-\xf6\xf8-\xff]
236 isoA                    [\xa1-\xff]
237
238 D                       [0-9]
239 O                       [0-7]
240 H                       [0-9A-Fa-f]
241 N                       {D}+
242 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
243 S                       [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
244 SId                     {S}{S}*
245 L                       [A-Z\xc0-\xd6\xd8-\xde]
246 l                       [a-z\xdf-\xf6\xf8-\xff]
247 I                       {L}|{l}
248 i                       {L}|{l}|[0-9'_]
249 Id                      {I}{i}*
250 Mod                     {L}{i}*
251 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
252 CNTRL                   [@A-Z\[\\\]^_]
253 WS                      [ \t\n\r\f\v]
254 NL                      [\n\r]
255
256 %%
257
258 %{
259     /* 
260      * Special GHC pragma rules.  Do we need a start state for interface files,
261      * so these won't be matched in source files? --JSM
262      */
263 %}
264
265 <Code,GlaExt>^"# ".*{NL}    {
266                           char tempf[FILENAME_SIZE];
267                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
268                           new_filename(tempf);
269                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
270                         }
271
272 <Code,GlaExt>^"#line ".*{NL}    {
273                           char tempf[FILENAME_SIZE];
274                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
275                           new_filename(tempf); 
276                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
277                         }
278
279 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
280                           /* partain: pragma-style line directive */
281                           char tempf[FILENAME_SIZE];
282                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
283                           new_filename(tempf);
284                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
285                         }
286
287 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
288                               PUSH_STATE(UserPragma);
289                               RETURN(INTERFACE_UPRAGMA);
290                             }
291 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
292                               PUSH_STATE(UserPragma);
293                               RETURN(SPECIALISE_UPRAGMA);
294                             }
295 <Code,GlaExt>"{-#"{WS}*"INLINE" {
296                               PUSH_STATE(UserPragma);
297                               RETURN(INLINE_UPRAGMA);
298                             }
299 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
300                               PUSH_STATE(UserPragma);
301                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
302                             }
303 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
304                               PUSH_STATE(UserPragma);
305                               RETURN(DEFOREST_UPRAGMA);
306                             }
307 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
308                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
309                                 input_filename, hsplineno);
310                               format_string(stderr, (unsigned char *) yytext, yyleng);
311                               fputs("'\n", stderr);
312                               nested_comments = 1;
313                               PUSH_STATE(Comment);
314                             }
315 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
316
317 %{
318     /*
319      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
320      * intentionally accepted as a keyword even for normal <Code>.
321      */
322 %}
323
324 <Code,GlaExt>"case"             { RETURN(CASE); }
325 <Code,GlaExt>"class"            { RETURN(CLASS); }
326 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
327 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
328 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
329 <Code,GlaExt>"do"               { RETURN(DO); }
330 <Code,GlaExt>"else"             { RETURN(ELSE); }
331 <Code,GlaExt>"if"               { RETURN(IF); }
332 <Code,GlaExt>"import"           { RETURN(IMPORT); }
333 <Code,GlaExt>"in"               { RETURN(IN); }
334 <Code,GlaExt>"infix"            { RETURN(INFIX); }
335 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
336 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
337 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
338 <Code,GlaExt>"let"              { RETURN(LET); }
339 <Code,GlaExt>"module"           { RETURN(MODULE); }
340 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
341 <Code,GlaExt>"of"               { RETURN(OF); }
342 <Code,GlaExt>"then"             { RETURN(THEN); }
343 <Code,GlaExt>"type"             { RETURN(TYPE); }
344 <Code,GlaExt>"where"            { RETURN(WHERE); }
345
346 <Code,GlaExt>"as"               { RETURN(AS); }
347 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
348 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
349
350 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
351 <GlaExt>"_ccall_"               { RETURN(CCALL); }
352 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
353 <GlaExt>"_casm_"                { RETURN(CASM); }
354 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
355
356 %{
357     /* 
358      * Haskell operators: special, reservedops and useful varsyms
359      */
360 %}
361
362 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
363 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
364 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
365 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
366 <Code,GlaExt>"{"                { RETURN(OCURLY); }
367 <Code,GlaExt>"}"                { RETURN(CCURLY); }
368 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
369 <Code,GlaExt>";"                { RETURN(SEMI); }
370 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
371 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
372
373 <Code,GlaExt>".."               { RETURN(DOTDOT); }
374 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
375 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
376 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
377 <Code,GlaExt>"|"                { RETURN(VBAR); }
378 <Code,GlaExt>"<-"               { RETURN(LARROW); }
379 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
380 <Code,GlaExt>"-"                { RETURN(MINUS); }
381
382 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
383 <Code,GlaExt>"@"                { RETURN(AT); }
384 <Code,GlaExt>"!"                { RETURN(BANG); }
385 <Code,GlaExt>"~"                { RETURN(LAZY); }
386
387 %{
388     /*
389      * Integers and (for Glasgow extensions) primitive integers.  Note that
390      * we pass all of the text on to the parser, because flex/C can't handle
391      * arbitrary precision numbers.
392      */
393 %}
394
395 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
396                          yylval.uid = xstrndup(yytext, yyleng - 1);
397                          RETURN(INTPRIM);
398                         }
399 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
400                          yylval.uid = xstrndup(yytext, yyleng);
401                          RETURN(INTEGER);
402                         }
403 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
404                          yylval.uid = xstrndup(yytext, yyleng - 1);
405                          RETURN(INTPRIM);
406                         }
407 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
408                          yylval.uid = xstrndup(yytext, yyleng);
409                          RETURN(INTEGER);
410                         }
411 <GlaExt>("-")?{N}"#"    {
412                          yylval.uid = xstrndup(yytext, yyleng - 1);
413                          RETURN(INTPRIM);
414                         }
415 <Code,GlaExt,UserPragma>{N} {
416                          yylval.uid = xstrndup(yytext, yyleng);
417                          RETURN(INTEGER);
418                         }
419
420 %{
421     /*
422      * Floats and (for Glasgow extensions) primitive floats/doubles.
423      */
424 %}
425
426 <GlaExt>("-")?{F}"##"   {
427                          yylval.uid = xstrndup(yytext, yyleng - 2);
428                          RETURN(DOUBLEPRIM);
429                         }
430 <GlaExt>("-")?{F}"#"    {
431                          yylval.uid = xstrndup(yytext, yyleng - 1);
432                          RETURN(FLOATPRIM);
433                         }
434 <Code,GlaExt>{F}        {
435                          yylval.uid = xstrndup(yytext, yyleng);
436                          RETURN(FLOAT);
437                         }
438
439 %{
440     /*
441      * Funky ``foo'' style C literals for Glasgow extensions
442      */
443 %}
444
445 <GlaExt>"``"[^']+"''"   {
446                          hsnewid(yytext + 2, yyleng - 4);
447                          RETURN(CLITLIT);
448                         }
449
450 %{
451     /*
452      * Identifiers, both variables and operators.  The trailing hash is allowed
453      * for Glasgow extensions.
454      */
455 %}
456
457
458 %{
459 /* These SHOULDNAE work in "Code" (sigh) */
460 %}
461 <Code,GlaExt,UserPragma>{Id}"#" { 
462                          if (! nonstandardFlag) {
463                             char errbuf[ERR_BUF_SIZE];
464                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
465                             hsperror(errbuf);
466                          }
467                          hsnewid(yytext, yyleng);
468                          RETURN(_isconstr(yytext) ? CONID : VARID);
469                         }
470 <Code,GlaExt,UserPragma>_+{Id} { 
471                          if (! nonstandardFlag) {
472                             char errbuf[ERR_BUF_SIZE];
473                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
474                             hsperror(errbuf);
475                          }
476                          hsnewid(yytext, yyleng);
477                          RETURN(isconstr(yytext) ? CONID : VARID);
478                          /* NB: ^^^^^^^^ : not the macro! */
479                         }
480 <Code,GlaExt,UserPragma>{Id}    {
481                          hsnewid(yytext, yyleng);
482                          RETURN(_isconstr(yytext) ? CONID : VARID);
483                         }
484 <Code,GlaExt,UserPragma>{SId}   {
485                          hsnewid(yytext, yyleng);
486                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
487                         }
488 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
489                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
490                          RETURN(isconstr ? QCONID : QVARID);
491                         }
492 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
493                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
494                          RETURN(isconstr ? QCONSYM : QVARSYM);
495                         }
496
497 %{
498     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
499
500     /* Because we can make the former well-behaved (we defined them).
501
502        Sadly, the latter is defined by Haskell, which allows such
503        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
504     */
505 %}
506
507 <GlaExt,UserPragma>"`"{Id}"#`"  {       
508                          hsnewid(yytext + 1, yyleng - 2);
509                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
510                         }
511
512 %{
513     /*
514      * Character literals.  The first form is the quick form, for character
515      * literals that don't contain backslashes.  Literals with backslashes are
516      * lexed through multiple rules.  First, we match the open ' and as many
517      * normal characters as possible.  This puts us into the <Char> state, where
518      * a backslash is legal.  Then, we match the backslash and move into the 
519      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
520      * characters and the close '.  We may end up with too many characters, but
521      * this allows us to easily share the lex rules with strings.  Excess characters
522      * are ignored with a warning.
523      */
524 %}
525
526 <GlaExt>'({CHAR}|"\"")"'#" {
527                          yylval.uhstring = installHstring(1, yytext+1);
528                          RETURN(CHARPRIM);
529                         }
530 <Code,GlaExt>'({CHAR}|"\"")'    {
531                          yylval.uhstring = installHstring(1, yytext+1);
532                          RETURN(CHAR);
533                         }
534 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
535                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
536                          hsperror(errbuf);
537                         }
538 <Code,GlaExt>'({CHAR}|"\"")* {
539                          hsmlcolno = hspcolno;
540                          cleartext();
541                          addtext(yytext+1, yyleng-1);
542                          PUSH_STATE(Char);
543                         }
544 <Char>({CHAR}|"\"")*'#  {
545                          unsigned length;
546                          char *text;
547
548                          addtext(yytext, yyleng - 2);
549                          text = fetchtext(&length);
550
551                          if (! nonstandardFlag) {
552                             char errbuf[ERR_BUF_SIZE];
553                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
554                             hsperror(errbuf);
555                          }
556
557                          if (length > 1) {
558                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
559                               input_filename, hsplineno, hspcolno + 1);
560                             format_string(stderr, (unsigned char *) text, length);
561                             fputs("' too long\n", stderr);
562                             hsperror("");
563                          }
564                          yylval.uhstring = installHstring(1, text);
565                          hspcolno = hsmlcolno;
566                          POP_STATE;
567                          RETURN(CHARPRIM); 
568                         }
569 <Char>({CHAR}|"\"")*'   {
570                          unsigned length;
571                          char *text;
572
573                          addtext(yytext, yyleng - 1);
574                          text = fetchtext(&length);
575
576                          if (length > 1) {
577                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
578                               input_filename, hsplineno, hspcolno + 1);
579                             format_string(stderr, (unsigned char *) text, length);
580                             fputs("' too long\n", stderr);
581                             hsperror("");
582                          }
583                          yylval.uhstring = installHstring(1, text);
584                          hspcolno = hsmlcolno;
585                          POP_STATE;
586                          RETURN(CHAR); 
587                         }
588 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
589
590
591 %{
592     /*
593      * String literals.  The first form is the quick form, for string literals
594      * that don't contain backslashes.  Literals with backslashes are lexed
595      * through multiple rules.  First, we match the open " and as many normal
596      * characters as possible.  This puts us into the <String> state, where
597      * a backslash is legal.  Then, we match the backslash and move into the 
598      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
599      * characters, moving back and forth between <String> and <StringEsc> as more
600      * backslashes are encountered.  (We may even digress into <Comment> mode if we
601      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
602      * of normal characters and the close ".
603      */
604 %}
605
606 <GlaExt>"\""({CHAR}|"'")*"\""#  {
607                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
608                             /* the -3 accounts for the " on front, "# on the end */
609                          RETURN(STRINGPRIM); 
610                         }
611 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
612                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
613                          RETURN(STRING); 
614                         }
615 <Code,GlaExt>"\""({CHAR}|"'")* {
616                          hsmlcolno = hspcolno;
617                          cleartext();
618                          addtext(yytext+1, yyleng-1);
619                          PUSH_STATE(String);
620                         }
621 <String>({CHAR}|"'")*"\"#"   {
622                          unsigned length;
623                          char *text;
624
625                          addtext(yytext, yyleng-2);
626                          text = fetchtext(&length);
627
628                          if (! nonstandardFlag) {
629                             char errbuf[ERR_BUF_SIZE];
630                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
631                             hsperror(errbuf);
632                          }
633
634                          yylval.uhstring = installHstring(length, text);
635                          hspcolno = hsmlcolno;
636                          POP_STATE;
637                          RETURN(STRINGPRIM);
638                         }
639 <String>({CHAR}|"'")*"\""   {
640                          unsigned length;
641                          char *text;
642
643                          addtext(yytext, yyleng-1);
644                          text = fetchtext(&length);
645
646                          yylval.uhstring = installHstring(length, text);
647                          hspcolno = hsmlcolno;
648                          POP_STATE;
649                          RETURN(STRING); 
650                         }
651 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
652
653 %{
654     /*
655      * Character and string escapes are roughly the same, but strings have the
656      * extra `\&' sequence which is not allowed for characters.  Also, comments
657      * are allowed in the <StringEsc> state.  (See the comment section much
658      * further down.)
659      *
660      * NB: Backslashes and tabs are stored in strings as themselves.
661      * But if we print them (in printtree.c), they must go out as
662      * "\\\\" and "\\t" respectively.  (This is because of the bogus
663      * intermediate format that the parser produces.  It uses '\t' fpr end of
664      * string, so it needs to be able to escape tabs, which means that it
665      * also needs to be able to escape the escape character ('\\').  Sigh.
666      */
667 %}
668
669 <Char>\\                { PUSH_STATE(CharEsc); }
670 <String>\\&             /* Ignore */ ;
671 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
672
673 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
674 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
675
676 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
677 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
678 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
679 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
680 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
681 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
682 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
683 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
684 <CharEsc,StringEsc>BEL  |
685 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
686 <CharEsc,StringEsc>BS   |
687 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
688 <CharEsc,StringEsc>HT   |
689 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
690 <CharEsc,StringEsc>LF   |
691 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
692 <CharEsc,StringEsc>VT   |
693 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
694 <CharEsc,StringEsc>FF   |
695 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
696 <CharEsc,StringEsc>CR   |
697 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
698 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
699 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
700 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
701 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
702 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
703 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
704 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
705 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
706 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
707 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
708 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
709 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
710 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
711 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
712 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
713 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
714 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
715 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
716 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
717 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
718 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
719 <CharEsc,StringEsc>{D}+  {
720                           int i = strtol(yytext, NULL, 10);
721                           if (i < NCHARS) {
722                              addchar((char) i);
723                           } else {
724                              char errbuf[ERR_BUF_SIZE];
725                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
726                                 yytext);
727                              hsperror(errbuf);
728                           }
729                           POP_STATE;
730                         }
731 <CharEsc,StringEsc>o{O}+ {
732                           int i = strtol(yytext + 1, NULL, 8);
733                           if (i < NCHARS) {
734                              addchar((char) i);
735                           } else {
736                              char errbuf[ERR_BUF_SIZE];
737                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
738                                 yytext);
739                              hsperror(errbuf);
740                           }
741                           POP_STATE;
742                         }
743 <CharEsc,StringEsc>x{H}+ {
744                           int i = strtol(yytext + 1, NULL, 16);
745                           if (i < NCHARS) {
746                              addchar((char) i);
747                           } else {
748                              char errbuf[ERR_BUF_SIZE];
749                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
750                                 yytext);
751                              hsperror(errbuf);
752                           }
753                           POP_STATE;
754                         }
755
756 %{
757     /*
758      * Simple comments and whitespace.  Normally, we would just ignore these, but
759      * in case we're processing a string escape, we need to note that we've seen
760      * a gap.
761      *
762      * Note that we cater for a comment line that *doesn't* end in a newline.
763      * This is incorrect, strictly speaking, but seems like the right thing
764      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
765      */
766 %}
767
768 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
769 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
770
771 %{
772     /*
773      * Nested comments.  The major complication here is in trying to match the
774      * longest lexemes possible, for better performance.  (See the flex document.)
775      * That's why the rules look so bizarre.
776      */
777 %}
778
779 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
780                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
781                         }
782
783 <Comment>[^-{]*         |
784 <Comment>"-"+[^-{}]+    |
785 <Comment>"{"+[^-{}]+    ;
786 <Comment>"{-"           { nested_comments++; }
787 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
788 <Comment>(.|\n)         ;
789
790 %{
791     /*
792      * Illegal characters.  This used to be a single rule, but we might as well
793      * pass on as much information as we have, so now we indicate our state in
794      * the error message.
795      */
796 %}
797
798 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
799                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
800                             input_filename, hsplineno, hspcolno + 1); 
801                          format_string(stderr, (unsigned char *) yytext, 1);
802                          fputs("'\n", stderr);
803                          hsperror("");
804                         }
805 <Char>(.|\n)            { 
806                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
807                             input_filename, hsplineno, hspcolno + 1); 
808                          format_string(stderr, (unsigned char *) yytext, 1);
809                          fputs("' in a character literal\n", stderr);
810                          hsperror("");
811                         }
812 <CharEsc>(.|\n)         {
813                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
814                             input_filename, hsplineno, hspcolno + 1); 
815                          format_string(stderr, (unsigned char *) yytext, 1);
816                          fputs("'\n", stderr);
817                          hsperror("");
818                         }
819 <String>(.|\n)          { if (nonstandardFlag) {
820                              addtext(yytext, yyleng);
821                           } else { 
822                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
823                                 input_filename, hsplineno, hspcolno + 1); 
824                                 format_string(stderr, (unsigned char *) yytext, 1);
825                                 fputs("' in a string literal\n", stderr);
826                                 hsperror("");
827                           }
828                         }
829 <StringEsc>(.|\n)       {
830                          if (noGap) {
831                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
832                                 input_filename, hsplineno, hspcolno + 1); 
833                              format_string(stderr, (unsigned char *) yytext, 1);
834                              fputs("'\n", stderr);
835                              hsperror("");
836                          } else {
837                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
838                                 input_filename, hsplineno, hspcolno + 1);
839                              format_string(stderr, (unsigned char *) yytext, 1);
840                              fputs("' in a string gap\n", stderr);
841                              hsperror("");
842                          }
843                         }
844
845 %{
846     /*
847      * End of file.  In any sub-state, this is an error.  However, for the primary
848      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
849      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
850      * adding virtual close curlies, or closing an interface and returning to the
851      * primary source file.
852      *
853      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
854      * line/column advancement has to be done by hand.
855      */
856 %}
857
858 <Char,CharEsc><<EOF>>   { 
859                           hsplineno = hslineno; hspcolno = hscolno;
860                           hsperror("unterminated character literal");
861                         }
862 <Comment><<EOF>>        { 
863                           hsplineno = hslineno; hspcolno = hscolno;
864                           hsperror("unterminated comment"); 
865                         }
866 <String,StringEsc><<EOF>>   { 
867                           hsplineno = hslineno; hspcolno = hscolno;
868                           hsperror("unterminated string literal"); 
869                         }
870 <UserPragma><<EOF>>     {
871                           hsplineno = hslineno; hspcolno = hscolno;
872                           hsperror("unterminated user-specified pragma"); 
873                         }
874 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
875
876 %%
877
878 /**********************************************************************
879 *                                                                     *
880 *                                                                     *
881 *     YACC/LEX Initialisation etc.                                    *
882 *                                                                     *
883 *                                                                     *
884 **********************************************************************/
885
886 /*
887    We initialise input_filename to "<stdin>".
888    This allows unnamed sources to be piped into the parser.
889 */
890
891 extern BOOLEAN acceptPrim;
892
893 void
894 yyinit(void)
895 {
896     input_filename = xstrdup("<stdin>");
897
898     /* We must initialize the input buffer _now_, because we call
899        setyyin _before_ calling yylex for the first time! */
900     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
901
902     if (acceptPrim)
903         PUSH_STATE(GlaExt);
904     else
905         PUSH_STATE(Code);
906 }
907
908 static void
909 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
910 {
911     if (input_filename != NULL)
912         free(input_filename);
913     input_filename = xstrdup(f);
914 }
915
916 /**********************************************************************
917 *                                                                     *
918 *                                                                     *
919 *     Layout Processing                                               *
920 *                                                                     *
921 *                                                                     *
922 **********************************************************************/
923
924 /*
925         The following section deals with Haskell Layout conventions
926         forcing insertion of ; or } as appropriate
927 */
928
929 static BOOLEAN
930 hsshouldindent(void)
931 {
932     return (!forgetindent && INDENTON);
933 }
934
935
936 /* Enter new context and set new indentation level */
937 void
938 hssetindent(void)
939 {
940 #ifdef HSP_DEBUG
941     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
942 #endif
943
944     /*
945      * partain: first chk that new indent won't be less than current one; this code
946      * doesn't make sense to me; hscolno tells the position of the _end_ of the
947      * current token; what that has to do with indenting, I don't know.
948      */
949
950
951     if (hscolno - 1 <= INDENTPT) {
952         if (INDENTPT == -1)
953             return;             /* Empty input OK for Haskell 1.1 */
954         else {
955             char errbuf[ERR_BUF_SIZE];
956
957             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
958             hsperror(errbuf);
959         }
960     }
961     hsentercontext((hspcolno << 1) | 1);
962 }
963
964
965 /* Enter a new context without changing the indentation level */
966 void
967 hsincindent(void)
968 {
969 #ifdef HSP_DEBUG
970     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
971 #endif
972     hsentercontext(indenttab[icontexts] & ~1);
973 }
974
975
976 /* Turn off indentation processing, usually because an explicit "{" has been seen */
977 void
978 hsindentoff(void)
979 {
980     forgetindent = TRUE;
981 }
982
983
984 /* Enter a new layout context. */
985 static void
986 hsentercontext(int indent)
987 {
988     /* Enter new context and set indentation as specified */
989     if (++icontexts >= MAX_CONTEXTS) {
990         char errbuf[ERR_BUF_SIZE];
991
992         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
993         hsperror(errbuf);
994     }
995     forgetindent = FALSE;
996     indenttab[icontexts] = indent;
997 #ifdef HSP_DEBUG
998     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
999 #endif
1000 }
1001
1002
1003 /* Exit a layout context */
1004 void
1005 hsendindent(void)
1006 {
1007     --icontexts;
1008 #ifdef HSP_DEBUG
1009     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1010 #endif
1011 }
1012
1013 /*
1014  *      Return checks the indentation level and returns ;, } or the specified token.
1015  */
1016
1017 static int
1018 Return(int tok)
1019 {
1020 #ifdef HSP_DEBUG
1021     extern int yyleng;
1022 #endif
1023
1024     if (hsshouldindent()) {
1025         if (hspcolno < INDENTPT) {
1026 #ifdef HSP_DEBUG
1027             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1028 #endif
1029             hssttok = tok;
1030             return (VCCURLY);
1031         } else if (hspcolno == INDENTPT) {
1032 #ifdef HSP_DEBUG
1033             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1034 #endif
1035             hssttok = -tok;
1036             return (SEMI);
1037         }
1038     }
1039     hssttok = -1;
1040 #ifdef HSP_DEBUG
1041     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1042 #endif
1043     return (tok);
1044 }
1045
1046
1047 /*
1048  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1049  */
1050 int
1051 yylex()
1052 {
1053     int tok;
1054     static BOOLEAN eof = FALSE;
1055
1056     if (!eof) {
1057         if (hssttok != -1) {
1058             if (hssttok < 0) {
1059                 tok = -hssttok;
1060                 hssttok = -1;
1061                 return tok;
1062             }
1063             RETURN(hssttok);
1064         } else {
1065             endlineno = hslineno;
1066             if ((tok = yylex1()) != EOF)
1067                 return tok;
1068             else
1069                 eof = TRUE;
1070         }
1071     }
1072     if (icontexts > icontexts_save) {
1073         if (INDENTON) {
1074             eof = TRUE;
1075             indenttab[icontexts] = 0;
1076             return (VCCURLY);
1077         } else
1078             hsperror("missing '}' at end of file");
1079     } else if (hsbuf_save != NULL) {
1080         fclose(yyin);
1081         yy_delete_buffer(YY_CURRENT_BUFFER);
1082         yy_switch_to_buffer(hsbuf_save);
1083         hsbuf_save = NULL;
1084         new_filename(filename_save);
1085         free(filename_save);
1086         hslineno = hslineno_save;
1087         hsplineno = hsplineno_save;
1088         hscolno = hscolno_save;
1089         hspcolno = hspcolno_save;
1090         etags = etags_save;
1091         icontexts = icontexts_save - 1;
1092         icontexts_save = 0;
1093 #ifdef HSP_DEBUG
1094         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1095 #endif
1096         eof = FALSE;
1097
1098         /* RETURN(LEOF); */
1099         hsperror("No longer using yacc to parse interface files");
1100
1101     } else {
1102         yyterminate();
1103     }
1104     abort(); /* should never get here! */
1105     return(0);
1106 }
1107
1108 /**********************************************************************
1109 *                                                                     *
1110 *                                                                     *
1111 *     Input Processing for Interfaces -- Not currently used !!!       *
1112 *                                                                     *
1113 *                                                                     *
1114 **********************************************************************/
1115
1116 /* setyyin(file)        open file as new lex input buffer */
1117 extern FILE *yyin;
1118
1119 void
1120 setyyin(char *file)
1121 {
1122     hsbuf_save = YY_CURRENT_BUFFER;
1123     if ((yyin = fopen(file, "r")) == NULL) {
1124         char errbuf[ERR_BUF_SIZE];
1125
1126         sprintf(errbuf, "can't read \"%-.50s\"", file);
1127         hsperror(errbuf);
1128     }
1129     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1130
1131     hslineno_save = hslineno;
1132     hsplineno_save = hsplineno;
1133     hslineno = hsplineno = 1;
1134
1135     filename_save = input_filename;
1136     input_filename = NULL;
1137     new_filename(file);
1138     hscolno_save = hscolno;
1139     hspcolno_save = hspcolno;
1140     hscolno = hspcolno = 0;
1141     etags_save = etags; /* do not do "etags" stuff in interfaces */
1142     etags = 0;          /* We remember whether we are doing it in
1143                            the module, so we can restore it later [WDP 94/09] */
1144     hsentercontext(-1);         /* partain: changed this from 0 */
1145     icontexts_save = icontexts;
1146 #ifdef HSP_DEBUG
1147     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1148 #endif
1149 }
1150
1151 static void
1152 layout_input(char *text, int len)
1153 {
1154 #ifdef HSP_DEBUG
1155     fprintf(stderr, "Scanning \"%s\"\n", text);
1156 #endif
1157
1158     hsplineno = hslineno;
1159     hspcolno = hscolno;
1160
1161     while (len-- > 0) {
1162         switch (*text++) {
1163         case '\n':
1164         case '\r':
1165         case '\f':
1166             hslineno++;
1167             hscolno = 0;
1168             break;
1169         case '\t':
1170             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1171             break;
1172         case '\v':
1173             break;
1174         default:
1175             ++hscolno;
1176             break;
1177         }
1178     }
1179 }
1180
1181 void
1182 setstartlineno(void)
1183 {
1184     startlineno = hsplineno;
1185
1186     if (modulelineno == 0) {
1187         modulelineno = startlineno;
1188     }
1189
1190 #if 1/*etags*/
1191 #else
1192     if (etags)
1193         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1194 #endif
1195 }
1196
1197 /**********************************************************************
1198 *                                                                     *
1199 *                                                                     *
1200 *                      Text Caching                                   *
1201 *                                                                     *
1202 *                                                                     *
1203 **********************************************************************/
1204
1205 #define CACHE_SIZE YY_BUF_SIZE
1206
1207 static struct {
1208     unsigned allocated;
1209     unsigned next;
1210     char *text;
1211 } textcache = { 0, 0, NULL };
1212
1213 static void
1214 cleartext(void)
1215 {
1216 /*  fprintf(stderr, "cleartext\n"); */
1217     textcache.next = 0;
1218     if (textcache.allocated == 0) {
1219         textcache.allocated = CACHE_SIZE;
1220         textcache.text = xmalloc(CACHE_SIZE);
1221     }
1222 }
1223
1224 static void
1225 addtext(char *text, unsigned length)
1226 {
1227 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1228
1229     if (length == 0)
1230         return;
1231
1232     if (textcache.next + length + 1 >= textcache.allocated) {
1233         textcache.allocated += length + CACHE_SIZE;
1234         textcache.text = xrealloc(textcache.text, textcache.allocated);
1235     }
1236     bcopy(text, textcache.text + textcache.next, length);
1237     textcache.next += length;
1238 }
1239
1240 static void
1241 addchar(char c)
1242 {
1243 /*  fprintf(stderr, "addchar: %c\n", c); */
1244
1245     if (textcache.next + 2 >= textcache.allocated) {
1246         textcache.allocated += CACHE_SIZE;
1247         textcache.text = xrealloc(textcache.text, textcache.allocated);
1248     }
1249     textcache.text[textcache.next++] = c;
1250 }
1251
1252 static char *
1253 fetchtext(unsigned *length)
1254 {
1255 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1256
1257     *length = textcache.next;
1258     textcache.text[textcache.next] = '\0';
1259     return textcache.text;
1260 }
1261
1262 /**********************************************************************
1263 *                                                                     *
1264 *                                                                     *
1265 *    Identifier Processing                                             *
1266 *                                                                     *
1267 *                                                                     *
1268 **********************************************************************/
1269
1270 /*
1271         hsnewid         Enters an id of length n into the symbol table.
1272 */
1273
1274 static void
1275 hsnewid(char *name, int length)
1276 {
1277     char save = name[length];
1278
1279     name[length] = '\0';
1280     yylval.uid = installid(name);
1281     name[length] = save;
1282 }
1283
1284 BOOLEAN
1285 hsnewqid(char *name, int length)
1286 {
1287     char* dot;
1288     char save = name[length];
1289     name[length] = '\0';
1290
1291     dot = strchr(name, '.');
1292     *dot = '\0';
1293     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1294     *dot = '.';
1295     name[length] = save;
1296
1297     return _isconstr(dot+1);
1298 }
1299
1300 BOOLEAN 
1301 isconstr(char *s) /* walks past leading underscores before using the macro */
1302 {
1303     char *temp = s;
1304
1305     for ( ; temp != NULL && *temp == '_' ; temp++ );
1306
1307     return _isconstr(temp);
1308 }