[project @ 1997-06-05 20:45:33 by sof]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 /* Special file handling for IMPORTS */
141 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142
143 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
144 static char *filename_save;             /* File Name                     */
145 static int hslineno_save = 0,           /* Line Number                   */
146  hsplineno_save = 0,                    /* Line Number of Prev. token    */
147  hscolno_save = 0,                      /* Indentation                   */
148  hspcolno_save = 0;                     /* Left Indentation              */
149 static short icontexts_save = 0;        /* Indent Context Level          */
150
151 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags;                   /* that which is saved */
153
154 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
155
156 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
157                                  * inserted before token +ve  -- "}" inserted before
158                                  * token */
159
160 short icontexts = 0;            /* Which context we're in */
161
162 /*
163         Table of indentations:  right bit indicates whether to use
164           indentation rules (1 = use rules; 0 = ignore)
165
166     partain:
167     push one of these "contexts" at every "case" or "where"; the right bit says
168     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
169
170     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
171     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
172     push is shown just below.
173
174 */
175
176
177 static short indenttab[MAX_CONTEXTS] = {-1};
178
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
181
182 #define RETURN(tok) return(Return(tok))
183
184 #undef YY_DECL
185 #define YY_DECL int yylex1()
186
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189    to support older versions of flex, we'll continue to peek for now.
190  */
191 #define YY_USER_ACTION \
192     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
193
194 #if 0/*debug*/
195 #undef YY_BREAK
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
197 #endif
198
199 /* Each time we enter a new start state, we push it onto the state stack.
200 */
201 #define PUSH_STATE(n)   yy_push_state(n)
202 #define POP_STATE       yy_pop_state()
203
204 %}
205 /* Options:
206     8bit (8-bit input)
207     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208     never-interactive (to go a bit faster)
209     stack (use a start-condition stack)
210 */
211 %option 8bit
212 %option noyywrap
213 %option never-interactive
214 %option stack
215
216 /* The start states are:
217    Code -- normal Haskell code (principal lexer)
218    GlaExt -- Haskell code with Glasgow extensions
219    Comment -- Nested comment processing
220    String -- Inside a string literal with backslashes
221    StringEsc -- Immediately following a backslash in a string literal
222    Char -- Inside a character literal with backslashes
223    CharEsc -- Immediately following a backslash in a character literal 
224
225    Note that the INITIAL state is unused.  Also note that these states
226    are _exclusive_.  All rules should be prefixed with an appropriate
227    list of start states.
228  */
229
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231
232 isoS                    [\xa1-\xbf\xd7\xf7]
233 isoL                    [\xc0-\xd6\xd8-\xde]
234 isol                    [\xdf-\xf6\xf8-\xff]
235 isoA                    [\xa1-\xff]
236
237 D                       [0-9]
238 O                       [0-7]
239 H                       [0-9A-Fa-f]
240 N                       {D}+
241 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 SId                     {S}{S}*
244 L                       [A-Z\xc0-\xd6\xd8-\xde]
245 l                       [a-z\xdf-\xf6\xf8-\xff]
246 I                       {L}|{l}
247 i                       {L}|{l}|[0-9'_]
248 Id                      {I}{i}*
249 Mod                     {L}{i}*
250 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
251 CNTRL                   [@A-Z\[\\\]^_]
252 WS                      [ \t\n\r\f\v]
253 NL                      [\n\r]
254
255 %%
256
257 %{
258     /*
259      * Simple comments and whitespace.  Normally, we would just ignore these, but
260      * in case we're processing a string escape, we need to note that we've seen
261      * a gap.
262      *
263      * Note that we cater for a comment line that *doesn't* end in a newline.
264      * This is incorrect, strictly speaking, but seems like the right thing
265      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
266      *
267      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
268      */
269 %}
270
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
273
274 %{
275     /* 
276      * Special GHC pragma rules.  Do we need a start state for interface files,
277      * so these won't be matched in source files? --JSM
278      */
279 %}
280
281 <Code,GlaExt>^"# ".*{NL}    {
282                           char tempf[FILENAME_SIZE];
283                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
284                           new_filename(tempf);
285                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
286                         }
287
288 <Code,GlaExt>^"#line ".*{NL}    {
289                           char tempf[FILENAME_SIZE];
290                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
291                           new_filename(tempf); 
292                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
293                         }
294
295 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
296                           /* partain: pragma-style line directive */
297                           char tempf[FILENAME_SIZE];
298                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
299                           new_filename(tempf);
300                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
301                         }
302
303 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
304                               PUSH_STATE(UserPragma);
305                               RETURN(INTERFACE_UPRAGMA);
306                             }
307 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
308                               PUSH_STATE(UserPragma);
309                               RETURN(SPECIALISE_UPRAGMA);
310                             }
311 <Code,GlaExt>"{-#"{WS}*"INLINE" {
312                               PUSH_STATE(UserPragma);
313                               RETURN(INLINE_UPRAGMA);
314                             }
315 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
316                               PUSH_STATE(UserPragma);
317                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
318                             }
319 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
320                               PUSH_STATE(UserPragma);
321                               RETURN(DEFOREST_UPRAGMA);
322                             }
323 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
324                               /* these are handled by hscpp */
325                               nested_comments =1;
326                               PUSH_STATE(Comment);
327                             }
328 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
329                               /* these are for the driver! */
330                               nested_comments =1;
331                               PUSH_STATE(Comment);
332                             }
333 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
334                               /* these are used by `make depend' and the
335                                  compiler to indicate that a module should
336                                  be imported from source */
337                               nested_comments =1;
338                               RETURN(SOURCE_UPRAGMA); 
339                             }
340
341 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
342                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
343                                 input_filename, hsplineno);
344                               format_string(stderr, (unsigned char *) yytext, yyleng);
345                               fputs("'\n", stderr);
346                               nested_comments = 1;
347                               PUSH_STATE(Comment);
348                             }
349 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
350
351 %{
352     /*
353      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
354      * intentionally accepted as a keyword even for normal <Code>.
355      */
356 %}
357
358 <Code,GlaExt>"case"             { RETURN(CASE); }
359 <Code,GlaExt>"class"            { RETURN(CLASS); }
360 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
361 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
362 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
363 <Code,GlaExt>"do"               { RETURN(DO); }
364 <Code,GlaExt>"else"             { RETURN(ELSE); }
365 <Code,GlaExt>"if"               { RETURN(IF); }
366 <Code,GlaExt>"import"           { RETURN(IMPORT); }
367 <Code,GlaExt>"in"               { RETURN(IN); }
368 <Code,GlaExt>"infix"            { RETURN(INFIX); }
369 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
370 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
371 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
372 <Code,GlaExt>"let"              { RETURN(LET); }
373 <Code,GlaExt>"module"           { RETURN(MODULE); }
374 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
375 <Code,GlaExt>"of"               { RETURN(OF); }
376 <Code,GlaExt>"then"             { RETURN(THEN); }
377 <Code,GlaExt>"type"             { RETURN(TYPE); }
378 <Code,GlaExt>"where"            { RETURN(WHERE); }
379
380 <Code,GlaExt>"as"               { RETURN(AS); }
381 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
382 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
383
384 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
385 <GlaExt>"_ccall_"               { RETURN(CCALL); }
386 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
387 <GlaExt>"_casm_"                { RETURN(CASM); }
388 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
389
390 %{
391     /* 
392      * Haskell operators: special, reservedops and useful varsyms
393      */
394 %}
395
396 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
397 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
398 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
399 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
400 <Code,GlaExt>"{"                { RETURN(OCURLY); }
401 <Code,GlaExt>"}"                { RETURN(CCURLY); }
402 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
403 <Code,GlaExt>";"                { RETURN(SEMI); }
404 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
405 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
406
407 <Code,GlaExt>".."               { RETURN(DOTDOT); }
408 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
409 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
410 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
411 <Code,GlaExt>"|"                { RETURN(VBAR); }
412 <Code,GlaExt>"<-"               { RETURN(LARROW); }
413 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
414 <Code,GlaExt>"-"                { RETURN(MINUS); }
415 <Code,GlaExt>"+"                { RETURN(PLUS); }
416
417 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
418 <Code,GlaExt>"@"                { RETURN(AT); }
419 <Code,GlaExt>"!"                { RETURN(BANG); }
420 <Code,GlaExt>"~"                { RETURN(LAZY); }
421
422 %{
423     /*
424      * Integers and (for Glasgow extensions) primitive integers.  Note that
425      * we pass all of the text on to the parser, because flex/C can't handle
426      * arbitrary precision numbers.
427      */
428 %}
429
430 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
431                          yylval.uid = xstrndup(yytext, yyleng - 1);
432                          RETURN(INTPRIM);
433                         }
434 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
435                          yylval.uid = xstrndup(yytext, yyleng);
436                          RETURN(INTEGER);
437                         }
438 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
439                          yylval.uid = xstrndup(yytext, yyleng - 1);
440                          RETURN(INTPRIM);
441                         }
442 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
443                          yylval.uid = xstrndup(yytext, yyleng);
444                          RETURN(INTEGER);
445                         }
446 <GlaExt>("-")?{N}"#"    {
447                          yylval.uid = xstrndup(yytext, yyleng - 1);
448                          RETURN(INTPRIM);
449                         }
450 <Code,GlaExt,UserPragma>{N} {
451                          yylval.uid = xstrndup(yytext, yyleng);
452                          RETURN(INTEGER);
453                         }
454
455 %{
456     /*
457      * Floats and (for Glasgow extensions) primitive floats/doubles.
458      */
459 %}
460
461 <GlaExt>("-")?{F}"##"   {
462                          yylval.uid = xstrndup(yytext, yyleng - 2);
463                          RETURN(DOUBLEPRIM);
464                         }
465 <GlaExt>("-")?{F}"#"    {
466                          yylval.uid = xstrndup(yytext, yyleng - 1);
467                          RETURN(FLOATPRIM);
468                         }
469 <Code,GlaExt>{F}        {
470                          yylval.uid = xstrndup(yytext, yyleng);
471                          RETURN(FLOAT);
472                         }
473
474 %{
475     /*
476      * Funky ``foo'' style C literals for Glasgow extensions
477      */
478 %}
479
480 <GlaExt>"``"[^']+"''"   {
481                          hsnewid(yytext + 2, yyleng - 4);
482                          RETURN(CLITLIT);
483                         }
484
485 %{
486     /*
487      * Identifiers, both variables and operators.  The trailing hash is allowed
488      * for Glasgow extensions.
489      */
490 %}
491
492
493 %{
494 /* These SHOULDNAE work in "Code" (sigh) */
495 %}
496 <GlaExt,UserPragma>{Id}"#" { 
497                         if (! nonstandardFlag) {
498                             char errbuf[ERR_BUF_SIZE];
499                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
500                             hsperror(errbuf);
501                          }
502                          hsnewid(yytext, yyleng);
503                          RETURN(isconstr(yytext) ? CONID : VARID);
504                         }
505 <Code,GlaExt,UserPragma>{Id}    {
506                          hsnewid(yytext, yyleng);
507                          RETURN(isconstr(yytext) ? CONID : VARID);
508                         }
509 <Code,GlaExt,UserPragma>{SId}   {
510                          hsnewid(yytext, yyleng);
511                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
512                         }
513 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
514                          BOOLEAN is_constr;
515                          if (! nonstandardFlag) {
516                             char errbuf[ERR_BUF_SIZE];
517                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
518                             hsperror(errbuf);
519                          }
520                          is_constr = hsnewqid(yytext, yyleng);
521                          RETURN(is_constr ? QCONID : QVARID);
522                         }
523 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
524                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
525                          RETURN(is_constr ? QCONID : QVARID);
526                         }
527 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
528                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
529                          RETURN(is_constr ? QCONSYM : QVARSYM);
530                         }
531
532 %{
533     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
534
535     /* Because we can make the former well-behaved (we defined them).
536
537        Sadly, the latter is defined by Haskell, which allows such
538        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
539     */
540 %}
541
542 <GlaExt,UserPragma>"`"{Id}"#`"  {       
543                          hsnewid(yytext + 1, yyleng - 2);
544                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
545                         }
546
547 %{
548     /*
549      * Character literals.  The first form is the quick form, for character
550      * literals that don't contain backslashes.  Literals with backslashes are
551      * lexed through multiple rules.  First, we match the open ' and as many
552      * normal characters as possible.  This puts us into the <Char> state, where
553      * a backslash is legal.  Then, we match the backslash and move into the 
554      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
555      * characters and the close '.  We may end up with too many characters, but
556      * this allows us to easily share the lex rules with strings.  Excess characters
557      * are ignored with a warning.
558      */
559 %}
560
561 <GlaExt>'({CHAR}|"\"")"'#" {
562                          yylval.uhstring = installHstring(1, yytext+1);
563                          RETURN(CHARPRIM);
564                         }
565 <Code,GlaExt>'({CHAR}|"\"")'    {
566                          yylval.uhstring = installHstring(1, yytext+1);
567                          RETURN(CHAR);
568                         }
569 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
570                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
571                          hsperror(errbuf);
572                         }
573 <Code,GlaExt>'({CHAR}|"\"")* {
574                          hsmlcolno = hspcolno;
575                          cleartext();
576                          addtext(yytext+1, yyleng-1);
577                          PUSH_STATE(Char);
578                         }
579 <Char>({CHAR}|"\"")*'#  {
580                          unsigned length;
581                          char *text;
582
583                          addtext(yytext, yyleng - 2);
584                          text = fetchtext(&length);
585
586                          if (! nonstandardFlag) {
587                             char errbuf[ERR_BUF_SIZE];
588                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
589                             hsperror(errbuf);
590                          }
591
592                          if (length > 1) {
593                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
594                               input_filename, hsplineno, hspcolno + 1);
595                             format_string(stderr, (unsigned char *) text, length);
596                             fputs("' too long\n", stderr);
597                             hsperror("");
598                          }
599                          yylval.uhstring = installHstring(1, text);
600                          hspcolno = hsmlcolno;
601                          POP_STATE;
602                          RETURN(CHARPRIM); 
603                         }
604 <Char>({CHAR}|"\"")*'   {
605                          unsigned length;
606                          char *text;
607
608                          addtext(yytext, yyleng - 1);
609                          text = fetchtext(&length);
610
611                          if (length > 1) {
612                             fprintf(stderr, "%s:%d:%d: Character literal '",
613                               input_filename, hsplineno, hspcolno + 1);
614                             format_string(stderr, (unsigned char *) text, length);
615                             fputs("' too long\n", stderr);
616                             hsperror("");
617                          }
618                          yylval.uhstring = installHstring(1, text);
619                          hspcolno = hsmlcolno;
620                          POP_STATE;
621                          RETURN(CHAR); 
622                         }
623 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
624
625
626 %{
627     /*
628      * String literals.  The first form is the quick form, for string literals
629      * that don't contain backslashes.  Literals with backslashes are lexed
630      * through multiple rules.  First, we match the open " and as many normal
631      * characters as possible.  This puts us into the <String> state, where
632      * a backslash is legal.  Then, we match the backslash and move into the 
633      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
634      * characters, moving back and forth between <String> and <StringEsc> as more
635      * backslashes are encountered.  (We may even digress into <Comment> mode if we
636      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
637      * of normal characters and the close ".
638      */
639 %}
640
641 <GlaExt>"\""({CHAR}|"'")*"\""#  {
642                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
643                             /* the -3 accounts for the " on front, "# on the end */
644                          RETURN(STRINGPRIM); 
645                         }
646 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
647                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
648                          RETURN(STRING); 
649                         }
650 <Code,GlaExt>"\""({CHAR}|"'")* {
651                          hsmlcolno = hspcolno;
652                          cleartext();
653                          addtext(yytext+1, yyleng-1);
654                          PUSH_STATE(String);
655                         }
656 <String>({CHAR}|"'")*"\"#"   {
657                          unsigned length;
658                          char *text;
659
660                          addtext(yytext, yyleng-2);
661                          text = fetchtext(&length);
662
663                          if (! nonstandardFlag) {
664                             char errbuf[ERR_BUF_SIZE];
665                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
666                             hsperror(errbuf);
667                          }
668
669                          yylval.uhstring = installHstring(length, text);
670                          hspcolno = hsmlcolno;
671                          POP_STATE;
672                          RETURN(STRINGPRIM);
673                         }
674 <String>({CHAR}|"'")*"\""   {
675                          unsigned length;
676                          char *text;
677
678                          addtext(yytext, yyleng-1);
679                          text = fetchtext(&length);
680
681                          yylval.uhstring = installHstring(length, text);
682                          hspcolno = hsmlcolno;
683                          POP_STATE;
684                          RETURN(STRING); 
685                         }
686 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
687
688 %{
689     /*
690      * Character and string escapes are roughly the same, but strings have the
691      * extra `\&' sequence which is not allowed for characters.  Also, comments
692      * are allowed in the <StringEsc> state.  (See the comment section much
693      * further down.)
694      *
695      * NB: Backslashes and tabs are stored in strings as themselves.
696      * But if we print them (in printtree.c), they must go out as
697      * "\\\\" and "\\t" respectively.  (This is because of the bogus
698      * intermediate format that the parser produces.  It uses '\t' fpr end of
699      * string, so it needs to be able to escape tabs, which means that it
700      * also needs to be able to escape the escape character ('\\').  Sigh.
701      */
702 %}
703
704 <Char>\\                { PUSH_STATE(CharEsc); }
705 <String>\\&             /* Ignore */ ;
706 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
707
708 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
709 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
710
711 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
712 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
713 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
714 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
715 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
716 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
717 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
718 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
719 <CharEsc,StringEsc>BEL  |
720 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
721 <CharEsc,StringEsc>BS   |
722 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
723 <CharEsc,StringEsc>HT   |
724 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
725 <CharEsc,StringEsc>LF   |
726 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
727 <CharEsc,StringEsc>VT   |
728 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
729 <CharEsc,StringEsc>FF   |
730 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
731 <CharEsc,StringEsc>CR   |
732 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
733 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
734 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
735 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
736 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
737 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
738 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
739 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
740 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
741 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
742 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
743 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
744 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
745 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
746 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
747 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
748 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
749 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
750 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
751 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
752 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
753 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
754 <CharEsc,StringEsc>{D}+  {
755                           int i = strtol(yytext, NULL, 10);
756                           if (i < NCHARS) {
757                              addchar((char) i);
758                           } else {
759                              char errbuf[ERR_BUF_SIZE];
760                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
761                                 yytext);
762                              hsperror(errbuf);
763                           }
764                           POP_STATE;
765                         }
766 <CharEsc,StringEsc>o{O}+ {
767                           int i = strtol(yytext + 1, NULL, 8);
768                           if (i < NCHARS) {
769                              addchar((char) i);
770                           } else {
771                              char errbuf[ERR_BUF_SIZE];
772                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
773                                 yytext);
774                              hsperror(errbuf);
775                           }
776                           POP_STATE;
777                         }
778 <CharEsc,StringEsc>x{H}+ {
779                           int i = strtol(yytext + 1, NULL, 16);
780                           if (i < NCHARS) {
781                              addchar((char) i);
782                           } else {
783                              char errbuf[ERR_BUF_SIZE];
784                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
785                                 yytext);
786                              hsperror(errbuf);
787                           }
788                           POP_STATE;
789                         }
790
791
792 %{
793     /*
794      * Nested comments.  The major complication here is in trying to match the
795      * longest lexemes possible, for better performance.  (See the flex document.)
796      * That's why the rules look so bizarre.
797      */
798 %}
799
800 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
801                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
802                         }
803
804 <Comment>[^-{]*         |
805 <Comment>"-"+[^-{}]+    |
806 <Comment>"{"+[^-{}]+    ;
807 <Comment>"{-"           { nested_comments++; }
808 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
809 <Comment>(.|\n)         ;
810
811 %{
812     /*
813      * Illegal characters.  This used to be a single rule, but we might as well
814      * pass on as much information as we have, so now we indicate our state in
815      * the error message.
816      */
817 %}
818
819 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
820                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
821                             input_filename, hsplineno, hspcolno + 1); 
822                          format_string(stderr, (unsigned char *) yytext, 1);
823                          fputs("'\n", stderr);
824                          hsperror("");
825                         }
826 <Char>(.|\n)            { 
827                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
828                             input_filename, hsplineno, hspcolno + 1); 
829                          format_string(stderr, (unsigned char *) yytext, 1);
830                          fputs("' in a character literal\n", stderr);
831                          hsperror("");
832                         }
833 <CharEsc>(.|\n)         {
834                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
835                             input_filename, hsplineno, hspcolno + 1); 
836                          format_string(stderr, (unsigned char *) yytext, 1);
837                          fputs("'\n", stderr);
838                          hsperror("");
839                         }
840 <String>(.|\n)          { if (nonstandardFlag) {
841                              addtext(yytext, yyleng);
842                           } else { 
843                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
844                                 input_filename, hsplineno, hspcolno + 1); 
845                                 format_string(stderr, (unsigned char *) yytext, 1);
846                                 fputs("' in a string literal\n", stderr);
847                                 hsperror("");
848                           }
849                         }
850 <StringEsc>(.|\n)       {
851                          if (noGap) {
852                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
853                                 input_filename, hsplineno, hspcolno + 1); 
854                              format_string(stderr, (unsigned char *) yytext, 1);
855                              fputs("'\n", stderr);
856                              hsperror("");
857                          } else {
858                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
859                                 input_filename, hsplineno, hspcolno + 1);
860                              format_string(stderr, (unsigned char *) yytext, 1);
861                              fputs("' in a string gap\n", stderr);
862                              hsperror("");
863                          }
864                         }
865
866 %{
867     /*
868      * End of file.  In any sub-state, this is an error.  However, for the primary
869      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
870      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
871      * adding virtual close curlies, or closing an interface and returning to the
872      * primary source file.
873      *
874      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
875      * line/column advancement has to be done by hand.
876      */
877 %}
878
879 <Char,CharEsc><<EOF>>   { 
880                           hsplineno = hslineno; hspcolno = hscolno;
881                           hsperror("unterminated character literal");
882                         }
883 <Comment><<EOF>>        { 
884                           hsplineno = hslineno; hspcolno = hscolno;
885                           hsperror("unterminated comment"); 
886                         }
887 <String,StringEsc><<EOF>>   { 
888                           hsplineno = hslineno; hspcolno = hscolno;
889                           hsperror("unterminated string literal"); 
890                         }
891 <UserPragma><<EOF>>     {
892                           hsplineno = hslineno; hspcolno = hscolno;
893                           hsperror("unterminated user-specified pragma"); 
894                         }
895 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
896
897 %%
898
899 /**********************************************************************
900 *                                                                     *
901 *                                                                     *
902 *     YACC/LEX Initialisation etc.                                    *
903 *                                                                     *
904 *                                                                     *
905 **********************************************************************/
906
907 /*
908    We initialise input_filename to "<stdin>".
909    This allows unnamed sources to be piped into the parser.
910 */
911
912 void
913 yyinit(void)
914 {
915     input_filename = xstrdup("<stdin>");
916
917     /* We must initialize the input buffer _now_, because we call
918        setyyin _before_ calling yylex for the first time! */
919     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
920
921     if (nonstandardFlag)
922         PUSH_STATE(GlaExt);
923     else
924         PUSH_STATE(Code);
925 }
926
927 static void
928 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
929 {
930     if (input_filename != NULL)
931         free(input_filename);
932     input_filename = xstrdup(f);
933 }
934
935 /**********************************************************************
936 *                                                                     *
937 *                                                                     *
938 *     Layout Processing                                               *
939 *                                                                     *
940 *                                                                     *
941 **********************************************************************/
942
943 /*
944         The following section deals with Haskell Layout conventions
945         forcing insertion of ; or } as appropriate
946 */
947
948 static BOOLEAN
949 hsshouldindent(void)
950 {
951     return (!forgetindent && INDENTON);
952 }
953
954
955 /* Enter new context and set new indentation level */
956 void
957 hssetindent(void)
958 {
959 #ifdef HSP_DEBUG
960     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
961 #endif
962
963     /*
964      * partain: first chk that new indent won't be less than current one; this code
965      * doesn't make sense to me; hscolno tells the position of the _end_ of the
966      * current token; what that has to do with indenting, I don't know.
967      */
968
969
970     if (hscolno - 1 <= INDENTPT) {
971         if (INDENTPT == -1)
972             return;             /* Empty input OK for Haskell 1.1 */
973         else {
974             char errbuf[ERR_BUF_SIZE];
975
976             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
977             hsperror(errbuf);
978         }
979     }
980     hsentercontext((hspcolno << 1) | 1);
981 }
982
983
984 /* Enter a new context without changing the indentation level */
985 void
986 hsincindent(void)
987 {
988 #ifdef HSP_DEBUG
989     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
990 #endif
991     hsentercontext(indenttab[icontexts] & ~1);
992 }
993
994
995 /* Turn off indentation processing, usually because an explicit "{" has been seen */
996 void
997 hsindentoff(void)
998 {
999     forgetindent = TRUE;
1000 }
1001
1002
1003 /* Enter a new layout context. */
1004 static void
1005 hsentercontext(int indent)
1006 {
1007     /* Enter new context and set indentation as specified */
1008     if (++icontexts >= MAX_CONTEXTS) {
1009         char errbuf[ERR_BUF_SIZE];
1010
1011         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1012         hsperror(errbuf);
1013     }
1014     forgetindent = FALSE;
1015     indenttab[icontexts] = indent;
1016 #ifdef HSP_DEBUG
1017     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1018 #endif
1019 }
1020
1021
1022 /* Exit a layout context */
1023 void
1024 hsendindent(void)
1025 {
1026     --icontexts;
1027 #ifdef HSP_DEBUG
1028     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1029 #endif
1030 }
1031
1032 /*
1033  *      Return checks the indentation level and returns ;, } or the specified token.
1034  */
1035
1036 static int
1037 Return(int tok)
1038 {
1039 #ifdef HSP_DEBUG
1040     extern int yyleng;
1041 #endif
1042
1043     if (hsshouldindent()) {
1044         if (hspcolno < INDENTPT) {
1045 #ifdef HSP_DEBUG
1046             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1047 #endif
1048             hssttok = tok;
1049             return (VCCURLY);
1050         } else if (hspcolno == INDENTPT) {
1051 #ifdef HSP_DEBUG
1052             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1053 #endif
1054             hssttok = -tok;
1055             return (SEMI);
1056         }
1057     }
1058     hssttok = -1;
1059 #ifdef HSP_DEBUG
1060     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1061 #endif
1062     return (tok);
1063 }
1064
1065
1066 /*
1067  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1068  */
1069 int
1070 yylex()
1071 {
1072     int tok;
1073     static BOOLEAN eof = FALSE;
1074
1075     if (!eof) {
1076         if (hssttok != -1) {
1077             if (hssttok < 0) {
1078                 tok = -hssttok;
1079                 hssttok = -1;
1080                 return tok;
1081             }
1082             RETURN(hssttok);
1083         } else {
1084             endlineno = hslineno;
1085             if ((tok = yylex1()) != EOF)
1086                 return tok;
1087             else
1088                 eof = TRUE;
1089         }
1090     }
1091     if (icontexts > icontexts_save) {
1092         if (INDENTON) {
1093             eof = TRUE;
1094             indenttab[icontexts] = 0;
1095             return (VCCURLY);
1096         } else
1097             hsperror("missing '}' at end of file");
1098     } else if (hsbuf_save != NULL) {
1099         fclose(yyin);
1100         yy_delete_buffer(YY_CURRENT_BUFFER);
1101         yy_switch_to_buffer(hsbuf_save);
1102         hsbuf_save = NULL;
1103         new_filename(filename_save);
1104         free(filename_save);
1105         hslineno = hslineno_save;
1106         hsplineno = hsplineno_save;
1107         hscolno = hscolno_save;
1108         hspcolno = hspcolno_save;
1109         etags = etags_save;
1110         icontexts = icontexts_save - 1;
1111         icontexts_save = 0;
1112 #ifdef HSP_DEBUG
1113         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1114 #endif
1115         eof = FALSE;
1116
1117         /* RETURN(LEOF); */
1118         hsperror("No longer using yacc to parse interface files");
1119
1120     } else {
1121         yyterminate();
1122     }
1123     abort(); /* should never get here! */
1124     return(0);
1125 }
1126
1127 /**********************************************************************
1128 *                                                                     *
1129 *                                                                     *
1130 *     Input Processing for Interfaces -- Not currently used !!!       *
1131 *                                                                     *
1132 *                                                                     *
1133 **********************************************************************/
1134
1135 /* setyyin(file)        open file as new lex input buffer */
1136 extern FILE *yyin;
1137
1138 void
1139 setyyin(char *file)
1140 {
1141     hsbuf_save = YY_CURRENT_BUFFER;
1142     if ((yyin = fopen(file, "r")) == NULL) {
1143         char errbuf[ERR_BUF_SIZE];
1144
1145         sprintf(errbuf, "can't read \"%-.50s\"", file);
1146         hsperror(errbuf);
1147     }
1148     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1149
1150     hslineno_save = hslineno;
1151     hsplineno_save = hsplineno;
1152     hslineno = hsplineno = 1;
1153
1154     filename_save = input_filename;
1155     input_filename = NULL;
1156     new_filename(file);
1157     hscolno_save = hscolno;
1158     hspcolno_save = hspcolno;
1159     hscolno = hspcolno = 0;
1160     etags_save = etags; /* do not do "etags" stuff in interfaces */
1161     etags = 0;          /* We remember whether we are doing it in
1162                            the module, so we can restore it later [WDP 94/09] */
1163     hsentercontext(-1);         /* partain: changed this from 0 */
1164     icontexts_save = icontexts;
1165 #ifdef HSP_DEBUG
1166     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1167 #endif
1168 }
1169
1170 static void
1171 layout_input(char *text, int len)
1172 {
1173 #ifdef HSP_DEBUG
1174     fprintf(stderr, "Scanning \"%s\"\n", text);
1175 #endif
1176
1177     hsplineno = hslineno;
1178     hspcolno = hscolno;
1179
1180     while (len-- > 0) {
1181         switch (*text++) {
1182         case '\n':
1183         case '\r':
1184         case '\f':
1185             hslineno++;
1186             hscolno = 0;
1187             break;
1188         case '\t':
1189             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1190             break;
1191         case '\v':
1192             break;
1193         default:
1194             ++hscolno;
1195             break;
1196         }
1197     }
1198 }
1199
1200 void
1201 setstartlineno(void)
1202 {
1203     startlineno = hsplineno;
1204
1205     if (modulelineno == 0) {
1206         modulelineno = startlineno;
1207     }
1208
1209 #if 1/*etags*/
1210 #else
1211     if (etags)
1212         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1213 #endif
1214 }
1215
1216 /**********************************************************************
1217 *                                                                     *
1218 *                                                                     *
1219 *                      Text Caching                                   *
1220 *                                                                     *
1221 *                                                                     *
1222 **********************************************************************/
1223
1224 #define CACHE_SIZE YY_BUF_SIZE
1225
1226 static struct {
1227     unsigned allocated;
1228     unsigned next;
1229     char *text;
1230 } textcache = { 0, 0, NULL };
1231
1232 static void
1233 cleartext(void)
1234 {
1235 /*  fprintf(stderr, "cleartext\n"); */
1236     textcache.next = 0;
1237     if (textcache.allocated == 0) {
1238         textcache.allocated = CACHE_SIZE;
1239         textcache.text = xmalloc(CACHE_SIZE);
1240     }
1241 }
1242
1243 static void
1244 addtext(char *text, unsigned length)
1245 {
1246 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1247
1248     if (length == 0)
1249         return;
1250
1251     if (textcache.next + length + 1 >= textcache.allocated) {
1252         textcache.allocated += length + CACHE_SIZE;
1253         textcache.text = xrealloc(textcache.text, textcache.allocated);
1254     }
1255     bcopy(text, textcache.text + textcache.next, length);
1256     textcache.next += length;
1257 }
1258
1259 static void
1260 addchar(char c)
1261 {
1262 /*  fprintf(stderr, "addchar: %c\n", c); */
1263
1264     if (textcache.next + 2 >= textcache.allocated) {
1265         textcache.allocated += CACHE_SIZE;
1266         textcache.text = xrealloc(textcache.text, textcache.allocated);
1267     }
1268     textcache.text[textcache.next++] = c;
1269 }
1270
1271 static char *
1272 fetchtext(unsigned *length)
1273 {
1274 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1275
1276     *length = textcache.next;
1277     textcache.text[textcache.next] = '\0';
1278     return textcache.text;
1279 }
1280
1281 /**********************************************************************
1282 *                                                                     *
1283 *                                                                     *
1284 *    Identifier Processing                                             *
1285 *                                                                     *
1286 *                                                                     *
1287 **********************************************************************/
1288
1289 /*
1290         hsnewid         Enters an id of length n into the symbol table.
1291 */
1292
1293 static void
1294 hsnewid(char *name, int length)
1295 {
1296     char save = name[length];
1297
1298     name[length] = '\0';
1299     yylval.uid = installid(name);
1300     name[length] = save;
1301 }
1302
1303 BOOLEAN
1304 hsnewqid(char *name, int length)
1305 {
1306     char* dot;
1307     char save = name[length];
1308     name[length] = '\0';
1309
1310     dot = strchr(name, '.');
1311     *dot = '\0';
1312     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1313     *dot = '.';
1314     name[length] = save;
1315
1316     return isconstr(dot+1);
1317 }