[project @ 1996-06-05 06:44:31 by partain]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 #include "../../includes/config.h"
11
12 #include <stdio.h>
13
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
15 #include <string.h>
16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
18 #include <memory.h>
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
20 #define index strchr
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
26 #include <strings.h>
27 /* memory.h and strings.h conflict on some systems.  */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
29
30 #include "hspincl.h"
31 #include "hsparser.tab.h"
32 #include "constants.h"
33 #include "utils.h"
34
35 /* Our substitute for <ctype.h> */
36
37 #define NCHARS  256
38 #define _S      0x1
39 #define _D      0x2
40 #define _H      0x4
41 #define _O      0x8
42 #define _C      0x10
43
44 #define _isconstr(s)    (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
46
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
64
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
81 };
82
83 /**********************************************************************
84 *                                                                     *
85 *                                                                     *
86 *      Declarations                                                   *
87 *                                                                     *
88 *                                                                     *
89 **********************************************************************/
90
91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
92
93 /*
94  * For my own sanity, things that are not part of the flex skeleton
95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
96  */
97
98 static int hslineno = 0;        /* Line number at end of token */
99 int hsplineno = 0;              /* Line number at end of previous token */
100
101 static int hscolno = 0;         /* Column number at end of token */
102 int hspcolno = 0;               /* Column number at end of previous token */
103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
104
105 int modulelineno = -1;          /* The line number where the module starts */
106 int startlineno = 0;            /* The line number where something starts */
107 int endlineno = 0;              /* The line number where something ends */
108
109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
111
112 static int nested_comments;     /* For counting comment nesting depth */
113
114 /* Hacky definition of yywrap: see flex doc.
115
116    If we don't do this, then we'll have to get the default
117    yywrap from the flex library, which is often something
118    we are not good at locating.  This avoids that difficulty.
119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
120    WDP 94/09/05
121 */
122 #define yywrap() 1
123
124 /* Essential forward declarations */
125
126 static void hsnewid      PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext    (NO_ARGS);
129 static void addtext      PROTO((char *, unsigned));
130 static void addchar      PROTO((char));
131 static char *fetchtext   PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int  Return       PROTO((int));
134 static void hsentercontext PROTO((int));
135
136 /* Special file handling for IMPORTS */
137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
138
139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
140 static char *filename_save;             /* File Name                     */
141 static int hslineno_save = 0,           /* Line Number                   */
142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
143  hscolno_save = 0,                      /* Indentation                   */
144  hspcolno_save = 0;                     /* Left Indentation              */
145 static short icontexts_save = 0;        /* Indent Context Level          */
146
147 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags;                   /* that which is saved */
149
150 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
151
152 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
153                                  * inserted before token +ve  -- "}" inserted before
154                                  * token */
155
156 short icontexts = 0;            /* Which context we're in */
157
158 /*
159         Table of indentations:  right bit indicates whether to use
160           indentation rules (1 = use rules; 0 = ignore)
161
162     partain:
163     push one of these "contexts" at every "case" or "where"; the right bit says
164     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
165
166     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
167     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
168     push is shown just below.
169
170 */
171
172
173 static short indenttab[MAX_CONTEXTS] = {-1};
174
175 #define INDENTPT (indenttab[icontexts]>>1)
176 #define INDENTON (indenttab[icontexts]&1)
177
178 #define RETURN(tok) return(Return(tok))
179
180 #undef YY_DECL
181 #define YY_DECL int yylex1()
182
183 /* We should not peek at yy_act, but flex calls us even for the internal action
184    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
185    to support older versions of flex, we'll continue to peek for now.
186  */
187 #define YY_USER_ACTION \
188     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
189
190 #if 0/*debug*/
191 #undef YY_BREAK
192 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
193 #endif
194
195 /* Each time we enter a new start state, we push it onto the state stack.
196    Note that the rules do not allow us to underflow or overflow the stack.
197    (At least, they shouldn't.)  The maximum expected depth is 4:
198    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
199 */
200 static int StateStack[5];
201 static int StateDepth = -1;
202
203 #ifdef HSP_DEBUG
204 #define PUSH_STATE(n)   do {\
205     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
206     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
207 #define POP_STATE       do {--StateDepth;\
208     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
209     BEGIN(StateStack[StateDepth]);} while(0)
210 #else
211 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
212 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
213 #endif
214
215 %}
216
217 /* The start states are:
218    Code -- normal Haskell code (principal lexer)
219    GlaExt -- Haskell code with Glasgow extensions
220    Comment -- Nested comment processing
221    String -- Inside a string literal with backslashes
222    StringEsc -- Immediately following a backslash in a string literal
223    Char -- Inside a character literal with backslashes
224    CharEsc -- Immediately following a backslash in a character literal 
225
226    Note that the INITIAL state is unused.  Also note that these states
227    are _exclusive_.  All rules should be prefixed with an appropriate
228    list of start states.
229  */
230
231 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
232
233 isoS                    [\xa1-\xbf\xd7\xf7]
234 isoL                    [\xc0-\xd6\xd8-\xde]
235 isol                    [\xdf-\xf6\xf8-\xff]
236 isoA                    [\xa1-\xff]
237
238 D                       [0-9]
239 O                       [0-7]
240 H                       [0-9A-Fa-f]
241 N                       {D}+
242 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
243 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
244 SId                     {S}{S}*
245 L                       [A-Z\xc0-\xd6\xd8-\xde]
246 l                       [a-z\xdf-\xf6\xf8-\xff]
247 I                       {L}|{l}
248 i                       {L}|{l}|[0-9'_]
249 Id                      {I}{i}*
250 Mod                     {L}{i}*
251 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
252 CNTRL                   [@A-Z\[\\\]^_]
253 WS                      [ \t\n\r\f\v]
254 NL                      [\n\r]
255
256 %%
257
258 %{
259     /* 
260      * Special GHC pragma rules.  Do we need a start state for interface files,
261      * so these won't be matched in source files? --JSM
262      */
263 %}
264
265 <Code,GlaExt>^"# ".*{NL}    {
266                           char tempf[FILENAME_SIZE];
267                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
268                           new_filename(tempf);
269                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
270                         }
271
272 <Code,GlaExt>^"#line ".*{NL}    {
273                           char tempf[FILENAME_SIZE];
274                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
275                           new_filename(tempf); 
276                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
277                         }
278
279 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
280                           /* partain: pragma-style line directive */
281                           char tempf[FILENAME_SIZE];
282                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
283                           new_filename(tempf);
284                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
285                         }
286
287 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
288                               PUSH_STATE(UserPragma);
289                               RETURN(INTERFACE_UPRAGMA);
290                             }
291 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
292                               PUSH_STATE(UserPragma);
293                               RETURN(SPECIALISE_UPRAGMA);
294                             }
295 <Code,GlaExt>"{-#"{WS}*"INLINE" {
296                               PUSH_STATE(UserPragma);
297                               RETURN(INLINE_UPRAGMA);
298                             }
299 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
300                               PUSH_STATE(UserPragma);
301                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
302                             }
303 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
304                               PUSH_STATE(UserPragma);
305                               RETURN(DEFOREST_UPRAGMA);
306                             }
307 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
308                               /* these are handled by hscpp */
309                               nested_comments =1;
310                               PUSH_STATE(Comment);
311                             }
312 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
313                               fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
314                                 input_filename, hsplineno);
315                               format_string(stderr, (unsigned char *) yytext, yyleng);
316                               fputs("'\n", stderr);
317                               nested_comments = 1;
318                               PUSH_STATE(Comment);
319                             }
320 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
321
322 %{
323     /*
324      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
325      * intentionally accepted as a keyword even for normal <Code>.
326      */
327 %}
328
329 <Code,GlaExt>"case"             { RETURN(CASE); }
330 <Code,GlaExt>"class"            { RETURN(CLASS); }
331 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
332 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
333 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
334 <Code,GlaExt>"do"               { RETURN(DO); }
335 <Code,GlaExt>"else"             { RETURN(ELSE); }
336 <Code,GlaExt>"if"               { RETURN(IF); }
337 <Code,GlaExt>"import"           { RETURN(IMPORT); }
338 <Code,GlaExt>"in"               { RETURN(IN); }
339 <Code,GlaExt>"infix"            { RETURN(INFIX); }
340 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
341 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
342 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
343 <Code,GlaExt>"let"              { RETURN(LET); }
344 <Code,GlaExt>"module"           { RETURN(MODULE); }
345 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
346 <Code,GlaExt>"of"               { RETURN(OF); }
347 <Code,GlaExt>"then"             { RETURN(THEN); }
348 <Code,GlaExt>"type"             { RETURN(TYPE); }
349 <Code,GlaExt>"where"            { RETURN(WHERE); }
350
351 <Code,GlaExt>"as"               { RETURN(AS); }
352 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
353 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
354
355 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
356 <GlaExt>"_ccall_"               { RETURN(CCALL); }
357 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
358 <GlaExt>"_casm_"                { RETURN(CASM); }
359 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
360
361 %{
362     /* 
363      * Haskell operators: special, reservedops and useful varsyms
364      */
365 %}
366
367 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
368 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
369 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
370 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
371 <Code,GlaExt>"{"                { RETURN(OCURLY); }
372 <Code,GlaExt>"}"                { RETURN(CCURLY); }
373 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
374 <Code,GlaExt>";"                { RETURN(SEMI); }
375 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
376 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
377
378 <Code,GlaExt>".."               { RETURN(DOTDOT); }
379 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
380 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
381 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
382 <Code,GlaExt>"|"                { RETURN(VBAR); }
383 <Code,GlaExt>"<-"               { RETURN(LARROW); }
384 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
385 <Code,GlaExt>"-"                { RETURN(MINUS); }
386
387 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
388 <Code,GlaExt>"@"                { RETURN(AT); }
389 <Code,GlaExt>"!"                { RETURN(BANG); }
390 <Code,GlaExt>"~"                { RETURN(LAZY); }
391
392 %{
393     /*
394      * Integers and (for Glasgow extensions) primitive integers.  Note that
395      * we pass all of the text on to the parser, because flex/C can't handle
396      * arbitrary precision numbers.
397      */
398 %}
399
400 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
401                          yylval.uid = xstrndup(yytext, yyleng - 1);
402                          RETURN(INTPRIM);
403                         }
404 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
405                          yylval.uid = xstrndup(yytext, yyleng);
406                          RETURN(INTEGER);
407                         }
408 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
409                          yylval.uid = xstrndup(yytext, yyleng - 1);
410                          RETURN(INTPRIM);
411                         }
412 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
413                          yylval.uid = xstrndup(yytext, yyleng);
414                          RETURN(INTEGER);
415                         }
416 <GlaExt>("-")?{N}"#"    {
417                          yylval.uid = xstrndup(yytext, yyleng - 1);
418                          RETURN(INTPRIM);
419                         }
420 <Code,GlaExt,UserPragma>{N} {
421                          yylval.uid = xstrndup(yytext, yyleng);
422                          RETURN(INTEGER);
423                         }
424
425 %{
426     /*
427      * Floats and (for Glasgow extensions) primitive floats/doubles.
428      */
429 %}
430
431 <GlaExt>("-")?{F}"##"   {
432                          yylval.uid = xstrndup(yytext, yyleng - 2);
433                          RETURN(DOUBLEPRIM);
434                         }
435 <GlaExt>("-")?{F}"#"    {
436                          yylval.uid = xstrndup(yytext, yyleng - 1);
437                          RETURN(FLOATPRIM);
438                         }
439 <Code,GlaExt>{F}        {
440                          yylval.uid = xstrndup(yytext, yyleng);
441                          RETURN(FLOAT);
442                         }
443
444 %{
445     /*
446      * Funky ``foo'' style C literals for Glasgow extensions
447      */
448 %}
449
450 <GlaExt>"``"[^']+"''"   {
451                          hsnewid(yytext + 2, yyleng - 4);
452                          RETURN(CLITLIT);
453                         }
454
455 %{
456     /*
457      * Identifiers, both variables and operators.  The trailing hash is allowed
458      * for Glasgow extensions.
459      */
460 %}
461
462
463 %{
464 /* These SHOULDNAE work in "Code" (sigh) */
465 %}
466 <Code,GlaExt,UserPragma>{Id}"#" { 
467                          if (! nonstandardFlag) {
468                             char errbuf[ERR_BUF_SIZE];
469                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
470                             hsperror(errbuf);
471                          }
472                          hsnewid(yytext, yyleng);
473                          RETURN(_isconstr(yytext) ? CONID : VARID);
474                         }
475 <Code,GlaExt,UserPragma>_+{Id} { 
476                          if (! nonstandardFlag) {
477                             char errbuf[ERR_BUF_SIZE];
478                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
479                             hsperror(errbuf);
480                          }
481                          hsnewid(yytext, yyleng);
482                          RETURN(isconstr(yytext) ? CONID : VARID);
483                          /* NB: ^^^^^^^^ : not the macro! */
484                         }
485 <Code,GlaExt,UserPragma>{Id}    {
486                          hsnewid(yytext, yyleng);
487                          RETURN(_isconstr(yytext) ? CONID : VARID);
488                         }
489 <Code,GlaExt,UserPragma>{SId}   {
490                          hsnewid(yytext, yyleng);
491                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
492                         }
493 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
494                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
495                          RETURN(isconstr ? QCONID : QVARID);
496                         }
497 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
498                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
499                          RETURN(isconstr ? QCONSYM : QVARSYM);
500                         }
501
502 %{
503     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
504
505     /* Because we can make the former well-behaved (we defined them).
506
507        Sadly, the latter is defined by Haskell, which allows such
508        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
509     */
510 %}
511
512 <GlaExt,UserPragma>"`"{Id}"#`"  {       
513                          hsnewid(yytext + 1, yyleng - 2);
514                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
515                         }
516
517 %{
518     /*
519      * Character literals.  The first form is the quick form, for character
520      * literals that don't contain backslashes.  Literals with backslashes are
521      * lexed through multiple rules.  First, we match the open ' and as many
522      * normal characters as possible.  This puts us into the <Char> state, where
523      * a backslash is legal.  Then, we match the backslash and move into the 
524      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
525      * characters and the close '.  We may end up with too many characters, but
526      * this allows us to easily share the lex rules with strings.  Excess characters
527      * are ignored with a warning.
528      */
529 %}
530
531 <GlaExt>'({CHAR}|"\"")"'#" {
532                          yylval.uhstring = installHstring(1, yytext+1);
533                          RETURN(CHARPRIM);
534                         }
535 <Code,GlaExt>'({CHAR}|"\"")'    {
536                          yylval.uhstring = installHstring(1, yytext+1);
537                          RETURN(CHAR);
538                         }
539 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
540                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
541                          hsperror(errbuf);
542                         }
543 <Code,GlaExt>'({CHAR}|"\"")* {
544                          hsmlcolno = hspcolno;
545                          cleartext();
546                          addtext(yytext+1, yyleng-1);
547                          PUSH_STATE(Char);
548                         }
549 <Char>({CHAR}|"\"")*'#  {
550                          unsigned length;
551                          char *text;
552
553                          addtext(yytext, yyleng - 2);
554                          text = fetchtext(&length);
555
556                          if (! nonstandardFlag) {
557                             char errbuf[ERR_BUF_SIZE];
558                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
559                             hsperror(errbuf);
560                          }
561
562                          if (length > 1) {
563                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
564                               input_filename, hsplineno, hspcolno + 1);
565                             format_string(stderr, (unsigned char *) text, length);
566                             fputs("' too long\n", stderr);
567                             hsperror("");
568                          }
569                          yylval.uhstring = installHstring(1, text);
570                          hspcolno = hsmlcolno;
571                          POP_STATE;
572                          RETURN(CHARPRIM); 
573                         }
574 <Char>({CHAR}|"\"")*'   {
575                          unsigned length;
576                          char *text;
577
578                          addtext(yytext, yyleng - 1);
579                          text = fetchtext(&length);
580
581                          if (length > 1) {
582                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
583                               input_filename, hsplineno, hspcolno + 1);
584                             format_string(stderr, (unsigned char *) text, length);
585                             fputs("' too long\n", stderr);
586                             hsperror("");
587                          }
588                          yylval.uhstring = installHstring(1, text);
589                          hspcolno = hsmlcolno;
590                          POP_STATE;
591                          RETURN(CHAR); 
592                         }
593 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
594
595
596 %{
597     /*
598      * String literals.  The first form is the quick form, for string literals
599      * that don't contain backslashes.  Literals with backslashes are lexed
600      * through multiple rules.  First, we match the open " and as many normal
601      * characters as possible.  This puts us into the <String> state, where
602      * a backslash is legal.  Then, we match the backslash and move into the 
603      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
604      * characters, moving back and forth between <String> and <StringEsc> as more
605      * backslashes are encountered.  (We may even digress into <Comment> mode if we
606      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
607      * of normal characters and the close ".
608      */
609 %}
610
611 <GlaExt>"\""({CHAR}|"'")*"\""#  {
612                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
613                             /* the -3 accounts for the " on front, "# on the end */
614                          RETURN(STRINGPRIM); 
615                         }
616 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
617                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
618                          RETURN(STRING); 
619                         }
620 <Code,GlaExt>"\""({CHAR}|"'")* {
621                          hsmlcolno = hspcolno;
622                          cleartext();
623                          addtext(yytext+1, yyleng-1);
624                          PUSH_STATE(String);
625                         }
626 <String>({CHAR}|"'")*"\"#"   {
627                          unsigned length;
628                          char *text;
629
630                          addtext(yytext, yyleng-2);
631                          text = fetchtext(&length);
632
633                          if (! nonstandardFlag) {
634                             char errbuf[ERR_BUF_SIZE];
635                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
636                             hsperror(errbuf);
637                          }
638
639                          yylval.uhstring = installHstring(length, text);
640                          hspcolno = hsmlcolno;
641                          POP_STATE;
642                          RETURN(STRINGPRIM);
643                         }
644 <String>({CHAR}|"'")*"\""   {
645                          unsigned length;
646                          char *text;
647
648                          addtext(yytext, yyleng-1);
649                          text = fetchtext(&length);
650
651                          yylval.uhstring = installHstring(length, text);
652                          hspcolno = hsmlcolno;
653                          POP_STATE;
654                          RETURN(STRING); 
655                         }
656 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
657
658 %{
659     /*
660      * Character and string escapes are roughly the same, but strings have the
661      * extra `\&' sequence which is not allowed for characters.  Also, comments
662      * are allowed in the <StringEsc> state.  (See the comment section much
663      * further down.)
664      *
665      * NB: Backslashes and tabs are stored in strings as themselves.
666      * But if we print them (in printtree.c), they must go out as
667      * "\\\\" and "\\t" respectively.  (This is because of the bogus
668      * intermediate format that the parser produces.  It uses '\t' fpr end of
669      * string, so it needs to be able to escape tabs, which means that it
670      * also needs to be able to escape the escape character ('\\').  Sigh.
671      */
672 %}
673
674 <Char>\\                { PUSH_STATE(CharEsc); }
675 <String>\\&             /* Ignore */ ;
676 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
677
678 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
679 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
680
681 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
682 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
683 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
684 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
685 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
686 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
687 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
688 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
689 <CharEsc,StringEsc>BEL  |
690 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
691 <CharEsc,StringEsc>BS   |
692 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
693 <CharEsc,StringEsc>HT   |
694 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
695 <CharEsc,StringEsc>LF   |
696 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
697 <CharEsc,StringEsc>VT   |
698 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
699 <CharEsc,StringEsc>FF   |
700 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
701 <CharEsc,StringEsc>CR   |
702 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
703 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
704 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
705 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
706 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
707 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
708 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
709 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
710 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
711 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
712 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
713 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
714 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
715 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
716 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
717 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
718 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
719 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
720 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
721 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
722 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
723 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
724 <CharEsc,StringEsc>{D}+  {
725                           int i = strtol(yytext, NULL, 10);
726                           if (i < NCHARS) {
727                              addchar((char) i);
728                           } else {
729                              char errbuf[ERR_BUF_SIZE];
730                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
731                                 yytext);
732                              hsperror(errbuf);
733                           }
734                           POP_STATE;
735                         }
736 <CharEsc,StringEsc>o{O}+ {
737                           int i = strtol(yytext + 1, NULL, 8);
738                           if (i < NCHARS) {
739                              addchar((char) i);
740                           } else {
741                              char errbuf[ERR_BUF_SIZE];
742                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
743                                 yytext);
744                              hsperror(errbuf);
745                           }
746                           POP_STATE;
747                         }
748 <CharEsc,StringEsc>x{H}+ {
749                           int i = strtol(yytext + 1, NULL, 16);
750                           if (i < NCHARS) {
751                              addchar((char) i);
752                           } else {
753                              char errbuf[ERR_BUF_SIZE];
754                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
755                                 yytext);
756                              hsperror(errbuf);
757                           }
758                           POP_STATE;
759                         }
760
761 %{
762     /*
763      * Simple comments and whitespace.  Normally, we would just ignore these, but
764      * in case we're processing a string escape, we need to note that we've seen
765      * a gap.
766      *
767      * Note that we cater for a comment line that *doesn't* end in a newline.
768      * This is incorrect, strictly speaking, but seems like the right thing
769      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
770      */
771 %}
772
773 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
774 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
775
776 %{
777     /*
778      * Nested comments.  The major complication here is in trying to match the
779      * longest lexemes possible, for better performance.  (See the flex document.)
780      * That's why the rules look so bizarre.
781      */
782 %}
783
784 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
785                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
786                         }
787
788 <Comment>[^-{]*         |
789 <Comment>"-"+[^-{}]+    |
790 <Comment>"{"+[^-{}]+    ;
791 <Comment>"{-"           { nested_comments++; }
792 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
793 <Comment>(.|\n)         ;
794
795 %{
796     /*
797      * Illegal characters.  This used to be a single rule, but we might as well
798      * pass on as much information as we have, so now we indicate our state in
799      * the error message.
800      */
801 %}
802
803 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
804                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
805                             input_filename, hsplineno, hspcolno + 1); 
806                          format_string(stderr, (unsigned char *) yytext, 1);
807                          fputs("'\n", stderr);
808                          hsperror("");
809                         }
810 <Char>(.|\n)            { 
811                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
812                             input_filename, hsplineno, hspcolno + 1); 
813                          format_string(stderr, (unsigned char *) yytext, 1);
814                          fputs("' in a character literal\n", stderr);
815                          hsperror("");
816                         }
817 <CharEsc>(.|\n)         {
818                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
819                             input_filename, hsplineno, hspcolno + 1); 
820                          format_string(stderr, (unsigned char *) yytext, 1);
821                          fputs("'\n", stderr);
822                          hsperror("");
823                         }
824 <String>(.|\n)          { if (nonstandardFlag) {
825                              addtext(yytext, yyleng);
826                           } else { 
827                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
828                                 input_filename, hsplineno, hspcolno + 1); 
829                                 format_string(stderr, (unsigned char *) yytext, 1);
830                                 fputs("' in a string literal\n", stderr);
831                                 hsperror("");
832                           }
833                         }
834 <StringEsc>(.|\n)       {
835                          if (noGap) {
836                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
837                                 input_filename, hsplineno, hspcolno + 1); 
838                              format_string(stderr, (unsigned char *) yytext, 1);
839                              fputs("'\n", stderr);
840                              hsperror("");
841                          } else {
842                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
843                                 input_filename, hsplineno, hspcolno + 1);
844                              format_string(stderr, (unsigned char *) yytext, 1);
845                              fputs("' in a string gap\n", stderr);
846                              hsperror("");
847                          }
848                         }
849
850 %{
851     /*
852      * End of file.  In any sub-state, this is an error.  However, for the primary
853      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
854      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
855      * adding virtual close curlies, or closing an interface and returning to the
856      * primary source file.
857      *
858      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
859      * line/column advancement has to be done by hand.
860      */
861 %}
862
863 <Char,CharEsc><<EOF>>   { 
864                           hsplineno = hslineno; hspcolno = hscolno;
865                           hsperror("unterminated character literal");
866                         }
867 <Comment><<EOF>>        { 
868                           hsplineno = hslineno; hspcolno = hscolno;
869                           hsperror("unterminated comment"); 
870                         }
871 <String,StringEsc><<EOF>>   { 
872                           hsplineno = hslineno; hspcolno = hscolno;
873                           hsperror("unterminated string literal"); 
874                         }
875 <UserPragma><<EOF>>     {
876                           hsplineno = hslineno; hspcolno = hscolno;
877                           hsperror("unterminated user-specified pragma"); 
878                         }
879 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
880
881 %%
882
883 /**********************************************************************
884 *                                                                     *
885 *                                                                     *
886 *     YACC/LEX Initialisation etc.                                    *
887 *                                                                     *
888 *                                                                     *
889 **********************************************************************/
890
891 /*
892    We initialise input_filename to "<stdin>".
893    This allows unnamed sources to be piped into the parser.
894 */
895
896 void
897 yyinit(void)
898 {
899     input_filename = xstrdup("<stdin>");
900
901     /* We must initialize the input buffer _now_, because we call
902        setyyin _before_ calling yylex for the first time! */
903     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
904
905     if (nonstandardFlag)
906         PUSH_STATE(GlaExt);
907     else
908         PUSH_STATE(Code);
909 }
910
911 static void
912 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
913 {
914     if (input_filename != NULL)
915         free(input_filename);
916     input_filename = xstrdup(f);
917 }
918
919 /**********************************************************************
920 *                                                                     *
921 *                                                                     *
922 *     Layout Processing                                               *
923 *                                                                     *
924 *                                                                     *
925 **********************************************************************/
926
927 /*
928         The following section deals with Haskell Layout conventions
929         forcing insertion of ; or } as appropriate
930 */
931
932 static BOOLEAN
933 hsshouldindent(void)
934 {
935     return (!forgetindent && INDENTON);
936 }
937
938
939 /* Enter new context and set new indentation level */
940 void
941 hssetindent(void)
942 {
943 #ifdef HSP_DEBUG
944     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
945 #endif
946
947     /*
948      * partain: first chk that new indent won't be less than current one; this code
949      * doesn't make sense to me; hscolno tells the position of the _end_ of the
950      * current token; what that has to do with indenting, I don't know.
951      */
952
953
954     if (hscolno - 1 <= INDENTPT) {
955         if (INDENTPT == -1)
956             return;             /* Empty input OK for Haskell 1.1 */
957         else {
958             char errbuf[ERR_BUF_SIZE];
959
960             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
961             hsperror(errbuf);
962         }
963     }
964     hsentercontext((hspcolno << 1) | 1);
965 }
966
967
968 /* Enter a new context without changing the indentation level */
969 void
970 hsincindent(void)
971 {
972 #ifdef HSP_DEBUG
973     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
974 #endif
975     hsentercontext(indenttab[icontexts] & ~1);
976 }
977
978
979 /* Turn off indentation processing, usually because an explicit "{" has been seen */
980 void
981 hsindentoff(void)
982 {
983     forgetindent = TRUE;
984 }
985
986
987 /* Enter a new layout context. */
988 static void
989 hsentercontext(int indent)
990 {
991     /* Enter new context and set indentation as specified */
992     if (++icontexts >= MAX_CONTEXTS) {
993         char errbuf[ERR_BUF_SIZE];
994
995         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
996         hsperror(errbuf);
997     }
998     forgetindent = FALSE;
999     indenttab[icontexts] = indent;
1000 #ifdef HSP_DEBUG
1001     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1002 #endif
1003 }
1004
1005
1006 /* Exit a layout context */
1007 void
1008 hsendindent(void)
1009 {
1010     --icontexts;
1011 #ifdef HSP_DEBUG
1012     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1013 #endif
1014 }
1015
1016 /*
1017  *      Return checks the indentation level and returns ;, } or the specified token.
1018  */
1019
1020 static int
1021 Return(int tok)
1022 {
1023 #ifdef HSP_DEBUG
1024     extern int yyleng;
1025 #endif
1026
1027     if (hsshouldindent()) {
1028         if (hspcolno < INDENTPT) {
1029 #ifdef HSP_DEBUG
1030             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1031 #endif
1032             hssttok = tok;
1033             return (VCCURLY);
1034         } else if (hspcolno == INDENTPT) {
1035 #ifdef HSP_DEBUG
1036             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1037 #endif
1038             hssttok = -tok;
1039             return (SEMI);
1040         }
1041     }
1042     hssttok = -1;
1043 #ifdef HSP_DEBUG
1044     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1045 #endif
1046     return (tok);
1047 }
1048
1049
1050 /*
1051  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1052  */
1053 int
1054 yylex()
1055 {
1056     int tok;
1057     static BOOLEAN eof = FALSE;
1058
1059     if (!eof) {
1060         if (hssttok != -1) {
1061             if (hssttok < 0) {
1062                 tok = -hssttok;
1063                 hssttok = -1;
1064                 return tok;
1065             }
1066             RETURN(hssttok);
1067         } else {
1068             endlineno = hslineno;
1069             if ((tok = yylex1()) != EOF)
1070                 return tok;
1071             else
1072                 eof = TRUE;
1073         }
1074     }
1075     if (icontexts > icontexts_save) {
1076         if (INDENTON) {
1077             eof = TRUE;
1078             indenttab[icontexts] = 0;
1079             return (VCCURLY);
1080         } else
1081             hsperror("missing '}' at end of file");
1082     } else if (hsbuf_save != NULL) {
1083         fclose(yyin);
1084         yy_delete_buffer(YY_CURRENT_BUFFER);
1085         yy_switch_to_buffer(hsbuf_save);
1086         hsbuf_save = NULL;
1087         new_filename(filename_save);
1088         free(filename_save);
1089         hslineno = hslineno_save;
1090         hsplineno = hsplineno_save;
1091         hscolno = hscolno_save;
1092         hspcolno = hspcolno_save;
1093         etags = etags_save;
1094         icontexts = icontexts_save - 1;
1095         icontexts_save = 0;
1096 #ifdef HSP_DEBUG
1097         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1098 #endif
1099         eof = FALSE;
1100
1101         /* RETURN(LEOF); */
1102         hsperror("No longer using yacc to parse interface files");
1103
1104     } else {
1105         yyterminate();
1106     }
1107     abort(); /* should never get here! */
1108     return(0);
1109 }
1110
1111 /**********************************************************************
1112 *                                                                     *
1113 *                                                                     *
1114 *     Input Processing for Interfaces -- Not currently used !!!       *
1115 *                                                                     *
1116 *                                                                     *
1117 **********************************************************************/
1118
1119 /* setyyin(file)        open file as new lex input buffer */
1120 extern FILE *yyin;
1121
1122 void
1123 setyyin(char *file)
1124 {
1125     hsbuf_save = YY_CURRENT_BUFFER;
1126     if ((yyin = fopen(file, "r")) == NULL) {
1127         char errbuf[ERR_BUF_SIZE];
1128
1129         sprintf(errbuf, "can't read \"%-.50s\"", file);
1130         hsperror(errbuf);
1131     }
1132     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1133
1134     hslineno_save = hslineno;
1135     hsplineno_save = hsplineno;
1136     hslineno = hsplineno = 1;
1137
1138     filename_save = input_filename;
1139     input_filename = NULL;
1140     new_filename(file);
1141     hscolno_save = hscolno;
1142     hspcolno_save = hspcolno;
1143     hscolno = hspcolno = 0;
1144     etags_save = etags; /* do not do "etags" stuff in interfaces */
1145     etags = 0;          /* We remember whether we are doing it in
1146                            the module, so we can restore it later [WDP 94/09] */
1147     hsentercontext(-1);         /* partain: changed this from 0 */
1148     icontexts_save = icontexts;
1149 #ifdef HSP_DEBUG
1150     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1151 #endif
1152 }
1153
1154 static void
1155 layout_input(char *text, int len)
1156 {
1157 #ifdef HSP_DEBUG
1158     fprintf(stderr, "Scanning \"%s\"\n", text);
1159 #endif
1160
1161     hsplineno = hslineno;
1162     hspcolno = hscolno;
1163
1164     while (len-- > 0) {
1165         switch (*text++) {
1166         case '\n':
1167         case '\r':
1168         case '\f':
1169             hslineno++;
1170             hscolno = 0;
1171             break;
1172         case '\t':
1173             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1174             break;
1175         case '\v':
1176             break;
1177         default:
1178             ++hscolno;
1179             break;
1180         }
1181     }
1182 }
1183
1184 void
1185 setstartlineno(void)
1186 {
1187     startlineno = hsplineno;
1188
1189     if (modulelineno == 0) {
1190         modulelineno = startlineno;
1191     }
1192
1193 #if 1/*etags*/
1194 #else
1195     if (etags)
1196         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1197 #endif
1198 }
1199
1200 /**********************************************************************
1201 *                                                                     *
1202 *                                                                     *
1203 *                      Text Caching                                   *
1204 *                                                                     *
1205 *                                                                     *
1206 **********************************************************************/
1207
1208 #define CACHE_SIZE YY_BUF_SIZE
1209
1210 static struct {
1211     unsigned allocated;
1212     unsigned next;
1213     char *text;
1214 } textcache = { 0, 0, NULL };
1215
1216 static void
1217 cleartext(void)
1218 {
1219 /*  fprintf(stderr, "cleartext\n"); */
1220     textcache.next = 0;
1221     if (textcache.allocated == 0) {
1222         textcache.allocated = CACHE_SIZE;
1223         textcache.text = xmalloc(CACHE_SIZE);
1224     }
1225 }
1226
1227 static void
1228 addtext(char *text, unsigned length)
1229 {
1230 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1231
1232     if (length == 0)
1233         return;
1234
1235     if (textcache.next + length + 1 >= textcache.allocated) {
1236         textcache.allocated += length + CACHE_SIZE;
1237         textcache.text = xrealloc(textcache.text, textcache.allocated);
1238     }
1239     bcopy(text, textcache.text + textcache.next, length);
1240     textcache.next += length;
1241 }
1242
1243 static void
1244 addchar(char c)
1245 {
1246 /*  fprintf(stderr, "addchar: %c\n", c); */
1247
1248     if (textcache.next + 2 >= textcache.allocated) {
1249         textcache.allocated += CACHE_SIZE;
1250         textcache.text = xrealloc(textcache.text, textcache.allocated);
1251     }
1252     textcache.text[textcache.next++] = c;
1253 }
1254
1255 static char *
1256 fetchtext(unsigned *length)
1257 {
1258 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1259
1260     *length = textcache.next;
1261     textcache.text[textcache.next] = '\0';
1262     return textcache.text;
1263 }
1264
1265 /**********************************************************************
1266 *                                                                     *
1267 *                                                                     *
1268 *    Identifier Processing                                             *
1269 *                                                                     *
1270 *                                                                     *
1271 **********************************************************************/
1272
1273 /*
1274         hsnewid         Enters an id of length n into the symbol table.
1275 */
1276
1277 static void
1278 hsnewid(char *name, int length)
1279 {
1280     char save = name[length];
1281
1282     name[length] = '\0';
1283     yylval.uid = installid(name);
1284     name[length] = save;
1285 }
1286
1287 BOOLEAN
1288 hsnewqid(char *name, int length)
1289 {
1290     char* dot;
1291     char save = name[length];
1292     name[length] = '\0';
1293
1294     dot = strchr(name, '.');
1295     *dot = '\0';
1296     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1297     *dot = '.';
1298     name[length] = save;
1299
1300     return _isconstr(dot+1);
1301 }
1302
1303 BOOLEAN 
1304 isconstr(char *s) /* walks past leading underscores before using the macro */
1305 {
1306     char *temp = s;
1307
1308     for ( ; temp != NULL && *temp == '_' ; temp++ );
1309
1310     return _isconstr(temp);
1311 }