ab3300e5adbd722272710af480fe0c22e047173e
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 #include "../../includes/config.h"
11
12 #include <stdio.h>
13
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
15 #include <string.h>
16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
18 #include <memory.h>
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
20 #define index strchr
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
26 #include <strings.h>
27 /* memory.h and strings.h conflict on some systems.  */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
29
30 #include "hspincl.h"
31 #include "hsparser.tab.h"
32 #include "constants.h"
33 #include "utils.h"
34
35 /* Our substitute for <ctype.h> */
36
37 #define NCHARS  256
38 #define _S      0x1
39 #define _D      0x2
40 #define _H      0x4
41 #define _O      0x8
42 #define _C      0x10
43
44 static unsigned char CharTable[NCHARS] = {
45 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
46 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
47 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
48 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
50 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
51 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
52 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
53 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
54 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
55 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
57 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
58 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
59 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
61
62 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 };
79
80 BOOLEAN
81 isconstr (char *s)
82 {
83     return(CharTable[*s]&(_C));
84 }
85
86 /**********************************************************************
87 *                                                                     *
88 *                                                                     *
89 *      Declarations                                                   *
90 *                                                                     *
91 *                                                                     *
92 **********************************************************************/
93
94 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
95
96 /*
97  * For my own sanity, things that are not part of the flex skeleton
98  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
99  */
100
101 static int hslineno = 0;        /* Line number at end of token */
102 int hsplineno = 0;              /* Line number at end of previous token */
103
104 static int hscolno = 0;         /* Column number at end of token */
105 int hspcolno = 0;               /* Column number at end of previous token */
106 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
107
108 int modulelineno = -1;          /* The line number where the module starts */
109 int startlineno = 0;            /* The line number where something starts */
110 int endlineno = 0;              /* The line number where something ends */
111
112 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
113 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
114
115 static int nested_comments;     /* For counting comment nesting depth */
116
117 /* OLD: Hacky definition of yywrap: see flex doc.
118
119    If we don't do this, then we'll have to get the default
120    yywrap from the flex library, which is often something
121    we are not good at locating.  This avoids that difficulty.
122    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
123    WDP 94/09/05
124 #define yywrap() 1
125 */
126
127 /* Essential forward declarations */
128
129 static void hsnewid      PROTO((char *, int));
130 static void layout_input PROTO((char *, int));
131 static void cleartext    (NO_ARGS);
132 static void addtext      PROTO((char *, unsigned));
133 static void addchar      PROTO((char));
134 static char *fetchtext   PROTO((unsigned *));
135 static void new_filename PROTO((char *));
136 static int  Return       PROTO((int));
137 static void hsentercontext PROTO((int));
138
139 /* Special file handling for IMPORTS */
140 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
141
142 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
143 static char *filename_save;             /* File Name                     */
144 static int hslineno_save = 0,           /* Line Number                   */
145  hsplineno_save = 0,                    /* Line Number of Prev. token    */
146  hscolno_save = 0,                      /* Indentation                   */
147  hspcolno_save = 0;                     /* Left Indentation              */
148 static short icontexts_save = 0;        /* Indent Context Level          */
149
150 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
151 extern BOOLEAN etags;                   /* that which is saved */
152
153 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
154
155 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
156                                  * inserted before token +ve  -- "}" inserted before
157                                  * token */
158
159 short icontexts = 0;            /* Which context we're in */
160
161 /*
162         Table of indentations:  right bit indicates whether to use
163           indentation rules (1 = use rules; 0 = ignore)
164
165     partain:
166     push one of these "contexts" at every "case" or "where"; the right bit says
167     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
168
169     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
170     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
171     push is shown just below.
172
173 */
174
175
176 static short indenttab[MAX_CONTEXTS] = {-1};
177
178 #define INDENTPT (indenttab[icontexts]>>1)
179 #define INDENTON (indenttab[icontexts]&1)
180
181 #define RETURN(tok) return(Return(tok))
182
183 #undef YY_DECL
184 #define YY_DECL int yylex1()
185
186 /* We should not peek at yy_act, but flex calls us even for the internal action
187    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
188    to support older versions of flex, we'll continue to peek for now.
189  */
190 #define YY_USER_ACTION \
191     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
192
193 #if 0/*debug*/
194 #undef YY_BREAK
195 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
196 #endif
197
198 /* Each time we enter a new start state, we push it onto the state stack.
199 */
200 #define PUSH_STATE(n)   yy_push_state(n)
201 #define POP_STATE       yy_pop_state()
202
203 %}
204 /* Options:
205     8bit (8-bit input)
206     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
207     never-interactive (to go a bit faster)
208     stack (use a start-condition stack)
209 */
210 %option 8bit
211 %option noyywrap
212 %option never-interactive
213 %option stack
214
215 /* The start states are:
216    Code -- normal Haskell code (principal lexer)
217    GlaExt -- Haskell code with Glasgow extensions
218    Comment -- Nested comment processing
219    String -- Inside a string literal with backslashes
220    StringEsc -- Immediately following a backslash in a string literal
221    Char -- Inside a character literal with backslashes
222    CharEsc -- Immediately following a backslash in a character literal 
223
224    Note that the INITIAL state is unused.  Also note that these states
225    are _exclusive_.  All rules should be prefixed with an appropriate
226    list of start states.
227  */
228
229 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
230
231 isoS                    [\xa1-\xbf\xd7\xf7]
232 isoL                    [\xc0-\xd6\xd8-\xde]
233 isol                    [\xdf-\xf6\xf8-\xff]
234 isoA                    [\xa1-\xff]
235
236 D                       [0-9]
237 O                       [0-7]
238 H                       [0-9A-Fa-f]
239 N                       {D}+
240 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
241 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
242 SId                     {S}{S}*
243 L                       [A-Z\xc0-\xd6\xd8-\xde]
244 l                       [a-z\xdf-\xf6\xf8-\xff]
245 I                       {L}|{l}
246 i                       {L}|{l}|[0-9'_]
247 Id                      {I}{i}*
248 Mod                     {L}{i}*
249 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
250 CNTRL                   [@A-Z\[\\\]^_]
251 WS                      [ \t\n\r\f\v]
252 NL                      [\n\r]
253
254 %%
255
256 %{
257     /* 
258      * Special GHC pragma rules.  Do we need a start state for interface files,
259      * so these won't be matched in source files? --JSM
260      */
261 %}
262
263 <Code,GlaExt>^"# ".*{NL}    {
264                           char tempf[FILENAME_SIZE];
265                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
266                           new_filename(tempf);
267                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
268                         }
269
270 <Code,GlaExt>^"#line ".*{NL}    {
271                           char tempf[FILENAME_SIZE];
272                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
273                           new_filename(tempf); 
274                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
275                         }
276
277 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
278                           /* partain: pragma-style line directive */
279                           char tempf[FILENAME_SIZE];
280                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
281                           new_filename(tempf);
282                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
283                         }
284
285 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
286                               PUSH_STATE(UserPragma);
287                               RETURN(INTERFACE_UPRAGMA);
288                             }
289 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
290                               PUSH_STATE(UserPragma);
291                               RETURN(SPECIALISE_UPRAGMA);
292                             }
293 <Code,GlaExt>"{-#"{WS}*"INLINE" {
294                               PUSH_STATE(UserPragma);
295                               RETURN(INLINE_UPRAGMA);
296                             }
297 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
298                               PUSH_STATE(UserPragma);
299                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
300                             }
301 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
302                               PUSH_STATE(UserPragma);
303                               RETURN(DEFOREST_UPRAGMA);
304                             }
305 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
306                               /* these are handled by hscpp */
307                               nested_comments =1;
308                               PUSH_STATE(Comment);
309                             }
310 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
311                               fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
312                                 input_filename, hsplineno);
313                               format_string(stderr, (unsigned char *) yytext, yyleng);
314                               fputs("'\n", stderr);
315                               nested_comments = 1;
316                               PUSH_STATE(Comment);
317                             }
318 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
319
320 %{
321     /*
322      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
323      * intentionally accepted as a keyword even for normal <Code>.
324      */
325 %}
326
327 <Code,GlaExt>"case"             { RETURN(CASE); }
328 <Code,GlaExt>"class"            { RETURN(CLASS); }
329 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
330 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
331 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
332 <Code,GlaExt>"do"               { RETURN(DO); }
333 <Code,GlaExt>"else"             { RETURN(ELSE); }
334 <Code,GlaExt>"if"               { RETURN(IF); }
335 <Code,GlaExt>"import"           { RETURN(IMPORT); }
336 <Code,GlaExt>"in"               { RETURN(IN); }
337 <Code,GlaExt>"infix"            { RETURN(INFIX); }
338 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
339 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
340 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
341 <Code,GlaExt>"let"              { RETURN(LET); }
342 <Code,GlaExt>"module"           { RETURN(MODULE); }
343 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
344 <Code,GlaExt>"of"               { RETURN(OF); }
345 <Code,GlaExt>"then"             { RETURN(THEN); }
346 <Code,GlaExt>"type"             { RETURN(TYPE); }
347 <Code,GlaExt>"where"            { RETURN(WHERE); }
348
349 <Code,GlaExt>"as"               { RETURN(AS); }
350 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
351 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
352
353 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
354 <GlaExt>"_ccall_"               { RETURN(CCALL); }
355 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
356 <GlaExt>"_casm_"                { RETURN(CASM); }
357 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
358
359 %{
360     /* 
361      * Haskell operators: special, reservedops and useful varsyms
362      */
363 %}
364
365 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
366 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
367 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
368 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
369 <Code,GlaExt>"{"                { RETURN(OCURLY); }
370 <Code,GlaExt>"}"                { RETURN(CCURLY); }
371 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
372 <Code,GlaExt>";"                { RETURN(SEMI); }
373 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
374 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
375
376 <Code,GlaExt>".."               { RETURN(DOTDOT); }
377 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
378 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
379 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
380 <Code,GlaExt>"|"                { RETURN(VBAR); }
381 <Code,GlaExt>"<-"               { RETURN(LARROW); }
382 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
383 <Code,GlaExt>"-"                { RETURN(MINUS); }
384
385 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
386 <Code,GlaExt>"@"                { RETURN(AT); }
387 <Code,GlaExt>"!"                { RETURN(BANG); }
388 <Code,GlaExt>"~"                { RETURN(LAZY); }
389
390 %{
391     /*
392      * Integers and (for Glasgow extensions) primitive integers.  Note that
393      * we pass all of the text on to the parser, because flex/C can't handle
394      * arbitrary precision numbers.
395      */
396 %}
397
398 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
399                          yylval.uid = xstrndup(yytext, yyleng - 1);
400                          RETURN(INTPRIM);
401                         }
402 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
403                          yylval.uid = xstrndup(yytext, yyleng);
404                          RETURN(INTEGER);
405                         }
406 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
407                          yylval.uid = xstrndup(yytext, yyleng - 1);
408                          RETURN(INTPRIM);
409                         }
410 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
411                          yylval.uid = xstrndup(yytext, yyleng);
412                          RETURN(INTEGER);
413                         }
414 <GlaExt>("-")?{N}"#"    {
415                          yylval.uid = xstrndup(yytext, yyleng - 1);
416                          RETURN(INTPRIM);
417                         }
418 <Code,GlaExt,UserPragma>{N} {
419                          yylval.uid = xstrndup(yytext, yyleng);
420                          RETURN(INTEGER);
421                         }
422
423 %{
424     /*
425      * Floats and (for Glasgow extensions) primitive floats/doubles.
426      */
427 %}
428
429 <GlaExt>("-")?{F}"##"   {
430                          yylval.uid = xstrndup(yytext, yyleng - 2);
431                          RETURN(DOUBLEPRIM);
432                         }
433 <GlaExt>("-")?{F}"#"    {
434                          yylval.uid = xstrndup(yytext, yyleng - 1);
435                          RETURN(FLOATPRIM);
436                         }
437 <Code,GlaExt>{F}        {
438                          yylval.uid = xstrndup(yytext, yyleng);
439                          RETURN(FLOAT);
440                         }
441
442 %{
443     /*
444      * Funky ``foo'' style C literals for Glasgow extensions
445      */
446 %}
447
448 <GlaExt>"``"[^']+"''"   {
449                          hsnewid(yytext + 2, yyleng - 4);
450                          RETURN(CLITLIT);
451                         }
452
453 %{
454     /*
455      * Identifiers, both variables and operators.  The trailing hash is allowed
456      * for Glasgow extensions.
457      */
458 %}
459
460
461 %{
462 /* These SHOULDNAE work in "Code" (sigh) */
463 %}
464 <Code,GlaExt,UserPragma>{Id}"#" { 
465                          if (! nonstandardFlag) {
466                             char errbuf[ERR_BUF_SIZE];
467                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
468                             hsperror(errbuf);
469                          }
470                          hsnewid(yytext, yyleng);
471                          RETURN(isconstr(yytext) ? CONID : VARID);
472                         }
473 <Code,GlaExt,UserPragma>{Id}    {
474                          hsnewid(yytext, yyleng);
475                          RETURN(isconstr(yytext) ? CONID : VARID);
476                         }
477 <Code,GlaExt,UserPragma>{SId}   {
478                          hsnewid(yytext, yyleng);
479                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
480                         }
481 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
482                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
483                          RETURN(is_constr ? QCONID : QVARID);
484                         }
485 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
486                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
487                          RETURN(is_constr ? QCONSYM : QVARSYM);
488                         }
489
490 %{
491     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
492
493     /* Because we can make the former well-behaved (we defined them).
494
495        Sadly, the latter is defined by Haskell, which allows such
496        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
497     */
498 %}
499
500 <GlaExt,UserPragma>"`"{Id}"#`"  {       
501                          hsnewid(yytext + 1, yyleng - 2);
502                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
503                         }
504
505 %{
506     /*
507      * Character literals.  The first form is the quick form, for character
508      * literals that don't contain backslashes.  Literals with backslashes are
509      * lexed through multiple rules.  First, we match the open ' and as many
510      * normal characters as possible.  This puts us into the <Char> state, where
511      * a backslash is legal.  Then, we match the backslash and move into the 
512      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
513      * characters and the close '.  We may end up with too many characters, but
514      * this allows us to easily share the lex rules with strings.  Excess characters
515      * are ignored with a warning.
516      */
517 %}
518
519 <GlaExt>'({CHAR}|"\"")"'#" {
520                          yylval.uhstring = installHstring(1, yytext+1);
521                          RETURN(CHARPRIM);
522                         }
523 <Code,GlaExt>'({CHAR}|"\"")'    {
524                          yylval.uhstring = installHstring(1, yytext+1);
525                          RETURN(CHAR);
526                         }
527 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
528                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
529                          hsperror(errbuf);
530                         }
531 <Code,GlaExt>'({CHAR}|"\"")* {
532                          hsmlcolno = hspcolno;
533                          cleartext();
534                          addtext(yytext+1, yyleng-1);
535                          PUSH_STATE(Char);
536                         }
537 <Char>({CHAR}|"\"")*'#  {
538                          unsigned length;
539                          char *text;
540
541                          addtext(yytext, yyleng - 2);
542                          text = fetchtext(&length);
543
544                          if (! nonstandardFlag) {
545                             char errbuf[ERR_BUF_SIZE];
546                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
547                             hsperror(errbuf);
548                          }
549
550                          if (length > 1) {
551                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
552                               input_filename, hsplineno, hspcolno + 1);
553                             format_string(stderr, (unsigned char *) text, length);
554                             fputs("' too long\n", stderr);
555                             hsperror("");
556                          }
557                          yylval.uhstring = installHstring(1, text);
558                          hspcolno = hsmlcolno;
559                          POP_STATE;
560                          RETURN(CHARPRIM); 
561                         }
562 <Char>({CHAR}|"\"")*'   {
563                          unsigned length;
564                          char *text;
565
566                          addtext(yytext, yyleng - 1);
567                          text = fetchtext(&length);
568
569                          if (length > 1) {
570                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
571                               input_filename, hsplineno, hspcolno + 1);
572                             format_string(stderr, (unsigned char *) text, length);
573                             fputs("' too long\n", stderr);
574                             hsperror("");
575                          }
576                          yylval.uhstring = installHstring(1, text);
577                          hspcolno = hsmlcolno;
578                          POP_STATE;
579                          RETURN(CHAR); 
580                         }
581 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
582
583
584 %{
585     /*
586      * String literals.  The first form is the quick form, for string literals
587      * that don't contain backslashes.  Literals with backslashes are lexed
588      * through multiple rules.  First, we match the open " and as many normal
589      * characters as possible.  This puts us into the <String> state, where
590      * a backslash is legal.  Then, we match the backslash and move into the 
591      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
592      * characters, moving back and forth between <String> and <StringEsc> as more
593      * backslashes are encountered.  (We may even digress into <Comment> mode if we
594      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
595      * of normal characters and the close ".
596      */
597 %}
598
599 <GlaExt>"\""({CHAR}|"'")*"\""#  {
600                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
601                             /* the -3 accounts for the " on front, "# on the end */
602                          RETURN(STRINGPRIM); 
603                         }
604 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
605                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
606                          RETURN(STRING); 
607                         }
608 <Code,GlaExt>"\""({CHAR}|"'")* {
609                          hsmlcolno = hspcolno;
610                          cleartext();
611                          addtext(yytext+1, yyleng-1);
612                          PUSH_STATE(String);
613                         }
614 <String>({CHAR}|"'")*"\"#"   {
615                          unsigned length;
616                          char *text;
617
618                          addtext(yytext, yyleng-2);
619                          text = fetchtext(&length);
620
621                          if (! nonstandardFlag) {
622                             char errbuf[ERR_BUF_SIZE];
623                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
624                             hsperror(errbuf);
625                          }
626
627                          yylval.uhstring = installHstring(length, text);
628                          hspcolno = hsmlcolno;
629                          POP_STATE;
630                          RETURN(STRINGPRIM);
631                         }
632 <String>({CHAR}|"'")*"\""   {
633                          unsigned length;
634                          char *text;
635
636                          addtext(yytext, yyleng-1);
637                          text = fetchtext(&length);
638
639                          yylval.uhstring = installHstring(length, text);
640                          hspcolno = hsmlcolno;
641                          POP_STATE;
642                          RETURN(STRING); 
643                         }
644 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
645
646 %{
647     /*
648      * Character and string escapes are roughly the same, but strings have the
649      * extra `\&' sequence which is not allowed for characters.  Also, comments
650      * are allowed in the <StringEsc> state.  (See the comment section much
651      * further down.)
652      *
653      * NB: Backslashes and tabs are stored in strings as themselves.
654      * But if we print them (in printtree.c), they must go out as
655      * "\\\\" and "\\t" respectively.  (This is because of the bogus
656      * intermediate format that the parser produces.  It uses '\t' fpr end of
657      * string, so it needs to be able to escape tabs, which means that it
658      * also needs to be able to escape the escape character ('\\').  Sigh.
659      */
660 %}
661
662 <Char>\\                { PUSH_STATE(CharEsc); }
663 <String>\\&             /* Ignore */ ;
664 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
665
666 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
667 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
668
669 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
670 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
671 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
672 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
673 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
674 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
675 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
676 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
677 <CharEsc,StringEsc>BEL  |
678 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
679 <CharEsc,StringEsc>BS   |
680 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
681 <CharEsc,StringEsc>HT   |
682 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
683 <CharEsc,StringEsc>LF   |
684 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
685 <CharEsc,StringEsc>VT   |
686 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
687 <CharEsc,StringEsc>FF   |
688 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
689 <CharEsc,StringEsc>CR   |
690 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
691 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
692 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
693 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
694 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
695 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
696 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
697 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
698 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
699 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
700 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
701 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
702 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
703 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
704 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
705 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
706 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
707 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
708 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
709 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
710 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
711 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
712 <CharEsc,StringEsc>{D}+  {
713                           int i = strtol(yytext, NULL, 10);
714                           if (i < NCHARS) {
715                              addchar((char) i);
716                           } else {
717                              char errbuf[ERR_BUF_SIZE];
718                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
719                                 yytext);
720                              hsperror(errbuf);
721                           }
722                           POP_STATE;
723                         }
724 <CharEsc,StringEsc>o{O}+ {
725                           int i = strtol(yytext + 1, NULL, 8);
726                           if (i < NCHARS) {
727                              addchar((char) i);
728                           } else {
729                              char errbuf[ERR_BUF_SIZE];
730                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
731                                 yytext);
732                              hsperror(errbuf);
733                           }
734                           POP_STATE;
735                         }
736 <CharEsc,StringEsc>x{H}+ {
737                           int i = strtol(yytext + 1, NULL, 16);
738                           if (i < NCHARS) {
739                              addchar((char) i);
740                           } else {
741                              char errbuf[ERR_BUF_SIZE];
742                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
743                                 yytext);
744                              hsperror(errbuf);
745                           }
746                           POP_STATE;
747                         }
748
749 %{
750     /*
751      * Simple comments and whitespace.  Normally, we would just ignore these, but
752      * in case we're processing a string escape, we need to note that we've seen
753      * a gap.
754      *
755      * Note that we cater for a comment line that *doesn't* end in a newline.
756      * This is incorrect, strictly speaking, but seems like the right thing
757      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
758      */
759 %}
760
761 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
762 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
763
764 %{
765     /*
766      * Nested comments.  The major complication here is in trying to match the
767      * longest lexemes possible, for better performance.  (See the flex document.)
768      * That's why the rules look so bizarre.
769      */
770 %}
771
772 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
773                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
774                         }
775
776 <Comment>[^-{]*         |
777 <Comment>"-"+[^-{}]+    |
778 <Comment>"{"+[^-{}]+    ;
779 <Comment>"{-"           { nested_comments++; }
780 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
781 <Comment>(.|\n)         ;
782
783 %{
784     /*
785      * Illegal characters.  This used to be a single rule, but we might as well
786      * pass on as much information as we have, so now we indicate our state in
787      * the error message.
788      */
789 %}
790
791 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
792                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
793                             input_filename, hsplineno, hspcolno + 1); 
794                          format_string(stderr, (unsigned char *) yytext, 1);
795                          fputs("'\n", stderr);
796                          hsperror("");
797                         }
798 <Char>(.|\n)            { 
799                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
800                             input_filename, hsplineno, hspcolno + 1); 
801                          format_string(stderr, (unsigned char *) yytext, 1);
802                          fputs("' in a character literal\n", stderr);
803                          hsperror("");
804                         }
805 <CharEsc>(.|\n)         {
806                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
807                             input_filename, hsplineno, hspcolno + 1); 
808                          format_string(stderr, (unsigned char *) yytext, 1);
809                          fputs("'\n", stderr);
810                          hsperror("");
811                         }
812 <String>(.|\n)          { if (nonstandardFlag) {
813                              addtext(yytext, yyleng);
814                           } else { 
815                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
816                                 input_filename, hsplineno, hspcolno + 1); 
817                                 format_string(stderr, (unsigned char *) yytext, 1);
818                                 fputs("' in a string literal\n", stderr);
819                                 hsperror("");
820                           }
821                         }
822 <StringEsc>(.|\n)       {
823                          if (noGap) {
824                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
825                                 input_filename, hsplineno, hspcolno + 1); 
826                              format_string(stderr, (unsigned char *) yytext, 1);
827                              fputs("'\n", stderr);
828                              hsperror("");
829                          } else {
830                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
831                                 input_filename, hsplineno, hspcolno + 1);
832                              format_string(stderr, (unsigned char *) yytext, 1);
833                              fputs("' in a string gap\n", stderr);
834                              hsperror("");
835                          }
836                         }
837
838 %{
839     /*
840      * End of file.  In any sub-state, this is an error.  However, for the primary
841      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
842      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
843      * adding virtual close curlies, or closing an interface and returning to the
844      * primary source file.
845      *
846      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
847      * line/column advancement has to be done by hand.
848      */
849 %}
850
851 <Char,CharEsc><<EOF>>   { 
852                           hsplineno = hslineno; hspcolno = hscolno;
853                           hsperror("unterminated character literal");
854                         }
855 <Comment><<EOF>>        { 
856                           hsplineno = hslineno; hspcolno = hscolno;
857                           hsperror("unterminated comment"); 
858                         }
859 <String,StringEsc><<EOF>>   { 
860                           hsplineno = hslineno; hspcolno = hscolno;
861                           hsperror("unterminated string literal"); 
862                         }
863 <UserPragma><<EOF>>     {
864                           hsplineno = hslineno; hspcolno = hscolno;
865                           hsperror("unterminated user-specified pragma"); 
866                         }
867 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
868
869 %%
870
871 /**********************************************************************
872 *                                                                     *
873 *                                                                     *
874 *     YACC/LEX Initialisation etc.                                    *
875 *                                                                     *
876 *                                                                     *
877 **********************************************************************/
878
879 /*
880    We initialise input_filename to "<stdin>".
881    This allows unnamed sources to be piped into the parser.
882 */
883
884 void
885 yyinit(void)
886 {
887     input_filename = xstrdup("<stdin>");
888
889     /* We must initialize the input buffer _now_, because we call
890        setyyin _before_ calling yylex for the first time! */
891     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
892
893     if (nonstandardFlag)
894         PUSH_STATE(GlaExt);
895     else
896         PUSH_STATE(Code);
897 }
898
899 static void
900 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
901 {
902     if (input_filename != NULL)
903         free(input_filename);
904     input_filename = xstrdup(f);
905 }
906
907 /**********************************************************************
908 *                                                                     *
909 *                                                                     *
910 *     Layout Processing                                               *
911 *                                                                     *
912 *                                                                     *
913 **********************************************************************/
914
915 /*
916         The following section deals with Haskell Layout conventions
917         forcing insertion of ; or } as appropriate
918 */
919
920 static BOOLEAN
921 hsshouldindent(void)
922 {
923     return (!forgetindent && INDENTON);
924 }
925
926
927 /* Enter new context and set new indentation level */
928 void
929 hssetindent(void)
930 {
931 #ifdef HSP_DEBUG
932     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
933 #endif
934
935     /*
936      * partain: first chk that new indent won't be less than current one; this code
937      * doesn't make sense to me; hscolno tells the position of the _end_ of the
938      * current token; what that has to do with indenting, I don't know.
939      */
940
941
942     if (hscolno - 1 <= INDENTPT) {
943         if (INDENTPT == -1)
944             return;             /* Empty input OK for Haskell 1.1 */
945         else {
946             char errbuf[ERR_BUF_SIZE];
947
948             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
949             hsperror(errbuf);
950         }
951     }
952     hsentercontext((hspcolno << 1) | 1);
953 }
954
955
956 /* Enter a new context without changing the indentation level */
957 void
958 hsincindent(void)
959 {
960 #ifdef HSP_DEBUG
961     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
962 #endif
963     hsentercontext(indenttab[icontexts] & ~1);
964 }
965
966
967 /* Turn off indentation processing, usually because an explicit "{" has been seen */
968 void
969 hsindentoff(void)
970 {
971     forgetindent = TRUE;
972 }
973
974
975 /* Enter a new layout context. */
976 static void
977 hsentercontext(int indent)
978 {
979     /* Enter new context and set indentation as specified */
980     if (++icontexts >= MAX_CONTEXTS) {
981         char errbuf[ERR_BUF_SIZE];
982
983         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
984         hsperror(errbuf);
985     }
986     forgetindent = FALSE;
987     indenttab[icontexts] = indent;
988 #ifdef HSP_DEBUG
989     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
990 #endif
991 }
992
993
994 /* Exit a layout context */
995 void
996 hsendindent(void)
997 {
998     --icontexts;
999 #ifdef HSP_DEBUG
1000     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1001 #endif
1002 }
1003
1004 /*
1005  *      Return checks the indentation level and returns ;, } or the specified token.
1006  */
1007
1008 static int
1009 Return(int tok)
1010 {
1011 #ifdef HSP_DEBUG
1012     extern int yyleng;
1013 #endif
1014
1015     if (hsshouldindent()) {
1016         if (hspcolno < INDENTPT) {
1017 #ifdef HSP_DEBUG
1018             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1019 #endif
1020             hssttok = tok;
1021             return (VCCURLY);
1022         } else if (hspcolno == INDENTPT) {
1023 #ifdef HSP_DEBUG
1024             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1025 #endif
1026             hssttok = -tok;
1027             return (SEMI);
1028         }
1029     }
1030     hssttok = -1;
1031 #ifdef HSP_DEBUG
1032     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1033 #endif
1034     return (tok);
1035 }
1036
1037
1038 /*
1039  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1040  */
1041 int
1042 yylex()
1043 {
1044     int tok;
1045     static BOOLEAN eof = FALSE;
1046
1047     if (!eof) {
1048         if (hssttok != -1) {
1049             if (hssttok < 0) {
1050                 tok = -hssttok;
1051                 hssttok = -1;
1052                 return tok;
1053             }
1054             RETURN(hssttok);
1055         } else {
1056             endlineno = hslineno;
1057             if ((tok = yylex1()) != EOF)
1058                 return tok;
1059             else
1060                 eof = TRUE;
1061         }
1062     }
1063     if (icontexts > icontexts_save) {
1064         if (INDENTON) {
1065             eof = TRUE;
1066             indenttab[icontexts] = 0;
1067             return (VCCURLY);
1068         } else
1069             hsperror("missing '}' at end of file");
1070     } else if (hsbuf_save != NULL) {
1071         fclose(yyin);
1072         yy_delete_buffer(YY_CURRENT_BUFFER);
1073         yy_switch_to_buffer(hsbuf_save);
1074         hsbuf_save = NULL;
1075         new_filename(filename_save);
1076         free(filename_save);
1077         hslineno = hslineno_save;
1078         hsplineno = hsplineno_save;
1079         hscolno = hscolno_save;
1080         hspcolno = hspcolno_save;
1081         etags = etags_save;
1082         icontexts = icontexts_save - 1;
1083         icontexts_save = 0;
1084 #ifdef HSP_DEBUG
1085         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1086 #endif
1087         eof = FALSE;
1088
1089         /* RETURN(LEOF); */
1090         hsperror("No longer using yacc to parse interface files");
1091
1092     } else {
1093         yyterminate();
1094     }
1095     abort(); /* should never get here! */
1096     return(0);
1097 }
1098
1099 /**********************************************************************
1100 *                                                                     *
1101 *                                                                     *
1102 *     Input Processing for Interfaces -- Not currently used !!!       *
1103 *                                                                     *
1104 *                                                                     *
1105 **********************************************************************/
1106
1107 /* setyyin(file)        open file as new lex input buffer */
1108 extern FILE *yyin;
1109
1110 void
1111 setyyin(char *file)
1112 {
1113     hsbuf_save = YY_CURRENT_BUFFER;
1114     if ((yyin = fopen(file, "r")) == NULL) {
1115         char errbuf[ERR_BUF_SIZE];
1116
1117         sprintf(errbuf, "can't read \"%-.50s\"", file);
1118         hsperror(errbuf);
1119     }
1120     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1121
1122     hslineno_save = hslineno;
1123     hsplineno_save = hsplineno;
1124     hslineno = hsplineno = 1;
1125
1126     filename_save = input_filename;
1127     input_filename = NULL;
1128     new_filename(file);
1129     hscolno_save = hscolno;
1130     hspcolno_save = hspcolno;
1131     hscolno = hspcolno = 0;
1132     etags_save = etags; /* do not do "etags" stuff in interfaces */
1133     etags = 0;          /* We remember whether we are doing it in
1134                            the module, so we can restore it later [WDP 94/09] */
1135     hsentercontext(-1);         /* partain: changed this from 0 */
1136     icontexts_save = icontexts;
1137 #ifdef HSP_DEBUG
1138     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1139 #endif
1140 }
1141
1142 static void
1143 layout_input(char *text, int len)
1144 {
1145 #ifdef HSP_DEBUG
1146     fprintf(stderr, "Scanning \"%s\"\n", text);
1147 #endif
1148
1149     hsplineno = hslineno;
1150     hspcolno = hscolno;
1151
1152     while (len-- > 0) {
1153         switch (*text++) {
1154         case '\n':
1155         case '\r':
1156         case '\f':
1157             hslineno++;
1158             hscolno = 0;
1159             break;
1160         case '\t':
1161             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1162             break;
1163         case '\v':
1164             break;
1165         default:
1166             ++hscolno;
1167             break;
1168         }
1169     }
1170 }
1171
1172 void
1173 setstartlineno(void)
1174 {
1175     startlineno = hsplineno;
1176
1177     if (modulelineno == 0) {
1178         modulelineno = startlineno;
1179     }
1180
1181 #if 1/*etags*/
1182 #else
1183     if (etags)
1184         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1185 #endif
1186 }
1187
1188 /**********************************************************************
1189 *                                                                     *
1190 *                                                                     *
1191 *                      Text Caching                                   *
1192 *                                                                     *
1193 *                                                                     *
1194 **********************************************************************/
1195
1196 #define CACHE_SIZE YY_BUF_SIZE
1197
1198 static struct {
1199     unsigned allocated;
1200     unsigned next;
1201     char *text;
1202 } textcache = { 0, 0, NULL };
1203
1204 static void
1205 cleartext(void)
1206 {
1207 /*  fprintf(stderr, "cleartext\n"); */
1208     textcache.next = 0;
1209     if (textcache.allocated == 0) {
1210         textcache.allocated = CACHE_SIZE;
1211         textcache.text = xmalloc(CACHE_SIZE);
1212     }
1213 }
1214
1215 static void
1216 addtext(char *text, unsigned length)
1217 {
1218 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1219
1220     if (length == 0)
1221         return;
1222
1223     if (textcache.next + length + 1 >= textcache.allocated) {
1224         textcache.allocated += length + CACHE_SIZE;
1225         textcache.text = xrealloc(textcache.text, textcache.allocated);
1226     }
1227     bcopy(text, textcache.text + textcache.next, length);
1228     textcache.next += length;
1229 }
1230
1231 static void
1232 addchar(char c)
1233 {
1234 /*  fprintf(stderr, "addchar: %c\n", c); */
1235
1236     if (textcache.next + 2 >= textcache.allocated) {
1237         textcache.allocated += CACHE_SIZE;
1238         textcache.text = xrealloc(textcache.text, textcache.allocated);
1239     }
1240     textcache.text[textcache.next++] = c;
1241 }
1242
1243 static char *
1244 fetchtext(unsigned *length)
1245 {
1246 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1247
1248     *length = textcache.next;
1249     textcache.text[textcache.next] = '\0';
1250     return textcache.text;
1251 }
1252
1253 /**********************************************************************
1254 *                                                                     *
1255 *                                                                     *
1256 *    Identifier Processing                                             *
1257 *                                                                     *
1258 *                                                                     *
1259 **********************************************************************/
1260
1261 /*
1262         hsnewid         Enters an id of length n into the symbol table.
1263 */
1264
1265 static void
1266 hsnewid(char *name, int length)
1267 {
1268     char save = name[length];
1269
1270     name[length] = '\0';
1271     yylval.uid = installid(name);
1272     name[length] = save;
1273 }
1274
1275 BOOLEAN
1276 hsnewqid(char *name, int length)
1277 {
1278     char* dot;
1279     char save = name[length];
1280     name[length] = '\0';
1281
1282     dot = strchr(name, '.');
1283     *dot = '\0';
1284     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1285     *dot = '.';
1286     name[length] = save;
1287
1288     return isconstr(dot+1);
1289 }