[project @ 1998-02-10 13:47:44 by simonm]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 /* Special file handling for IMPORTS */
141 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142
143 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
144 static char *filename_save;             /* File Name                     */
145 static int hslineno_save = 0,           /* Line Number                   */
146  hsplineno_save = 0,                    /* Line Number of Prev. token    */
147  hscolno_save = 0,                      /* Indentation                   */
148  hspcolno_save = 0;                     /* Left Indentation              */
149 static short icontexts_save = 0;        /* Indent Context Level          */
150
151 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags;                   /* that which is saved */
153
154 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
155
156 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
157                                  * inserted before token +ve  -- "}" inserted before
158                                  * token */
159
160 short icontexts = 0;            /* Which context we're in */
161
162 /*
163         Table of indentations:  right bit indicates whether to use
164           indentation rules (1 = use rules; 0 = ignore)
165
166     partain:
167     push one of these "contexts" at every "case" or "where"; the right bit says
168     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
169
170     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
171     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
172     push is shown just below.
173
174 */
175
176
177 static short indenttab[MAX_CONTEXTS] = {-1};
178
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
181
182 #define RETURN(tok) return(Return(tok))
183
184 #undef YY_DECL
185 #define YY_DECL int yylex1()
186
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189    to support older versions of flex, we'll continue to peek for now.
190  */
191 #define YY_USER_ACTION \
192     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
193
194 #if 0/*debug*/
195 #undef YY_BREAK
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
197 #endif
198
199 /* Each time we enter a new start state, we push it onto the state stack.
200 */
201 #define PUSH_STATE(n)   yy_push_state(n)
202 #define POP_STATE       yy_pop_state()
203
204 %}
205 /* Options:
206     8bit (8-bit input)
207     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208     never-interactive (to go a bit faster)
209     stack (use a start-condition stack)
210 */
211 %option 8bit
212 %option noyywrap
213 %option never-interactive
214 %option stack
215
216 /* The start states are:
217    Code -- normal Haskell code (principal lexer)
218    GlaExt -- Haskell code with Glasgow extensions
219    Comment -- Nested comment processing
220    String -- Inside a string literal with backslashes
221    StringEsc -- Immediately following a backslash in a string literal
222    Char -- Inside a character literal with backslashes
223    CharEsc -- Immediately following a backslash in a character literal 
224
225    Note that the INITIAL state is unused.  Also note that these states
226    are _exclusive_.  All rules should be prefixed with an appropriate
227    list of start states.
228  */
229
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231
232 isoS                    [\xa1-\xbf\xd7\xf7]
233 isoL                    [\xc0-\xd6\xd8-\xde]
234 isol                    [\xdf-\xf6\xf8-\xff]
235 isoA                    [\xa1-\xff]
236
237 D                       [0-9]
238 O                       [0-7]
239 H                       [0-9A-Fa-f]
240 N                       {D}+
241 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 SId                     {S}{S}*
244 L                       [A-Z\xc0-\xd6\xd8-\xde]
245 l                       [a-z\xdf-\xf6\xf8-\xff]
246 I                       {L}|{l}
247 i                       {L}|{l}|[0-9'_]
248 Id                      {I}{i}*
249 Mod                     {L}{i}*
250 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
251 CNTRL                   [@A-Z\[\\\]^_]
252 WS                      [ \t\n\r\f\v]
253 NL                      [\n\r]
254
255 %%
256
257 %{
258     /*
259      * Simple comments and whitespace.  Normally, we would just ignore these, but
260      * in case we're processing a string escape, we need to note that we've seen
261      * a gap.
262      *
263      * Note that we cater for a comment line that *doesn't* end in a newline.
264      * This is incorrect, strictly speaking, but seems like the right thing
265      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
266      *
267      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
268      */
269 %}
270
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
273
274 %{
275     /* 
276      * Special GHC pragma rules.  Do we need a start state for interface files,
277      * so these won't be matched in source files? --JSM
278      */
279
280 %}
281
282 %{
283 /* I believe the next rule is not ever matched.
284
285    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287    where possible, so the lexer should now never see cpp directives
288    like '# ' and '#line'.
289
290    -- SOF
291
292 <Code,GlaExt>^"# ".*{NL}    {
293                           char tempf[FILENAME_SIZE];
294                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
295                           new_filename(tempf);
296                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
297                         }
298
299 <Code,GlaExt>^"#line ".*{NL}    {
300                           char tempf[FILENAME_SIZE];
301                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
302                           new_filename(tempf); 
303                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
304                         }
305 */
306 %}
307
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
309                           /* partain: pragma-style line directive */
310                           char tempf[FILENAME_SIZE];
311                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
312                           new_filename(tempf);
313                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
314                         }
315
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317                               PUSH_STATE(UserPragma);
318                               forgetindent = TRUE;
319                               RETURN(INTERFACE_UPRAGMA);
320                             }
321 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
322                               PUSH_STATE(UserPragma);
323                               forgetindent = TRUE;
324                               RETURN(SPECIALISE_UPRAGMA);
325                             }
326 <Code,GlaExt>"{-#"{WS}*"INLINE" {
327                               PUSH_STATE(UserPragma);
328                               forgetindent = TRUE;
329                               RETURN(INLINE_UPRAGMA);
330                             }
331 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
332                               PUSH_STATE(UserPragma);
333                               forgetindent = TRUE;
334                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
335                             }
336 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
337                               /* these are handled by hscpp */
338                               nested_comments =1;
339                               forgetindent = TRUE;
340                               PUSH_STATE(Comment);
341                             }
342 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
343                               /* these are for the driver! */
344                               nested_comments =1;
345                               forgetindent = TRUE;
346                               PUSH_STATE(Comment);
347                             }
348 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
349                               /* these are used by `make depend' and the
350                                  compiler to indicate that a module should
351                                  be imported from source */
352                               nested_comments =1;
353                               RETURN(SOURCE_UPRAGMA); 
354                             }
355
356 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
357                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
358                                 input_filename, hsplineno);
359                               format_string(stderr, (unsigned char *) yytext, yyleng);
360                               fputs("'\n", stderr);
361                               nested_comments = 1;
362                               PUSH_STATE(Comment);
363                             }
364 <UserPragma>"#-}"           { POP_STATE; 
365                               forgetindent=FALSE; 
366                               /* don't want any layout processing here,
367                                * so just use 'return' instead of 'RETURN',
368                                * remembering to set hssttok.
369                                */
370                               hssttok = -1;
371                               return(END_UPRAGMA); 
372                             }
373
374 %{
375     /*
376      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
377      * intentionally accepted as a keyword even for normal <Code>.
378      */
379 %}
380
381 <Code,GlaExt>"case"             { RETURN(CASE); }
382 <Code,GlaExt>"class"            { RETURN(CLASS); }
383 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
384 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
385 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
386 <Code,GlaExt>"do"               { RETURN(DO); }
387 <Code,GlaExt>"else"             { RETURN(ELSE); }
388 <Code,GlaExt>"if"               { RETURN(IF); }
389 <Code,GlaExt>"import"           { RETURN(IMPORT); }
390 <Code,GlaExt>"in"               { RETURN(IN); }
391 <Code,GlaExt>"infix"            { RETURN(INFIX); }
392 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
393 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
394 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
395 <Code,GlaExt>"let"              { RETURN(LET); }
396 <Code,GlaExt>"module"           { RETURN(MODULE); }
397 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
398 <Code,GlaExt>"of"               { RETURN(OF); }
399 <Code,GlaExt>"then"             { RETURN(THEN); }
400 <Code,GlaExt>"type"             { RETURN(TYPE); }
401 <Code,GlaExt>"where"            { RETURN(WHERE); }
402
403 <Code,GlaExt>"as"               { RETURN(AS); }
404 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
405 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
406
407 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
408 <GlaExt>"_ccall_"               { RETURN(CCALL); }
409 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
410 <GlaExt>"_casm_"                { RETURN(CASM); }
411 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
412
413 %{
414     /* 
415      * Haskell operators: special, reservedops and useful varsyms
416      */
417 %}
418
419 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
420 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
421 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
422 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
423 <Code,GlaExt>"{"                { RETURN(OCURLY); }
424 <Code,GlaExt>"}"                { RETURN(CCURLY); }
425 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
426 <Code,GlaExt>";"                { RETURN(SEMI); }
427 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
428 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
429
430 <Code,GlaExt>".."               { RETURN(DOTDOT); }
431 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
432 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
433 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
434 <Code,GlaExt>"|"                { RETURN(VBAR); }
435 <Code,GlaExt>"<-"               { RETURN(LARROW); }
436 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
437 <Code,GlaExt>"-"                { RETURN(MINUS); }
438 <Code,GlaExt>"+"                { RETURN(PLUS); }
439
440 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
441 <Code,GlaExt>"@"                { RETURN(AT); }
442 <Code,GlaExt>"!"                { RETURN(BANG); }
443 <Code,GlaExt>"~"                { RETURN(LAZY); }
444
445 %{
446     /*
447      * Integers and (for Glasgow extensions) primitive integers.  Note that
448      * we pass all of the text on to the parser, because flex/C can't handle
449      * arbitrary precision numbers.
450      */
451 %}
452
453 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
454                          yylval.uid = xstrndup(yytext, yyleng - 1);
455                          RETURN(INTPRIM);
456                         }
457 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
458                          yylval.uid = xstrndup(yytext, yyleng);
459                          RETURN(INTEGER);
460                         }
461 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
462                          yylval.uid = xstrndup(yytext, yyleng - 1);
463                          RETURN(INTPRIM);
464                         }
465 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
466                          yylval.uid = xstrndup(yytext, yyleng);
467                          RETURN(INTEGER);
468                         }
469 <GlaExt>("-")?{N}"#"    {
470                          yylval.uid = xstrndup(yytext, yyleng - 1);
471                          RETURN(INTPRIM);
472                         }
473 <Code,GlaExt,UserPragma>{N} {
474                          yylval.uid = xstrndup(yytext, yyleng);
475                          RETURN(INTEGER);
476                         }
477
478 %{
479     /*
480      * Floats and (for Glasgow extensions) primitive floats/doubles.
481      */
482 %}
483
484 <GlaExt>("-")?{F}"##"   {
485                          yylval.uid = xstrndup(yytext, yyleng - 2);
486                          RETURN(DOUBLEPRIM);
487                         }
488 <GlaExt>("-")?{F}"#"    {
489                          yylval.uid = xstrndup(yytext, yyleng - 1);
490                          RETURN(FLOATPRIM);
491                         }
492 <Code,GlaExt>{F}        {
493                          yylval.uid = xstrndup(yytext, yyleng);
494                          RETURN(FLOAT);
495                         }
496
497 %{
498     /*
499      * Funky ``foo'' style C literals for Glasgow extensions
500      */
501 %}
502
503 <GlaExt>"``"[^']+"''"   {
504                          hsnewid(yytext + 2, yyleng - 4);
505                          RETURN(CLITLIT);
506                         }
507
508 %{
509     /*
510      * Identifiers, both variables and operators.  The trailing hash is allowed
511      * for Glasgow extensions.
512      */
513 %}
514
515
516 %{
517 /* These SHOULDNAE work in "Code" (sigh) */
518 %}
519 <GlaExt,UserPragma>{Id}"#" { 
520                         if (! nonstandardFlag) {
521                             char errbuf[ERR_BUF_SIZE];
522                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
523                             hsperror(errbuf);
524                          }
525                          hsnewid(yytext, yyleng);
526                          RETURN(isconstr(yytext) ? CONID : VARID);
527                         }
528 <Code,GlaExt,UserPragma>{Id}    {
529                          hsnewid(yytext, yyleng);
530                          RETURN(isconstr(yytext) ? CONID : VARID);
531                         }
532 <Code,GlaExt,UserPragma>{SId}   {
533                          hsnewid(yytext, yyleng);
534                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
535                         }
536 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
537                          BOOLEAN is_constr;
538                          if (! nonstandardFlag) {
539                             char errbuf[ERR_BUF_SIZE];
540                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
541                             hsperror(errbuf);
542                          }
543                          is_constr = hsnewqid(yytext, yyleng);
544                          RETURN(is_constr ? QCONID : QVARID);
545                         }
546 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
547                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
548                          RETURN(is_constr ? QCONID : QVARID);
549                         }
550 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
551                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
552                          RETURN(is_constr ? QCONSYM : QVARSYM);
553                         }
554
555 %{
556     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
557
558     /* Because we can make the former well-behaved (we defined them).
559
560        Sadly, the latter is defined by Haskell, which allows such
561        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
562     */
563 %}
564
565 <GlaExt,UserPragma>"`"{Id}"#`"  {       
566                          hsnewid(yytext + 1, yyleng - 2);
567                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
568                         }
569
570 %{
571     /*
572      * Character literals.  The first form is the quick form, for character
573      * literals that don't contain backslashes.  Literals with backslashes are
574      * lexed through multiple rules.  First, we match the open ' and as many
575      * normal characters as possible.  This puts us into the <Char> state, where
576      * a backslash is legal.  Then, we match the backslash and move into the 
577      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
578      * characters and the close '.  We may end up with too many characters, but
579      * this allows us to easily share the lex rules with strings.  Excess characters
580      * are ignored with a warning.
581      */
582 %}
583
584 <GlaExt>'({CHAR}|"\"")"'#" {
585                          yylval.uhstring = installHstring(1, yytext+1);
586                          RETURN(CHARPRIM);
587                         }
588 <Code,GlaExt>'({CHAR}|"\"")'    {
589                          yylval.uhstring = installHstring(1, yytext+1);
590                          RETURN(CHAR);
591                         }
592 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
593                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
594                          hsperror(errbuf);
595                         }
596 <Code,GlaExt>'({CHAR}|"\"")* {
597                          hsmlcolno = hspcolno;
598                          cleartext();
599                          addtext(yytext+1, yyleng-1);
600                          PUSH_STATE(Char);
601                         }
602 <Char>({CHAR}|"\"")*'#  {
603                          unsigned length;
604                          char *text;
605
606                          addtext(yytext, yyleng - 2);
607                          text = fetchtext(&length);
608
609                          if (! nonstandardFlag) {
610                             char errbuf[ERR_BUF_SIZE];
611                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
612                             hsperror(errbuf);
613                          }
614
615                          if (length > 1) {
616                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
617                               input_filename, hsplineno, hspcolno + 1);
618                             format_string(stderr, (unsigned char *) text, length);
619                             fputs("' too long\n", stderr);
620                             hsperror("");
621                          }
622                          yylval.uhstring = installHstring(1, text);
623                          hspcolno = hsmlcolno;
624                          POP_STATE;
625                          RETURN(CHARPRIM); 
626                         }
627 <Char>({CHAR}|"\"")*'   {
628                          unsigned length;
629                          char *text;
630
631                          addtext(yytext, yyleng - 1);
632                          text = fetchtext(&length);
633
634                          if (length > 1) {
635                             fprintf(stderr, "%s:%d:%d: Character literal '",
636                               input_filename, hsplineno, hspcolno + 1);
637                             format_string(stderr, (unsigned char *) text, length);
638                             fputs("' too long\n", stderr);
639                             hsperror("");
640                          }
641                          yylval.uhstring = installHstring(1, text);
642                          hspcolno = hsmlcolno;
643                          POP_STATE;
644                          RETURN(CHAR); 
645                         }
646 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
647
648
649 %{
650     /*
651      * String literals.  The first form is the quick form, for string literals
652      * that don't contain backslashes.  Literals with backslashes are lexed
653      * through multiple rules.  First, we match the open " and as many normal
654      * characters as possible.  This puts us into the <String> state, where
655      * a backslash is legal.  Then, we match the backslash and move into the 
656      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
657      * characters, moving back and forth between <String> and <StringEsc> as more
658      * backslashes are encountered.  (We may even digress into <Comment> mode if we
659      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
660      * of normal characters and the close ".
661      */
662 %}
663
664 <GlaExt>"\""({CHAR}|"'")*"\""#  {
665                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
666                             /* the -3 accounts for the " on front, "# on the end */
667                          RETURN(STRINGPRIM); 
668                         }
669 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
670                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
671                          RETURN(STRING); 
672                         }
673 <Code,GlaExt>"\""({CHAR}|"'")* {
674                          hsmlcolno = hspcolno;
675                          cleartext();
676                          addtext(yytext+1, yyleng-1);
677                          PUSH_STATE(String);
678                         }
679 <String>({CHAR}|"'")*"\"#"   {
680                          unsigned length;
681                          char *text;
682
683                          addtext(yytext, yyleng-2);
684                          text = fetchtext(&length);
685
686                          if (! nonstandardFlag) {
687                             char errbuf[ERR_BUF_SIZE];
688                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
689                             hsperror(errbuf);
690                          }
691
692                          yylval.uhstring = installHstring(length, text);
693                          hspcolno = hsmlcolno;
694                          POP_STATE;
695                          RETURN(STRINGPRIM);
696                         }
697 <String>({CHAR}|"'")*"\""   {
698                          unsigned length;
699                          char *text;
700
701                          addtext(yytext, yyleng-1);
702                          text = fetchtext(&length);
703
704                          yylval.uhstring = installHstring(length, text);
705                          hspcolno = hsmlcolno;
706                          POP_STATE;
707                          RETURN(STRING); 
708                         }
709 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
710
711 %{
712     /*
713      * Character and string escapes are roughly the same, but strings have the
714      * extra `\&' sequence which is not allowed for characters.  Also, comments
715      * are allowed in the <StringEsc> state.  (See the comment section much
716      * further down.)
717      *
718      * NB: Backslashes and tabs are stored in strings as themselves.
719      * But if we print them (in printtree.c), they must go out as
720      * "\\\\" and "\\t" respectively.  (This is because of the bogus
721      * intermediate format that the parser produces.  It uses '\t' fpr end of
722      * string, so it needs to be able to escape tabs, which means that it
723      * also needs to be able to escape the escape character ('\\').  Sigh.
724      */
725 %}
726
727 <Char>\\                { PUSH_STATE(CharEsc); }
728 <String>\\&             /* Ignore */ ;
729 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
730
731 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
732 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
733
734 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
735 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
736 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
737 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
738 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
739 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
740 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
741 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
742 <CharEsc,StringEsc>BEL  |
743 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
744 <CharEsc,StringEsc>BS   |
745 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
746 <CharEsc,StringEsc>HT   |
747 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
748 <CharEsc,StringEsc>LF   |
749 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
750 <CharEsc,StringEsc>VT   |
751 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
752 <CharEsc,StringEsc>FF   |
753 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
754 <CharEsc,StringEsc>CR   |
755 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
756 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
757 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
758 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
759 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
760 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
761 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
762 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
763 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
764 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
765 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
766 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
767 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
768 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
769 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
770 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
771 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
772 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
773 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
774 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
775 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
776 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
777 <CharEsc,StringEsc>{D}+  {
778                           int i = strtol(yytext, NULL, 10);
779                           if (i < NCHARS) {
780                              addchar((char) i);
781                           } else {
782                              char errbuf[ERR_BUF_SIZE];
783                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
784                                 yytext);
785                              hsperror(errbuf);
786                           }
787                           POP_STATE;
788                         }
789 <CharEsc,StringEsc>o{O}+ {
790                           int i = strtol(yytext + 1, NULL, 8);
791                           if (i < NCHARS) {
792                              addchar((char) i);
793                           } else {
794                              char errbuf[ERR_BUF_SIZE];
795                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
796                                 yytext);
797                              hsperror(errbuf);
798                           }
799                           POP_STATE;
800                         }
801 <CharEsc,StringEsc>x{H}+ {
802                           int i = strtol(yytext + 1, NULL, 16);
803                           if (i < NCHARS) {
804                              addchar((char) i);
805                           } else {
806                              char errbuf[ERR_BUF_SIZE];
807                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
808                                 yytext);
809                              hsperror(errbuf);
810                           }
811                           POP_STATE;
812                         }
813
814
815 %{
816     /*
817      * Nested comments.  The major complication here is in trying to match the
818      * longest lexemes possible, for better performance.  (See the flex document.)
819      * That's why the rules look so bizarre.
820      */
821 %}
822
823 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
824                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
825                         }
826
827 <Comment>[^-{]*         |
828 <Comment>"-"+[^-{}]+    |
829 <Comment>"{"+[^-{}]+    ;
830 <Comment>"{-"           { nested_comments++; }
831 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
832 <Comment>(.|\n)         ;
833
834 %{
835     /*
836      * Illegal characters.  This used to be a single rule, but we might as well
837      * pass on as much information as we have, so now we indicate our state in
838      * the error message.
839      */
840 %}
841
842 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
843                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
844                             input_filename, hsplineno, hspcolno + 1); 
845                          format_string(stderr, (unsigned char *) yytext, 1);
846                          fputs("'\n", stderr);
847                          hsperror("");
848                         }
849 <Char>(.|\n)            { 
850                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
851                             input_filename, hsplineno, hspcolno + 1); 
852                          format_string(stderr, (unsigned char *) yytext, 1);
853                          fputs("' in a character literal\n", stderr);
854                          hsperror("");
855                         }
856 <CharEsc>(.|\n)         {
857                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
858                             input_filename, hsplineno, hspcolno + 1); 
859                          format_string(stderr, (unsigned char *) yytext, 1);
860                          fputs("'\n", stderr);
861                          hsperror("");
862                         }
863 <String>(.|\n)          { if (nonstandardFlag) {
864                              addtext(yytext, yyleng);
865                           } else { 
866                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
867                                 input_filename, hsplineno, hspcolno + 1); 
868                                 format_string(stderr, (unsigned char *) yytext, 1);
869                                 fputs("' in a string literal\n", stderr);
870                                 hsperror("");
871                           }
872                         }
873 <StringEsc>(.|\n)       {
874                          if (noGap) {
875                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
876                                 input_filename, hsplineno, hspcolno + 1); 
877                              format_string(stderr, (unsigned char *) yytext, 1);
878                              fputs("'\n", stderr);
879                              hsperror("");
880                          } else {
881                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
882                                 input_filename, hsplineno, hspcolno + 1);
883                              format_string(stderr, (unsigned char *) yytext, 1);
884                              fputs("' in a string gap\n", stderr);
885                              hsperror("");
886                          }
887                         }
888
889 %{
890     /*
891      * End of file.  In any sub-state, this is an error.  However, for the primary
892      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
893      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
894      * adding virtual close curlies, or closing an interface and returning to the
895      * primary source file.
896      *
897      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
898      * line/column advancement has to be done by hand.
899      */
900 %}
901
902 <Char,CharEsc><<EOF>>   { 
903                           hsplineno = hslineno; hspcolno = hscolno;
904                           hsperror("unterminated character literal");
905                         }
906 <Comment><<EOF>>        { 
907                           hsplineno = hslineno; hspcolno = hscolno;
908                           hsperror("unterminated comment"); 
909                         }
910 <String,StringEsc><<EOF>>   { 
911                           hsplineno = hslineno; hspcolno = hscolno;
912                           hsperror("unterminated string literal"); 
913                         }
914 <UserPragma><<EOF>>     {
915                           hsplineno = hslineno; hspcolno = hscolno;
916                           hsperror("unterminated user-specified pragma"); 
917                         }
918 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
919
920 %%
921
922 /**********************************************************************
923 *                                                                     *
924 *                                                                     *
925 *     YACC/LEX Initialisation etc.                                    *
926 *                                                                     *
927 *                                                                     *
928 **********************************************************************/
929
930 /*
931    We initialise input_filename to "<stdin>".
932    This allows unnamed sources to be piped into the parser.
933 */
934
935 void
936 yyinit(void)
937 {
938     input_filename = xstrdup("<stdin>");
939
940     /* We must initialize the input buffer _now_, because we call
941        setyyin _before_ calling yylex for the first time! */
942     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
943
944     if (nonstandardFlag)
945         PUSH_STATE(GlaExt);
946     else
947         PUSH_STATE(Code);
948 }
949
950 static void
951 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
952 {
953     if (input_filename != NULL)
954         free(input_filename);
955     input_filename = xstrdup(f);
956 }
957
958 /**********************************************************************
959 *                                                                     *
960 *                                                                     *
961 *     Layout Processing                                               *
962 *                                                                     *
963 *                                                                     *
964 **********************************************************************/
965
966 /*
967         The following section deals with Haskell Layout conventions
968         forcing insertion of ; or } as appropriate
969 */
970
971 static BOOLEAN
972 hsshouldindent(void)
973 {
974     return (!forgetindent && INDENTON);
975 }
976
977
978 /* Enter new context and set new indentation level */
979 void
980 hssetindent(void)
981 {
982 #ifdef HSP_DEBUG
983     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
984 #endif
985
986     /*
987      * partain: first chk that new indent won't be less than current one; this code
988      * doesn't make sense to me; hscolno tells the position of the _end_ of the
989      * current token; what that has to do with indenting, I don't know.
990      */
991
992
993     if (hscolno - 1 <= INDENTPT) {
994         if (INDENTPT == -1)
995             return;             /* Empty input OK for Haskell 1.1 */
996         else {
997             char errbuf[ERR_BUF_SIZE];
998
999             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1000             hsperror(errbuf);
1001         }
1002     }
1003     hsentercontext((hspcolno << 1) | 1);
1004 }
1005
1006
1007 /* Enter a new context without changing the indentation level */
1008 void
1009 hsincindent(void)
1010 {
1011 #ifdef HSP_DEBUG
1012     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1013 #endif
1014     hsentercontext(indenttab[icontexts] & ~1);
1015 }
1016
1017
1018 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1019 void
1020 hsindentoff(void)
1021 {
1022     forgetindent = TRUE;
1023 }
1024
1025
1026 /* Enter a new layout context. */
1027 static void
1028 hsentercontext(int indent)
1029 {
1030     /* Enter new context and set indentation as specified */
1031     if (++icontexts >= MAX_CONTEXTS) {
1032         char errbuf[ERR_BUF_SIZE];
1033
1034         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1035         hsperror(errbuf);
1036     }
1037     forgetindent = FALSE;
1038     indenttab[icontexts] = indent;
1039 #ifdef HSP_DEBUG
1040     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1041 #endif
1042 }
1043
1044
1045 /* Exit a layout context */
1046 void
1047 hsendindent(void)
1048 {
1049     --icontexts;
1050 #ifdef HSP_DEBUG
1051     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1052 #endif
1053 }
1054
1055 /*
1056  *      Return checks the indentation level and returns ;, } or the specified token.
1057  */
1058
1059 static int
1060 Return(int tok)
1061 {
1062 #ifdef HSP_DEBUG
1063     extern int yyleng;
1064 #endif
1065
1066     if (hsshouldindent()) {
1067         if (hspcolno < INDENTPT) {
1068 #ifdef HSP_DEBUG
1069             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1070 #endif
1071             hssttok = tok;
1072             return (VCCURLY);
1073         } else if (hspcolno == INDENTPT) {
1074 #ifdef HSP_DEBUG
1075             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1076 #endif
1077             hssttok = -tok;
1078             return (SEMI);
1079         }
1080     }
1081     hssttok = -1;
1082 #ifdef HSP_DEBUG
1083     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1084 #endif
1085     return (tok);
1086 }
1087
1088
1089 /*
1090  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1091  */
1092 int
1093 yylex()
1094 {
1095     int tok;
1096     static BOOLEAN eof = FALSE;
1097
1098     if (!eof) {
1099         if (hssttok != -1) {
1100             if (hssttok < 0) {
1101                 tok = -hssttok;
1102                 hssttok = -1;
1103                 return tok;
1104             }
1105             RETURN(hssttok);
1106         } else {
1107             endlineno = hslineno;
1108             if ((tok = yylex1()) != EOF)
1109                 return tok;
1110             else
1111                 eof = TRUE;
1112         }
1113     }
1114     if (icontexts > icontexts_save) {
1115         if (INDENTON) {
1116             eof = TRUE;
1117             indenttab[icontexts] = 0;
1118             return (VCCURLY);
1119         } else
1120             hsperror("missing '}' at end of file");
1121     } else if (hsbuf_save != NULL) {
1122         fclose(yyin);
1123         yy_delete_buffer(YY_CURRENT_BUFFER);
1124         yy_switch_to_buffer(hsbuf_save);
1125         hsbuf_save = NULL;
1126         new_filename(filename_save);
1127         free(filename_save);
1128         hslineno = hslineno_save;
1129         hsplineno = hsplineno_save;
1130         hscolno = hscolno_save;
1131         hspcolno = hspcolno_save;
1132         etags = etags_save;
1133         icontexts = icontexts_save - 1;
1134         icontexts_save = 0;
1135 #ifdef HSP_DEBUG
1136         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1137 #endif
1138         eof = FALSE;
1139
1140         /* RETURN(LEOF); */
1141         hsperror("No longer using yacc to parse interface files");
1142
1143     } else {
1144         yyterminate();
1145     }
1146     abort(); /* should never get here! */
1147     return(0);
1148 }
1149
1150 /**********************************************************************
1151 *                                                                     *
1152 *                                                                     *
1153 *     Input Processing for Interfaces -- Not currently used !!!       *
1154 *                                                                     *
1155 *                                                                     *
1156 **********************************************************************/
1157
1158 /* setyyin(file)        open file as new lex input buffer */
1159 extern FILE *yyin;
1160
1161 void
1162 setyyin(char *file)
1163 {
1164     hsbuf_save = YY_CURRENT_BUFFER;
1165     if ((yyin = fopen(file, "r")) == NULL) {
1166         char errbuf[ERR_BUF_SIZE];
1167
1168         sprintf(errbuf, "can't read \"%-.50s\"", file);
1169         hsperror(errbuf);
1170     }
1171     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1172
1173     hslineno_save = hslineno;
1174     hsplineno_save = hsplineno;
1175     hslineno = hsplineno = 1;
1176
1177     filename_save = input_filename;
1178     input_filename = NULL;
1179     new_filename(file);
1180     hscolno_save = hscolno;
1181     hspcolno_save = hspcolno;
1182     hscolno = hspcolno = 0;
1183     etags_save = etags; /* do not do "etags" stuff in interfaces */
1184     etags = 0;          /* We remember whether we are doing it in
1185                            the module, so we can restore it later [WDP 94/09] */
1186     hsentercontext(-1);         /* partain: changed this from 0 */
1187     icontexts_save = icontexts;
1188 #ifdef HSP_DEBUG
1189     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1190 #endif
1191 }
1192
1193 static void
1194 layout_input(char *text, int len)
1195 {
1196 #ifdef HSP_DEBUG
1197     fprintf(stderr, "Scanning \"%s\"\n", text);
1198 #endif
1199
1200     hsplineno = hslineno;
1201     hspcolno = hscolno;
1202
1203     while (len-- > 0) {
1204         switch (*text++) {
1205         case '\n':
1206         case '\r':
1207         case '\f':
1208             hslineno++;
1209             hscolno = 0;
1210             break;
1211         case '\t':
1212             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1213             break;
1214         case '\v':
1215             break;
1216         default:
1217             ++hscolno;
1218             break;
1219         }
1220     }
1221 }
1222
1223 void
1224 setstartlineno(void)
1225 {
1226     startlineno = hsplineno;
1227
1228     if (modulelineno == 0) {
1229         modulelineno = startlineno;
1230     }
1231
1232 #if 1/*etags*/
1233 #else
1234     if (etags)
1235         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1236 #endif
1237 }
1238
1239 /**********************************************************************
1240 *                                                                     *
1241 *                                                                     *
1242 *                      Text Caching                                   *
1243 *                                                                     *
1244 *                                                                     *
1245 **********************************************************************/
1246
1247 #define CACHE_SIZE YY_BUF_SIZE
1248
1249 static struct {
1250     unsigned allocated;
1251     unsigned next;
1252     char *text;
1253 } textcache = { 0, 0, NULL };
1254
1255 static void
1256 cleartext(void)
1257 {
1258 /*  fprintf(stderr, "cleartext\n"); */
1259     textcache.next = 0;
1260     if (textcache.allocated == 0) {
1261         textcache.allocated = CACHE_SIZE;
1262         textcache.text = xmalloc(CACHE_SIZE);
1263     }
1264 }
1265
1266 static void
1267 addtext(char *text, unsigned length)
1268 {
1269 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1270
1271     if (length == 0)
1272         return;
1273
1274     if (textcache.next + length + 1 >= textcache.allocated) {
1275         textcache.allocated += length + CACHE_SIZE;
1276         textcache.text = xrealloc(textcache.text, textcache.allocated);
1277     }
1278     bcopy(text, textcache.text + textcache.next, length);
1279     textcache.next += length;
1280 }
1281
1282 static void
1283 addchar(char c)
1284 {
1285 /*  fprintf(stderr, "addchar: %c\n", c); */
1286
1287     if (textcache.next + 2 >= textcache.allocated) {
1288         textcache.allocated += CACHE_SIZE;
1289         textcache.text = xrealloc(textcache.text, textcache.allocated);
1290     }
1291     textcache.text[textcache.next++] = c;
1292 }
1293
1294 static char *
1295 fetchtext(unsigned *length)
1296 {
1297 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1298
1299     *length = textcache.next;
1300     textcache.text[textcache.next] = '\0';
1301     return textcache.text;
1302 }
1303
1304 /**********************************************************************
1305 *                                                                     *
1306 *                                                                     *
1307 *    Identifier Processing                                             *
1308 *                                                                     *
1309 *                                                                     *
1310 **********************************************************************/
1311
1312 /*
1313         hsnewid         Enters an id of length n into the symbol table.
1314 */
1315
1316 static void
1317 hsnewid(char *name, int length)
1318 {
1319     char save = name[length];
1320
1321     name[length] = '\0';
1322     yylval.uid = installid(name);
1323     name[length] = save;
1324 }
1325
1326 BOOLEAN
1327 hsnewqid(char *name, int length)
1328 {
1329     char* dot;
1330     char save = name[length];
1331     name[length] = '\0';
1332
1333     dot = strchr(name, '.');
1334     *dot = '\0';
1335     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1336     *dot = '.';
1337     name[length] = save;
1338
1339     return isconstr(dot+1);
1340 }