[project @ 1998-08-14 12:07:18 by sof]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 /* Special file handling for IMPORTS */
141 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142
143 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
144 static char *filename_save;             /* File Name                     */
145 static int hslineno_save = 0,           /* Line Number                   */
146  hsplineno_save = 0,                    /* Line Number of Prev. token    */
147  hscolno_save = 0,                      /* Indentation                   */
148  hspcolno_save = 0;                     /* Left Indentation              */
149 static short icontexts_save = 0;        /* Indent Context Level          */
150
151 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags;                   /* that which is saved */
153
154 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
155
156 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
157                                  * inserted before token +ve  -- "}" inserted before
158                                  * token */
159
160 short icontexts = 0;            /* Which context we're in */
161
162 /*
163         Table of indentations:  right bit indicates whether to use
164           indentation rules (1 = use rules; 0 = ignore)
165
166     partain:
167     push one of these "contexts" at every "case" or "where"; the right bit says
168     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
169
170     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
171     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
172     push is shown just below.
173
174 */
175
176
177 static short indenttab[MAX_CONTEXTS] = {-1};
178
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
181
182 #define RETURN(tok) return(Return(tok))
183
184 #undef YY_DECL
185 #define YY_DECL int yylex1()
186
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189    to support older versions of flex, we'll continue to peek for now.
190  */
191 #define YY_USER_ACTION \
192     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
193
194 #if 0/*debug*/
195 #undef YY_BREAK
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
197 #endif
198
199 /* Each time we enter a new start state, we push it onto the state stack.
200 */
201 #define PUSH_STATE(n)   yy_push_state(n)
202 #define POP_STATE       yy_pop_state()
203
204 %}
205 /* Options:
206     8bit (8-bit input)
207     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208     never-interactive (to go a bit faster)
209     stack (use a start-condition stack)
210 */
211 %option 8bit
212 %option noyywrap
213 %option never-interactive
214 %option stack
215
216 /* The start states are:
217    Code -- normal Haskell code (principal lexer)
218    GlaExt -- Haskell code with Glasgow extensions
219    Comment -- Nested comment processing
220    String -- Inside a string literal with backslashes
221    StringEsc -- Immediately following a backslash in a string literal
222    Char -- Inside a character literal with backslashes
223    CharEsc -- Immediately following a backslash in a character literal 
224
225    Note that the INITIAL state is unused.  Also note that these states
226    are _exclusive_.  All rules should be prefixed with an appropriate
227    list of start states.
228  */
229
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231
232 isoS                    [\xa1-\xbf\xd7\xf7]
233 isoL                    [\xc0-\xd6\xd8-\xde]
234 isol                    [\xdf-\xf6\xf8-\xff]
235 isoA                    [\xa1-\xff]
236
237 D                       [0-9]
238 O                       [0-7]
239 H                       [0-9A-Fa-f]
240 N                       {D}+
241 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 SId                     {S}{S}*
244 L                       [A-Z\xc0-\xd6\xd8-\xde]
245 l                       [a-z\xdf-\xf6\xf8-\xff]
246 I                       {L}|{l}
247 i                       {L}|{l}|[0-9'_]
248 Id                      {I}{i}*
249 Mod                     {L}{i}*
250 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
251 CNTRL                   [@A-Z\[\\\]^_]
252 WS                      [ \t\n\r\f\v]
253 NL                      [\n\r]
254
255 %%
256
257 %{
258     /*
259      * Simple comments and whitespace.  Normally, we would just ignore these, but
260      * in case we're processing a string escape, we need to note that we've seen
261      * a gap.
262      *
263      * Note that we cater for a comment line that *doesn't* end in a newline.
264      * This is incorrect, strictly speaking, but seems like the right thing
265      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
266      *
267      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
268      */
269 %}
270
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
273
274 %{
275     /* 
276      * Special GHC pragma rules.  Do we need a start state for interface files,
277      * so these won't be matched in source files? --JSM
278      */
279
280 %}
281
282 %{
283 /* I believe the next rule is not ever matched.
284
285    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287    where possible, so the lexer should now never see cpp directives
288    like '# ' and '#line'.
289
290    -- SOF
291
292 <Code,GlaExt>^"# ".*{NL}    {
293                           char tempf[FILENAME_SIZE];
294                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
295                           new_filename(tempf);
296                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
297                         }
298
299 <Code,GlaExt>^"#line ".*{NL}    {
300                           char tempf[FILENAME_SIZE];
301                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
302                           new_filename(tempf); 
303                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
304                         }
305 */
306 %}
307
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
309                           /* partain: pragma-style line directive */
310                           char tempf[FILENAME_SIZE];
311                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
312                           new_filename(tempf);
313                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
314                         }
315
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317                               PUSH_STATE(UserPragma);
318                               RETURN(INTERFACE_UPRAGMA);
319                             }
320 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
321                               PUSH_STATE(UserPragma);
322                               RETURN(SPECIALISE_UPRAGMA);
323                             }
324 <Code,GlaExt>"{-#"{WS}*"INLINE" {
325                               PUSH_STATE(UserPragma);
326                               RETURN(INLINE_UPRAGMA);
327                             }
328 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
329                               PUSH_STATE(UserPragma);
330                               RETURN(NOINLINE_UPRAGMA);
331                             }
332 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
333                               PUSH_STATE(UserPragma);
334                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
335                             }
336 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
337                               /* these are handled by hscpp */
338                               nested_comments =1;
339                               PUSH_STATE(Comment);
340                             }
341 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
342                               /* these are for the driver! */
343                               nested_comments =1;
344                               PUSH_STATE(Comment);
345                             }
346 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
347                               /* these are used by `make depend' and the
348                                  compiler to indicate that a module should
349                                  be imported from source */
350                               nested_comments =1;
351                               RETURN(SOURCE_UPRAGMA); 
352                             }
353
354 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
355                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
356                                 input_filename, hsplineno);
357                               format_string(stderr, (unsigned char *) yytext, yyleng);
358                               fputs("'\n", stderr);
359                               nested_comments = 1;
360                               PUSH_STATE(Comment);
361                             }
362 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
363
364 %{
365     /*
366      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
367      * intentionally accepted as a keyword even for normal <Code>.
368      */
369 %}
370
371 <Code,GlaExt>"case"             { RETURN(CASE); }
372 <Code,GlaExt>"class"            { RETURN(CLASS); }
373 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
374 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
375 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
376 <Code,GlaExt>"do"               { RETURN(DO); }
377 <Code,GlaExt>"else"             { RETURN(ELSE); }
378 <Code,GlaExt>"if"               { RETURN(IF); }
379 <Code,GlaExt>"import"           { RETURN(IMPORT); }
380 <Code,GlaExt>"in"               { RETURN(IN); }
381 <Code,GlaExt>"infix"            { RETURN(INFIX); }
382 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
383 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
384 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
385 <Code,GlaExt>"let"              { RETURN(LET); }
386 <Code,GlaExt>"module"           { RETURN(MODULE); }
387 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
388 <Code,GlaExt>"of"               { RETURN(OF); }
389 <Code,GlaExt>"then"             { RETURN(THEN); }
390 <Code,GlaExt>"type"             { RETURN(TYPE); }
391 <Code,GlaExt>"where"            { RETURN(WHERE); }
392
393 <Code,GlaExt>"as"               { RETURN(AS); }
394 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
395 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
396
397 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
398 <GlaExt>"_ccall_"               { RETURN(CCALL); }
399 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
400 <GlaExt>"_casm_"                { RETURN(CASM); }
401 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
402 <GlaExt>"foreign"               { RETURN(FOREIGN); }
403 <GlaExt>"export"                { RETURN(EXPORT); }
404 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
405 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
406 <GlaExt>"_ccall"                { RETURN(C_CALL); }
407 <GlaExt>"_pascal"               { RETURN(PASCAL); }
408 <GlaExt>"_fastcall"             { RETURN(FASTCALL); }
409 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
410
411 %{
412     /* 
413      * Haskell operators: special, reservedops and useful varsyms
414      */
415 %}
416
417 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
418 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
419 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
420 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
421 <Code,GlaExt>"{"                { RETURN(OCURLY); }
422 <Code,GlaExt>"}"                { RETURN(CCURLY); }
423 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
424 <Code,GlaExt>";"                { RETURN(SEMI); }
425 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
426 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
427
428 <Code,GlaExt>".."               { RETURN(DOTDOT); }
429 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
430 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
431 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
432 <Code,GlaExt>"|"                { RETURN(VBAR); }
433 <Code,GlaExt>"<-"               { RETURN(LARROW); }
434 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
435 <Code,GlaExt>"-"                { RETURN(MINUS); }
436 <Code,GlaExt>"+"                { RETURN(PLUS); }
437
438 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
439 <Code,GlaExt>"@"                { RETURN(AT); }
440 <Code,GlaExt>"!"                { RETURN(BANG); }
441 <Code,GlaExt>"~"                { RETURN(LAZY); }
442
443 %{
444     /*
445      * Integers and (for Glasgow extensions) primitive integers.  Note that
446      * we pass all of the text on to the parser, because flex/C can't handle
447      * arbitrary precision numbers.
448      */
449 %}
450
451 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
452                          yylval.uid = xstrndup(yytext, yyleng - 1);
453                          RETURN(INTPRIM);
454                         }
455 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
456                          yylval.uid = xstrndup(yytext, yyleng);
457                          RETURN(INTEGER);
458                         }
459 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
460                          yylval.uid = xstrndup(yytext, yyleng - 1);
461                          RETURN(INTPRIM);
462                         }
463 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
464                          yylval.uid = xstrndup(yytext, yyleng);
465                          RETURN(INTEGER);
466                         }
467 <GlaExt>("-")?{N}"#"    {
468                          yylval.uid = xstrndup(yytext, yyleng - 1);
469                          RETURN(INTPRIM);
470                         }
471 <Code,GlaExt,UserPragma>{N} {
472                          yylval.uid = xstrndup(yytext, yyleng);
473                          RETURN(INTEGER);
474                         }
475
476 %{
477     /*
478      * Floats and (for Glasgow extensions) primitive floats/doubles.
479      */
480 %}
481
482 <GlaExt>("-")?{F}"##"   {
483                          yylval.uid = xstrndup(yytext, yyleng - 2);
484                          RETURN(DOUBLEPRIM);
485                         }
486 <GlaExt>("-")?{F}"#"    {
487                          yylval.uid = xstrndup(yytext, yyleng - 1);
488                          RETURN(FLOATPRIM);
489                         }
490 <Code,GlaExt>{F}        {
491                          yylval.uid = xstrndup(yytext, yyleng);
492                          RETURN(FLOAT);
493                         }
494
495 %{
496     /*
497      * Funky ``foo'' style C literals for Glasgow extensions
498      */
499 %}
500
501 <GlaExt>"``"[^']+"''"   {
502                          hsnewid(yytext + 2, yyleng - 4);
503                          RETURN(CLITLIT);
504                         }
505
506 %{
507     /*
508      * Identifiers, both variables and operators.  The trailing hash is allowed
509      * for Glasgow extensions.
510      */
511 %}
512
513
514 %{
515 /* These SHOULDNAE work in "Code" (sigh) */
516 %}
517 <GlaExt,UserPragma>{Id}"#" { 
518                         if (! nonstandardFlag) {
519                             char errbuf[ERR_BUF_SIZE];
520                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
521                             hsperror(errbuf);
522                          }
523                          hsnewid(yytext, yyleng);
524                          RETURN(isconstr(yytext) ? CONID : VARID);
525                         }
526 <Code,GlaExt,UserPragma>{Id}    {
527                          hsnewid(yytext, yyleng);
528                          RETURN(isconstr(yytext) ? CONID : VARID);
529                         }
530 <Code,GlaExt,UserPragma>{SId}   {
531                          hsnewid(yytext, yyleng);
532                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
533                         }
534 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
535                          BOOLEAN is_constr;
536                          if (! nonstandardFlag) {
537                             char errbuf[ERR_BUF_SIZE];
538                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
539                             hsperror(errbuf);
540                          }
541                          is_constr = hsnewqid(yytext, yyleng);
542                          RETURN(is_constr ? QCONID : QVARID);
543                         }
544 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
545                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
546                          RETURN(is_constr ? QCONID : QVARID);
547                         }
548 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
549                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
550                          RETURN(is_constr ? QCONSYM : QVARSYM);
551                         }
552
553 %{
554     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
555
556     /* Because we can make the former well-behaved (we defined them).
557
558        Sadly, the latter is defined by Haskell, which allows such
559        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
560     */
561 %}
562
563 <GlaExt,UserPragma>"`"{Id}"#`"  {       
564                          hsnewid(yytext + 1, yyleng - 2);
565                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
566                         }
567
568 %{
569     /*
570      * Character literals.  The first form is the quick form, for character
571      * literals that don't contain backslashes.  Literals with backslashes are
572      * lexed through multiple rules.  First, we match the open ' and as many
573      * normal characters as possible.  This puts us into the <Char> state, where
574      * a backslash is legal.  Then, we match the backslash and move into the 
575      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
576      * characters and the close '.  We may end up with too many characters, but
577      * this allows us to easily share the lex rules with strings.  Excess characters
578      * are ignored with a warning.
579      */
580 %}
581
582 <GlaExt>'({CHAR}|"\"")"'#" {
583                          yylval.uhstring = installHstring(1, yytext+1);
584                          RETURN(CHARPRIM);
585                         }
586 <Code,GlaExt>'({CHAR}|"\"")'    {
587                          yylval.uhstring = installHstring(1, yytext+1);
588                          RETURN(CHAR);
589                         }
590 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
591                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
592                          hsperror(errbuf);
593                         }
594 <Code,GlaExt>'({CHAR}|"\"")* {
595                          hsmlcolno = hspcolno;
596                          cleartext();
597                          addtext(yytext+1, yyleng-1);
598                          PUSH_STATE(Char);
599                         }
600 <Char>({CHAR}|"\"")*'#  {
601                          unsigned length;
602                          char *text;
603
604                          addtext(yytext, yyleng - 2);
605                          text = fetchtext(&length);
606
607                          if (! nonstandardFlag) {
608                             char errbuf[ERR_BUF_SIZE];
609                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
610                             hsperror(errbuf);
611                          }
612
613                          if (length > 1) {
614                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
615                               input_filename, hsplineno, hspcolno + 1);
616                             format_string(stderr, (unsigned char *) text, length);
617                             fputs("' too long\n", stderr);
618                             hsperror("");
619                          }
620                          yylval.uhstring = installHstring(1, text);
621                          hspcolno = hsmlcolno;
622                          POP_STATE;
623                          RETURN(CHARPRIM); 
624                         }
625 <Char>({CHAR}|"\"")*'   {
626                          unsigned length;
627                          char *text;
628
629                          addtext(yytext, yyleng - 1);
630                          text = fetchtext(&length);
631
632                          if (length > 1) {
633                             fprintf(stderr, "%s:%d:%d: Character literal '",
634                               input_filename, hsplineno, hspcolno + 1);
635                             format_string(stderr, (unsigned char *) text, length);
636                             fputs("' too long\n", stderr);
637                             hsperror("");
638                          }
639                          yylval.uhstring = installHstring(1, text);
640                          hspcolno = hsmlcolno;
641                          POP_STATE;
642                          RETURN(CHAR); 
643                         }
644 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
645
646
647 %{
648     /*
649      * String literals.  The first form is the quick form, for string literals
650      * that don't contain backslashes.  Literals with backslashes are lexed
651      * through multiple rules.  First, we match the open " and as many normal
652      * characters as possible.  This puts us into the <String> state, where
653      * a backslash is legal.  Then, we match the backslash and move into the 
654      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
655      * characters, moving back and forth between <String> and <StringEsc> as more
656      * backslashes are encountered.  (We may even digress into <Comment> mode if we
657      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
658      * of normal characters and the close ".
659      */
660 %}
661
662 <GlaExt>"\""({CHAR}|"'")*"\""#  {
663                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
664                             /* the -3 accounts for the " on front, "# on the end */
665                          RETURN(STRINGPRIM); 
666                         }
667 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
668                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
669                          RETURN(STRING); 
670                         }
671 <Code,GlaExt>"\""({CHAR}|"'")* {
672                          hsmlcolno = hspcolno;
673                          cleartext();
674                          addtext(yytext+1, yyleng-1);
675                          PUSH_STATE(String);
676                         }
677 <String>({CHAR}|"'")*"\"#"   {
678                          unsigned length;
679                          char *text;
680
681                          addtext(yytext, yyleng-2);
682                          text = fetchtext(&length);
683
684                          if (! nonstandardFlag) {
685                             char errbuf[ERR_BUF_SIZE];
686                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
687                             hsperror(errbuf);
688                          }
689
690                          yylval.uhstring = installHstring(length, text);
691                          hspcolno = hsmlcolno;
692                          POP_STATE;
693                          RETURN(STRINGPRIM);
694                         }
695 <String>({CHAR}|"'")*"\""   {
696                          unsigned length;
697                          char *text;
698
699                          addtext(yytext, yyleng-1);
700                          text = fetchtext(&length);
701
702                          yylval.uhstring = installHstring(length, text);
703                          hspcolno = hsmlcolno;
704                          POP_STATE;
705                          RETURN(STRING); 
706                         }
707 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
708
709 %{
710     /*
711      * Character and string escapes are roughly the same, but strings have the
712      * extra `\&' sequence which is not allowed for characters.  Also, comments
713      * are allowed in the <StringEsc> state.  (See the comment section much
714      * further down.)
715      *
716      * NB: Backslashes and tabs are stored in strings as themselves.
717      * But if we print them (in printtree.c), they must go out as
718      * "\\\\" and "\\t" respectively.  (This is because of the bogus
719      * intermediate format that the parser produces.  It uses '\t' fpr end of
720      * string, so it needs to be able to escape tabs, which means that it
721      * also needs to be able to escape the escape character ('\\').  Sigh.
722      */
723 %}
724
725 <Char>\\                { PUSH_STATE(CharEsc); }
726 <String>\\&             /* Ignore */ ;
727 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
728
729 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
730 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
731
732 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
733 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
734 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
735 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
736 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
737 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
738 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
739 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
740 <CharEsc,StringEsc>BEL  |
741 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
742 <CharEsc,StringEsc>BS   |
743 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
744 <CharEsc,StringEsc>HT   |
745 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
746 <CharEsc,StringEsc>LF   |
747 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
748 <CharEsc,StringEsc>VT   |
749 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
750 <CharEsc,StringEsc>FF   |
751 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
752 <CharEsc,StringEsc>CR   |
753 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
754 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
755 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
756 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
757 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
758 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
759 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
760 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
761 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
762 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
763 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
764 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
765 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
766 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
767 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
768 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
769 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
770 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
771 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
772 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
773 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
774 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
775 <CharEsc,StringEsc>{D}+  {
776                           int i = strtol(yytext, NULL, 10);
777                           if (i < NCHARS) {
778                              addchar((char) i);
779                           } else {
780                              char errbuf[ERR_BUF_SIZE];
781                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
782                                 yytext);
783                              hsperror(errbuf);
784                           }
785                           POP_STATE;
786                         }
787 <CharEsc,StringEsc>o{O}+ {
788                           int i = strtol(yytext + 1, NULL, 8);
789                           if (i < NCHARS) {
790                              addchar((char) i);
791                           } else {
792                              char errbuf[ERR_BUF_SIZE];
793                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
794                                 yytext);
795                              hsperror(errbuf);
796                           }
797                           POP_STATE;
798                         }
799 <CharEsc,StringEsc>x{H}+ {
800                           int i = strtol(yytext + 1, NULL, 16);
801                           if (i < NCHARS) {
802                              addchar((char) i);
803                           } else {
804                              char errbuf[ERR_BUF_SIZE];
805                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
806                                 yytext);
807                              hsperror(errbuf);
808                           }
809                           POP_STATE;
810                         }
811
812
813 %{
814     /*
815      * Nested comments.  The major complication here is in trying to match the
816      * longest lexemes possible, for better performance.  (See the flex document.)
817      * That's why the rules look so bizarre.
818      */
819 %}
820
821 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
822                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
823                         }
824
825 <Comment>[^-{]*         |
826 <Comment>"-"+[^-{}]+    |
827 <Comment>"{"+[^-{}]+    ;
828 <Comment>"{-"           { nested_comments++; }
829 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
830 <Comment>(.|\n)         ;
831
832 %{
833     /*
834      * Illegal characters.  This used to be a single rule, but we might as well
835      * pass on as much information as we have, so now we indicate our state in
836      * the error message.
837      */
838 %}
839
840 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
841                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
842                             input_filename, hsplineno, hspcolno + 1); 
843                          format_string(stderr, (unsigned char *) yytext, 1);
844                          fputs("'\n", stderr);
845                          hsperror("");
846                         }
847 <Char>(.|\n)            { 
848                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
849                             input_filename, hsplineno, hspcolno + 1); 
850                          format_string(stderr, (unsigned char *) yytext, 1);
851                          fputs("' in a character literal\n", stderr);
852                          hsperror("");
853                         }
854 <CharEsc>(.|\n)         {
855                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
856                             input_filename, hsplineno, hspcolno + 1); 
857                          format_string(stderr, (unsigned char *) yytext, 1);
858                          fputs("'\n", stderr);
859                          hsperror("");
860                         }
861 <String>(.|\n)          { if (nonstandardFlag) {
862                              addtext(yytext, yyleng);
863                           } else { 
864                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
865                                 input_filename, hsplineno, hspcolno + 1); 
866                                 format_string(stderr, (unsigned char *) yytext, 1);
867                                 fputs("' in a string literal\n", stderr);
868                                 hsperror("");
869                           }
870                         }
871 <StringEsc>(.|\n)       {
872                          if (noGap) {
873                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
874                                 input_filename, hsplineno, hspcolno + 1); 
875                              format_string(stderr, (unsigned char *) yytext, 1);
876                              fputs("'\n", stderr);
877                              hsperror("");
878                          } else {
879                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
880                                 input_filename, hsplineno, hspcolno + 1);
881                              format_string(stderr, (unsigned char *) yytext, 1);
882                              fputs("' in a string gap\n", stderr);
883                              hsperror("");
884                          }
885                         }
886
887 %{
888     /*
889      * End of file.  In any sub-state, this is an error.  However, for the primary
890      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
891      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
892      * adding virtual close curlies, or closing an interface and returning to the
893      * primary source file.
894      *
895      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
896      * line/column advancement has to be done by hand.
897      */
898 %}
899
900 <Char,CharEsc><<EOF>>   { 
901                           hsplineno = hslineno; hspcolno = hscolno;
902                           hsperror("unterminated character literal");
903                         }
904 <Comment><<EOF>>        { 
905                           hsplineno = hslineno; hspcolno = hscolno;
906                           hsperror("unterminated comment"); 
907                         }
908 <String,StringEsc><<EOF>>   { 
909                           hsplineno = hslineno; hspcolno = hscolno;
910                           hsperror("unterminated string literal"); 
911                         }
912 <UserPragma><<EOF>>     {
913                           hsplineno = hslineno; hspcolno = hscolno;
914                           hsperror("unterminated user-specified pragma"); 
915                         }
916 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
917
918 %%
919
920 /**********************************************************************
921 *                                                                     *
922 *                                                                     *
923 *     YACC/LEX Initialisation etc.                                    *
924 *                                                                     *
925 *                                                                     *
926 **********************************************************************/
927
928 /*
929    We initialise input_filename to "<stdin>".
930    This allows unnamed sources to be piped into the parser.
931 */
932
933 void
934 yyinit(void)
935 {
936     input_filename = xstrdup("<stdin>");
937
938     /* We must initialize the input buffer _now_, because we call
939        setyyin _before_ calling yylex for the first time! */
940     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
941
942     if (nonstandardFlag)
943         PUSH_STATE(GlaExt);
944     else
945         PUSH_STATE(Code);
946 }
947
948 static void
949 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
950 {
951     if (input_filename != NULL)
952         free(input_filename);
953     input_filename = xstrdup(f);
954 }
955
956 /**********************************************************************
957 *                                                                     *
958 *                                                                     *
959 *     Layout Processing                                               *
960 *                                                                     *
961 *                                                                     *
962 **********************************************************************/
963
964 /*
965         The following section deals with Haskell Layout conventions
966         forcing insertion of ; or } as appropriate
967 */
968
969 static BOOLEAN
970 hsshouldindent(void)
971 {
972     return (!forgetindent && INDENTON);
973 }
974
975
976 /* Enter new context and set new indentation level */
977 void
978 hssetindent(void)
979 {
980 #ifdef HSP_DEBUG
981     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
982 #endif
983
984     /*
985      * partain: first chk that new indent won't be less than current one; this code
986      * doesn't make sense to me; hscolno tells the position of the _end_ of the
987      * current token; what that has to do with indenting, I don't know.
988      */
989
990
991     if (hscolno - 1 <= INDENTPT) {
992         if (INDENTPT == -1)
993             return;             /* Empty input OK for Haskell 1.1 */
994         else {
995             char errbuf[ERR_BUF_SIZE];
996
997             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
998             hsperror(errbuf);
999         }
1000     }
1001     hsentercontext((hspcolno << 1) | 1);
1002 }
1003
1004
1005 /* Enter a new context without changing the indentation level */
1006 void
1007 hsincindent(void)
1008 {
1009 #ifdef HSP_DEBUG
1010     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1011 #endif
1012     hsentercontext(indenttab[icontexts] & ~1);
1013 }
1014
1015
1016 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1017 void
1018 hsindentoff(void)
1019 {
1020     forgetindent = TRUE;
1021 }
1022
1023
1024 /* Enter a new layout context. */
1025 static void
1026 hsentercontext(int indent)
1027 {
1028     /* Enter new context and set indentation as specified */
1029     if (++icontexts >= MAX_CONTEXTS) {
1030         char errbuf[ERR_BUF_SIZE];
1031
1032         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1033         hsperror(errbuf);
1034     }
1035     forgetindent = FALSE;
1036     indenttab[icontexts] = indent;
1037 #ifdef HSP_DEBUG
1038     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1039 #endif
1040 }
1041
1042
1043 /* Exit a layout context */
1044 void
1045 hsendindent(void)
1046 {
1047     --icontexts;
1048 #ifdef HSP_DEBUG
1049     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1050 #endif
1051 }
1052
1053 /*
1054  *      Return checks the indentation level and returns ;, } or the specified token.
1055  */
1056
1057 static int
1058 Return(int tok)
1059 {
1060 #ifdef HSP_DEBUG
1061     extern int yyleng;
1062 #endif
1063
1064     if (hsshouldindent()) {
1065         if (hspcolno < INDENTPT) {
1066 #ifdef HSP_DEBUG
1067             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1068 #endif
1069             hssttok = tok;
1070             return (VCCURLY);
1071         } else if (hspcolno == INDENTPT) {
1072 #ifdef HSP_DEBUG
1073             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1074 #endif
1075             hssttok = -tok;
1076             return (SEMI);
1077         }
1078     }
1079     hssttok = -1;
1080 #ifdef HSP_DEBUG
1081     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1082 #endif
1083     return (tok);
1084 }
1085
1086
1087 /*
1088  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1089  */
1090 int
1091 yylex()
1092 {
1093     int tok;
1094     static BOOLEAN eof = FALSE;
1095
1096     if (!eof) {
1097         if (hssttok != -1) {
1098             if (hssttok < 0) {
1099                 tok = -hssttok;
1100                 hssttok = -1;
1101                 return tok;
1102             }
1103             RETURN(hssttok);
1104         } else {
1105             endlineno = hslineno;
1106             if ((tok = yylex1()) != EOF)
1107                 return tok;
1108             else
1109                 eof = TRUE;
1110         }
1111     }
1112     if (icontexts > icontexts_save) {
1113         if (INDENTON) {
1114             eof = TRUE;
1115             indenttab[icontexts] = 0;
1116             return (VCCURLY);
1117         } else
1118             hsperror("missing '}' at end of file");
1119     } else if (hsbuf_save != NULL) {
1120         fclose(yyin);
1121         yy_delete_buffer(YY_CURRENT_BUFFER);
1122         yy_switch_to_buffer(hsbuf_save);
1123         hsbuf_save = NULL;
1124         new_filename(filename_save);
1125         free(filename_save);
1126         hslineno = hslineno_save;
1127         hsplineno = hsplineno_save;
1128         hscolno = hscolno_save;
1129         hspcolno = hspcolno_save;
1130         etags = etags_save;
1131         icontexts = icontexts_save - 1;
1132         icontexts_save = 0;
1133 #ifdef HSP_DEBUG
1134         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1135 #endif
1136         eof = FALSE;
1137
1138         /* RETURN(LEOF); */
1139         hsperror("No longer using yacc to parse interface files");
1140
1141     } else {
1142         yyterminate();
1143     }
1144     abort(); /* should never get here! */
1145     return(0);
1146 }
1147
1148 /**********************************************************************
1149 *                                                                     *
1150 *                                                                     *
1151 *     Input Processing for Interfaces -- Not currently used !!!       *
1152 *                                                                     *
1153 *                                                                     *
1154 **********************************************************************/
1155
1156 /* setyyin(file)        open file as new lex input buffer */
1157 extern FILE *yyin;
1158
1159 void
1160 setyyin(char *file)
1161 {
1162     hsbuf_save = YY_CURRENT_BUFFER;
1163     if ((yyin = fopen(file, "r")) == NULL) {
1164         char errbuf[ERR_BUF_SIZE];
1165
1166         sprintf(errbuf, "can't read \"%-.50s\"", file);
1167         hsperror(errbuf);
1168     }
1169     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1170
1171     hslineno_save = hslineno;
1172     hsplineno_save = hsplineno;
1173     hslineno = hsplineno = 1;
1174
1175     filename_save = input_filename;
1176     input_filename = NULL;
1177     new_filename(file);
1178     hscolno_save = hscolno;
1179     hspcolno_save = hspcolno;
1180     hscolno = hspcolno = 0;
1181     etags_save = etags; /* do not do "etags" stuff in interfaces */
1182     etags = 0;          /* We remember whether we are doing it in
1183                            the module, so we can restore it later [WDP 94/09] */
1184     hsentercontext(-1);         /* partain: changed this from 0 */
1185     icontexts_save = icontexts;
1186 #ifdef HSP_DEBUG
1187     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1188 #endif
1189 }
1190
1191 static void
1192 layout_input(char *text, int len)
1193 {
1194 #ifdef HSP_DEBUG
1195     fprintf(stderr, "Scanning \"%s\"\n", text);
1196 #endif
1197
1198     hsplineno = hslineno;
1199     hspcolno = hscolno;
1200
1201     while (len-- > 0) {
1202         switch (*text++) {
1203         case '\n':
1204         case '\r':
1205         case '\f':
1206             hslineno++;
1207             hscolno = 0;
1208             break;
1209         case '\t':
1210             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1211             break;
1212         case '\v':
1213             break;
1214         default:
1215             ++hscolno;
1216             break;
1217         }
1218     }
1219 }
1220
1221 void
1222 setstartlineno(void)
1223 {
1224     startlineno = hsplineno;
1225
1226     if (modulelineno == 0) {
1227         modulelineno = startlineno;
1228     }
1229
1230 #if 1/*etags*/
1231 #else
1232     if (etags)
1233         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1234 #endif
1235 }
1236
1237 /**********************************************************************
1238 *                                                                     *
1239 *                                                                     *
1240 *                      Text Caching                                   *
1241 *                                                                     *
1242 *                                                                     *
1243 **********************************************************************/
1244
1245 #define CACHE_SIZE YY_BUF_SIZE
1246
1247 static struct {
1248     unsigned allocated;
1249     unsigned next;
1250     char *text;
1251 } textcache = { 0, 0, NULL };
1252
1253 static void
1254 cleartext(void)
1255 {
1256 /*  fprintf(stderr, "cleartext\n"); */
1257     textcache.next = 0;
1258     if (textcache.allocated == 0) {
1259         textcache.allocated = CACHE_SIZE;
1260         textcache.text = xmalloc(CACHE_SIZE);
1261     }
1262 }
1263
1264 static void
1265 addtext(char *text, unsigned length)
1266 {
1267 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1268
1269     if (length == 0)
1270         return;
1271
1272     if (textcache.next + length + 1 >= textcache.allocated) {
1273         textcache.allocated += length + CACHE_SIZE;
1274         textcache.text = xrealloc(textcache.text, textcache.allocated);
1275     }
1276     bcopy(text, textcache.text + textcache.next, length);
1277     textcache.next += length;
1278 }
1279
1280 static void
1281 addchar(char c)
1282 {
1283 /*  fprintf(stderr, "addchar: %c\n", c); */
1284
1285     if (textcache.next + 2 >= textcache.allocated) {
1286         textcache.allocated += CACHE_SIZE;
1287         textcache.text = xrealloc(textcache.text, textcache.allocated);
1288     }
1289     textcache.text[textcache.next++] = c;
1290 }
1291
1292 static char *
1293 fetchtext(unsigned *length)
1294 {
1295 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1296
1297     *length = textcache.next;
1298     textcache.text[textcache.next] = '\0';
1299     return textcache.text;
1300 }
1301
1302 /**********************************************************************
1303 *                                                                     *
1304 *                                                                     *
1305 *    Identifier Processing                                             *
1306 *                                                                     *
1307 *                                                                     *
1308 **********************************************************************/
1309
1310 /*
1311         hsnewid         Enters an id of length n into the symbol table.
1312 */
1313
1314 static void
1315 hsnewid(char *name, int length)
1316 {
1317     char save = name[length];
1318
1319     name[length] = '\0';
1320     yylval.uid = installid(name);
1321     name[length] = save;
1322 }
1323
1324 BOOLEAN
1325 hsnewqid(char *name, int length)
1326 {
1327     char* dot;
1328     char save = name[length];
1329     name[length] = '\0';
1330
1331     dot = strchr(name, '.');
1332     *dot = '\0';
1333     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1334     *dot = '.';
1335     name[length] = save;
1336
1337     return isconstr(dot+1);
1338 }