[project @ 1998-02-11 14:15:07 by simonm]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 /* Special file handling for IMPORTS */
141 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142
143 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
144 static char *filename_save;             /* File Name                     */
145 static int hslineno_save = 0,           /* Line Number                   */
146  hsplineno_save = 0,                    /* Line Number of Prev. token    */
147  hscolno_save = 0,                      /* Indentation                   */
148  hspcolno_save = 0;                     /* Left Indentation              */
149 static short icontexts_save = 0;        /* Indent Context Level          */
150
151 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags;                   /* that which is saved */
153
154 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
155
156 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
157                                  * inserted before token +ve  -- "}" inserted before
158                                  * token */
159
160 short icontexts = 0;            /* Which context we're in */
161
162 /*
163         Table of indentations:  right bit indicates whether to use
164           indentation rules (1 = use rules; 0 = ignore)
165
166     partain:
167     push one of these "contexts" at every "case" or "where"; the right bit says
168     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
169
170     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
171     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
172     push is shown just below.
173
174 */
175
176
177 static short indenttab[MAX_CONTEXTS] = {-1};
178
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
181
182 #define RETURN(tok) return(Return(tok))
183
184 #undef YY_DECL
185 #define YY_DECL int yylex1()
186
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189    to support older versions of flex, we'll continue to peek for now.
190  */
191 #define YY_USER_ACTION \
192     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
193
194 #if 0/*debug*/
195 #undef YY_BREAK
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
197 #endif
198
199 /* Each time we enter a new start state, we push it onto the state stack.
200 */
201 #define PUSH_STATE(n)   yy_push_state(n)
202 #define POP_STATE       yy_pop_state()
203
204 %}
205 /* Options:
206     8bit (8-bit input)
207     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208     never-interactive (to go a bit faster)
209     stack (use a start-condition stack)
210 */
211 %option 8bit
212 %option noyywrap
213 %option never-interactive
214 %option stack
215
216 /* The start states are:
217    Code -- normal Haskell code (principal lexer)
218    GlaExt -- Haskell code with Glasgow extensions
219    Comment -- Nested comment processing
220    String -- Inside a string literal with backslashes
221    StringEsc -- Immediately following a backslash in a string literal
222    Char -- Inside a character literal with backslashes
223    CharEsc -- Immediately following a backslash in a character literal 
224
225    Note that the INITIAL state is unused.  Also note that these states
226    are _exclusive_.  All rules should be prefixed with an appropriate
227    list of start states.
228  */
229
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231
232 isoS                    [\xa1-\xbf\xd7\xf7]
233 isoL                    [\xc0-\xd6\xd8-\xde]
234 isol                    [\xdf-\xf6\xf8-\xff]
235 isoA                    [\xa1-\xff]
236
237 D                       [0-9]
238 O                       [0-7]
239 H                       [0-9A-Fa-f]
240 N                       {D}+
241 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 SId                     {S}{S}*
244 L                       [A-Z\xc0-\xd6\xd8-\xde]
245 l                       [a-z\xdf-\xf6\xf8-\xff]
246 I                       {L}|{l}
247 i                       {L}|{l}|[0-9'_]
248 Id                      {I}{i}*
249 Mod                     {L}{i}*
250 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
251 CNTRL                   [@A-Z\[\\\]^_]
252 WS                      [ \t\n\r\f\v]
253 NL                      [\n\r]
254
255 %%
256
257 %{
258     /*
259      * Simple comments and whitespace.  Normally, we would just ignore these, but
260      * in case we're processing a string escape, we need to note that we've seen
261      * a gap.
262      *
263      * Note that we cater for a comment line that *doesn't* end in a newline.
264      * This is incorrect, strictly speaking, but seems like the right thing
265      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
266      *
267      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
268      */
269 %}
270
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
273
274 %{
275     /* 
276      * Special GHC pragma rules.  Do we need a start state for interface files,
277      * so these won't be matched in source files? --JSM
278      */
279
280 %}
281
282 %{
283 /* I believe the next rule is not ever matched.
284
285    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287    where possible, so the lexer should now never see cpp directives
288    like '# ' and '#line'.
289
290    -- SOF
291
292 <Code,GlaExt>^"# ".*{NL}    {
293                           char tempf[FILENAME_SIZE];
294                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
295                           new_filename(tempf);
296                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
297                         }
298
299 <Code,GlaExt>^"#line ".*{NL}    {
300                           char tempf[FILENAME_SIZE];
301                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
302                           new_filename(tempf); 
303                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
304                         }
305 */
306 %}
307
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
309                           /* partain: pragma-style line directive */
310                           char tempf[FILENAME_SIZE];
311                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
312                           new_filename(tempf);
313                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
314                         }
315
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317                               PUSH_STATE(UserPragma);
318                               RETURN(INTERFACE_UPRAGMA);
319                             }
320 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
321                               PUSH_STATE(UserPragma);
322                               RETURN(SPECIALISE_UPRAGMA);
323                             }
324 <Code,GlaExt>"{-#"{WS}*"INLINE" {
325                               PUSH_STATE(UserPragma);
326                               RETURN(INLINE_UPRAGMA);
327                             }
328 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
329                               PUSH_STATE(UserPragma);
330                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
331                             }
332 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
333                               /* these are handled by hscpp */
334                               nested_comments =1;
335                               PUSH_STATE(Comment);
336                             }
337 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
338                               /* these are for the driver! */
339                               nested_comments =1;
340                               PUSH_STATE(Comment);
341                             }
342 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
343                               /* these are used by `make depend' and the
344                                  compiler to indicate that a module should
345                                  be imported from source */
346                               nested_comments =1;
347                               RETURN(SOURCE_UPRAGMA); 
348                             }
349
350 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
351                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
352                                 input_filename, hsplineno);
353                               format_string(stderr, (unsigned char *) yytext, yyleng);
354                               fputs("'\n", stderr);
355                               nested_comments = 1;
356                               PUSH_STATE(Comment);
357                             }
358 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
359
360 %{
361     /*
362      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
363      * intentionally accepted as a keyword even for normal <Code>.
364      */
365 %}
366
367 <Code,GlaExt>"case"             { RETURN(CASE); }
368 <Code,GlaExt>"class"            { RETURN(CLASS); }
369 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
370 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
371 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
372 <Code,GlaExt>"do"               { RETURN(DO); }
373 <Code,GlaExt>"else"             { RETURN(ELSE); }
374 <Code,GlaExt>"if"               { RETURN(IF); }
375 <Code,GlaExt>"import"           { RETURN(IMPORT); }
376 <Code,GlaExt>"in"               { RETURN(IN); }
377 <Code,GlaExt>"infix"            { RETURN(INFIX); }
378 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
379 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
380 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
381 <Code,GlaExt>"let"              { RETURN(LET); }
382 <Code,GlaExt>"module"           { RETURN(MODULE); }
383 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
384 <Code,GlaExt>"of"               { RETURN(OF); }
385 <Code,GlaExt>"then"             { RETURN(THEN); }
386 <Code,GlaExt>"type"             { RETURN(TYPE); }
387 <Code,GlaExt>"where"            { RETURN(WHERE); }
388
389 <Code,GlaExt>"as"               { RETURN(AS); }
390 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
391 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
392
393 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
394 <GlaExt>"_ccall_"               { RETURN(CCALL); }
395 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
396 <GlaExt>"_casm_"                { RETURN(CASM); }
397 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
398
399 %{
400     /* 
401      * Haskell operators: special, reservedops and useful varsyms
402      */
403 %}
404
405 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
406 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
407 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
408 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
409 <Code,GlaExt>"{"                { RETURN(OCURLY); }
410 <Code,GlaExt>"}"                { RETURN(CCURLY); }
411 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
412 <Code,GlaExt>";"                { RETURN(SEMI); }
413 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
414 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
415
416 <Code,GlaExt>".."               { RETURN(DOTDOT); }
417 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
418 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
419 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
420 <Code,GlaExt>"|"                { RETURN(VBAR); }
421 <Code,GlaExt>"<-"               { RETURN(LARROW); }
422 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
423 <Code,GlaExt>"-"                { RETURN(MINUS); }
424 <Code,GlaExt>"+"                { RETURN(PLUS); }
425
426 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
427 <Code,GlaExt>"@"                { RETURN(AT); }
428 <Code,GlaExt>"!"                { RETURN(BANG); }
429 <Code,GlaExt>"~"                { RETURN(LAZY); }
430
431 %{
432     /*
433      * Integers and (for Glasgow extensions) primitive integers.  Note that
434      * we pass all of the text on to the parser, because flex/C can't handle
435      * arbitrary precision numbers.
436      */
437 %}
438
439 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
440                          yylval.uid = xstrndup(yytext, yyleng - 1);
441                          RETURN(INTPRIM);
442                         }
443 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
444                          yylval.uid = xstrndup(yytext, yyleng);
445                          RETURN(INTEGER);
446                         }
447 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
448                          yylval.uid = xstrndup(yytext, yyleng - 1);
449                          RETURN(INTPRIM);
450                         }
451 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
452                          yylval.uid = xstrndup(yytext, yyleng);
453                          RETURN(INTEGER);
454                         }
455 <GlaExt>("-")?{N}"#"    {
456                          yylval.uid = xstrndup(yytext, yyleng - 1);
457                          RETURN(INTPRIM);
458                         }
459 <Code,GlaExt,UserPragma>{N} {
460                          yylval.uid = xstrndup(yytext, yyleng);
461                          RETURN(INTEGER);
462                         }
463
464 %{
465     /*
466      * Floats and (for Glasgow extensions) primitive floats/doubles.
467      */
468 %}
469
470 <GlaExt>("-")?{F}"##"   {
471                          yylval.uid = xstrndup(yytext, yyleng - 2);
472                          RETURN(DOUBLEPRIM);
473                         }
474 <GlaExt>("-")?{F}"#"    {
475                          yylval.uid = xstrndup(yytext, yyleng - 1);
476                          RETURN(FLOATPRIM);
477                         }
478 <Code,GlaExt>{F}        {
479                          yylval.uid = xstrndup(yytext, yyleng);
480                          RETURN(FLOAT);
481                         }
482
483 %{
484     /*
485      * Funky ``foo'' style C literals for Glasgow extensions
486      */
487 %}
488
489 <GlaExt>"``"[^']+"''"   {
490                          hsnewid(yytext + 2, yyleng - 4);
491                          RETURN(CLITLIT);
492                         }
493
494 %{
495     /*
496      * Identifiers, both variables and operators.  The trailing hash is allowed
497      * for Glasgow extensions.
498      */
499 %}
500
501
502 %{
503 /* These SHOULDNAE work in "Code" (sigh) */
504 %}
505 <GlaExt,UserPragma>{Id}"#" { 
506                         if (! nonstandardFlag) {
507                             char errbuf[ERR_BUF_SIZE];
508                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
509                             hsperror(errbuf);
510                          }
511                          hsnewid(yytext, yyleng);
512                          RETURN(isconstr(yytext) ? CONID : VARID);
513                         }
514 <Code,GlaExt,UserPragma>{Id}    {
515                          hsnewid(yytext, yyleng);
516                          RETURN(isconstr(yytext) ? CONID : VARID);
517                         }
518 <Code,GlaExt,UserPragma>{SId}   {
519                          hsnewid(yytext, yyleng);
520                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
521                         }
522 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
523                          BOOLEAN is_constr;
524                          if (! nonstandardFlag) {
525                             char errbuf[ERR_BUF_SIZE];
526                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
527                             hsperror(errbuf);
528                          }
529                          is_constr = hsnewqid(yytext, yyleng);
530                          RETURN(is_constr ? QCONID : QVARID);
531                         }
532 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
533                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
534                          RETURN(is_constr ? QCONID : QVARID);
535                         }
536 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
537                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
538                          RETURN(is_constr ? QCONSYM : QVARSYM);
539                         }
540
541 %{
542     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
543
544     /* Because we can make the former well-behaved (we defined them).
545
546        Sadly, the latter is defined by Haskell, which allows such
547        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
548     */
549 %}
550
551 <GlaExt,UserPragma>"`"{Id}"#`"  {       
552                          hsnewid(yytext + 1, yyleng - 2);
553                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
554                         }
555
556 %{
557     /*
558      * Character literals.  The first form is the quick form, for character
559      * literals that don't contain backslashes.  Literals with backslashes are
560      * lexed through multiple rules.  First, we match the open ' and as many
561      * normal characters as possible.  This puts us into the <Char> state, where
562      * a backslash is legal.  Then, we match the backslash and move into the 
563      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
564      * characters and the close '.  We may end up with too many characters, but
565      * this allows us to easily share the lex rules with strings.  Excess characters
566      * are ignored with a warning.
567      */
568 %}
569
570 <GlaExt>'({CHAR}|"\"")"'#" {
571                          yylval.uhstring = installHstring(1, yytext+1);
572                          RETURN(CHARPRIM);
573                         }
574 <Code,GlaExt>'({CHAR}|"\"")'    {
575                          yylval.uhstring = installHstring(1, yytext+1);
576                          RETURN(CHAR);
577                         }
578 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
579                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
580                          hsperror(errbuf);
581                         }
582 <Code,GlaExt>'({CHAR}|"\"")* {
583                          hsmlcolno = hspcolno;
584                          cleartext();
585                          addtext(yytext+1, yyleng-1);
586                          PUSH_STATE(Char);
587                         }
588 <Char>({CHAR}|"\"")*'#  {
589                          unsigned length;
590                          char *text;
591
592                          addtext(yytext, yyleng - 2);
593                          text = fetchtext(&length);
594
595                          if (! nonstandardFlag) {
596                             char errbuf[ERR_BUF_SIZE];
597                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
598                             hsperror(errbuf);
599                          }
600
601                          if (length > 1) {
602                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
603                               input_filename, hsplineno, hspcolno + 1);
604                             format_string(stderr, (unsigned char *) text, length);
605                             fputs("' too long\n", stderr);
606                             hsperror("");
607                          }
608                          yylval.uhstring = installHstring(1, text);
609                          hspcolno = hsmlcolno;
610                          POP_STATE;
611                          RETURN(CHARPRIM); 
612                         }
613 <Char>({CHAR}|"\"")*'   {
614                          unsigned length;
615                          char *text;
616
617                          addtext(yytext, yyleng - 1);
618                          text = fetchtext(&length);
619
620                          if (length > 1) {
621                             fprintf(stderr, "%s:%d:%d: Character literal '",
622                               input_filename, hsplineno, hspcolno + 1);
623                             format_string(stderr, (unsigned char *) text, length);
624                             fputs("' too long\n", stderr);
625                             hsperror("");
626                          }
627                          yylval.uhstring = installHstring(1, text);
628                          hspcolno = hsmlcolno;
629                          POP_STATE;
630                          RETURN(CHAR); 
631                         }
632 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
633
634
635 %{
636     /*
637      * String literals.  The first form is the quick form, for string literals
638      * that don't contain backslashes.  Literals with backslashes are lexed
639      * through multiple rules.  First, we match the open " and as many normal
640      * characters as possible.  This puts us into the <String> state, where
641      * a backslash is legal.  Then, we match the backslash and move into the 
642      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
643      * characters, moving back and forth between <String> and <StringEsc> as more
644      * backslashes are encountered.  (We may even digress into <Comment> mode if we
645      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
646      * of normal characters and the close ".
647      */
648 %}
649
650 <GlaExt>"\""({CHAR}|"'")*"\""#  {
651                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
652                             /* the -3 accounts for the " on front, "# on the end */
653                          RETURN(STRINGPRIM); 
654                         }
655 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
656                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
657                          RETURN(STRING); 
658                         }
659 <Code,GlaExt>"\""({CHAR}|"'")* {
660                          hsmlcolno = hspcolno;
661                          cleartext();
662                          addtext(yytext+1, yyleng-1);
663                          PUSH_STATE(String);
664                         }
665 <String>({CHAR}|"'")*"\"#"   {
666                          unsigned length;
667                          char *text;
668
669                          addtext(yytext, yyleng-2);
670                          text = fetchtext(&length);
671
672                          if (! nonstandardFlag) {
673                             char errbuf[ERR_BUF_SIZE];
674                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
675                             hsperror(errbuf);
676                          }
677
678                          yylval.uhstring = installHstring(length, text);
679                          hspcolno = hsmlcolno;
680                          POP_STATE;
681                          RETURN(STRINGPRIM);
682                         }
683 <String>({CHAR}|"'")*"\""   {
684                          unsigned length;
685                          char *text;
686
687                          addtext(yytext, yyleng-1);
688                          text = fetchtext(&length);
689
690                          yylval.uhstring = installHstring(length, text);
691                          hspcolno = hsmlcolno;
692                          POP_STATE;
693                          RETURN(STRING); 
694                         }
695 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
696
697 %{
698     /*
699      * Character and string escapes are roughly the same, but strings have the
700      * extra `\&' sequence which is not allowed for characters.  Also, comments
701      * are allowed in the <StringEsc> state.  (See the comment section much
702      * further down.)
703      *
704      * NB: Backslashes and tabs are stored in strings as themselves.
705      * But if we print them (in printtree.c), they must go out as
706      * "\\\\" and "\\t" respectively.  (This is because of the bogus
707      * intermediate format that the parser produces.  It uses '\t' fpr end of
708      * string, so it needs to be able to escape tabs, which means that it
709      * also needs to be able to escape the escape character ('\\').  Sigh.
710      */
711 %}
712
713 <Char>\\                { PUSH_STATE(CharEsc); }
714 <String>\\&             /* Ignore */ ;
715 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
716
717 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
718 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
719
720 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
721 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
722 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
723 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
724 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
725 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
726 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
727 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
728 <CharEsc,StringEsc>BEL  |
729 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
730 <CharEsc,StringEsc>BS   |
731 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
732 <CharEsc,StringEsc>HT   |
733 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
734 <CharEsc,StringEsc>LF   |
735 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
736 <CharEsc,StringEsc>VT   |
737 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
738 <CharEsc,StringEsc>FF   |
739 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
740 <CharEsc,StringEsc>CR   |
741 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
742 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
743 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
744 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
745 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
746 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
747 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
748 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
749 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
750 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
751 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
752 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
753 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
754 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
755 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
756 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
757 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
758 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
759 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
760 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
761 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
762 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
763 <CharEsc,StringEsc>{D}+  {
764                           int i = strtol(yytext, NULL, 10);
765                           if (i < NCHARS) {
766                              addchar((char) i);
767                           } else {
768                              char errbuf[ERR_BUF_SIZE];
769                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
770                                 yytext);
771                              hsperror(errbuf);
772                           }
773                           POP_STATE;
774                         }
775 <CharEsc,StringEsc>o{O}+ {
776                           int i = strtol(yytext + 1, NULL, 8);
777                           if (i < NCHARS) {
778                              addchar((char) i);
779                           } else {
780                              char errbuf[ERR_BUF_SIZE];
781                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
782                                 yytext);
783                              hsperror(errbuf);
784                           }
785                           POP_STATE;
786                         }
787 <CharEsc,StringEsc>x{H}+ {
788                           int i = strtol(yytext + 1, NULL, 16);
789                           if (i < NCHARS) {
790                              addchar((char) i);
791                           } else {
792                              char errbuf[ERR_BUF_SIZE];
793                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
794                                 yytext);
795                              hsperror(errbuf);
796                           }
797                           POP_STATE;
798                         }
799
800
801 %{
802     /*
803      * Nested comments.  The major complication here is in trying to match the
804      * longest lexemes possible, for better performance.  (See the flex document.)
805      * That's why the rules look so bizarre.
806      */
807 %}
808
809 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
810                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
811                         }
812
813 <Comment>[^-{]*         |
814 <Comment>"-"+[^-{}]+    |
815 <Comment>"{"+[^-{}]+    ;
816 <Comment>"{-"           { nested_comments++; }
817 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
818 <Comment>(.|\n)         ;
819
820 %{
821     /*
822      * Illegal characters.  This used to be a single rule, but we might as well
823      * pass on as much information as we have, so now we indicate our state in
824      * the error message.
825      */
826 %}
827
828 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
829                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
830                             input_filename, hsplineno, hspcolno + 1); 
831                          format_string(stderr, (unsigned char *) yytext, 1);
832                          fputs("'\n", stderr);
833                          hsperror("");
834                         }
835 <Char>(.|\n)            { 
836                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
837                             input_filename, hsplineno, hspcolno + 1); 
838                          format_string(stderr, (unsigned char *) yytext, 1);
839                          fputs("' in a character literal\n", stderr);
840                          hsperror("");
841                         }
842 <CharEsc>(.|\n)         {
843                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
844                             input_filename, hsplineno, hspcolno + 1); 
845                          format_string(stderr, (unsigned char *) yytext, 1);
846                          fputs("'\n", stderr);
847                          hsperror("");
848                         }
849 <String>(.|\n)          { if (nonstandardFlag) {
850                              addtext(yytext, yyleng);
851                           } else { 
852                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
853                                 input_filename, hsplineno, hspcolno + 1); 
854                                 format_string(stderr, (unsigned char *) yytext, 1);
855                                 fputs("' in a string literal\n", stderr);
856                                 hsperror("");
857                           }
858                         }
859 <StringEsc>(.|\n)       {
860                          if (noGap) {
861                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
862                                 input_filename, hsplineno, hspcolno + 1); 
863                              format_string(stderr, (unsigned char *) yytext, 1);
864                              fputs("'\n", stderr);
865                              hsperror("");
866                          } else {
867                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
868                                 input_filename, hsplineno, hspcolno + 1);
869                              format_string(stderr, (unsigned char *) yytext, 1);
870                              fputs("' in a string gap\n", stderr);
871                              hsperror("");
872                          }
873                         }
874
875 %{
876     /*
877      * End of file.  In any sub-state, this is an error.  However, for the primary
878      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
879      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
880      * adding virtual close curlies, or closing an interface and returning to the
881      * primary source file.
882      *
883      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
884      * line/column advancement has to be done by hand.
885      */
886 %}
887
888 <Char,CharEsc><<EOF>>   { 
889                           hsplineno = hslineno; hspcolno = hscolno;
890                           hsperror("unterminated character literal");
891                         }
892 <Comment><<EOF>>        { 
893                           hsplineno = hslineno; hspcolno = hscolno;
894                           hsperror("unterminated comment"); 
895                         }
896 <String,StringEsc><<EOF>>   { 
897                           hsplineno = hslineno; hspcolno = hscolno;
898                           hsperror("unterminated string literal"); 
899                         }
900 <UserPragma><<EOF>>     {
901                           hsplineno = hslineno; hspcolno = hscolno;
902                           hsperror("unterminated user-specified pragma"); 
903                         }
904 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
905
906 %%
907
908 /**********************************************************************
909 *                                                                     *
910 *                                                                     *
911 *     YACC/LEX Initialisation etc.                                    *
912 *                                                                     *
913 *                                                                     *
914 **********************************************************************/
915
916 /*
917    We initialise input_filename to "<stdin>".
918    This allows unnamed sources to be piped into the parser.
919 */
920
921 void
922 yyinit(void)
923 {
924     input_filename = xstrdup("<stdin>");
925
926     /* We must initialize the input buffer _now_, because we call
927        setyyin _before_ calling yylex for the first time! */
928     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
929
930     if (nonstandardFlag)
931         PUSH_STATE(GlaExt);
932     else
933         PUSH_STATE(Code);
934 }
935
936 static void
937 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
938 {
939     if (input_filename != NULL)
940         free(input_filename);
941     input_filename = xstrdup(f);
942 }
943
944 /**********************************************************************
945 *                                                                     *
946 *                                                                     *
947 *     Layout Processing                                               *
948 *                                                                     *
949 *                                                                     *
950 **********************************************************************/
951
952 /*
953         The following section deals with Haskell Layout conventions
954         forcing insertion of ; or } as appropriate
955 */
956
957 static BOOLEAN
958 hsshouldindent(void)
959 {
960     return (!forgetindent && INDENTON);
961 }
962
963
964 /* Enter new context and set new indentation level */
965 void
966 hssetindent(void)
967 {
968 #ifdef HSP_DEBUG
969     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
970 #endif
971
972     /*
973      * partain: first chk that new indent won't be less than current one; this code
974      * doesn't make sense to me; hscolno tells the position of the _end_ of the
975      * current token; what that has to do with indenting, I don't know.
976      */
977
978
979     if (hscolno - 1 <= INDENTPT) {
980         if (INDENTPT == -1)
981             return;             /* Empty input OK for Haskell 1.1 */
982         else {
983             char errbuf[ERR_BUF_SIZE];
984
985             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
986             hsperror(errbuf);
987         }
988     }
989     hsentercontext((hspcolno << 1) | 1);
990 }
991
992
993 /* Enter a new context without changing the indentation level */
994 void
995 hsincindent(void)
996 {
997 #ifdef HSP_DEBUG
998     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
999 #endif
1000     hsentercontext(indenttab[icontexts] & ~1);
1001 }
1002
1003
1004 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1005 void
1006 hsindentoff(void)
1007 {
1008     forgetindent = TRUE;
1009 }
1010
1011
1012 /* Enter a new layout context. */
1013 static void
1014 hsentercontext(int indent)
1015 {
1016     /* Enter new context and set indentation as specified */
1017     if (++icontexts >= MAX_CONTEXTS) {
1018         char errbuf[ERR_BUF_SIZE];
1019
1020         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1021         hsperror(errbuf);
1022     }
1023     forgetindent = FALSE;
1024     indenttab[icontexts] = indent;
1025 #ifdef HSP_DEBUG
1026     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1027 #endif
1028 }
1029
1030
1031 /* Exit a layout context */
1032 void
1033 hsendindent(void)
1034 {
1035     --icontexts;
1036 #ifdef HSP_DEBUG
1037     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1038 #endif
1039 }
1040
1041 /*
1042  *      Return checks the indentation level and returns ;, } or the specified token.
1043  */
1044
1045 static int
1046 Return(int tok)
1047 {
1048 #ifdef HSP_DEBUG
1049     extern int yyleng;
1050 #endif
1051
1052     if (hsshouldindent()) {
1053         if (hspcolno < INDENTPT) {
1054 #ifdef HSP_DEBUG
1055             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1056 #endif
1057             hssttok = tok;
1058             return (VCCURLY);
1059         } else if (hspcolno == INDENTPT) {
1060 #ifdef HSP_DEBUG
1061             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1062 #endif
1063             hssttok = -tok;
1064             return (SEMI);
1065         }
1066     }
1067     hssttok = -1;
1068 #ifdef HSP_DEBUG
1069     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1070 #endif
1071     return (tok);
1072 }
1073
1074
1075 /*
1076  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1077  */
1078 int
1079 yylex()
1080 {
1081     int tok;
1082     static BOOLEAN eof = FALSE;
1083
1084     if (!eof) {
1085         if (hssttok != -1) {
1086             if (hssttok < 0) {
1087                 tok = -hssttok;
1088                 hssttok = -1;
1089                 return tok;
1090             }
1091             RETURN(hssttok);
1092         } else {
1093             endlineno = hslineno;
1094             if ((tok = yylex1()) != EOF)
1095                 return tok;
1096             else
1097                 eof = TRUE;
1098         }
1099     }
1100     if (icontexts > icontexts_save) {
1101         if (INDENTON) {
1102             eof = TRUE;
1103             indenttab[icontexts] = 0;
1104             return (VCCURLY);
1105         } else
1106             hsperror("missing '}' at end of file");
1107     } else if (hsbuf_save != NULL) {
1108         fclose(yyin);
1109         yy_delete_buffer(YY_CURRENT_BUFFER);
1110         yy_switch_to_buffer(hsbuf_save);
1111         hsbuf_save = NULL;
1112         new_filename(filename_save);
1113         free(filename_save);
1114         hslineno = hslineno_save;
1115         hsplineno = hsplineno_save;
1116         hscolno = hscolno_save;
1117         hspcolno = hspcolno_save;
1118         etags = etags_save;
1119         icontexts = icontexts_save - 1;
1120         icontexts_save = 0;
1121 #ifdef HSP_DEBUG
1122         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1123 #endif
1124         eof = FALSE;
1125
1126         /* RETURN(LEOF); */
1127         hsperror("No longer using yacc to parse interface files");
1128
1129     } else {
1130         yyterminate();
1131     }
1132     abort(); /* should never get here! */
1133     return(0);
1134 }
1135
1136 /**********************************************************************
1137 *                                                                     *
1138 *                                                                     *
1139 *     Input Processing for Interfaces -- Not currently used !!!       *
1140 *                                                                     *
1141 *                                                                     *
1142 **********************************************************************/
1143
1144 /* setyyin(file)        open file as new lex input buffer */
1145 extern FILE *yyin;
1146
1147 void
1148 setyyin(char *file)
1149 {
1150     hsbuf_save = YY_CURRENT_BUFFER;
1151     if ((yyin = fopen(file, "r")) == NULL) {
1152         char errbuf[ERR_BUF_SIZE];
1153
1154         sprintf(errbuf, "can't read \"%-.50s\"", file);
1155         hsperror(errbuf);
1156     }
1157     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1158
1159     hslineno_save = hslineno;
1160     hsplineno_save = hsplineno;
1161     hslineno = hsplineno = 1;
1162
1163     filename_save = input_filename;
1164     input_filename = NULL;
1165     new_filename(file);
1166     hscolno_save = hscolno;
1167     hspcolno_save = hspcolno;
1168     hscolno = hspcolno = 0;
1169     etags_save = etags; /* do not do "etags" stuff in interfaces */
1170     etags = 0;          /* We remember whether we are doing it in
1171                            the module, so we can restore it later [WDP 94/09] */
1172     hsentercontext(-1);         /* partain: changed this from 0 */
1173     icontexts_save = icontexts;
1174 #ifdef HSP_DEBUG
1175     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1176 #endif
1177 }
1178
1179 static void
1180 layout_input(char *text, int len)
1181 {
1182 #ifdef HSP_DEBUG
1183     fprintf(stderr, "Scanning \"%s\"\n", text);
1184 #endif
1185
1186     hsplineno = hslineno;
1187     hspcolno = hscolno;
1188
1189     while (len-- > 0) {
1190         switch (*text++) {
1191         case '\n':
1192         case '\r':
1193         case '\f':
1194             hslineno++;
1195             hscolno = 0;
1196             break;
1197         case '\t':
1198             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1199             break;
1200         case '\v':
1201             break;
1202         default:
1203             ++hscolno;
1204             break;
1205         }
1206     }
1207 }
1208
1209 void
1210 setstartlineno(void)
1211 {
1212     startlineno = hsplineno;
1213
1214     if (modulelineno == 0) {
1215         modulelineno = startlineno;
1216     }
1217
1218 #if 1/*etags*/
1219 #else
1220     if (etags)
1221         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1222 #endif
1223 }
1224
1225 /**********************************************************************
1226 *                                                                     *
1227 *                                                                     *
1228 *                      Text Caching                                   *
1229 *                                                                     *
1230 *                                                                     *
1231 **********************************************************************/
1232
1233 #define CACHE_SIZE YY_BUF_SIZE
1234
1235 static struct {
1236     unsigned allocated;
1237     unsigned next;
1238     char *text;
1239 } textcache = { 0, 0, NULL };
1240
1241 static void
1242 cleartext(void)
1243 {
1244 /*  fprintf(stderr, "cleartext\n"); */
1245     textcache.next = 0;
1246     if (textcache.allocated == 0) {
1247         textcache.allocated = CACHE_SIZE;
1248         textcache.text = xmalloc(CACHE_SIZE);
1249     }
1250 }
1251
1252 static void
1253 addtext(char *text, unsigned length)
1254 {
1255 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1256
1257     if (length == 0)
1258         return;
1259
1260     if (textcache.next + length + 1 >= textcache.allocated) {
1261         textcache.allocated += length + CACHE_SIZE;
1262         textcache.text = xrealloc(textcache.text, textcache.allocated);
1263     }
1264     bcopy(text, textcache.text + textcache.next, length);
1265     textcache.next += length;
1266 }
1267
1268 static void
1269 addchar(char c)
1270 {
1271 /*  fprintf(stderr, "addchar: %c\n", c); */
1272
1273     if (textcache.next + 2 >= textcache.allocated) {
1274         textcache.allocated += CACHE_SIZE;
1275         textcache.text = xrealloc(textcache.text, textcache.allocated);
1276     }
1277     textcache.text[textcache.next++] = c;
1278 }
1279
1280 static char *
1281 fetchtext(unsigned *length)
1282 {
1283 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1284
1285     *length = textcache.next;
1286     textcache.text[textcache.next] = '\0';
1287     return textcache.text;
1288 }
1289
1290 /**********************************************************************
1291 *                                                                     *
1292 *                                                                     *
1293 *    Identifier Processing                                             *
1294 *                                                                     *
1295 *                                                                     *
1296 **********************************************************************/
1297
1298 /*
1299         hsnewid         Enters an id of length n into the symbol table.
1300 */
1301
1302 static void
1303 hsnewid(char *name, int length)
1304 {
1305     char save = name[length];
1306
1307     name[length] = '\0';
1308     yylval.uid = installid(name);
1309     name[length] = save;
1310 }
1311
1312 BOOLEAN
1313 hsnewqid(char *name, int length)
1314 {
1315     char* dot;
1316     char save = name[length];
1317     name[length] = '\0';
1318
1319     dot = strchr(name, '.');
1320     *dot = '\0';
1321     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1322     *dot = '.';
1323     name[length] = save;
1324
1325     return isconstr(dot+1);
1326 }