[project @ 1996-04-07 15:41:24 by partain]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 #include "../../includes/config.h"
11
12 #include <stdio.h>
13
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
15 #include <string.h>
16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
18 #include <memory.h>
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
20 #define index strchr
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
26 #include <strings.h>
27 /* memory.h and strings.h conflict on some systems.  */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
29
30 #include "hspincl.h"
31 #include "hsparser.tab.h"
32 #include "constants.h"
33 #include "utils.h"
34
35 /* Our substitute for <ctype.h> */
36
37 #define NCHARS  256
38 #define _S      0x1
39 #define _D      0x2
40 #define _H      0x4
41 #define _O      0x8
42 #define _C      0x10
43
44 #define _isconstr(s)    (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
46
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
64
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
81 };
82
83 /**********************************************************************
84 *                                                                     *
85 *                                                                     *
86 *      Declarations                                                   *
87 *                                                                     *
88 *                                                                     *
89 **********************************************************************/
90
91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
92
93 /*
94  * For my own sanity, things that are not part of the flex skeleton
95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
96  */
97
98 static int hslineno = 0;        /* Line number at end of token */
99 int hsplineno = 0;              /* Line number at end of previous token */
100
101 static int hscolno = 0;         /* Column number at end of token */
102 int hspcolno = 0;               /* Column number at end of previous token */
103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
104
105 int modulelineno = -1;          /* The line number where the module starts */
106 int startlineno = 0;            /* The line number where something starts */
107 int endlineno = 0;              /* The line number where something ends */
108
109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
111
112 static int nested_comments;     /* For counting comment nesting depth */
113
114 /* Hacky definition of yywrap: see flex doc.
115
116    If we don't do this, then we'll have to get the default
117    yywrap from the flex library, which is often something
118    we are not good at locating.  This avoids that difficulty.
119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
120    WDP 94/09/05
121 */
122 #define yywrap() 1
123
124 /* Essential forward declarations */
125
126 static void hsnewid      PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext    (NO_ARGS);
129 static void addtext      PROTO((char *, unsigned));
130 static void addchar      PROTO((char));
131 static char *fetchtext   PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int  Return       PROTO((int));
134 static void hsentercontext PROTO((int));
135
136 /* Special file handling for IMPORTS */
137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
138
139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
140 static char *filename_save;             /* File Name                     */
141 static int hslineno_save = 0,           /* Line Number                   */
142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
143  hscolno_save = 0,                      /* Indentation                   */
144  hspcolno_save = 0;                     /* Left Indentation              */
145 static short icontexts_save = 0;        /* Indent Context Level          */
146
147 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags;                   /* that which is saved */
149
150 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
151
152 static BOOLEAN in_interface = FALSE;    /* TRUE if we are reading a .hi file */
153
154 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
155 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
156 extern int maxAcceptablePragmaVersion;
157 extern int thisIfacePragmaVersion;
158
159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
160                                  * inserted before token +ve  -- "}" inserted before
161                                  * token */
162
163 short icontexts = 0;            /* Which context we're in */
164
165
166
167 /*
168         Table of indentations:  right bit indicates whether to use
169           indentation rules (1 = use rules; 0 = ignore)
170
171     partain:
172     push one of these "contexts" at every "case" or "where"; the right bit says
173     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
174
175     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
176     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
177     push is shown just below.
178
179 */
180
181
182 static short indenttab[MAX_CONTEXTS] = {-1};
183
184 #define INDENTPT (indenttab[icontexts]>>1)
185 #define INDENTON (indenttab[icontexts]&1)
186
187 #define RETURN(tok) return(Return(tok))
188
189 #undef YY_DECL
190 #define YY_DECL int yylex1()
191
192 /* We should not peek at yy_act, but flex calls us even for the internal action
193    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
194    to support older versions of flex, we'll continue to peek for now.
195  */
196 #define YY_USER_ACTION \
197     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
198
199 #if 0/*debug*/
200 #undef YY_BREAK
201 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
202 #endif
203
204 /* Each time we enter a new start state, we push it onto the state stack.
205    Note that the rules do not allow us to underflow or overflow the stack.
206    (At least, they shouldn't.)  The maximum expected depth is 4:
207    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
208 */
209 static int StateStack[5];
210 static int StateDepth = -1;
211
212 #ifdef HSP_DEBUG
213 #define PUSH_STATE(n)   do {\
214     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
215     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
216 #define POP_STATE       do {--StateDepth;\
217     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
218     BEGIN(StateStack[StateDepth]);} while(0)
219 #else
220 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
221 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
222 #endif
223
224 %}
225
226 /* The start states are:
227    Code -- normal Haskell code (principal lexer)
228    GlaExt -- Haskell code with Glasgow extensions
229    Comment -- Nested comment processing
230    String -- Inside a string literal with backslashes
231    StringEsc -- Immediately following a backslash in a string literal
232    Char -- Inside a character literal with backslashes
233    CharEsc -- Immediately following a backslash in a character literal 
234
235    Note that the INITIAL state is unused.  Also note that these states
236    are _exclusive_.  All rules should be prefixed with an appropriate
237    list of start states.
238  */
239
240 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
241
242 isoS                    [\xa1-\xbf\xd7\xf7]
243 isoL                    [\xc0-\xd6\xd8-\xde]
244 isol                    [\xdf-\xf6\xf8-\xff]
245 isoA                    [\xa1-\xff]
246
247 D                       [0-9]
248 O                       [0-7]
249 H                       [0-9A-Fa-f]
250 N                       {D}+
251 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
252 S                       [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
253 SId                     {S}{S}*
254 L                       [A-Z\xc0-\xd6\xd8-\xde]
255 l                       [a-z\xdf-\xf6\xf8-\xff]
256 I                       {L}|{l}
257 i                       {L}|{l}|[0-9'_]
258 Id                      {I}{i}*
259 Mod                     {L}{i}*
260 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
261 CNTRL                   [@A-Z\[\\\]^_]
262 WS                      [ \t\n\r\f\v]
263 NL                      [\n\r]
264
265 %%
266
267 %{
268     /* 
269      * Special GHC pragma rules.  Do we need a start state for interface files,
270      * so these won't be matched in source files? --JSM
271      */
272 %}
273
274 <Code,GlaExt>^"# ".*{NL}    {
275                           char tempf[FILENAME_SIZE];
276                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
277                           new_filename(tempf);
278                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
279                         }
280
281 <Code,GlaExt>^"#line ".*{NL}    {
282                           char tempf[FILENAME_SIZE];
283                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
284                           new_filename(tempf); 
285                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
286                         }
287
288 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
289                           /* partain: pragma-style line directive */
290                           char tempf[FILENAME_SIZE];
291                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
292                           new_filename(tempf);
293                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
294                         }
295
296 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
297                               PUSH_STATE(UserPragma);
298                               RETURN(INTERFACE_UPRAGMA);
299                             }
300 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
301                               PUSH_STATE(UserPragma);
302                               RETURN(SPECIALISE_UPRAGMA);
303                             }
304 <Code,GlaExt>"{-#"{WS}*"INLINE" {
305                               PUSH_STATE(UserPragma);
306                               RETURN(INLINE_UPRAGMA);
307                             }
308 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
309                               PUSH_STATE(UserPragma);
310                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
311                             }
312 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
313                               PUSH_STATE(UserPragma);
314                               RETURN(DEFOREST_UPRAGMA);
315                             }
316 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
317                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
318                                 input_filename, hsplineno);
319                               format_string(stderr, (unsigned char *) yytext, yyleng);
320                               fputs("'\n", stderr);
321                               nested_comments = 1;
322                               PUSH_STATE(Comment);
323                             }
324 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
325
326 %{
327     /*
328      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
329      * intentionally accepted as a keyword even for normal <Code>.
330      */
331 %}
332
333 <Code,GlaExt>"case"             { RETURN(CASE); }
334 <Code,GlaExt>"class"            { RETURN(CLASS); }
335 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
336 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
337 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
338 <Code,GlaExt>"do"               { RETURN(DO); }
339 <Code,GlaExt>"else"             { RETURN(ELSE); }
340 <Code,GlaExt>"if"               { RETURN(IF); }
341 <Code,GlaExt>"import"           { RETURN(IMPORT); }
342 <Code,GlaExt>"in"               { RETURN(IN); }
343 <Code,GlaExt>"infix"            { RETURN(INFIX); }
344 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
345 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
346 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
347 <Code,GlaExt>"let"              { RETURN(LET); }
348 <Code,GlaExt>"module"           { RETURN(MODULE); }
349 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
350 <Code,GlaExt>"of"               { RETURN(OF); }
351 <Code,GlaExt>"then"             { RETURN(THEN); }
352 <Code,GlaExt>"type"             { RETURN(TYPE); }
353 <Code,GlaExt>"where"            { RETURN(WHERE); }
354
355 <Code,GlaExt>"as"               { RETURN(AS); }
356 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
357 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
358
359 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
360 <GlaExt>"_ccall_"               { RETURN(CCALL); }
361 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
362 <GlaExt>"_casm_"                { RETURN(CASM); }
363 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
364
365 %{
366     /* 
367      * Haskell operators: special, reservedops and useful varsyms
368      */
369 %}
370
371 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
372 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
373 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
374 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
375 <Code,GlaExt>"{"                { RETURN(OCURLY); }
376 <Code,GlaExt>"}"                { RETURN(CCURLY); }
377 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
378 <Code,GlaExt>";"                { RETURN(SEMI); }
379 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
380 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
381
382 <Code,GlaExt>".."               { RETURN(DOTDOT); }
383 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
384 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
385 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
386 <Code,GlaExt>"|"                { RETURN(VBAR); }
387 <Code,GlaExt>"<-"               { RETURN(LARROW); }
388 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
389 <Code,GlaExt>"-"                { RETURN(MINUS); }
390
391 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
392 <Code,GlaExt>"@"                { RETURN(AT); }
393 <Code,GlaExt>"!"                { RETURN(BANG); }
394 <Code,GlaExt>"~"                { RETURN(LAZY); }
395
396 %{
397     /*
398      * Integers and (for Glasgow extensions) primitive integers.  Note that
399      * we pass all of the text on to the parser, because flex/C can't handle
400      * arbitrary precision numbers.
401      */
402 %}
403
404 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
405                          yylval.uid = xstrndup(yytext, yyleng - 1);
406                          RETURN(INTPRIM);
407                         }
408 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
409                          yylval.uid = xstrndup(yytext, yyleng);
410                          RETURN(INTEGER);
411                         }
412 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
413                          yylval.uid = xstrndup(yytext, yyleng - 1);
414                          RETURN(INTPRIM);
415                         }
416 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
417                          yylval.uid = xstrndup(yytext, yyleng);
418                          RETURN(INTEGER);
419                         }
420 <GlaExt>("-")?{N}"#"    {
421                          yylval.uid = xstrndup(yytext, yyleng - 1);
422                          RETURN(INTPRIM);
423                         }
424 <Code,GlaExt,UserPragma>{N} {
425                          yylval.uid = xstrndup(yytext, yyleng);
426                          RETURN(INTEGER);
427                         }
428
429 %{
430     /*
431      * Floats and (for Glasgow extensions) primitive floats/doubles.
432      */
433 %}
434
435 <GlaExt>("-")?{F}"##"   {
436                          yylval.uid = xstrndup(yytext, yyleng - 2);
437                          RETURN(DOUBLEPRIM);
438                         }
439 <GlaExt>("-")?{F}"#"    {
440                          yylval.uid = xstrndup(yytext, yyleng - 1);
441                          RETURN(FLOATPRIM);
442                         }
443 <Code,GlaExt>{F}        {
444                          yylval.uid = xstrndup(yytext, yyleng);
445                          RETURN(FLOAT);
446                         }
447
448 %{
449     /*
450      * Funky ``foo'' style C literals for Glasgow extensions
451      */
452 %}
453
454 <GlaExt>"``"[^']+"''"   {
455                          hsnewid(yytext + 2, yyleng - 4);
456                          RETURN(CLITLIT);
457                         }
458
459 %{
460     /*
461      * Identifiers, both variables and operators.  The trailing hash is allowed
462      * for Glasgow extensions.
463      */
464 %}
465
466
467 %{
468 /* These SHOULDNAE work in "Code" (sigh) */
469 %}
470 <Code,GlaExt,UserPragma>{Id}"#" { 
471                          if (! (nonstandardFlag || in_interface)) {
472                             char errbuf[ERR_BUF_SIZE];
473                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
474                             hsperror(errbuf);
475                          }
476                          hsnewid(yytext, yyleng);
477                          RETURN(_isconstr(yytext) ? CONID : VARID);
478                         }
479 <Code,GlaExt,UserPragma>_+{Id} { 
480                          if (! (nonstandardFlag || in_interface)) {
481                             char errbuf[ERR_BUF_SIZE];
482                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
483                             hsperror(errbuf);
484                          }
485                          hsnewid(yytext, yyleng);
486                          RETURN(isconstr(yytext) ? CONID : VARID);
487                          /* NB: ^^^^^^^^ : not the macro! */
488                         }
489 <Code,GlaExt,UserPragma>{Id}    {
490                          hsnewid(yytext, yyleng);
491                          RETURN(_isconstr(yytext) ? CONID : VARID);
492                         }
493 <Code,GlaExt,UserPragma>{SId}   {
494                          hsnewid(yytext, yyleng);
495                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
496                         }
497 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
498                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
499                          RETURN(isconstr ? QCONID : QVARID);
500                         }
501 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
502                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
503                          RETURN(isconstr ? QCONSYM : QVARSYM);
504                         }
505
506 %{
507     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
508
509     /* Because we can make the former well-behaved (we defined them).
510
511        Sadly, the latter is defined by Haskell, which allows such
512        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
513     */
514 %}
515
516 <GlaExt,UserPragma>"`"{Id}"#`"  {       
517                          hsnewid(yytext + 1, yyleng - 2);
518                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
519                         }
520
521 %{
522     /*
523      * Character literals.  The first form is the quick form, for character
524      * literals that don't contain backslashes.  Literals with backslashes are
525      * lexed through multiple rules.  First, we match the open ' and as many
526      * normal characters as possible.  This puts us into the <Char> state, where
527      * a backslash is legal.  Then, we match the backslash and move into the 
528      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
529      * characters and the close '.  We may end up with too many characters, but
530      * this allows us to easily share the lex rules with strings.  Excess characters
531      * are ignored with a warning.
532      */
533 %}
534
535 <GlaExt>'({CHAR}|"\"")"'#" {
536                          yylval.uhstring = installHstring(1, yytext+1);
537                          RETURN(CHARPRIM);
538                         }
539 <Code,GlaExt>'({CHAR}|"\"")'    {
540                          yylval.uhstring = installHstring(1, yytext+1);
541                          RETURN(CHAR);
542                         }
543 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
544                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
545                          hsperror(errbuf);
546                         }
547 <Code,GlaExt>'({CHAR}|"\"")* {
548                          hsmlcolno = hspcolno;
549                          cleartext();
550                          addtext(yytext+1, yyleng-1);
551                          PUSH_STATE(Char);
552                         }
553 <Char>({CHAR}|"\"")*'#  {
554                          unsigned length;
555                          char *text;
556
557                          addtext(yytext, yyleng - 2);
558                          text = fetchtext(&length);
559
560                          if (! (nonstandardFlag || in_interface)) {
561                             char errbuf[ERR_BUF_SIZE];
562                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
563                             hsperror(errbuf);
564                          }
565
566                          if (length > 1) {
567                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
568                               input_filename, hsplineno, hspcolno + 1);
569                             format_string(stderr, (unsigned char *) text, length);
570                             fputs("' too long\n", stderr);
571                             hsperror("");
572                          }
573                          yylval.uhstring = installHstring(1, text);
574                          hspcolno = hsmlcolno;
575                          POP_STATE;
576                          RETURN(CHARPRIM); 
577                         }
578 <Char>({CHAR}|"\"")*'   {
579                          unsigned length;
580                          char *text;
581
582                          addtext(yytext, yyleng - 1);
583                          text = fetchtext(&length);
584
585                          if (length > 1) {
586                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
587                               input_filename, hsplineno, hspcolno + 1);
588                             format_string(stderr, (unsigned char *) text, length);
589                             fputs("' too long\n", stderr);
590                             hsperror("");
591                          }
592                          yylval.uhstring = installHstring(1, text);
593                          hspcolno = hsmlcolno;
594                          POP_STATE;
595                          RETURN(CHAR); 
596                         }
597 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
598
599
600 %{
601     /*
602      * String literals.  The first form is the quick form, for string literals
603      * that don't contain backslashes.  Literals with backslashes are lexed
604      * through multiple rules.  First, we match the open " and as many normal
605      * characters as possible.  This puts us into the <String> state, where
606      * a backslash is legal.  Then, we match the backslash and move into the 
607      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
608      * characters, moving back and forth between <String> and <StringEsc> as more
609      * backslashes are encountered.  (We may even digress into <Comment> mode if we
610      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
611      * of normal characters and the close ".
612      */
613 %}
614
615 <GlaExt>"\""({CHAR}|"'")*"\""#  {
616                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
617                             /* the -3 accounts for the " on front, "# on the end */
618                          RETURN(STRINGPRIM); 
619                         }
620 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
621                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
622                          RETURN(STRING); 
623                         }
624 <Code,GlaExt>"\""({CHAR}|"'")* {
625                          hsmlcolno = hspcolno;
626                          cleartext();
627                          addtext(yytext+1, yyleng-1);
628                          PUSH_STATE(String);
629                         }
630 <String>({CHAR}|"'")*"\"#"   {
631                          unsigned length;
632                          char *text;
633
634                          addtext(yytext, yyleng-2);
635                          text = fetchtext(&length);
636
637                          if (! (nonstandardFlag || in_interface)) {
638                             char errbuf[ERR_BUF_SIZE];
639                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
640                             hsperror(errbuf);
641                          }
642
643                          yylval.uhstring = installHstring(length, text);
644                          hspcolno = hsmlcolno;
645                          POP_STATE;
646                          RETURN(STRINGPRIM);
647                         }
648 <String>({CHAR}|"'")*"\""   {
649                          unsigned length;
650                          char *text;
651
652                          addtext(yytext, yyleng-1);
653                          text = fetchtext(&length);
654
655                          yylval.uhstring = installHstring(length, text);
656                          hspcolno = hsmlcolno;
657                          POP_STATE;
658                          RETURN(STRING); 
659                         }
660 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
661
662 %{
663     /*
664      * Character and string escapes are roughly the same, but strings have the
665      * extra `\&' sequence which is not allowed for characters.  Also, comments
666      * are allowed in the <StringEsc> state.  (See the comment section much
667      * further down.)
668      *
669      * NB: Backslashes and tabs are stored in strings as themselves.
670      * But if we print them (in printtree.c), they must go out as
671      * "\\\\" and "\\t" respectively.  (This is because of the bogus
672      * intermediate format that the parser produces.  It uses '\t' fpr end of
673      * string, so it needs to be able to escape tabs, which means that it
674      * also needs to be able to escape the escape character ('\\').  Sigh.
675      */
676 %}
677
678 <Char>\\                { PUSH_STATE(CharEsc); }
679 <String>\\&             /* Ignore */ ;
680 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
681
682 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
683 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
684
685 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
686 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
687 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
688 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
689 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
690 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
691 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
692 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
693 <CharEsc,StringEsc>BEL  |
694 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
695 <CharEsc,StringEsc>BS   |
696 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
697 <CharEsc,StringEsc>HT   |
698 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
699 <CharEsc,StringEsc>LF   |
700 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
701 <CharEsc,StringEsc>VT   |
702 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
703 <CharEsc,StringEsc>FF   |
704 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
705 <CharEsc,StringEsc>CR   |
706 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
707 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
708 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
709 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
710 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
711 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
712 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
713 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
714 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
715 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
716 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
717 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
718 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
719 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
720 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
721 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
722 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
723 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
724 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
725 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
726 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
727 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
728 <CharEsc,StringEsc>{D}+  {
729                           int i = strtol(yytext, NULL, 10);
730                           if (i < NCHARS) {
731                              addchar((char) i);
732                           } else {
733                              char errbuf[ERR_BUF_SIZE];
734                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
735                                 yytext);
736                              hsperror(errbuf);
737                           }
738                           POP_STATE;
739                         }
740 <CharEsc,StringEsc>o{O}+ {
741                           int i = strtol(yytext + 1, NULL, 8);
742                           if (i < NCHARS) {
743                              addchar((char) i);
744                           } else {
745                              char errbuf[ERR_BUF_SIZE];
746                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
747                                 yytext);
748                              hsperror(errbuf);
749                           }
750                           POP_STATE;
751                         }
752 <CharEsc,StringEsc>x{H}+ {
753                           int i = strtol(yytext + 1, NULL, 16);
754                           if (i < NCHARS) {
755                              addchar((char) i);
756                           } else {
757                              char errbuf[ERR_BUF_SIZE];
758                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
759                                 yytext);
760                              hsperror(errbuf);
761                           }
762                           POP_STATE;
763                         }
764
765 %{
766     /*
767      * Simple comments and whitespace.  Normally, we would just ignore these, but
768      * in case we're processing a string escape, we need to note that we've seen
769      * a gap.
770      *
771      * Note that we cater for a comment line that *doesn't* end in a newline.
772      * This is incorrect, strictly speaking, but seems like the right thing
773      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
774      */
775 %}
776
777 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
778 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
779
780 %{
781     /*
782      * Nested comments.  The major complication here is in trying to match the
783      * longest lexemes possible, for better performance.  (See the flex document.)
784      * That's why the rules look so bizarre.
785      */
786 %}
787
788 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
789                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
790                         }
791
792 <Comment>[^-{]*         |
793 <Comment>"-"+[^-{}]+    |
794 <Comment>"{"+[^-{}]+    ;
795 <Comment>"{-"           { nested_comments++; }
796 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
797 <Comment>(.|\n)         ;
798
799 %{
800     /*
801      * Illegal characters.  This used to be a single rule, but we might as well
802      * pass on as much information as we have, so now we indicate our state in
803      * the error message.
804      */
805 %}
806
807 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
808                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
809                             input_filename, hsplineno, hspcolno + 1); 
810                          format_string(stderr, (unsigned char *) yytext, 1);
811                          fputs("'\n", stderr);
812                          hsperror("");
813                         }
814 <Char>(.|\n)            { 
815                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
816                             input_filename, hsplineno, hspcolno + 1); 
817                          format_string(stderr, (unsigned char *) yytext, 1);
818                          fputs("' in a character literal\n", stderr);
819                          hsperror("");
820                         }
821 <CharEsc>(.|\n)         {
822                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
823                             input_filename, hsplineno, hspcolno + 1); 
824                          format_string(stderr, (unsigned char *) yytext, 1);
825                          fputs("'\n", stderr);
826                          hsperror("");
827                         }
828 <String>(.|\n)          { if (nonstandardFlag) {
829                              addtext(yytext, yyleng);
830                           } else { 
831                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
832                                 input_filename, hsplineno, hspcolno + 1); 
833                                 format_string(stderr, (unsigned char *) yytext, 1);
834                                 fputs("' in a string literal\n", stderr);
835                                 hsperror("");
836                           }
837                         }
838 <StringEsc>(.|\n)       {
839                          if (noGap) {
840                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
841                                 input_filename, hsplineno, hspcolno + 1); 
842                              format_string(stderr, (unsigned char *) yytext, 1);
843                              fputs("'\n", stderr);
844                              hsperror("");
845                          } else {
846                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
847                                 input_filename, hsplineno, hspcolno + 1);
848                              format_string(stderr, (unsigned char *) yytext, 1);
849                              fputs("' in a string gap\n", stderr);
850                              hsperror("");
851                          }
852                         }
853
854 %{
855     /*
856      * End of file.  In any sub-state, this is an error.  However, for the primary
857      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
858      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
859      * adding virtual close curlies, or closing an interface and returning to the
860      * primary source file.
861      *
862      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
863      * line/column advancement has to be done by hand.
864      */
865 %}
866
867 <Char,CharEsc><<EOF>>   { 
868                           hsplineno = hslineno; hspcolno = hscolno;
869                           hsperror("unterminated character literal");
870                         }
871 <Comment><<EOF>>        { 
872                           hsplineno = hslineno; hspcolno = hscolno;
873                           hsperror("unterminated comment"); 
874                         }
875 <String,StringEsc><<EOF>>   { 
876                           hsplineno = hslineno; hspcolno = hscolno;
877                           hsperror("unterminated string literal"); 
878                         }
879 <UserPragma><<EOF>>     {
880                           hsplineno = hslineno; hspcolno = hscolno;
881                           hsperror("unterminated user-specified pragma"); 
882                         }
883 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
884
885 %%
886
887 /**********************************************************************
888 *                                                                     *
889 *                                                                     *
890 *     YACC/LEX Initialisation etc.                                    *
891 *                                                                     *
892 *                                                                     *
893 **********************************************************************/
894
895 /*
896    We initialise input_filename to "<stdin>".
897    This allows unnamed sources to be piped into the parser.
898 */
899
900 extern BOOLEAN acceptPrim;
901
902 void
903 yyinit(void)
904 {
905     input_filename = xstrdup("<stdin>");
906
907     /* We must initialize the input buffer _now_, because we call
908        setyyin _before_ calling yylex for the first time! */
909     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
910
911     if (acceptPrim)
912         PUSH_STATE(GlaExt);
913     else
914         PUSH_STATE(Code);
915 }
916
917 static void
918 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
919 {
920     if (input_filename != NULL)
921         free(input_filename);
922     input_filename = xstrdup(f);
923 }
924
925 /**********************************************************************
926 *                                                                     *
927 *                                                                     *
928 *     Layout Processing                                               *
929 *                                                                     *
930 *                                                                     *
931 **********************************************************************/
932
933 /*
934         The following section deals with Haskell Layout conventions
935         forcing insertion of ; or } as appropriate
936 */
937
938 static BOOLEAN
939 hsshouldindent(void)
940 {
941     return (!forgetindent && INDENTON);
942 }
943
944
945 /* Enter new context and set new indentation level */
946 void
947 hssetindent(void)
948 {
949 #ifdef HSP_DEBUG
950     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
951 #endif
952
953     /*
954      * partain: first chk that new indent won't be less than current one; this code
955      * doesn't make sense to me; hscolno tells the position of the _end_ of the
956      * current token; what that has to do with indenting, I don't know.
957      */
958
959
960     if (hscolno - 1 <= INDENTPT) {
961         if (INDENTPT == -1)
962             return;             /* Empty input OK for Haskell 1.1 */
963         else {
964             char errbuf[ERR_BUF_SIZE];
965
966             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
967             hsperror(errbuf);
968         }
969     }
970     hsentercontext((hspcolno << 1) | 1);
971 }
972
973
974 /* Enter a new context without changing the indentation level */
975 void
976 hsincindent(void)
977 {
978 #ifdef HSP_DEBUG
979     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
980 #endif
981     hsentercontext(indenttab[icontexts] & ~1);
982 }
983
984
985 /* Turn off indentation processing, usually because an explicit "{" has been seen */
986 void
987 hsindentoff(void)
988 {
989     forgetindent = TRUE;
990 }
991
992
993 /* Enter a new layout context. */
994 static void
995 hsentercontext(int indent)
996 {
997     /* Enter new context and set indentation as specified */
998     if (++icontexts >= MAX_CONTEXTS) {
999         char errbuf[ERR_BUF_SIZE];
1000
1001         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1002         hsperror(errbuf);
1003     }
1004     forgetindent = FALSE;
1005     indenttab[icontexts] = indent;
1006 #ifdef HSP_DEBUG
1007     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1008 #endif
1009 }
1010
1011
1012 /* Exit a layout context */
1013 void
1014 hsendindent(void)
1015 {
1016     --icontexts;
1017 #ifdef HSP_DEBUG
1018     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1019 #endif
1020 }
1021
1022 /*
1023  *      Return checks the indentation level and returns ;, } or the specified token.
1024  */
1025
1026 static int
1027 Return(int tok)
1028 {
1029 #ifdef HSP_DEBUG
1030     extern int yyleng;
1031 #endif
1032
1033     if (hsshouldindent()) {
1034         if (hspcolno < INDENTPT) {
1035 #ifdef HSP_DEBUG
1036             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1037 #endif
1038             hssttok = tok;
1039             return (VCCURLY);
1040         } else if (hspcolno == INDENTPT) {
1041 #ifdef HSP_DEBUG
1042             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1043 #endif
1044             hssttok = -tok;
1045             return (SEMI);
1046         }
1047     }
1048     hssttok = -1;
1049 #ifdef HSP_DEBUG
1050     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1051 #endif
1052     return (tok);
1053 }
1054
1055
1056 /*
1057  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1058  */
1059 int
1060 yylex()
1061 {
1062     int tok;
1063     static BOOLEAN eof = FALSE;
1064
1065     if (!eof) {
1066         if (hssttok != -1) {
1067             if (hssttok < 0) {
1068                 tok = -hssttok;
1069                 hssttok = -1;
1070                 return tok;
1071             }
1072             RETURN(hssttok);
1073         } else {
1074             endlineno = hslineno;
1075             if ((tok = yylex1()) != EOF)
1076                 return tok;
1077             else
1078                 eof = TRUE;
1079         }
1080     }
1081     if (icontexts > icontexts_save) {
1082         if (INDENTON) {
1083             eof = TRUE;
1084             indenttab[icontexts] = 0;
1085             return (VCCURLY);
1086         } else
1087             hsperror("missing '}' at end of file");
1088     } else if (hsbuf_save != NULL) {
1089         fclose(yyin);
1090         yy_delete_buffer(YY_CURRENT_BUFFER);
1091         yy_switch_to_buffer(hsbuf_save);
1092         hsbuf_save = NULL;
1093         new_filename(filename_save);
1094         free(filename_save);
1095         hslineno = hslineno_save;
1096         hsplineno = hsplineno_save;
1097         hscolno = hscolno_save;
1098         hspcolno = hspcolno_save;
1099         etags = etags_save;
1100         in_interface = FALSE;
1101         icontexts = icontexts_save - 1;
1102         icontexts_save = 0;
1103 #ifdef HSP_DEBUG
1104         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1105 #endif
1106         eof = FALSE;
1107
1108         /* RETURN(LEOF); */
1109         hsperror("No longer using yacc to parse interface files");
1110
1111     } else {
1112         yyterminate();
1113     }
1114     abort(); /* should never get here! */
1115     return(0);
1116 }
1117
1118 /**********************************************************************
1119 *                                                                     *
1120 *                                                                     *
1121 *     Input Processing for Interfaces -- Not currently used !!!       *
1122 *                                                                     *
1123 *                                                                     *
1124 **********************************************************************/
1125
1126 /* setyyin(file)        open file as new lex input buffer */
1127 extern FILE *yyin;
1128
1129 void
1130 setyyin(char *file)
1131 {
1132     hsbuf_save = YY_CURRENT_BUFFER;
1133     if ((yyin = fopen(file, "r")) == NULL) {
1134         char errbuf[ERR_BUF_SIZE];
1135
1136         sprintf(errbuf, "can't read \"%-.50s\"", file);
1137         hsperror(errbuf);
1138     }
1139     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1140
1141     hslineno_save = hslineno;
1142     hsplineno_save = hsplineno;
1143     hslineno = hsplineno = 1;
1144
1145     filename_save = input_filename;
1146     input_filename = NULL;
1147     new_filename(file);
1148     hscolno_save = hscolno;
1149     hspcolno_save = hspcolno;
1150     hscolno = hspcolno = 0;
1151     in_interface = TRUE;
1152     etags_save = etags; /* do not do "etags" stuff in interfaces */
1153     etags = 0;          /* We remember whether we are doing it in
1154                            the module, so we can restore it later [WDP 94/09] */
1155     hsentercontext(-1);         /* partain: changed this from 0 */
1156     icontexts_save = icontexts;
1157 #ifdef HSP_DEBUG
1158     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1159 #endif
1160 }
1161
1162 static void
1163 layout_input(char *text, int len)
1164 {
1165 #ifdef HSP_DEBUG
1166     fprintf(stderr, "Scanning \"%s\"\n", text);
1167 #endif
1168
1169     hsplineno = hslineno;
1170     hspcolno = hscolno;
1171
1172     while (len-- > 0) {
1173         switch (*text++) {
1174         case '\n':
1175         case '\r':
1176         case '\f':
1177             hslineno++;
1178             hscolno = 0;
1179             break;
1180         case '\t':
1181             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1182             break;
1183         case '\v':
1184             break;
1185         default:
1186             ++hscolno;
1187             break;
1188         }
1189     }
1190 }
1191
1192 void
1193 setstartlineno(void)
1194 {
1195     startlineno = hsplineno;
1196
1197     if (modulelineno == 0) {
1198         modulelineno = startlineno;
1199     }
1200
1201 #if 1/*etags*/
1202 #else
1203     if (etags)
1204         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1205 #endif
1206 }
1207
1208 /**********************************************************************
1209 *                                                                     *
1210 *                                                                     *
1211 *                      Text Caching                                   *
1212 *                                                                     *
1213 *                                                                     *
1214 **********************************************************************/
1215
1216 #define CACHE_SIZE YY_BUF_SIZE
1217
1218 static struct {
1219     unsigned allocated;
1220     unsigned next;
1221     char *text;
1222 } textcache = { 0, 0, NULL };
1223
1224 static void
1225 cleartext(void)
1226 {
1227 /*  fprintf(stderr, "cleartext\n"); */
1228     textcache.next = 0;
1229     if (textcache.allocated == 0) {
1230         textcache.allocated = CACHE_SIZE;
1231         textcache.text = xmalloc(CACHE_SIZE);
1232     }
1233 }
1234
1235 static void
1236 addtext(char *text, unsigned length)
1237 {
1238 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1239
1240     if (length == 0)
1241         return;
1242
1243     if (textcache.next + length + 1 >= textcache.allocated) {
1244         textcache.allocated += length + CACHE_SIZE;
1245         textcache.text = xrealloc(textcache.text, textcache.allocated);
1246     }
1247     bcopy(text, textcache.text + textcache.next, length);
1248     textcache.next += length;
1249 }
1250
1251 static void
1252 addchar(char c)
1253 {
1254 /*  fprintf(stderr, "addchar: %c\n", c); */
1255
1256     if (textcache.next + 2 >= textcache.allocated) {
1257         textcache.allocated += CACHE_SIZE;
1258         textcache.text = xrealloc(textcache.text, textcache.allocated);
1259     }
1260     textcache.text[textcache.next++] = c;
1261 }
1262
1263 static char *
1264 fetchtext(unsigned *length)
1265 {
1266 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1267
1268     *length = textcache.next;
1269     textcache.text[textcache.next] = '\0';
1270     return textcache.text;
1271 }
1272
1273 /**********************************************************************
1274 *                                                                     *
1275 *                                                                     *
1276 *    Identifier Processing                                             *
1277 *                                                                     *
1278 *                                                                     *
1279 **********************************************************************/
1280
1281 /*
1282         hsnewid         Enters an id of length n into the symbol table.
1283 */
1284
1285 static void
1286 hsnewid(char *name, int length)
1287 {
1288     char save = name[length];
1289
1290     name[length] = '\0';
1291     yylval.uid = installid(name);
1292     name[length] = save;
1293 }
1294
1295 BOOLEAN
1296 hsnewqid(char *name, int length)
1297 {
1298     char* dot;
1299     char save = name[length];
1300     name[length] = '\0';
1301
1302     dot = strchr(name, '.');
1303     *dot = '\0';
1304     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1305     *dot = '.';
1306     name[length] = save;
1307
1308     return _isconstr(dot+1);
1309 }
1310
1311 BOOLEAN 
1312 isconstr(char *s) /* walks past leading underscores before using the macro */
1313 {
1314     char *temp = s;
1315
1316     for ( ; temp != NULL && *temp == '_' ; temp++ );
1317
1318     return _isconstr(temp);
1319 }