[project @ 1998-05-22 15:23:11 by simonm]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 /* Special file handling for IMPORTS */
141 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142
143 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
144 static char *filename_save;             /* File Name                     */
145 static int hslineno_save = 0,           /* Line Number                   */
146  hsplineno_save = 0,                    /* Line Number of Prev. token    */
147  hscolno_save = 0,                      /* Indentation                   */
148  hspcolno_save = 0;                     /* Left Indentation              */
149 static short icontexts_save = 0;        /* Indent Context Level          */
150
151 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags;                   /* that which is saved */
153
154 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
155
156 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
157                                  * inserted before token +ve  -- "}" inserted before
158                                  * token */
159
160 short icontexts = 0;            /* Which context we're in */
161
162 /*
163         Table of indentations:  right bit indicates whether to use
164           indentation rules (1 = use rules; 0 = ignore)
165
166     partain:
167     push one of these "contexts" at every "case" or "where"; the right bit says
168     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
169
170     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
171     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
172     push is shown just below.
173
174 */
175
176
177 static short indenttab[MAX_CONTEXTS] = {-1};
178
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
181
182 #define RETURN(tok) return(Return(tok))
183
184 #undef YY_DECL
185 #define YY_DECL int yylex1()
186
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189    to support older versions of flex, we'll continue to peek for now.
190  */
191 #define YY_USER_ACTION \
192     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
193
194 #if 0/*debug*/
195 #undef YY_BREAK
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
197 #endif
198
199 /* Each time we enter a new start state, we push it onto the state stack.
200 */
201 #define PUSH_STATE(n)   yy_push_state(n)
202 #define POP_STATE       yy_pop_state()
203
204 %}
205 /* Options:
206     8bit (8-bit input)
207     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208     never-interactive (to go a bit faster)
209     stack (use a start-condition stack)
210 */
211 %option 8bit
212 %option noyywrap
213 %option never-interactive
214 %option stack
215
216 /* The start states are:
217    Code -- normal Haskell code (principal lexer)
218    GlaExt -- Haskell code with Glasgow extensions
219    Comment -- Nested comment processing
220    String -- Inside a string literal with backslashes
221    StringEsc -- Immediately following a backslash in a string literal
222    Char -- Inside a character literal with backslashes
223    CharEsc -- Immediately following a backslash in a character literal 
224
225    Note that the INITIAL state is unused.  Also note that these states
226    are _exclusive_.  All rules should be prefixed with an appropriate
227    list of start states.
228  */
229
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231
232 isoS                    [\xa1-\xbf\xd7\xf7]
233 isoL                    [\xc0-\xd6\xd8-\xde]
234 isol                    [\xdf-\xf6\xf8-\xff]
235 isoA                    [\xa1-\xff]
236
237 D                       [0-9]
238 O                       [0-7]
239 H                       [0-9A-Fa-f]
240 N                       {D}+
241 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 SId                     {S}{S}*
244 L                       [A-Z\xc0-\xd6\xd8-\xde]
245 l                       [a-z\xdf-\xf6\xf8-\xff]
246 I                       {L}|{l}
247 i                       {L}|{l}|[0-9'_]
248 Id                      {I}{i}*
249 Mod                     {L}{i}*
250 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
251 CNTRL                   [@A-Z\[\\\]^_]
252 WS                      [ \t\n\r\f\v]
253 NL                      [\n\r]
254
255 %%
256
257 %{
258     /*
259      * Simple comments and whitespace.  Normally, we would just ignore these, but
260      * in case we're processing a string escape, we need to note that we've seen
261      * a gap.
262      *
263      * Note that we cater for a comment line that *doesn't* end in a newline.
264      * This is incorrect, strictly speaking, but seems like the right thing
265      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
266      *
267      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
268      */
269 %}
270
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
273
274 %{
275     /* 
276      * Special GHC pragma rules.  Do we need a start state for interface files,
277      * so these won't be matched in source files? --JSM
278      */
279
280 %}
281
282 %{
283 /* I believe the next rule is not ever matched.
284
285    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287    where possible, so the lexer should now never see cpp directives
288    like '# ' and '#line'.
289
290    -- SOF
291
292 <Code,GlaExt>^"# ".*{NL}    {
293                           char tempf[FILENAME_SIZE];
294                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
295                           new_filename(tempf);
296                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
297                         }
298
299 <Code,GlaExt>^"#line ".*{NL}    {
300                           char tempf[FILENAME_SIZE];
301                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
302                           new_filename(tempf); 
303                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
304                         }
305 */
306 %}
307
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
309                           /* partain: pragma-style line directive */
310                           char tempf[FILENAME_SIZE];
311                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
312                           new_filename(tempf);
313                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
314                         }
315
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317                               PUSH_STATE(UserPragma);
318                               RETURN(INTERFACE_UPRAGMA);
319                             }
320 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
321                               PUSH_STATE(UserPragma);
322                               RETURN(SPECIALISE_UPRAGMA);
323                             }
324 <Code,GlaExt>"{-#"{WS}*"INLINE" {
325                               PUSH_STATE(UserPragma);
326                               RETURN(INLINE_UPRAGMA);
327                             }
328 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
329                               PUSH_STATE(UserPragma);
330                               RETURN(NOINLINE_UPRAGMA);
331                             }
332 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
333                               PUSH_STATE(UserPragma);
334                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
335                             }
336 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
337                               /* these are handled by hscpp */
338                               nested_comments =1;
339                               PUSH_STATE(Comment);
340                             }
341 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
342                               /* these are for the driver! */
343                               nested_comments =1;
344                               PUSH_STATE(Comment);
345                             }
346 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
347                               /* these are used by `make depend' and the
348                                  compiler to indicate that a module should
349                                  be imported from source */
350                               nested_comments =1;
351                               RETURN(SOURCE_UPRAGMA); 
352                             }
353
354 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
355                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
356                                 input_filename, hsplineno);
357                               format_string(stderr, (unsigned char *) yytext, yyleng);
358                               fputs("'\n", stderr);
359                               nested_comments = 1;
360                               PUSH_STATE(Comment);
361                             }
362 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
363
364 %{
365     /*
366      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
367      * intentionally accepted as a keyword even for normal <Code>.
368      */
369 %}
370
371 <Code,GlaExt>"case"             { RETURN(CASE); }
372 <Code,GlaExt>"class"            { RETURN(CLASS); }
373 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
374 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
375 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
376 <Code,GlaExt>"do"               { RETURN(DO); }
377 <Code,GlaExt>"else"             { RETURN(ELSE); }
378 <Code,GlaExt>"if"               { RETURN(IF); }
379 <Code,GlaExt>"import"           { RETURN(IMPORT); }
380 <Code,GlaExt>"in"               { RETURN(IN); }
381 <Code,GlaExt>"infix"            { RETURN(INFIX); }
382 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
383 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
384 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
385 <Code,GlaExt>"let"              { RETURN(LET); }
386 <Code,GlaExt>"module"           { RETURN(MODULE); }
387 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
388 <Code,GlaExt>"of"               { RETURN(OF); }
389 <Code,GlaExt>"then"             { RETURN(THEN); }
390 <Code,GlaExt>"type"             { RETURN(TYPE); }
391 <Code,GlaExt>"where"            { RETURN(WHERE); }
392
393 <Code,GlaExt>"as"               { RETURN(AS); }
394 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
395 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
396
397 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
398 <GlaExt>"_ccall_"               { RETURN(CCALL); }
399 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
400 <GlaExt>"_casm_"                { RETURN(CASM); }
401 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
402
403 %{
404     /* 
405      * Haskell operators: special, reservedops and useful varsyms
406      */
407 %}
408
409 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
410 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
411 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
412 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
413 <Code,GlaExt>"{"                { RETURN(OCURLY); }
414 <Code,GlaExt>"}"                { RETURN(CCURLY); }
415 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
416 <Code,GlaExt>";"                { RETURN(SEMI); }
417 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
418 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
419
420 <Code,GlaExt>".."               { RETURN(DOTDOT); }
421 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
422 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
423 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
424 <Code,GlaExt>"|"                { RETURN(VBAR); }
425 <Code,GlaExt>"<-"               { RETURN(LARROW); }
426 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
427 <Code,GlaExt>"-"                { RETURN(MINUS); }
428 <Code,GlaExt>"+"                { RETURN(PLUS); }
429
430 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
431 <Code,GlaExt>"@"                { RETURN(AT); }
432 <Code,GlaExt>"!"                { RETURN(BANG); }
433 <Code,GlaExt>"~"                { RETURN(LAZY); }
434
435 %{
436     /*
437      * Integers and (for Glasgow extensions) primitive integers.  Note that
438      * we pass all of the text on to the parser, because flex/C can't handle
439      * arbitrary precision numbers.
440      */
441 %}
442
443 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
444                          yylval.uid = xstrndup(yytext, yyleng - 1);
445                          RETURN(INTPRIM);
446                         }
447 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
448                          yylval.uid = xstrndup(yytext, yyleng);
449                          RETURN(INTEGER);
450                         }
451 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
452                          yylval.uid = xstrndup(yytext, yyleng - 1);
453                          RETURN(INTPRIM);
454                         }
455 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
456                          yylval.uid = xstrndup(yytext, yyleng);
457                          RETURN(INTEGER);
458                         }
459 <GlaExt>("-")?{N}"#"    {
460                          yylval.uid = xstrndup(yytext, yyleng - 1);
461                          RETURN(INTPRIM);
462                         }
463 <Code,GlaExt,UserPragma>{N} {
464                          yylval.uid = xstrndup(yytext, yyleng);
465                          RETURN(INTEGER);
466                         }
467
468 %{
469     /*
470      * Floats and (for Glasgow extensions) primitive floats/doubles.
471      */
472 %}
473
474 <GlaExt>("-")?{F}"##"   {
475                          yylval.uid = xstrndup(yytext, yyleng - 2);
476                          RETURN(DOUBLEPRIM);
477                         }
478 <GlaExt>("-")?{F}"#"    {
479                          yylval.uid = xstrndup(yytext, yyleng - 1);
480                          RETURN(FLOATPRIM);
481                         }
482 <Code,GlaExt>{F}        {
483                          yylval.uid = xstrndup(yytext, yyleng);
484                          RETURN(FLOAT);
485                         }
486
487 %{
488     /*
489      * Funky ``foo'' style C literals for Glasgow extensions
490      */
491 %}
492
493 <GlaExt>"``"[^']+"''"   {
494                          hsnewid(yytext + 2, yyleng - 4);
495                          RETURN(CLITLIT);
496                         }
497
498 %{
499     /*
500      * Identifiers, both variables and operators.  The trailing hash is allowed
501      * for Glasgow extensions.
502      */
503 %}
504
505
506 %{
507 /* These SHOULDNAE work in "Code" (sigh) */
508 %}
509 <GlaExt,UserPragma>{Id}"#" { 
510                         if (! nonstandardFlag) {
511                             char errbuf[ERR_BUF_SIZE];
512                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
513                             hsperror(errbuf);
514                          }
515                          hsnewid(yytext, yyleng);
516                          RETURN(isconstr(yytext) ? CONID : VARID);
517                         }
518 <Code,GlaExt,UserPragma>{Id}    {
519                          hsnewid(yytext, yyleng);
520                          RETURN(isconstr(yytext) ? CONID : VARID);
521                         }
522 <Code,GlaExt,UserPragma>{SId}   {
523                          hsnewid(yytext, yyleng);
524                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
525                         }
526 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
527                          BOOLEAN is_constr;
528                          if (! nonstandardFlag) {
529                             char errbuf[ERR_BUF_SIZE];
530                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
531                             hsperror(errbuf);
532                          }
533                          is_constr = hsnewqid(yytext, yyleng);
534                          RETURN(is_constr ? QCONID : QVARID);
535                         }
536 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
537                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
538                          RETURN(is_constr ? QCONID : QVARID);
539                         }
540 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
541                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
542                          RETURN(is_constr ? QCONSYM : QVARSYM);
543                         }
544
545 %{
546     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
547
548     /* Because we can make the former well-behaved (we defined them).
549
550        Sadly, the latter is defined by Haskell, which allows such
551        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
552     */
553 %}
554
555 <GlaExt,UserPragma>"`"{Id}"#`"  {       
556                          hsnewid(yytext + 1, yyleng - 2);
557                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
558                         }
559
560 %{
561     /*
562      * Character literals.  The first form is the quick form, for character
563      * literals that don't contain backslashes.  Literals with backslashes are
564      * lexed through multiple rules.  First, we match the open ' and as many
565      * normal characters as possible.  This puts us into the <Char> state, where
566      * a backslash is legal.  Then, we match the backslash and move into the 
567      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
568      * characters and the close '.  We may end up with too many characters, but
569      * this allows us to easily share the lex rules with strings.  Excess characters
570      * are ignored with a warning.
571      */
572 %}
573
574 <GlaExt>'({CHAR}|"\"")"'#" {
575                          yylval.uhstring = installHstring(1, yytext+1);
576                          RETURN(CHARPRIM);
577                         }
578 <Code,GlaExt>'({CHAR}|"\"")'    {
579                          yylval.uhstring = installHstring(1, yytext+1);
580                          RETURN(CHAR);
581                         }
582 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
583                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
584                          hsperror(errbuf);
585                         }
586 <Code,GlaExt>'({CHAR}|"\"")* {
587                          hsmlcolno = hspcolno;
588                          cleartext();
589                          addtext(yytext+1, yyleng-1);
590                          PUSH_STATE(Char);
591                         }
592 <Char>({CHAR}|"\"")*'#  {
593                          unsigned length;
594                          char *text;
595
596                          addtext(yytext, yyleng - 2);
597                          text = fetchtext(&length);
598
599                          if (! nonstandardFlag) {
600                             char errbuf[ERR_BUF_SIZE];
601                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
602                             hsperror(errbuf);
603                          }
604
605                          if (length > 1) {
606                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
607                               input_filename, hsplineno, hspcolno + 1);
608                             format_string(stderr, (unsigned char *) text, length);
609                             fputs("' too long\n", stderr);
610                             hsperror("");
611                          }
612                          yylval.uhstring = installHstring(1, text);
613                          hspcolno = hsmlcolno;
614                          POP_STATE;
615                          RETURN(CHARPRIM); 
616                         }
617 <Char>({CHAR}|"\"")*'   {
618                          unsigned length;
619                          char *text;
620
621                          addtext(yytext, yyleng - 1);
622                          text = fetchtext(&length);
623
624                          if (length > 1) {
625                             fprintf(stderr, "%s:%d:%d: Character literal '",
626                               input_filename, hsplineno, hspcolno + 1);
627                             format_string(stderr, (unsigned char *) text, length);
628                             fputs("' too long\n", stderr);
629                             hsperror("");
630                          }
631                          yylval.uhstring = installHstring(1, text);
632                          hspcolno = hsmlcolno;
633                          POP_STATE;
634                          RETURN(CHAR); 
635                         }
636 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
637
638
639 %{
640     /*
641      * String literals.  The first form is the quick form, for string literals
642      * that don't contain backslashes.  Literals with backslashes are lexed
643      * through multiple rules.  First, we match the open " and as many normal
644      * characters as possible.  This puts us into the <String> state, where
645      * a backslash is legal.  Then, we match the backslash and move into the 
646      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
647      * characters, moving back and forth between <String> and <StringEsc> as more
648      * backslashes are encountered.  (We may even digress into <Comment> mode if we
649      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
650      * of normal characters and the close ".
651      */
652 %}
653
654 <GlaExt>"\""({CHAR}|"'")*"\""#  {
655                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
656                             /* the -3 accounts for the " on front, "# on the end */
657                          RETURN(STRINGPRIM); 
658                         }
659 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
660                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
661                          RETURN(STRING); 
662                         }
663 <Code,GlaExt>"\""({CHAR}|"'")* {
664                          hsmlcolno = hspcolno;
665                          cleartext();
666                          addtext(yytext+1, yyleng-1);
667                          PUSH_STATE(String);
668                         }
669 <String>({CHAR}|"'")*"\"#"   {
670                          unsigned length;
671                          char *text;
672
673                          addtext(yytext, yyleng-2);
674                          text = fetchtext(&length);
675
676                          if (! nonstandardFlag) {
677                             char errbuf[ERR_BUF_SIZE];
678                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
679                             hsperror(errbuf);
680                          }
681
682                          yylval.uhstring = installHstring(length, text);
683                          hspcolno = hsmlcolno;
684                          POP_STATE;
685                          RETURN(STRINGPRIM);
686                         }
687 <String>({CHAR}|"'")*"\""   {
688                          unsigned length;
689                          char *text;
690
691                          addtext(yytext, yyleng-1);
692                          text = fetchtext(&length);
693
694                          yylval.uhstring = installHstring(length, text);
695                          hspcolno = hsmlcolno;
696                          POP_STATE;
697                          RETURN(STRING); 
698                         }
699 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
700
701 %{
702     /*
703      * Character and string escapes are roughly the same, but strings have the
704      * extra `\&' sequence which is not allowed for characters.  Also, comments
705      * are allowed in the <StringEsc> state.  (See the comment section much
706      * further down.)
707      *
708      * NB: Backslashes and tabs are stored in strings as themselves.
709      * But if we print them (in printtree.c), they must go out as
710      * "\\\\" and "\\t" respectively.  (This is because of the bogus
711      * intermediate format that the parser produces.  It uses '\t' fpr end of
712      * string, so it needs to be able to escape tabs, which means that it
713      * also needs to be able to escape the escape character ('\\').  Sigh.
714      */
715 %}
716
717 <Char>\\                { PUSH_STATE(CharEsc); }
718 <String>\\&             /* Ignore */ ;
719 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
720
721 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
722 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
723
724 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
725 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
726 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
727 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
728 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
729 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
730 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
731 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
732 <CharEsc,StringEsc>BEL  |
733 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
734 <CharEsc,StringEsc>BS   |
735 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
736 <CharEsc,StringEsc>HT   |
737 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
738 <CharEsc,StringEsc>LF   |
739 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
740 <CharEsc,StringEsc>VT   |
741 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
742 <CharEsc,StringEsc>FF   |
743 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
744 <CharEsc,StringEsc>CR   |
745 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
746 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
747 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
748 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
749 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
750 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
751 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
752 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
753 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
754 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
755 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
756 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
757 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
758 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
759 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
760 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
761 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
762 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
763 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
764 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
765 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
766 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
767 <CharEsc,StringEsc>{D}+  {
768                           int i = strtol(yytext, NULL, 10);
769                           if (i < NCHARS) {
770                              addchar((char) i);
771                           } else {
772                              char errbuf[ERR_BUF_SIZE];
773                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
774                                 yytext);
775                              hsperror(errbuf);
776                           }
777                           POP_STATE;
778                         }
779 <CharEsc,StringEsc>o{O}+ {
780                           int i = strtol(yytext + 1, NULL, 8);
781                           if (i < NCHARS) {
782                              addchar((char) i);
783                           } else {
784                              char errbuf[ERR_BUF_SIZE];
785                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
786                                 yytext);
787                              hsperror(errbuf);
788                           }
789                           POP_STATE;
790                         }
791 <CharEsc,StringEsc>x{H}+ {
792                           int i = strtol(yytext + 1, NULL, 16);
793                           if (i < NCHARS) {
794                              addchar((char) i);
795                           } else {
796                              char errbuf[ERR_BUF_SIZE];
797                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
798                                 yytext);
799                              hsperror(errbuf);
800                           }
801                           POP_STATE;
802                         }
803
804
805 %{
806     /*
807      * Nested comments.  The major complication here is in trying to match the
808      * longest lexemes possible, for better performance.  (See the flex document.)
809      * That's why the rules look so bizarre.
810      */
811 %}
812
813 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
814                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
815                         }
816
817 <Comment>[^-{]*         |
818 <Comment>"-"+[^-{}]+    |
819 <Comment>"{"+[^-{}]+    ;
820 <Comment>"{-"           { nested_comments++; }
821 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
822 <Comment>(.|\n)         ;
823
824 %{
825     /*
826      * Illegal characters.  This used to be a single rule, but we might as well
827      * pass on as much information as we have, so now we indicate our state in
828      * the error message.
829      */
830 %}
831
832 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
833                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
834                             input_filename, hsplineno, hspcolno + 1); 
835                          format_string(stderr, (unsigned char *) yytext, 1);
836                          fputs("'\n", stderr);
837                          hsperror("");
838                         }
839 <Char>(.|\n)            { 
840                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
841                             input_filename, hsplineno, hspcolno + 1); 
842                          format_string(stderr, (unsigned char *) yytext, 1);
843                          fputs("' in a character literal\n", stderr);
844                          hsperror("");
845                         }
846 <CharEsc>(.|\n)         {
847                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
848                             input_filename, hsplineno, hspcolno + 1); 
849                          format_string(stderr, (unsigned char *) yytext, 1);
850                          fputs("'\n", stderr);
851                          hsperror("");
852                         }
853 <String>(.|\n)          { if (nonstandardFlag) {
854                              addtext(yytext, yyleng);
855                           } else { 
856                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
857                                 input_filename, hsplineno, hspcolno + 1); 
858                                 format_string(stderr, (unsigned char *) yytext, 1);
859                                 fputs("' in a string literal\n", stderr);
860                                 hsperror("");
861                           }
862                         }
863 <StringEsc>(.|\n)       {
864                          if (noGap) {
865                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
866                                 input_filename, hsplineno, hspcolno + 1); 
867                              format_string(stderr, (unsigned char *) yytext, 1);
868                              fputs("'\n", stderr);
869                              hsperror("");
870                          } else {
871                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
872                                 input_filename, hsplineno, hspcolno + 1);
873                              format_string(stderr, (unsigned char *) yytext, 1);
874                              fputs("' in a string gap\n", stderr);
875                              hsperror("");
876                          }
877                         }
878
879 %{
880     /*
881      * End of file.  In any sub-state, this is an error.  However, for the primary
882      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
883      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
884      * adding virtual close curlies, or closing an interface and returning to the
885      * primary source file.
886      *
887      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
888      * line/column advancement has to be done by hand.
889      */
890 %}
891
892 <Char,CharEsc><<EOF>>   { 
893                           hsplineno = hslineno; hspcolno = hscolno;
894                           hsperror("unterminated character literal");
895                         }
896 <Comment><<EOF>>        { 
897                           hsplineno = hslineno; hspcolno = hscolno;
898                           hsperror("unterminated comment"); 
899                         }
900 <String,StringEsc><<EOF>>   { 
901                           hsplineno = hslineno; hspcolno = hscolno;
902                           hsperror("unterminated string literal"); 
903                         }
904 <UserPragma><<EOF>>     {
905                           hsplineno = hslineno; hspcolno = hscolno;
906                           hsperror("unterminated user-specified pragma"); 
907                         }
908 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
909
910 %%
911
912 /**********************************************************************
913 *                                                                     *
914 *                                                                     *
915 *     YACC/LEX Initialisation etc.                                    *
916 *                                                                     *
917 *                                                                     *
918 **********************************************************************/
919
920 /*
921    We initialise input_filename to "<stdin>".
922    This allows unnamed sources to be piped into the parser.
923 */
924
925 void
926 yyinit(void)
927 {
928     input_filename = xstrdup("<stdin>");
929
930     /* We must initialize the input buffer _now_, because we call
931        setyyin _before_ calling yylex for the first time! */
932     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
933
934     if (nonstandardFlag)
935         PUSH_STATE(GlaExt);
936     else
937         PUSH_STATE(Code);
938 }
939
940 static void
941 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
942 {
943     if (input_filename != NULL)
944         free(input_filename);
945     input_filename = xstrdup(f);
946 }
947
948 /**********************************************************************
949 *                                                                     *
950 *                                                                     *
951 *     Layout Processing                                               *
952 *                                                                     *
953 *                                                                     *
954 **********************************************************************/
955
956 /*
957         The following section deals with Haskell Layout conventions
958         forcing insertion of ; or } as appropriate
959 */
960
961 static BOOLEAN
962 hsshouldindent(void)
963 {
964     return (!forgetindent && INDENTON);
965 }
966
967
968 /* Enter new context and set new indentation level */
969 void
970 hssetindent(void)
971 {
972 #ifdef HSP_DEBUG
973     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
974 #endif
975
976     /*
977      * partain: first chk that new indent won't be less than current one; this code
978      * doesn't make sense to me; hscolno tells the position of the _end_ of the
979      * current token; what that has to do with indenting, I don't know.
980      */
981
982
983     if (hscolno - 1 <= INDENTPT) {
984         if (INDENTPT == -1)
985             return;             /* Empty input OK for Haskell 1.1 */
986         else {
987             char errbuf[ERR_BUF_SIZE];
988
989             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
990             hsperror(errbuf);
991         }
992     }
993     hsentercontext((hspcolno << 1) | 1);
994 }
995
996
997 /* Enter a new context without changing the indentation level */
998 void
999 hsincindent(void)
1000 {
1001 #ifdef HSP_DEBUG
1002     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1003 #endif
1004     hsentercontext(indenttab[icontexts] & ~1);
1005 }
1006
1007
1008 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1009 void
1010 hsindentoff(void)
1011 {
1012     forgetindent = TRUE;
1013 }
1014
1015
1016 /* Enter a new layout context. */
1017 static void
1018 hsentercontext(int indent)
1019 {
1020     /* Enter new context and set indentation as specified */
1021     if (++icontexts >= MAX_CONTEXTS) {
1022         char errbuf[ERR_BUF_SIZE];
1023
1024         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1025         hsperror(errbuf);
1026     }
1027     forgetindent = FALSE;
1028     indenttab[icontexts] = indent;
1029 #ifdef HSP_DEBUG
1030     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1031 #endif
1032 }
1033
1034
1035 /* Exit a layout context */
1036 void
1037 hsendindent(void)
1038 {
1039     --icontexts;
1040 #ifdef HSP_DEBUG
1041     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1042 #endif
1043 }
1044
1045 /*
1046  *      Return checks the indentation level and returns ;, } or the specified token.
1047  */
1048
1049 static int
1050 Return(int tok)
1051 {
1052 #ifdef HSP_DEBUG
1053     extern int yyleng;
1054 #endif
1055
1056     if (hsshouldindent()) {
1057         if (hspcolno < INDENTPT) {
1058 #ifdef HSP_DEBUG
1059             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1060 #endif
1061             hssttok = tok;
1062             return (VCCURLY);
1063         } else if (hspcolno == INDENTPT) {
1064 #ifdef HSP_DEBUG
1065             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1066 #endif
1067             hssttok = -tok;
1068             return (SEMI);
1069         }
1070     }
1071     hssttok = -1;
1072 #ifdef HSP_DEBUG
1073     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1074 #endif
1075     return (tok);
1076 }
1077
1078
1079 /*
1080  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1081  */
1082 int
1083 yylex()
1084 {
1085     int tok;
1086     static BOOLEAN eof = FALSE;
1087
1088     if (!eof) {
1089         if (hssttok != -1) {
1090             if (hssttok < 0) {
1091                 tok = -hssttok;
1092                 hssttok = -1;
1093                 return tok;
1094             }
1095             RETURN(hssttok);
1096         } else {
1097             endlineno = hslineno;
1098             if ((tok = yylex1()) != EOF)
1099                 return tok;
1100             else
1101                 eof = TRUE;
1102         }
1103     }
1104     if (icontexts > icontexts_save) {
1105         if (INDENTON) {
1106             eof = TRUE;
1107             indenttab[icontexts] = 0;
1108             return (VCCURLY);
1109         } else
1110             hsperror("missing '}' at end of file");
1111     } else if (hsbuf_save != NULL) {
1112         fclose(yyin);
1113         yy_delete_buffer(YY_CURRENT_BUFFER);
1114         yy_switch_to_buffer(hsbuf_save);
1115         hsbuf_save = NULL;
1116         new_filename(filename_save);
1117         free(filename_save);
1118         hslineno = hslineno_save;
1119         hsplineno = hsplineno_save;
1120         hscolno = hscolno_save;
1121         hspcolno = hspcolno_save;
1122         etags = etags_save;
1123         icontexts = icontexts_save - 1;
1124         icontexts_save = 0;
1125 #ifdef HSP_DEBUG
1126         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1127 #endif
1128         eof = FALSE;
1129
1130         /* RETURN(LEOF); */
1131         hsperror("No longer using yacc to parse interface files");
1132
1133     } else {
1134         yyterminate();
1135     }
1136     abort(); /* should never get here! */
1137     return(0);
1138 }
1139
1140 /**********************************************************************
1141 *                                                                     *
1142 *                                                                     *
1143 *     Input Processing for Interfaces -- Not currently used !!!       *
1144 *                                                                     *
1145 *                                                                     *
1146 **********************************************************************/
1147
1148 /* setyyin(file)        open file as new lex input buffer */
1149 extern FILE *yyin;
1150
1151 void
1152 setyyin(char *file)
1153 {
1154     hsbuf_save = YY_CURRENT_BUFFER;
1155     if ((yyin = fopen(file, "r")) == NULL) {
1156         char errbuf[ERR_BUF_SIZE];
1157
1158         sprintf(errbuf, "can't read \"%-.50s\"", file);
1159         hsperror(errbuf);
1160     }
1161     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1162
1163     hslineno_save = hslineno;
1164     hsplineno_save = hsplineno;
1165     hslineno = hsplineno = 1;
1166
1167     filename_save = input_filename;
1168     input_filename = NULL;
1169     new_filename(file);
1170     hscolno_save = hscolno;
1171     hspcolno_save = hspcolno;
1172     hscolno = hspcolno = 0;
1173     etags_save = etags; /* do not do "etags" stuff in interfaces */
1174     etags = 0;          /* We remember whether we are doing it in
1175                            the module, so we can restore it later [WDP 94/09] */
1176     hsentercontext(-1);         /* partain: changed this from 0 */
1177     icontexts_save = icontexts;
1178 #ifdef HSP_DEBUG
1179     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1180 #endif
1181 }
1182
1183 static void
1184 layout_input(char *text, int len)
1185 {
1186 #ifdef HSP_DEBUG
1187     fprintf(stderr, "Scanning \"%s\"\n", text);
1188 #endif
1189
1190     hsplineno = hslineno;
1191     hspcolno = hscolno;
1192
1193     while (len-- > 0) {
1194         switch (*text++) {
1195         case '\n':
1196         case '\r':
1197         case '\f':
1198             hslineno++;
1199             hscolno = 0;
1200             break;
1201         case '\t':
1202             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1203             break;
1204         case '\v':
1205             break;
1206         default:
1207             ++hscolno;
1208             break;
1209         }
1210     }
1211 }
1212
1213 void
1214 setstartlineno(void)
1215 {
1216     startlineno = hsplineno;
1217
1218     if (modulelineno == 0) {
1219         modulelineno = startlineno;
1220     }
1221
1222 #if 1/*etags*/
1223 #else
1224     if (etags)
1225         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1226 #endif
1227 }
1228
1229 /**********************************************************************
1230 *                                                                     *
1231 *                                                                     *
1232 *                      Text Caching                                   *
1233 *                                                                     *
1234 *                                                                     *
1235 **********************************************************************/
1236
1237 #define CACHE_SIZE YY_BUF_SIZE
1238
1239 static struct {
1240     unsigned allocated;
1241     unsigned next;
1242     char *text;
1243 } textcache = { 0, 0, NULL };
1244
1245 static void
1246 cleartext(void)
1247 {
1248 /*  fprintf(stderr, "cleartext\n"); */
1249     textcache.next = 0;
1250     if (textcache.allocated == 0) {
1251         textcache.allocated = CACHE_SIZE;
1252         textcache.text = xmalloc(CACHE_SIZE);
1253     }
1254 }
1255
1256 static void
1257 addtext(char *text, unsigned length)
1258 {
1259 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1260
1261     if (length == 0)
1262         return;
1263
1264     if (textcache.next + length + 1 >= textcache.allocated) {
1265         textcache.allocated += length + CACHE_SIZE;
1266         textcache.text = xrealloc(textcache.text, textcache.allocated);
1267     }
1268     bcopy(text, textcache.text + textcache.next, length);
1269     textcache.next += length;
1270 }
1271
1272 static void
1273 addchar(char c)
1274 {
1275 /*  fprintf(stderr, "addchar: %c\n", c); */
1276
1277     if (textcache.next + 2 >= textcache.allocated) {
1278         textcache.allocated += CACHE_SIZE;
1279         textcache.text = xrealloc(textcache.text, textcache.allocated);
1280     }
1281     textcache.text[textcache.next++] = c;
1282 }
1283
1284 static char *
1285 fetchtext(unsigned *length)
1286 {
1287 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1288
1289     *length = textcache.next;
1290     textcache.text[textcache.next] = '\0';
1291     return textcache.text;
1292 }
1293
1294 /**********************************************************************
1295 *                                                                     *
1296 *                                                                     *
1297 *    Identifier Processing                                             *
1298 *                                                                     *
1299 *                                                                     *
1300 **********************************************************************/
1301
1302 /*
1303         hsnewid         Enters an id of length n into the symbol table.
1304 */
1305
1306 static void
1307 hsnewid(char *name, int length)
1308 {
1309     char save = name[length];
1310
1311     name[length] = '\0';
1312     yylval.uid = installid(name);
1313     name[length] = save;
1314 }
1315
1316 BOOLEAN
1317 hsnewqid(char *name, int length)
1318 {
1319     char* dot;
1320     char save = name[length];
1321     name[length] = '\0';
1322
1323     dot = strchr(name, '.');
1324     *dot = '\0';
1325     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1326     *dot = '.';
1327     name[length] = save;
1328
1329     return isconstr(dot+1);
1330 }