[project @ 1998-12-02 13:17:09 by simonm]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 /* The includes/config.h one */
11 #include "config.h"
12
13 #include <stdio.h>
14
15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
16 #include <string.h>
17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
19 #include <memory.h>
20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
21 #define index strchr
22 #define rindex strrchr
23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
25 #define bzero(s, n) memset ((s), 0, (n))
26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
27 #include <strings.h>
28 /* memory.h and strings.h conflict on some systems.  */
29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
30
31 #include "hspincl.h"
32 #include "hsparser.tab.h"
33 #include "constants.h"
34 #include "utils.h"
35
36 /* Our substitute for <ctype.h> */
37
38 #define NCHARS  256
39 #define _S      0x1
40 #define _D      0x2
41 #define _H      0x4
42 #define _O      0x8
43 #define _C      0x10
44
45 static unsigned char CharTable[NCHARS] = {
46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
62
63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 };
80
81 BOOLEAN
82 isconstr (char *s)
83 {
84     return(CharTable[*s]&(_C));
85 }
86
87 /**********************************************************************
88 *                                                                     *
89 *                                                                     *
90 *      Declarations                                                   *
91 *                                                                     *
92 *                                                                     *
93 **********************************************************************/
94
95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
96
97 /*
98  * For my own sanity, things that are not part of the flex skeleton
99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
100  */
101
102 static int hslineno = 0;        /* Line number at end of token */
103 int hsplineno = 0;              /* Line number at end of previous token */
104
105 static int hscolno = 0;         /* Column number at end of token */
106 int hspcolno = 0;               /* Column number at end of previous token */
107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
108
109 int modulelineno = -1;          /* The line number where the module starts */
110 int startlineno = 0;            /* The line number where something starts */
111 int endlineno = 0;              /* The line number where something ends */
112
113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
115
116 static int nested_comments;     /* For counting comment nesting depth */
117
118 /* OLD: Hacky definition of yywrap: see flex doc.
119
120    If we don't do this, then we'll have to get the default
121    yywrap from the flex library, which is often something
122    we are not good at locating.  This avoids that difficulty.
123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
124    WDP 94/09/05
125 #define yywrap() 1
126 */
127
128 /* Essential forward declarations */
129
130 static void hsnewid      PROTO((char *, int));
131 static void layout_input PROTO((char *, int));
132 static void cleartext    (NO_ARGS);
133 static void addtext      PROTO((char *, unsigned));
134 static void addchar      PROTO((char));
135 static char *fetchtext   PROTO((unsigned *));
136 static void new_filename PROTO((char *));
137 static int  Return       PROTO((int));
138 static void hsentercontext PROTO((int));
139
140 /* Special file handling for IMPORTS */
141 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
142
143 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
144 static char *filename_save;             /* File Name                     */
145 static int hslineno_save = 0,           /* Line Number                   */
146  hsplineno_save = 0,                    /* Line Number of Prev. token    */
147  hscolno_save = 0,                      /* Indentation                   */
148  hspcolno_save = 0;                     /* Left Indentation              */
149 static short icontexts_save = 0;        /* Indent Context Level          */
150
151 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
152 extern BOOLEAN etags;                   /* that which is saved */
153
154 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
155
156 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
157                                  * inserted before token +ve  -- "}" inserted before
158                                  * token */
159
160 short icontexts = 0;            /* Which context we're in */
161
162 /*
163         Table of indentations:  right bit indicates whether to use
164           indentation rules (1 = use rules; 0 = ignore)
165
166     partain:
167     push one of these "contexts" at every "case" or "where"; the right bit says
168     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
169
170     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
171     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
172     push is shown just below.
173
174 */
175
176
177 static short indenttab[MAX_CONTEXTS] = {-1};
178
179 #define INDENTPT (indenttab[icontexts]>>1)
180 #define INDENTON (indenttab[icontexts]&1)
181
182 #define RETURN(tok) return(Return(tok))
183
184 #undef YY_DECL
185 #define YY_DECL int yylex1()
186
187 /* We should not peek at yy_act, but flex calls us even for the internal action
188    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
189    to support older versions of flex, we'll continue to peek for now.
190  */
191 #define YY_USER_ACTION \
192     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
193
194 #if 0/*debug*/
195 #undef YY_BREAK
196 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
197 #endif
198
199 /* Each time we enter a new start state, we push it onto the state stack.
200 */
201 #define PUSH_STATE(n)   yy_push_state(n)
202 #define POP_STATE       yy_pop_state()
203
204 %}
205 /* Options:
206     8bit (8-bit input)
207     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
208     never-interactive (to go a bit faster)
209     stack (use a start-condition stack)
210 */
211 %option 8bit
212 %option noyywrap
213 %option never-interactive
214 %option stack
215
216 /* The start states are:
217    Code -- normal Haskell code (principal lexer)
218    GlaExt -- Haskell code with Glasgow extensions
219    Comment -- Nested comment processing
220    String -- Inside a string literal with backslashes
221    StringEsc -- Immediately following a backslash in a string literal
222    Char -- Inside a character literal with backslashes
223    CharEsc -- Immediately following a backslash in a character literal 
224
225    Note that the INITIAL state is unused.  Also note that these states
226    are _exclusive_.  All rules should be prefixed with an appropriate
227    list of start states.
228  */
229
230 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
231
232 isoS                    [\xa1-\xbf\xd7\xf7]
233 isoL                    [\xc0-\xd6\xd8-\xde]
234 isol                    [\xdf-\xf6\xf8-\xff]
235 isoA                    [\xa1-\xff]
236
237 D                       [0-9]
238 O                       [0-7]
239 H                       [0-9A-Fa-f]
240 N                       {D}+
241 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
242 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
243 SId                     {S}{S}*
244 L                       [A-Z\xc0-\xd6\xd8-\xde]
245 l                       [a-z\xdf-\xf6\xf8-\xff]
246 I                       {L}|{l}
247 i                       {L}|{l}|[0-9'_]
248 Id                      {I}{i}*
249 Mod                     {L}{i}*
250 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
251 CNTRL                   [@A-Z\[\\\]^_]
252 WS                      [ \t\n\r\f\v]
253 NL                      [\n\r]
254
255 %%
256
257 %{
258     /*
259      * Simple comments and whitespace.  Normally, we would just ignore these, but
260      * in case we're processing a string escape, we need to note that we've seen
261      * a gap.
262      *
263      * Note that we cater for a comment line that *doesn't* end in a newline.
264      * This is incorrect, strictly speaking, but seems like the right thing
265      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
266      *
267      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
268      */
269 %}
270
271 <Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
272 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
273
274 %{
275     /* 
276      * Special GHC pragma rules.  Do we need a start state for interface files,
277      * so these won't be matched in source files? --JSM
278      */
279
280 %}
281
282 %{
283 /* I believe the next rule is not ever matched.
284
285    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
286    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
287    where possible, so the lexer should now never see cpp directives
288    like '# ' and '#line'.
289
290    -- SOF
291
292 <Code,GlaExt>^"# ".*{NL}    {
293                           char tempf[FILENAME_SIZE];
294                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
295                           new_filename(tempf);
296                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
297                         }
298
299 <Code,GlaExt>^"#line ".*{NL}    {
300                           char tempf[FILENAME_SIZE];
301                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
302                           new_filename(tempf); 
303                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
304                         }
305 */
306 %}
307
308 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
309                           /* partain: pragma-style line directive */
310                           char tempf[FILENAME_SIZE];
311                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
312                           new_filename(tempf);
313                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
314                         }
315
316 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
317                               PUSH_STATE(UserPragma);
318                               RETURN(INTERFACE_UPRAGMA);
319                             }
320 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
321                               PUSH_STATE(UserPragma);
322                               RETURN(SPECIALISE_UPRAGMA);
323                             }
324 <Code,GlaExt>"{-#"{WS}*"INLINE" {
325                               PUSH_STATE(UserPragma);
326                               RETURN(INLINE_UPRAGMA);
327                             }
328 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
329                               PUSH_STATE(UserPragma);
330                               RETURN(NOINLINE_UPRAGMA);
331                             }
332 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
333                               PUSH_STATE(UserPragma);
334                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
335                             }
336 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
337                               /* these are handled by hscpp */
338                               nested_comments =1;
339                               PUSH_STATE(Comment);
340                             }
341 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
342                               /* these are for the driver! */
343                               nested_comments =1;
344                               PUSH_STATE(Comment);
345                             }
346 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
347                               /* these are used by `make depend' and the
348                                  compiler to indicate that a module should
349                                  be imported from source */
350                               nested_comments =1;
351                               RETURN(SOURCE_UPRAGMA); 
352                             }
353
354 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
355                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
356                                 input_filename, hsplineno);
357                               format_string(stderr, (unsigned char *) yytext, yyleng);
358                               fputs("'\n", stderr);
359                               nested_comments = 1;
360                               PUSH_STATE(Comment);
361                             }
362 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
363
364 %{
365     /*
366      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
367      * intentionally accepted as a keyword even for normal <Code>.
368      */
369 %}
370
371 <Code,GlaExt>"case"             { RETURN(CASE); }
372 <Code,GlaExt>"class"            { RETURN(CLASS); }
373 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
374 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
375 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
376 <Code,GlaExt>"do"               { RETURN(DO); }
377 <Code,GlaExt>"else"             { RETURN(ELSE); }
378 <Code,GlaExt>"if"               { RETURN(IF); }
379 <Code,GlaExt>"import"           { RETURN(IMPORT); }
380 <Code,GlaExt>"in"               { RETURN(IN); }
381 <Code,GlaExt>"infix"            { RETURN(INFIX); }
382 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
383 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
384 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
385 <Code,GlaExt>"let"              { RETURN(LET); }
386 <Code,GlaExt>"module"           { RETURN(MODULE); }
387 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
388 <Code,GlaExt>"of"               { RETURN(OF); }
389 <Code,GlaExt>"then"             { RETURN(THEN); }
390 <Code,GlaExt>"type"             { RETURN(TYPE); }
391 <Code,GlaExt>"where"            { RETURN(WHERE); }
392
393 <Code,GlaExt>"as"               { RETURN(AS); }
394 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
395 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
396
397 <Code,GlaExt>"forall"           { RETURN(FORALL); }
398
399 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
400 <GlaExt>"_ccall_"               { RETURN(CCALL); }
401 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
402 <GlaExt>"_casm_"                { RETURN(CASM); }
403 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
404 <GlaExt>"(#"                    { RETURN(OUNBOXPAREN); }
405 <GlaExt>"#)"                    { RETURN(CUNBOXPAREN); }
406 <GlaExt>"foreign"               { RETURN(FOREIGN); }
407 <GlaExt>"export"                { RETURN(EXPORT); }
408 <GlaExt>"label"                 { RETURN(LABEL); }
409 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
410 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
411 <GlaExt>"_ccall"                { RETURN(C_CALL); }
412 <GlaExt>"_pascal"               { RETURN(PASCAL); }
413 <GlaExt>"stdcall"               { RETURN(STDCALL); }
414 <GlaExt>"ccall"                 { RETURN(C_CALL); }
415 <GlaExt>"pascal"                { RETURN(PASCAL); }
416 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
417
418 %{
419     /* 
420      * Haskell operators: special, reservedops and useful varsyms
421      */
422 %}
423
424 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
425 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
426 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
427 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
428 <Code,GlaExt>"{"                { RETURN(OCURLY); }
429 <Code,GlaExt>"}"                { RETURN(CCURLY); }
430 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
431 <Code,GlaExt>";"                { RETURN(SEMI); }
432 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
433 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
434
435 <Code,GlaExt>"."                { RETURN(DOT); }
436 <Code,GlaExt>".."               { RETURN(DOTDOT); }
437 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
438 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
439 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
440 <Code,GlaExt>"|"                { RETURN(VBAR); }
441 <Code,GlaExt>"<-"               { RETURN(LARROW); }
442 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
443 <Code,GlaExt>"-"                { RETURN(MINUS); }
444 <Code,GlaExt>"+"                { RETURN(PLUS); }
445
446 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
447 <Code,GlaExt>"@"                { RETURN(AT); }
448 <Code,GlaExt>"!"                { RETURN(BANG); }
449 <Code,GlaExt>"~"                { RETURN(LAZY); }
450
451 %{
452     /*
453      * Integers and (for Glasgow extensions) primitive integers.  Note that
454      * we pass all of the text on to the parser, because flex/C can't handle
455      * arbitrary precision numbers.
456      */
457 %}
458
459 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
460                          yylval.uid = xstrndup(yytext, yyleng - 1);
461                          RETURN(INTPRIM);
462                         }
463 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
464                          yylval.uid = xstrndup(yytext, yyleng);
465                          RETURN(INTEGER);
466                         }
467 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
468                          yylval.uid = xstrndup(yytext, yyleng - 1);
469                          RETURN(INTPRIM);
470                         }
471 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
472                          yylval.uid = xstrndup(yytext, yyleng);
473                          RETURN(INTEGER);
474                         }
475 <GlaExt>("-")?{N}"#"    {
476                          yylval.uid = xstrndup(yytext, yyleng - 1);
477                          RETURN(INTPRIM);
478                         }
479 <Code,GlaExt,UserPragma>{N} {
480                          yylval.uid = xstrndup(yytext, yyleng);
481                          RETURN(INTEGER);
482                         }
483
484 %{
485     /*
486      * Floats and (for Glasgow extensions) primitive floats/doubles.
487      */
488 %}
489
490 <GlaExt>("-")?{F}"##"   {
491                          yylval.uid = xstrndup(yytext, yyleng - 2);
492                          RETURN(DOUBLEPRIM);
493                         }
494 <GlaExt>("-")?{F}"#"    {
495                          yylval.uid = xstrndup(yytext, yyleng - 1);
496                          RETURN(FLOATPRIM);
497                         }
498 <Code,GlaExt>{F}        {
499                          yylval.uid = xstrndup(yytext, yyleng);
500                          RETURN(FLOAT);
501                         }
502
503 %{
504     /*
505      * Funky ``foo'' style C literals for Glasgow extensions
506      */
507 %}
508
509 <GlaExt>"``"[^']+"''"   {
510                          hsnewid(yytext + 2, yyleng - 4);
511                          RETURN(CLITLIT);
512                         }
513
514 %{
515     /*
516      * Identifiers, both variables and operators.  The trailing hash is allowed
517      * for Glasgow extensions.
518      */
519 %}
520
521
522 %{
523 /* These SHOULDNAE work in "Code" (sigh) */
524 %}
525 <GlaExt,UserPragma>{Id}"#" { 
526                         if (! nonstandardFlag) {
527                             char errbuf[ERR_BUF_SIZE];
528                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
529                             hsperror(errbuf);
530                          }
531                          hsnewid(yytext, yyleng);
532                          RETURN(isconstr(yytext) ? CONID : VARID);
533                         }
534 <Code,GlaExt,UserPragma>{Id}    {
535                          hsnewid(yytext, yyleng);
536                          RETURN(isconstr(yytext) ? CONID : VARID);
537                         }
538 <Code,GlaExt,UserPragma>{SId}   {
539                          hsnewid(yytext, yyleng);
540                          RETURN(isconstr(yytext) ? CONSYM : VARSYM);
541                         }
542 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
543                          BOOLEAN is_constr;
544                          if (! nonstandardFlag) {
545                             char errbuf[ERR_BUF_SIZE];
546                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
547                             hsperror(errbuf);
548                          }
549                          is_constr = hsnewqid(yytext, yyleng);
550                          RETURN(is_constr ? QCONID : QVARID);
551                         }
552 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
553                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
554                          RETURN(is_constr ? QCONID : QVARID);
555                         }
556 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
557                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
558                          RETURN(is_constr ? QCONSYM : QVARSYM);
559                         }
560
561 %{
562     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
563
564     /* Because we can make the former well-behaved (we defined them).
565
566        Sadly, the latter is defined by Haskell, which allows such
567        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
568     */
569 %}
570
571 <GlaExt,UserPragma>"`"{Id}"#`"  {       
572                          hsnewid(yytext + 1, yyleng - 2);
573                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
574                         }
575
576 %{
577     /*
578      * Character literals.  The first form is the quick form, for character
579      * literals that don't contain backslashes.  Literals with backslashes are
580      * lexed through multiple rules.  First, we match the open ' and as many
581      * normal characters as possible.  This puts us into the <Char> state, where
582      * a backslash is legal.  Then, we match the backslash and move into the 
583      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
584      * characters and the close '.  We may end up with too many characters, but
585      * this allows us to easily share the lex rules with strings.  Excess characters
586      * are ignored with a warning.
587      */
588 %}
589
590 <GlaExt>'({CHAR}|"\"")"'#" {
591                          yylval.uhstring = installHstring(1, yytext+1);
592                          RETURN(CHARPRIM);
593                         }
594 <Code,GlaExt>'({CHAR}|"\"")'    {
595                          yylval.uhstring = installHstring(1, yytext+1);
596                          RETURN(CHAR);
597                         }
598 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
599                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
600                          hsperror(errbuf);
601                         }
602 <Code,GlaExt>'({CHAR}|"\"")* {
603                          hsmlcolno = hspcolno;
604                          cleartext();
605                          addtext(yytext+1, yyleng-1);
606                          PUSH_STATE(Char);
607                         }
608 <Char>({CHAR}|"\"")*'#  {
609                          unsigned length;
610                          char *text;
611
612                          addtext(yytext, yyleng - 2);
613                          text = fetchtext(&length);
614
615                          if (! nonstandardFlag) {
616                             char errbuf[ERR_BUF_SIZE];
617                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
618                             hsperror(errbuf);
619                          }
620
621                          if (length > 1) {
622                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
623                               input_filename, hsplineno, hspcolno + 1);
624                             format_string(stderr, (unsigned char *) text, length);
625                             fputs("' too long\n", stderr);
626                             hsperror("");
627                          }
628                          yylval.uhstring = installHstring(1, text);
629                          hspcolno = hsmlcolno;
630                          POP_STATE;
631                          RETURN(CHARPRIM); 
632                         }
633 <Char>({CHAR}|"\"")*'   {
634                          unsigned length;
635                          char *text;
636
637                          addtext(yytext, yyleng - 1);
638                          text = fetchtext(&length);
639
640                          if (length > 1) {
641                             fprintf(stderr, "%s:%d:%d: Character literal '",
642                               input_filename, hsplineno, hspcolno + 1);
643                             format_string(stderr, (unsigned char *) text, length);
644                             fputs("' too long\n", stderr);
645                             hsperror("");
646                          }
647                          yylval.uhstring = installHstring(1, text);
648                          hspcolno = hsmlcolno;
649                          POP_STATE;
650                          RETURN(CHAR); 
651                         }
652 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
653
654
655 %{
656     /*
657      * String literals.  The first form is the quick form, for string literals
658      * that don't contain backslashes.  Literals with backslashes are lexed
659      * through multiple rules.  First, we match the open " and as many normal
660      * characters as possible.  This puts us into the <String> state, where
661      * a backslash is legal.  Then, we match the backslash and move into the 
662      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
663      * characters, moving back and forth between <String> and <StringEsc> as more
664      * backslashes are encountered.  (We may even digress into <Comment> mode if we
665      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
666      * of normal characters and the close ".
667      */
668 %}
669
670 <GlaExt>"\""({CHAR}|"'")*"\""#  {
671                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
672                             /* the -3 accounts for the " on front, "# on the end */
673                          RETURN(STRINGPRIM); 
674                         }
675 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
676                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
677                          RETURN(STRING); 
678                         }
679 <Code,GlaExt>"\""({CHAR}|"'")* {
680                          hsmlcolno = hspcolno;
681                          cleartext();
682                          addtext(yytext+1, yyleng-1);
683                          PUSH_STATE(String);
684                         }
685 <String>({CHAR}|"'")*"\"#"   {
686                          unsigned length;
687                          char *text;
688
689                          addtext(yytext, yyleng-2);
690                          text = fetchtext(&length);
691
692                          if (! nonstandardFlag) {
693                             char errbuf[ERR_BUF_SIZE];
694                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
695                             hsperror(errbuf);
696                          }
697
698                          yylval.uhstring = installHstring(length, text);
699                          hspcolno = hsmlcolno;
700                          POP_STATE;
701                          RETURN(STRINGPRIM);
702                         }
703 <String>({CHAR}|"'")*"\""   {
704                          unsigned length;
705                          char *text;
706
707                          addtext(yytext, yyleng-1);
708                          text = fetchtext(&length);
709
710                          yylval.uhstring = installHstring(length, text);
711                          hspcolno = hsmlcolno;
712                          POP_STATE;
713                          RETURN(STRING); 
714                         }
715 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
716
717 %{
718     /*
719      * Character and string escapes are roughly the same, but strings have the
720      * extra `\&' sequence which is not allowed for characters.  Also, comments
721      * are allowed in the <StringEsc> state.  (See the comment section much
722      * further down.)
723      *
724      * NB: Backslashes and tabs are stored in strings as themselves.
725      * But if we print them (in printtree.c), they must go out as
726      * "\\\\" and "\\t" respectively.  (This is because of the bogus
727      * intermediate format that the parser produces.  It uses '\t' fpr end of
728      * string, so it needs to be able to escape tabs, which means that it
729      * also needs to be able to escape the escape character ('\\').  Sigh.
730      */
731 %}
732
733 <Char>\\                { PUSH_STATE(CharEsc); }
734 <String>\\&             /* Ignore */ ;
735 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
736
737 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
738 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
739
740 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
741 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
742 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
743 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
744 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
745 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
746 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
747 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
748 <CharEsc,StringEsc>BEL  |
749 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
750 <CharEsc,StringEsc>BS   |
751 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
752 <CharEsc,StringEsc>HT   |
753 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
754 <CharEsc,StringEsc>LF   |
755 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
756 <CharEsc,StringEsc>VT   |
757 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
758 <CharEsc,StringEsc>FF   |
759 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
760 <CharEsc,StringEsc>CR   |
761 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
762 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
763 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
764 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
765 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
766 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
767 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
768 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
769 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
770 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
771 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
772 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
773 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
774 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
775 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
776 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
777 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
778 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
779 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
780 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
781 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
782 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
783 <CharEsc,StringEsc>{D}+  {
784                           int i = strtol(yytext, NULL, 10);
785                           if (i < NCHARS) {
786                              addchar((char) i);
787                           } else {
788                              char errbuf[ERR_BUF_SIZE];
789                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
790                                 yytext);
791                              hsperror(errbuf);
792                           }
793                           POP_STATE;
794                         }
795 <CharEsc,StringEsc>o{O}+ {
796                           int i = strtol(yytext + 1, NULL, 8);
797                           if (i < NCHARS) {
798                              addchar((char) i);
799                           } else {
800                              char errbuf[ERR_BUF_SIZE];
801                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
802                                 yytext);
803                              hsperror(errbuf);
804                           }
805                           POP_STATE;
806                         }
807 <CharEsc,StringEsc>x{H}+ {
808                           int i = strtol(yytext + 1, NULL, 16);
809                           if (i < NCHARS) {
810                              addchar((char) i);
811                           } else {
812                              char errbuf[ERR_BUF_SIZE];
813                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
814                                 yytext);
815                              hsperror(errbuf);
816                           }
817                           POP_STATE;
818                         }
819
820
821 %{
822     /*
823      * Nested comments.  The major complication here is in trying to match the
824      * longest lexemes possible, for better performance.  (See the flex document.)
825      * That's why the rules look so bizarre.
826      */
827 %}
828
829 <Code,GlaExt,UserPragma,StringEsc>"{-"  { 
830                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
831                         }
832
833 <Comment>[^-{]*         |
834 <Comment>"-"+[^-{}]+    |
835 <Comment>"{"+[^-{}]+    ;
836 <Comment>"{-"           { nested_comments++; }
837 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
838 <Comment>(.|\n)         ;
839
840 %{
841     /*
842      * Illegal characters.  This used to be a single rule, but we might as well
843      * pass on as much information as we have, so now we indicate our state in
844      * the error message.
845      */
846 %}
847
848 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  { 
849                          fprintf(stderr, "%s:%d:%d: Illegal character: `", 
850                             input_filename, hsplineno, hspcolno + 1); 
851                          format_string(stderr, (unsigned char *) yytext, 1);
852                          fputs("'\n", stderr);
853                          hsperror("");
854                         }
855 <Char>(.|\n)            { 
856                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
857                             input_filename, hsplineno, hspcolno + 1); 
858                          format_string(stderr, (unsigned char *) yytext, 1);
859                          fputs("' in a character literal\n", stderr);
860                          hsperror("");
861                         }
862 <CharEsc>(.|\n)         {
863                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
864                             input_filename, hsplineno, hspcolno + 1); 
865                          format_string(stderr, (unsigned char *) yytext, 1);
866                          fputs("'\n", stderr);
867                          hsperror("");
868                         }
869 <String>(.|\n)          { if (nonstandardFlag) {
870                              addtext(yytext, yyleng);
871                           } else { 
872                                 fprintf(stderr, "%s:%d:%d: Illegal character: `", 
873                                 input_filename, hsplineno, hspcolno + 1); 
874                                 format_string(stderr, (unsigned char *) yytext, 1);
875                                 fputs("' in a string literal\n", stderr);
876                                 hsperror("");
877                           }
878                         }
879 <StringEsc>(.|\n)       {
880                          if (noGap) {
881                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
882                                 input_filename, hsplineno, hspcolno + 1); 
883                              format_string(stderr, (unsigned char *) yytext, 1);
884                              fputs("'\n", stderr);
885                              hsperror("");
886                          } else {
887                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
888                                 input_filename, hsplineno, hspcolno + 1);
889                              format_string(stderr, (unsigned char *) yytext, 1);
890                              fputs("' in a string gap\n", stderr);
891                              hsperror("");
892                          }
893                         }
894
895 %{
896     /*
897      * End of file.  In any sub-state, this is an error.  However, for the primary
898      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
899      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
900      * adding virtual close curlies, or closing an interface and returning to the
901      * primary source file.
902      *
903      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
904      * line/column advancement has to be done by hand.
905      */
906 %}
907
908 <Char,CharEsc><<EOF>>   { 
909                           hsplineno = hslineno; hspcolno = hscolno;
910                           hsperror("unterminated character literal");
911                         }
912 <Comment><<EOF>>        { 
913                           hsplineno = hslineno; hspcolno = hscolno;
914                           hsperror("unterminated comment"); 
915                         }
916 <String,StringEsc><<EOF>>   { 
917                           hsplineno = hslineno; hspcolno = hscolno;
918                           hsperror("unterminated string literal"); 
919                         }
920 <UserPragma><<EOF>>     {
921                           hsplineno = hslineno; hspcolno = hscolno;
922                           hsperror("unterminated user-specified pragma"); 
923                         }
924 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
925
926 %%
927
928 /**********************************************************************
929 *                                                                     *
930 *                                                                     *
931 *     YACC/LEX Initialisation etc.                                    *
932 *                                                                     *
933 *                                                                     *
934 **********************************************************************/
935
936 /*
937    We initialise input_filename to "<stdin>".
938    This allows unnamed sources to be piped into the parser.
939 */
940
941 void
942 yyinit(void)
943 {
944     input_filename = xstrdup("<stdin>");
945
946     /* We must initialize the input buffer _now_, because we call
947        setyyin _before_ calling yylex for the first time! */
948     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
949
950     if (nonstandardFlag)
951         PUSH_STATE(GlaExt);
952     else
953         PUSH_STATE(Code);
954 }
955
956 static void
957 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
958 {
959     if (input_filename != NULL)
960         free(input_filename);
961     input_filename = xstrdup(f);
962 }
963
964 /**********************************************************************
965 *                                                                     *
966 *                                                                     *
967 *     Layout Processing                                               *
968 *                                                                     *
969 *                                                                     *
970 **********************************************************************/
971
972 /*
973         The following section deals with Haskell Layout conventions
974         forcing insertion of ; or } as appropriate
975 */
976
977 static BOOLEAN
978 hsshouldindent(void)
979 {
980     return (!forgetindent && INDENTON);
981 }
982
983
984 /* Enter new context and set new indentation level */
985 void
986 hssetindent(void)
987 {
988 #ifdef HSP_DEBUG
989     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
990 #endif
991
992     /*
993      * partain: first chk that new indent won't be less than current one; this code
994      * doesn't make sense to me; hscolno tells the position of the _end_ of the
995      * current token; what that has to do with indenting, I don't know.
996      */
997
998
999     if (hscolno - 1 <= INDENTPT) {
1000         if (INDENTPT == -1)
1001             return;             /* Empty input OK for Haskell 1.1 */
1002         else {
1003             char errbuf[ERR_BUF_SIZE];
1004
1005             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1006             hsperror(errbuf);
1007         }
1008     }
1009     hsentercontext((hspcolno << 1) | 1);
1010 }
1011
1012
1013 /* Enter a new context without changing the indentation level */
1014 void
1015 hsincindent(void)
1016 {
1017 #ifdef HSP_DEBUG
1018     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1019 #endif
1020     hsentercontext(indenttab[icontexts] & ~1);
1021 }
1022
1023
1024 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1025 void
1026 hsindentoff(void)
1027 {
1028     forgetindent = TRUE;
1029 }
1030
1031
1032 /* Enter a new layout context. */
1033 static void
1034 hsentercontext(int indent)
1035 {
1036     /* Enter new context and set indentation as specified */
1037     if (++icontexts >= MAX_CONTEXTS) {
1038         char errbuf[ERR_BUF_SIZE];
1039
1040         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1041         hsperror(errbuf);
1042     }
1043     forgetindent = FALSE;
1044     indenttab[icontexts] = indent;
1045 #ifdef HSP_DEBUG
1046     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1047 #endif
1048 }
1049
1050
1051 /* Exit a layout context */
1052 void
1053 hsendindent(void)
1054 {
1055     --icontexts;
1056 #ifdef HSP_DEBUG
1057     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1058 #endif
1059 }
1060
1061 /*
1062  *      Return checks the indentation level and returns ;, } or the specified token.
1063  */
1064
1065 static int
1066 Return(int tok)
1067 {
1068 #ifdef HSP_DEBUG
1069     extern int yyleng;
1070 #endif
1071
1072     if (hsshouldindent()) {
1073         if (hspcolno < INDENTPT) {
1074 #ifdef HSP_DEBUG
1075             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1076 #endif
1077             hssttok = tok;
1078             return (VCCURLY);
1079         } else if (hspcolno == INDENTPT) {
1080 #ifdef HSP_DEBUG
1081             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1082 #endif
1083             hssttok = -tok;
1084             return (SEMI);
1085         }
1086     }
1087     hssttok = -1;
1088 #ifdef HSP_DEBUG
1089     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1090 #endif
1091     return (tok);
1092 }
1093
1094
1095 /*
1096  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1097  */
1098 int
1099 yylex()
1100 {
1101     int tok;
1102     static BOOLEAN eof = FALSE;
1103
1104     if (!eof) {
1105         if (hssttok != -1) {
1106             if (hssttok < 0) {
1107                 tok = -hssttok;
1108                 hssttok = -1;
1109                 return tok;
1110             }
1111             RETURN(hssttok);
1112         } else {
1113             endlineno = hslineno;
1114             if ((tok = yylex1()) != EOF)
1115                 return tok;
1116             else
1117                 eof = TRUE;
1118         }
1119     }
1120     if (icontexts > icontexts_save) {
1121         if (INDENTON) {
1122             eof = TRUE;
1123             indenttab[icontexts] = 0;
1124             return (VCCURLY);
1125         } else
1126             hsperror("missing '}' at end of file");
1127     } else if (hsbuf_save != NULL) {
1128         fclose(yyin);
1129         yy_delete_buffer(YY_CURRENT_BUFFER);
1130         yy_switch_to_buffer(hsbuf_save);
1131         hsbuf_save = NULL;
1132         new_filename(filename_save);
1133         free(filename_save);
1134         hslineno = hslineno_save;
1135         hsplineno = hsplineno_save;
1136         hscolno = hscolno_save;
1137         hspcolno = hspcolno_save;
1138         etags = etags_save;
1139         icontexts = icontexts_save - 1;
1140         icontexts_save = 0;
1141 #ifdef HSP_DEBUG
1142         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1143 #endif
1144         eof = FALSE;
1145
1146         /* RETURN(LEOF); */
1147         hsperror("No longer using yacc to parse interface files");
1148
1149     } else {
1150         yyterminate();
1151     }
1152     abort(); /* should never get here! */
1153     return(0);
1154 }
1155
1156 /**********************************************************************
1157 *                                                                     *
1158 *                                                                     *
1159 *     Input Processing for Interfaces -- Not currently used !!!       *
1160 *                                                                     *
1161 *                                                                     *
1162 **********************************************************************/
1163
1164 /* setyyin(file)        open file as new lex input buffer */
1165 extern FILE *yyin;
1166
1167 void
1168 setyyin(char *file)
1169 {
1170     hsbuf_save = YY_CURRENT_BUFFER;
1171     if ((yyin = fopen(file, "r")) == NULL) {
1172         char errbuf[ERR_BUF_SIZE];
1173
1174         sprintf(errbuf, "can't read \"%-.50s\"", file);
1175         hsperror(errbuf);
1176     }
1177     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1178
1179     hslineno_save = hslineno;
1180     hsplineno_save = hsplineno;
1181     hslineno = hsplineno = 1;
1182
1183     filename_save = input_filename;
1184     input_filename = NULL;
1185     new_filename(file);
1186     hscolno_save = hscolno;
1187     hspcolno_save = hspcolno;
1188     hscolno = hspcolno = 0;
1189     etags_save = etags; /* do not do "etags" stuff in interfaces */
1190     etags = 0;          /* We remember whether we are doing it in
1191                            the module, so we can restore it later [WDP 94/09] */
1192     hsentercontext(-1);         /* partain: changed this from 0 */
1193     icontexts_save = icontexts;
1194 #ifdef HSP_DEBUG
1195     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1196 #endif
1197 }
1198
1199 static void
1200 layout_input(char *text, int len)
1201 {
1202 #ifdef HSP_DEBUG
1203     fprintf(stderr, "Scanning \"%s\"\n", text);
1204 #endif
1205
1206     hsplineno = hslineno;
1207     hspcolno = hscolno;
1208
1209     while (len-- > 0) {
1210         switch (*text++) {
1211         case '\n':
1212         case '\r':
1213         case '\f':
1214             hslineno++;
1215             hscolno = 0;
1216             break;
1217         case '\t':
1218             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1219             break;
1220         case '\v':
1221             break;
1222         default:
1223             ++hscolno;
1224             break;
1225         }
1226     }
1227 }
1228
1229 void
1230 setstartlineno(void)
1231 {
1232     startlineno = hsplineno;
1233
1234     if (modulelineno == 0) {
1235         modulelineno = startlineno;
1236     }
1237
1238 #if 1/*etags*/
1239 #else
1240     if (etags)
1241         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1242 #endif
1243 }
1244
1245 /**********************************************************************
1246 *                                                                     *
1247 *                                                                     *
1248 *                      Text Caching                                   *
1249 *                                                                     *
1250 *                                                                     *
1251 **********************************************************************/
1252
1253 #define CACHE_SIZE YY_BUF_SIZE
1254
1255 static struct {
1256     unsigned allocated;
1257     unsigned next;
1258     char *text;
1259 } textcache = { 0, 0, NULL };
1260
1261 static void
1262 cleartext(void)
1263 {
1264 /*  fprintf(stderr, "cleartext\n"); */
1265     textcache.next = 0;
1266     if (textcache.allocated == 0) {
1267         textcache.allocated = CACHE_SIZE;
1268         textcache.text = xmalloc(CACHE_SIZE);
1269     }
1270 }
1271
1272 static void
1273 addtext(char *text, unsigned length)
1274 {
1275 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1276
1277     if (length == 0)
1278         return;
1279
1280     if (textcache.next + length + 1 >= textcache.allocated) {
1281         textcache.allocated += length + CACHE_SIZE;
1282         textcache.text = xrealloc(textcache.text, textcache.allocated);
1283     }
1284     bcopy(text, textcache.text + textcache.next, length);
1285     textcache.next += length;
1286 }
1287
1288 static void
1289 addchar(char c)
1290 {
1291 /*  fprintf(stderr, "addchar: %c\n", c); */
1292
1293     if (textcache.next + 2 >= textcache.allocated) {
1294         textcache.allocated += CACHE_SIZE;
1295         textcache.text = xrealloc(textcache.text, textcache.allocated);
1296     }
1297     textcache.text[textcache.next++] = c;
1298 }
1299
1300 static char *
1301 fetchtext(unsigned *length)
1302 {
1303 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1304
1305     *length = textcache.next;
1306     textcache.text[textcache.next] = '\0';
1307     return textcache.text;
1308 }
1309
1310 /**********************************************************************
1311 *                                                                     *
1312 *                                                                     *
1313 *    Identifier Processing                                             *
1314 *                                                                     *
1315 *                                                                     *
1316 **********************************************************************/
1317
1318 /*
1319         hsnewid         Enters an id of length n into the symbol table.
1320 */
1321
1322 static void
1323 hsnewid(char *name, int length)
1324 {
1325     char save = name[length];
1326
1327     name[length] = '\0';
1328     yylval.uid = installid(name);
1329     name[length] = save;
1330 }
1331
1332 BOOLEAN
1333 hsnewqid(char *name, int length)
1334 {
1335     char* dot;
1336     char save = name[length];
1337     name[length] = '\0';
1338
1339     dot = strchr(name, '.');
1340     *dot = '\0';
1341     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1342     *dot = '.';
1343     name[length] = save;
1344
1345     return isconstr(dot+1);
1346 }