[project @ 1996-03-19 08:58:34 by partain]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       FLEX for Haskell.                                             *
6 *       -----------------                                             *
7 *                                                                     *
8 **********************************************************************/
9
10 #include "../../includes/config.h"
11
12 #include <stdio.h>
13
14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
15 #include <string.h>
16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
18 #include <memory.h>
19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
20 #define index strchr
21 #define rindex strrchr
22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
24 #define bzero(s, n) memset ((s), 0, (n))
25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
26 #include <strings.h>
27 /* memory.h and strings.h conflict on some systems.  */
28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
29
30 #include "hspincl.h"
31 #include "hsparser.tab.h"
32 #include "constants.h"
33 #include "utils.h"
34
35 /* Our substitute for <ctype.h> */
36
37 #define NCHARS  256
38 #define _S      0x1
39 #define _D      0x2
40 #define _H      0x4
41 #define _O      0x8
42 #define _C      0x10
43
44 #define _isconstr(s)    (CharTable[*s]&(_C))
45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
46
47 static unsigned char CharTable[NCHARS] = {
48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,      /* ( */
54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,      /* [ */
60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
64
65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
81 };
82
83 /**********************************************************************
84 *                                                                     *
85 *                                                                     *
86 *      Declarations                                                   *
87 *                                                                     *
88 *                                                                     *
89 **********************************************************************/
90
91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
92
93 /*
94  * For my own sanity, things that are not part of the flex skeleton
95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
96  */
97
98 static int hslineno = 0;        /* Line number at end of token */
99 int hsplineno = 0;              /* Line number at end of previous token */
100
101 static int hscolno = 0;         /* Column number at end of token */
102 int hspcolno = 0;               /* Column number at end of previous token */
103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
104
105 int modulelineno = -1;          /* The line number where the module starts */
106 int startlineno = 0;            /* The line number where something starts */
107 int endlineno = 0;              /* The line number where something ends */
108
109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
111
112 static int nested_comments;     /* For counting comment nesting depth */
113
114 /* Hacky definition of yywrap: see flex doc.
115
116    If we don't do this, then we'll have to get the default
117    yywrap from the flex library, which is often something
118    we are not good at locating.  This avoids that difficulty.
119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
120    WDP 94/09/05
121 */
122 #define yywrap() 1
123
124 /* Essential forward declarations */
125
126 static void hsnewid      PROTO((char *, int));
127 static void layout_input PROTO((char *, int));
128 static void cleartext    (NO_ARGS);
129 static void addtext      PROTO((char *, unsigned));
130 static void addchar      PROTO((char));
131 static char *fetchtext   PROTO((unsigned *));
132 static void new_filename PROTO((char *));
133 static int  Return       PROTO((int));
134 static void hsentercontext PROTO((int));
135
136 /* Special file handling for IMPORTS */
137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
138
139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
140 static char *filename_save;             /* File Name                     */
141 static int hslineno_save = 0,           /* Line Number                   */
142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
143  hscolno_save = 0,                      /* Indentation                   */
144  hspcolno_save = 0;                     /* Left Indentation              */
145 static short icontexts_save = 0;        /* Indent Context Level          */
146
147 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
148 extern BOOLEAN etags;      /* that which is saved */
149
150 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
151
152 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
153
154 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
155 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
156 extern int maxAcceptablePragmaVersion;
157 extern int thisIfacePragmaVersion;
158
159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
160                                  * inserted before token +ve  -- "}" inserted before
161                                  * token */
162
163 short icontexts = 0;            /* Which context we're in */
164
165
166
167 /*
168         Table of indentations:  right bit indicates whether to use
169           indentation rules (1 = use rules; 0 = ignore)
170
171     partain:
172     push one of these "contexts" at every "case" or "where"; the right bit says
173     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
174
175     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
176     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
177     push is shown just below.
178
179 */
180
181
182 static short indenttab[MAX_CONTEXTS] = {-1};
183
184 #define INDENTPT (indenttab[icontexts]>>1)
185 #define INDENTON (indenttab[icontexts]&1)
186
187 #define RETURN(tok) return(Return(tok))
188
189 #undef YY_DECL
190 #define YY_DECL int yylex1()
191
192 /* We should not peek at yy_act, but flex calls us even for the internal action
193    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
194    to support older versions of flex, we'll continue to peek for now.
195  */
196 #define YY_USER_ACTION \
197     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
198
199 #if 0/*debug*/
200 #undef YY_BREAK
201 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
202 #endif
203
204 /* Each time we enter a new start state, we push it onto the state stack.
205    Note that the rules do not allow us to underflow or overflow the stack.
206    (At least, they shouldn't.)  The maximum expected depth is 4:
207    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
208 */
209 static int StateStack[5];
210 static int StateDepth = -1;
211
212 #ifdef HSP_DEBUG
213 #define PUSH_STATE(n)   do {\
214     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
215     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
216 #define POP_STATE       do {--StateDepth;\
217     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
218     BEGIN(StateStack[StateDepth]);} while(0)
219 #else
220 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
221 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
222 #endif
223
224 %}
225
226 /* The start states are:
227    Code -- normal Haskell code (principal lexer)
228    GlaExt -- Haskell code with Glasgow extensions
229    Comment -- Nested comment processing
230    String -- Inside a string literal with backslashes
231    StringEsc -- Immediately following a backslash in a string literal
232    Char -- Inside a character literal with backslashes
233    CharEsc -- Immediately following a backslash in a character literal 
234
235    Note that the INITIAL state is unused.  Also note that these states
236    are _exclusive_.  All rules should be prefixed with an appropriate
237    list of start states.
238  */
239
240 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
241
242 isoS                    [\xa1-\xbf\xd7\xf7]
243 isoL                    [\xc0-\xd6\xd8-\xde]
244 isol                    [\xdf-\xf6\xf8-\xff]
245 isoA                    [\xa1-\xff]
246
247 D                       [0-9]
248 O                       [0-7]
249 H                       [0-9A-Fa-f]
250 N                       {D}+
251 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
252 S                       [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
253 SId                     {S}{S}*
254 L                       [A-Z\xc0-\xd6\xd8-\xde]
255 l                       [a-z\xdf-\xf6\xf8-\xff]
256 I                       {L}|{l}
257 i                       {L}|{l}|[0-9'_]
258 Id                      {I}{i}*
259 Mod                     {L}{i}*
260 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
261 CNTRL                   [@A-Z\[\\\]^_]
262 WS                      [ \t\n\r\f\v]
263 NL                      [\n\r]
264
265 %%
266
267 %{
268     /* 
269      * Special GHC pragma rules.  Do we need a start state for interface files,
270      * so these won't be matched in source files? --JSM
271      */
272 %}
273
274 <Code,GlaExt>^"# ".*{NL}    {
275                           char tempf[FILENAME_SIZE];
276                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
277                           new_filename(tempf);
278                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
279                         }
280
281 <Code,GlaExt>^"#line ".*{NL}    {
282                           char tempf[FILENAME_SIZE];
283                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
284                           new_filename(tempf); 
285                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
286                         }
287
288 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
289                           /* partain: pragma-style line directive */
290                           char tempf[FILENAME_SIZE];
291                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
292                           new_filename(tempf);
293                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
294                         }
295 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
296                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
297                         }
298 <Code,GlaExt>"{-# GHC_PRAGMA "   { 
299                           if ( ignorePragmas ||
300                                thisIfacePragmaVersion < minAcceptablePragmaVersion || 
301                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
302                              nested_comments = 1;
303                              PUSH_STATE(Comment);
304                           } else {
305                              PUSH_STATE(GhcPragma);
306                              RETURN(GHC_PRAGMA);
307                           }
308                         }
309 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
310 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
311 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
312 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
313 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
314 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
315 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
316 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
317 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
318 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
319
320 <GhcPragma>"_!_"            { RETURN(COCON); }
321 <GhcPragma>"_#_"            { RETURN(COPRIM); }
322 <GhcPragma>"_APP_"          { RETURN(COAPP); }
323 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
324 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
325 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
326 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
327 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
328
329 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
330 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
331 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
332 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
333 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
334
335 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
336 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
337
338 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
339 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
340 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
341 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
342 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
343 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
344 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
345 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
346
347 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
348 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
349
350 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
351 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
352 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
353
354 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
355
356 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
357                               PUSH_STATE(UserPragma);
358                               RETURN(SPECIALISE_UPRAGMA);
359                             }
360 <Code,GlaExt>"{-#"{WS}*"INLINE" {
361                               PUSH_STATE(UserPragma);
362                               RETURN(INLINE_UPRAGMA);
363                             }
364 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
365                               PUSH_STATE(UserPragma);
366                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
367                             }
368 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
369                               PUSH_STATE(UserPragma);
370                               RETURN(DEFOREST_UPRAGMA);
371                             }
372 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
373                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
374                                 input_filename, hsplineno);
375                               format_string(stderr, (unsigned char *) yytext, yyleng);
376                               fputs("'\n", stderr);
377                               nested_comments = 1;
378                               PUSH_STATE(Comment);
379                             }
380 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
381
382 %{
383     /*
384      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
385      * intentionally accepted as a keyword even for normal <Code>.
386      */
387 %}
388
389 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
390 <Code,GlaExt>"class"            { RETURN(CLASS); }
391 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
392 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
393 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
394 <Code,GlaExt>"do"               { RETURN(DO); }
395 <Code,GlaExt>"else"             { RETURN(ELSE); }
396 <Code,GlaExt>"if"               { RETURN(IF); }
397 <Code,GlaExt>"import"           { RETURN(IMPORT); }
398 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
399 <Code,GlaExt>"infix"            { RETURN(INFIX); }
400 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
401 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
402 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
403 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
404 <Code,GlaExt>"module"           { RETURN(MODULE); }
405 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
406 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
407 <Code,GlaExt>"then"             { RETURN(THEN); }
408 <Code,GlaExt>"type"             { RETURN(TYPE); }
409 <Code,GlaExt>"where"            { RETURN(WHERE); }
410
411 <Code,GlaExt>"as"               { RETURN(AS); }
412 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
413 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
414 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
415
416 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
417 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
418 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
419 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
420 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
421 <GhcPragma>"_forall_"           { RETURN(FORALL); }
422
423 %{
424     /* 
425      * Haskell operators: special, reservedops and useful varsyms
426      */
427 %}
428
429 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
430 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
431 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
432 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
433 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
434 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
435 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
436 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
437 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
438 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
439
440 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
441 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
442 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
443 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
444 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
445 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
446 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
447 <Code,GlaExt>"-"                        { RETURN(MINUS); }
448
449 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
450 <Code,GlaExt>"@"                        { RETURN(AT); }
451 <Code,GlaExt>"!"                        { RETURN(BANG); }
452 <Code,GlaExt>"~"                        { RETURN(LAZY); }
453
454 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
455
456 %{
457     /*
458      * Integers and (for Glasgow extensions) primitive integers.  Note that
459      * we pass all of the text on to the parser, because flex/C can't handle
460      * arbitrary precision numbers.
461      */
462 %}
463
464 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
465                          yylval.uid = xstrndup(yytext, yyleng - 1);
466                          RETURN(INTPRIM);
467                         }
468 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
469                          yylval.uid = xstrndup(yytext, yyleng);
470                          RETURN(INTEGER);
471                         }
472 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
473                          yylval.uid = xstrndup(yytext, yyleng - 1);
474                          RETURN(INTPRIM);
475                         }
476 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
477                          yylval.uid = xstrndup(yytext, yyleng);
478                          RETURN(INTEGER);
479                         }
480 <GlaExt,GhcPragma>("-")?{N}"#"  {
481                          yylval.uid = xstrndup(yytext, yyleng - 1);
482                          RETURN(INTPRIM);
483                         }
484 <Code,GlaExt,GhcPragma>{N} {
485                          yylval.uid = xstrndup(yytext, yyleng);
486                          RETURN(INTEGER);
487                         }
488
489 %{
490     /*
491      * Floats and (for Glasgow extensions) primitive floats/doubles.
492      */
493 %}
494
495 <GlaExt,GhcPragma>("-")?{F}"##" {
496                          yylval.uid = xstrndup(yytext, yyleng - 2);
497                          RETURN(DOUBLEPRIM);
498                         }
499 <GlaExt,GhcPragma>("-")?{F}"#" {
500                          yylval.uid = xstrndup(yytext, yyleng - 1);
501                          RETURN(FLOATPRIM);
502                         }
503 <Code,GlaExt>{F}        {
504                          yylval.uid = xstrndup(yytext, yyleng);
505                          RETURN(FLOAT);
506                         }
507
508 %{
509     /*
510      * Funky ``foo'' style C literals for Glasgow extensions
511      */
512 %}
513
514 <GlaExt,GhcPragma>"``"[^']+"''" {
515                          hsnewid(yytext + 2, yyleng - 4);
516                          RETURN(CLITLIT);
517                         }
518
519 %{
520     /*
521      * Identifiers, both variables and operators.  The trailing hash is allowed
522      * for Glasgow extensions.
523      */
524 %}
525
526 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
527 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
528 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
529
530 %{
531 /* These SHOULDNAE work in "Code" (sigh) */
532 %}
533 <Code,GlaExt,GhcPragma,UserPragma>{Id}"#" { 
534                          if (! (nonstandardFlag || in_interface)) {
535                             char errbuf[ERR_BUF_SIZE];
536                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
537                             hsperror(errbuf);
538                          }
539                          hsnewid(yytext, yyleng);
540                          RETURN(_isconstr(yytext) ? CONID : VARID);
541                         }
542 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} { 
543                          if (! (nonstandardFlag || in_interface)) {
544                             char errbuf[ERR_BUF_SIZE];
545                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
546                             hsperror(errbuf);
547                          }
548                          hsnewid(yytext, yyleng);
549                          RETURN(isconstr(yytext) ? CONID : VARID);
550                          /* NB: ^^^^^^^^ : not the macro! */
551                         }
552 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
553                          hsnewid(yytext, yyleng);
554                          RETURN(_isconstr(yytext) ? CONID : VARID);
555                         }
556 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
557                          hsnewid(yytext, yyleng);
558                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
559                         }
560 <Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{Id}  {
561                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
562                          RETURN(isconstr ? QCONID : QVARID);
563                         }
564 <Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{SId} {
565                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
566                          RETURN(isconstr ? QCONSYM : QVARSYM);
567                         }
568
569 %{
570     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
571
572     /* Because we can make the former well-behaved (we defined them).
573
574        Sadly, the latter is defined by Haskell, which allows such
575        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
576     */
577 %}
578
579 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {       
580                          hsnewid(yytext + 1, yyleng - 2);
581                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
582                         }
583
584 %{
585     /*
586      * Character literals.  The first form is the quick form, for character
587      * literals that don't contain backslashes.  Literals with backslashes are
588      * lexed through multiple rules.  First, we match the open ' and as many
589      * normal characters as possible.  This puts us into the <Char> state, where
590      * a backslash is legal.  Then, we match the backslash and move into the 
591      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
592      * characters and the close '.  We may end up with too many characters, but
593      * this allows us to easily share the lex rules with strings.  Excess characters
594      * are ignored with a warning.
595      */
596 %}
597
598 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
599                          yylval.uhstring = installHstring(1, yytext+1);
600                          RETURN(CHARPRIM);
601                         }
602 <Code,GlaExt>'({CHAR}|"\"")'    {
603                          yylval.uhstring = installHstring(1, yytext+1);
604                          RETURN(CHAR);
605                         }
606 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
607                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
608                          hsperror(errbuf);
609                         }
610 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
611                          hsmlcolno = hspcolno;
612                          cleartext();
613                          addtext(yytext+1, yyleng-1);
614                          PUSH_STATE(Char);
615                         }
616 <Char>({CHAR}|"\"")*'#  {
617                          unsigned length;
618                          char *text;
619
620                          addtext(yytext, yyleng - 2);
621                          text = fetchtext(&length);
622
623                          if (! (nonstandardFlag || in_interface)) {
624                             char errbuf[ERR_BUF_SIZE];
625                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
626                             hsperror(errbuf);
627                          }
628
629                          if (length > 1) {
630                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
631                               input_filename, hsplineno, hspcolno + 1);
632                             format_string(stderr, (unsigned char *) text, length);
633                             fputs("' too long\n", stderr);
634                             hsperror("");
635                          }
636                          yylval.uhstring = installHstring(1, text);
637                          hspcolno = hsmlcolno;
638                          POP_STATE;
639                          RETURN(CHARPRIM); 
640                         }
641 <Char>({CHAR}|"\"")*'   {
642                          unsigned length;
643                          char *text;
644
645                          addtext(yytext, yyleng - 1);
646                          text = fetchtext(&length);
647
648                          if (length > 1) {
649                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
650                               input_filename, hsplineno, hspcolno + 1);
651                             format_string(stderr, (unsigned char *) text, length);
652                             fputs("' too long\n", stderr);
653                             hsperror("");
654                          }
655                          yylval.uhstring = installHstring(1, text);
656                          hspcolno = hsmlcolno;
657                          POP_STATE;
658                          RETURN(CHAR); 
659                         }
660 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
661
662
663 %{
664     /*
665      * String literals.  The first form is the quick form, for string literals
666      * that don't contain backslashes.  Literals with backslashes are lexed
667      * through multiple rules.  First, we match the open " and as many normal
668      * characters as possible.  This puts us into the <String> state, where
669      * a backslash is legal.  Then, we match the backslash and move into the 
670      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
671      * characters, moving back and forth between <String> and <StringEsc> as more
672      * backslashes are encountered.  (We may even digress into <Comment> mode if we
673      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
674      * of normal characters and the close ".
675      */
676 %}
677
678 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
679                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
680                             /* the -3 accounts for the " on front, "# on the end */
681                          RETURN(STRINGPRIM); 
682                         }
683 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
684                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
685                          RETURN(STRING); 
686                         }
687 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
688                          hsmlcolno = hspcolno;
689                          cleartext();
690                          addtext(yytext+1, yyleng-1);
691                          PUSH_STATE(String);
692                         }
693 <String>({CHAR}|"'")*"\"#"   {
694                          unsigned length;
695                          char *text;
696
697                          addtext(yytext, yyleng-2);
698                          text = fetchtext(&length);
699
700                          if (! (nonstandardFlag || in_interface)) {
701                             char errbuf[ERR_BUF_SIZE];
702                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
703                             hsperror(errbuf);
704                          }
705
706                          yylval.uhstring = installHstring(length, text);
707                          hspcolno = hsmlcolno;
708                          POP_STATE;
709                          RETURN(STRINGPRIM);
710                         }
711 <String>({CHAR}|"'")*"\""   {
712                          unsigned length;
713                          char *text;
714
715                          addtext(yytext, yyleng-1);
716                          text = fetchtext(&length);
717
718                          yylval.uhstring = installHstring(length, text);
719                          hspcolno = hsmlcolno;
720                          POP_STATE;
721                          RETURN(STRING); 
722                         }
723 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
724
725 %{
726     /*
727      * Character and string escapes are roughly the same, but strings have the
728      * extra `\&' sequence which is not allowed for characters.  Also, comments
729      * are allowed in the <StringEsc> state.  (See the comment section much
730      * further down.)
731      *
732      * NB: Backslashes and tabs are stored in strings as themselves.
733      * But if we print them (in printtree.c), they must go out as
734      * "\\\\" and "\\t" respectively.  (This is because of the bogus
735      * intermediate format that the parser produces.  It uses '\t' fpr end of
736      * string, so it needs to be able to escape tabs, which means that it
737      * also needs to be able to escape the escape character ('\\').  Sigh.
738      */
739 %}
740
741 <Char>\\                { PUSH_STATE(CharEsc); }
742 <String>\\&             /* Ignore */ ;
743 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
744
745 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
746 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
747
748 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
749 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
750 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
751 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
752 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
753 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
754 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
755 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
756 <CharEsc,StringEsc>BEL  |
757 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
758 <CharEsc,StringEsc>BS   |
759 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
760 <CharEsc,StringEsc>HT   |
761 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
762 <CharEsc,StringEsc>LF   |
763 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
764 <CharEsc,StringEsc>VT   |
765 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
766 <CharEsc,StringEsc>FF   |
767 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
768 <CharEsc,StringEsc>CR   |
769 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
770 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
771 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
772 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
773 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
774 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
775 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
776 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
777 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
778 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
779 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
780 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
781 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
782 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
783 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
784 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
785 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
786 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
787 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
788 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
789 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
790 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
791 <CharEsc,StringEsc>{D}+  {
792                           int i = strtol(yytext, NULL, 10);
793                           if (i < NCHARS) {
794                              addchar((char) i);
795                           } else {
796                              char errbuf[ERR_BUF_SIZE];
797                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
798                                 yytext);
799                              hsperror(errbuf);
800                           }
801                           POP_STATE;
802                         }
803 <CharEsc,StringEsc>o{O}+ {
804                           int i = strtol(yytext + 1, NULL, 8);
805                           if (i < NCHARS) {
806                              addchar((char) i);
807                           } else {
808                              char errbuf[ERR_BUF_SIZE];
809                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
810                                 yytext);
811                              hsperror(errbuf);
812                           }
813                           POP_STATE;
814                         }
815 <CharEsc,StringEsc>x{H}+ {
816                           int i = strtol(yytext + 1, NULL, 16);
817                           if (i < NCHARS) {
818                              addchar((char) i);
819                           } else {
820                              char errbuf[ERR_BUF_SIZE];
821                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
822                                 yytext);
823                              hsperror(errbuf);
824                           }
825                           POP_STATE;
826                         }
827
828 %{
829     /*
830      * Simple comments and whitespace.  Normally, we would just ignore these, but
831      * in case we're processing a string escape, we need to note that we've seen
832      * a gap.
833      *
834      * Note that we cater for a comment line that *doesn't* end in a newline.
835      * This is incorrect, strictly speaking, but seems like the right thing
836      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
837      */
838 %}
839
840 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
841 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
842
843 %{
844     /*
845      * Nested comments.  The major complication here is in trying to match the
846      * longest lexemes possible, for better performance.  (See the flex document.)
847      * That's why the rules look so bizarre.
848      */
849 %}
850
851 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        { 
852                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
853                         }
854
855 <Comment>[^-{]*         |
856 <Comment>"-"+[^-{}]+    |
857 <Comment>"{"+[^-{}]+    ;
858 <Comment>"{-"           { nested_comments++; }
859 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
860 <Comment>(.|\n)         ;
861
862 %{
863     /*
864      * Illegal characters.  This used to be a single rule, but we might as well
865      * pass on as much information as we have, so now we indicate our state in
866      * the error message.
867      */
868 %}
869
870 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        { 
871                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
872                             input_filename, hsplineno, hspcolno + 1); 
873                          format_string(stderr, (unsigned char *) yytext, 1);
874                          fputs("'\n", stderr);
875                          hsperror("");
876                         }
877 <Char>(.|\n)            { 
878                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
879                             input_filename, hsplineno, hspcolno + 1); 
880                          format_string(stderr, (unsigned char *) yytext, 1);
881                          fputs("' in a character literal\n", stderr);
882                          hsperror("");
883                         }
884 <CharEsc>(.|\n)         {
885                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
886                             input_filename, hsplineno, hspcolno + 1); 
887                          format_string(stderr, (unsigned char *) yytext, 1);
888                          fputs("'\n", stderr);
889                          hsperror("");
890                         }
891 <String>(.|\n)          { if (nonstandardFlag) {
892                              addtext(yytext, yyleng);
893                           } else { 
894                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
895                                 input_filename, hsplineno, hspcolno + 1); 
896                                 format_string(stderr, (unsigned char *) yytext, 1);
897                                 fputs("' in a string literal\n", stderr);
898                                 hsperror("");
899                           }
900                         }
901 <StringEsc>(.|\n)       {
902                          if (noGap) {
903                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
904                                 input_filename, hsplineno, hspcolno + 1); 
905                              format_string(stderr, (unsigned char *) yytext, 1);
906                              fputs("'\n", stderr);
907                              hsperror("");
908                          } else {
909                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
910                                 input_filename, hsplineno, hspcolno + 1);
911                              format_string(stderr, (unsigned char *) yytext, 1);
912                              fputs("' in a string gap\n", stderr);
913                              hsperror("");
914                          }
915                         }
916
917 %{
918     /*
919      * End of file.  In any sub-state, this is an error.  However, for the primary
920      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
921      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
922      * adding virtual close curlies, or closing an interface and returning to the
923      * primary source file.
924      *
925      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
926      * line/column advancement has to be done by hand.
927      */
928 %}
929
930 <Char,CharEsc><<EOF>>   { 
931                           hsplineno = hslineno; hspcolno = hscolno;
932                           hsperror("unterminated character literal");
933                         }
934 <Comment><<EOF>>        { 
935                           hsplineno = hslineno; hspcolno = hscolno;
936                           hsperror("unterminated comment"); 
937                         }
938 <String,StringEsc><<EOF>>   { 
939                           hsplineno = hslineno; hspcolno = hscolno;
940                           hsperror("unterminated string literal"); 
941                         }
942 <GhcPragma><<EOF>>      {
943                           hsplineno = hslineno; hspcolno = hscolno;
944                           hsperror("unterminated interface pragma"); 
945                         }
946 <UserPragma><<EOF>>     {
947                           hsplineno = hslineno; hspcolno = hscolno;
948                           hsperror("unterminated user-specified pragma"); 
949                         }
950 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
951
952 %%
953
954 /**********************************************************************
955 *                                                                     *
956 *                                                                     *
957 *     YACC/LEX Initialisation etc.                                    *
958 *                                                                     *
959 *                                                                     *
960 **********************************************************************/
961
962 /*
963    We initialise input_filename to "<stdin>".
964    This allows unnamed sources to be piped into the parser.
965 */
966
967 extern BOOLEAN acceptPrim;
968
969 void
970 yyinit(void)
971 {
972     input_filename = xstrdup("<stdin>");
973
974     /* We must initialize the input buffer _now_, because we call
975        setyyin _before_ calling yylex for the first time! */
976     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
977
978     if (acceptPrim)
979         PUSH_STATE(GlaExt);
980     else
981         PUSH_STATE(Code);
982 }
983
984 static void
985 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
986 {
987     if (input_filename != NULL)
988         free(input_filename);
989     input_filename = xstrdup(f);
990 }
991
992 /**********************************************************************
993 *                                                                     *
994 *                                                                     *
995 *     Layout Processing                                               *
996 *                                                                     *
997 *                                                                     *
998 **********************************************************************/
999
1000 /*
1001         The following section deals with Haskell Layout conventions
1002         forcing insertion of ; or } as appropriate
1003 */
1004
1005 static BOOLEAN
1006 hsshouldindent(void)
1007 {
1008     return (!forgetindent && INDENTON);
1009 }
1010
1011
1012 /* Enter new context and set new indentation level */
1013 void
1014 hssetindent(void)
1015 {
1016 #ifdef HSP_DEBUG
1017     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1018 #endif
1019
1020     /*
1021      * partain: first chk that new indent won't be less than current one; this code
1022      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1023      * current token; what that has to do with indenting, I don't know.
1024      */
1025
1026
1027     if (hscolno - 1 <= INDENTPT) {
1028         if (INDENTPT == -1)
1029             return;             /* Empty input OK for Haskell 1.1 */
1030         else {
1031             char errbuf[ERR_BUF_SIZE];
1032
1033             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1034             hsperror(errbuf);
1035         }
1036     }
1037     hsentercontext((hspcolno << 1) | 1);
1038 }
1039
1040
1041 /* Enter a new context without changing the indentation level */
1042 void
1043 hsincindent(void)
1044 {
1045 #ifdef HSP_DEBUG
1046     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1047 #endif
1048     hsentercontext(indenttab[icontexts] & ~1);
1049 }
1050
1051
1052 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1053 void
1054 hsindentoff(void)
1055 {
1056     forgetindent = TRUE;
1057 }
1058
1059
1060 /* Enter a new layout context. */
1061 static void
1062 hsentercontext(int indent)
1063 {
1064     /* Enter new context and set indentation as specified */
1065     if (++icontexts >= MAX_CONTEXTS) {
1066         char errbuf[ERR_BUF_SIZE];
1067
1068         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1069         hsperror(errbuf);
1070     }
1071     forgetindent = FALSE;
1072     indenttab[icontexts] = indent;
1073 #ifdef HSP_DEBUG
1074     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1075 #endif
1076 }
1077
1078
1079 /* Exit a layout context */
1080 void
1081 hsendindent(void)
1082 {
1083     --icontexts;
1084 #ifdef HSP_DEBUG
1085     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1086 #endif
1087 }
1088
1089 /*
1090  *      Return checks the indentation level and returns ;, } or the specified token.
1091  */
1092
1093 static int
1094 Return(int tok)
1095 {
1096 #ifdef HSP_DEBUG
1097     extern int yyleng;
1098 #endif
1099
1100     if (hsshouldindent()) {
1101         if (hspcolno < INDENTPT) {
1102 #ifdef HSP_DEBUG
1103             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1104 #endif
1105             hssttok = tok;
1106             return (VCCURLY);
1107         } else if (hspcolno == INDENTPT) {
1108 #ifdef HSP_DEBUG
1109             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1110 #endif
1111             hssttok = -tok;
1112             return (SEMI);
1113         }
1114     }
1115     hssttok = -1;
1116 #ifdef HSP_DEBUG
1117     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1118 #endif
1119     return (tok);
1120 }
1121
1122
1123 /*
1124  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1125  */
1126 int
1127 yylex()
1128 {
1129     int tok;
1130     static BOOLEAN eof = FALSE;
1131
1132     if (!eof) {
1133         if (hssttok != -1) {
1134             if (hssttok < 0) {
1135                 tok = -hssttok;
1136                 hssttok = -1;
1137                 return tok;
1138             }
1139             RETURN(hssttok);
1140         } else {
1141             endlineno = hslineno;
1142             if ((tok = yylex1()) != EOF)
1143                 return tok;
1144             else
1145                 eof = TRUE;
1146         }
1147     }
1148     if (icontexts > icontexts_save) {
1149         if (INDENTON) {
1150             eof = TRUE;
1151             indenttab[icontexts] = 0;
1152             return (VCCURLY);
1153         } else
1154             hsperror("missing '}' at end of file");
1155     } else if (hsbuf_save != NULL) {
1156         fclose(yyin);
1157         yy_delete_buffer(YY_CURRENT_BUFFER);
1158         yy_switch_to_buffer(hsbuf_save);
1159         hsbuf_save = NULL;
1160         new_filename(filename_save);
1161         free(filename_save);
1162         hslineno = hslineno_save;
1163         hsplineno = hsplineno_save;
1164         hscolno = hscolno_save;
1165         hspcolno = hspcolno_save;
1166         etags = etags_save;
1167         in_interface = FALSE;
1168         icontexts = icontexts_save - 1;
1169         icontexts_save = 0;
1170 #ifdef HSP_DEBUG
1171         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1172 #endif
1173         eof = FALSE;
1174         RETURN(LEOF);
1175     } else {
1176         yyterminate();
1177     }
1178     abort(); /* should never get here! */
1179     return(0);
1180 }
1181
1182 /**********************************************************************
1183 *                                                                     *
1184 *                                                                     *
1185 *     Input Processing for Interfaces                                 *
1186 *                                                                     *
1187 *                                                                     *
1188 **********************************************************************/
1189
1190 /* setyyin(file)        open file as new lex input buffer */
1191 extern FILE *yyin;
1192
1193 void
1194 setyyin(char *file)
1195 {
1196     hsbuf_save = YY_CURRENT_BUFFER;
1197     if ((yyin = fopen(file, "r")) == NULL) {
1198         char errbuf[ERR_BUF_SIZE];
1199
1200         sprintf(errbuf, "can't read \"%-.50s\"", file);
1201         hsperror(errbuf);
1202     }
1203     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1204
1205     hslineno_save = hslineno;
1206     hsplineno_save = hsplineno;
1207     hslineno = hsplineno = 1;
1208
1209     filename_save = input_filename;
1210     input_filename = NULL;
1211     new_filename(file);
1212     hscolno_save = hscolno;
1213     hspcolno_save = hspcolno;
1214     hscolno = hspcolno = 0;
1215     in_interface = TRUE;
1216     etags_save = etags; /* do not do "etags" stuff in interfaces */
1217     etags = 0;          /* We remember whether we are doing it in
1218                            the module, so we can restore it later [WDP 94/09] */
1219     hsentercontext(-1);         /* partain: changed this from 0 */
1220     icontexts_save = icontexts;
1221 #ifdef HSP_DEBUG
1222     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1223 #endif
1224 }
1225
1226 static void
1227 layout_input(char *text, int len)
1228 {
1229 #ifdef HSP_DEBUG
1230     fprintf(stderr, "Scanning \"%s\"\n", text);
1231 #endif
1232
1233     hsplineno = hslineno;
1234     hspcolno = hscolno;
1235
1236     while (len-- > 0) {
1237         switch (*text++) {
1238         case '\n':
1239         case '\r':
1240         case '\f':
1241             hslineno++;
1242             hscolno = 0;
1243             break;
1244         case '\t':
1245             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1246             break;
1247         case '\v':
1248             break;
1249         default:
1250             ++hscolno;
1251             break;
1252         }
1253     }
1254 }
1255
1256 void
1257 setstartlineno(void)
1258 {
1259     startlineno = hsplineno;
1260
1261     if (modulelineno == 0) {
1262         modulelineno = startlineno;
1263     }
1264
1265 #if 1/*etags*/
1266 #else
1267     if (etags)
1268         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1269 #endif
1270 }
1271
1272 /**********************************************************************
1273 *                                                                     *
1274 *                                                                     *
1275 *                      Text Caching                                   *
1276 *                                                                     *
1277 *                                                                     *
1278 **********************************************************************/
1279
1280 #define CACHE_SIZE YY_BUF_SIZE
1281
1282 static struct {
1283     unsigned allocated;
1284     unsigned next;
1285     char *text;
1286 } textcache = { 0, 0, NULL };
1287
1288 static void
1289 cleartext(void)
1290 {
1291 /*  fprintf(stderr, "cleartext\n"); */
1292     textcache.next = 0;
1293     if (textcache.allocated == 0) {
1294         textcache.allocated = CACHE_SIZE;
1295         textcache.text = xmalloc(CACHE_SIZE);
1296     }
1297 }
1298
1299 static void
1300 addtext(char *text, unsigned length)
1301 {
1302 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1303
1304     if (length == 0)
1305         return;
1306
1307     if (textcache.next + length + 1 >= textcache.allocated) {
1308         textcache.allocated += length + CACHE_SIZE;
1309         textcache.text = xrealloc(textcache.text, textcache.allocated);
1310     }
1311     bcopy(text, textcache.text + textcache.next, length);
1312     textcache.next += length;
1313 }
1314
1315 static void
1316 addchar(char c)
1317 {
1318 /*  fprintf(stderr, "addchar: %c\n", c); */
1319
1320     if (textcache.next + 2 >= textcache.allocated) {
1321         textcache.allocated += CACHE_SIZE;
1322         textcache.text = xrealloc(textcache.text, textcache.allocated);
1323     }
1324     textcache.text[textcache.next++] = c;
1325 }
1326
1327 static char *
1328 fetchtext(unsigned *length)
1329 {
1330 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1331
1332     *length = textcache.next;
1333     textcache.text[textcache.next] = '\0';
1334     return textcache.text;
1335 }
1336
1337 /**********************************************************************
1338 *                                                                     *
1339 *                                                                     *
1340 *    Identifier Processing                                             *
1341 *                                                                     *
1342 *                                                                     *
1343 **********************************************************************/
1344
1345 /*
1346         hsnewid         Enters an id of length n into the symbol table.
1347 */
1348
1349 static void
1350 hsnewid(char *name, int length)
1351 {
1352     char save = name[length];
1353
1354     name[length] = '\0';
1355     yylval.uid = installid(name);
1356     name[length] = save;
1357 }
1358
1359 BOOLEAN
1360 hsnewqid(char *name, int length)
1361 {
1362     char* dot;
1363     char save = name[length];
1364     name[length] = '\0';
1365
1366     dot = strchr(name, '.');
1367     *dot = '\0';
1368     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1369     *dot = '.';
1370     name[length] = save;
1371
1372     return _isconstr(dot+1);
1373 }
1374
1375 BOOLEAN 
1376 isconstr(char *s) /* walks past leading underscores before using the macro */
1377 {
1378     char *temp = s;
1379
1380     for ( ; temp != NULL && *temp == '_' ; temp++ );
1381
1382     return _isconstr(temp);
1383 }