[project @ 1996-01-11 14:06:51 by partain]
[ghc-hetmet.git] / ghc / compiler / yaccParser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       LEX grammar for Haskell.                                      *
6 *       ------------------------                                      *
7 *                                                                     *
8 *       (c) Copyright K. Hammond, University of Glasgow,              *
9 *               10th. February 1989                                   *
10 *                                                                     *
11 *       Modification History                                          *
12 *       --------------------                                          *
13 *                                                                     *
14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
15 *       18/10/91 kh             Added 'ccall'.                        *
16 *       19/11/91 kh             Tidied generally.                     *
17 *       04/12/91 kh             Added Int#.                           *
18 *       31/01/92 kh             Haskell 1.2 version.                  *
19 *       24/04/92 ps             Added 'scc'.                          *
20 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
21 *       23/08/93 jsm            Changed to support flex               *
22 *                                                                     *
23 *                                                                     *
24 *       Known Problems:                                               *
25 *                                                                     *
26 *               None, any more.                                       *
27 *                                                                     *
28 **********************************************************************/
29
30 #include "../../includes/config.h"
31
32 #include <stdio.h>
33
34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
35 #include <string.h>
36 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
38 #include <memory.h>
39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
40 #define index strchr
41 #define rindex strrchr
42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
44 #define bzero(s, n) memset ((s), 0, (n))
45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
46 #include <strings.h>
47 /* memory.h and strings.h conflict on some systems.  */
48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
49
50 #include "hspincl.h"
51 #include "hsparser.tab.h"
52 #include "constants.h"
53 #include "utils.h"
54
55 /* Our substitute for <ctype.h> */
56
57 #define NCHARS  256
58 #define _S      0x1
59 #define _D      0x2
60 #define _H      0x4
61 #define _O      0x8
62 #define _C      0x10
63
64 #define _isconstr(s)    (CharTable[*s]&(_C))
65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
66
67 static unsigned char CharTable[NCHARS] = {
68 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
69 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
70 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
71 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
72 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
73 /* '(' */       0,      0,      0,      0,      0,      0,      0,      0,
74 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
75 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
76 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
77 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
78 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
79 /* 'X' */       _C,     _C,     _C,     0,      0,      0,      0,      0,
80 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
81 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
82 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
83 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
84
85 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
86 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
87 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
88 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
89 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
90 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
91 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
92 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
93 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
94 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
95 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
96 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
97 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
98 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
99 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
100 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
101 };
102
103 /**********************************************************************
104 *                                                                     *
105 *                                                                     *
106 *      Declarations                                                   *
107 *                                                                     *
108 *                                                                     *
109 **********************************************************************/
110
111 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
112
113 /*
114  * For my own sanity, things that are not part of the flex skeleton
115  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
116  */
117
118 static int hslineno = 0;        /* Line number at end of token */
119 int hsplineno = 0;              /* Line number at end of previous token */
120
121 static int hscolno = 0;         /* Column number at end of token */
122 int hspcolno = 0;               /* Column number at end of previous token */
123 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
124
125 int startlineno = 0;            /* The line number where something starts */
126 int endlineno = 0;              /* The line number where something ends */
127
128 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
129 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
130
131 static int nested_comments;     /* For counting comment nesting depth */
132
133 /* Hacky definition of yywrap: see flex doc.
134
135    If we don't do this, then we'll have to get the default
136    yywrap from the flex library, which is often something
137    we are not good at locating.  This avoids that difficulty.
138    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
139    WDP 94/09/05
140 */
141 #define yywrap() 1
142
143 /* Essential forward declarations */
144
145 static void hsnewid      PROTO((char *, int));
146 static void layout_input PROTO((char *, int));
147 static void cleartext    (NO_ARGS);
148 static void addtext      PROTO((char *, unsigned));
149 static void addchar      PROTO((char));
150 static char *fetchtext   PROTO((unsigned *));
151 static void new_filename PROTO((char *));
152 static int  Return       PROTO((int));
153 static void hsentercontext PROTO((int));
154
155 /* Special file handling for IMPORTS */
156 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
157
158 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
159 static char *filename_save;             /* File Name                     */
160 static int hslineno_save = 0,           /* Line Number                   */
161  hsplineno_save = 0,                    /* Line Number of Prev. token    */
162  hscolno_save = 0,                      /* Indentation                   */
163  hspcolno_save = 0;                     /* Left Indentation              */
164 static short icontexts_save = 0;        /* Indent Context Level          */
165
166 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
167 extern BOOLEAN etags;      /* that which is saved */
168
169 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
170
171 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
172
173 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
174 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
175 extern int maxAcceptablePragmaVersion;
176 extern int thisIfacePragmaVersion;
177
178 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
179                                  * inserted before token +ve  -- "}" inserted before
180                                  * token */
181
182 short icontexts = 0;            /* Which context we're in */
183
184
185
186 /*
187         Table of indentations:  right bit indicates whether to use
188           indentation rules (1 = use rules; 0 = ignore)
189
190     partain:
191     push one of these "contexts" at every "case" or "where"; the right bit says
192     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
193
194     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
195     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
196     push is shown just below.
197
198 */
199
200
201 static short indenttab[MAX_CONTEXTS] = {-1};
202
203 #define INDENTPT (indenttab[icontexts]>>1)
204 #define INDENTON (indenttab[icontexts]&1)
205
206 #define RETURN(tok) return(Return(tok))
207
208 #undef YY_DECL
209 #define YY_DECL int yylex1()
210
211 /* We should not peek at yy_act, but flex calls us even for the internal action
212    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
213    to support older versions of flex, we'll continue to peek for now.
214  */
215 #define YY_USER_ACTION \
216     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
217
218 #if 0/*debug*/
219 #undef YY_BREAK
220 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
221 #endif
222
223 /* Each time we enter a new start state, we push it onto the state stack.
224    Note that the rules do not allow us to underflow or overflow the stack.
225    (At least, they shouldn't.)  The maximum expected depth is 4:
226    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
227 */
228 static int StateStack[5];
229 static int StateDepth = -1;
230
231 #ifdef HSP_DEBUG
232 #define PUSH_STATE(n)   do {\
233     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
234     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
235 #define POP_STATE       do {--StateDepth;\
236     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
237     BEGIN(StateStack[StateDepth]);} while(0)
238 #else
239 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
240 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
241 #endif
242
243 %}
244
245 /* The start states are:
246    Code -- normal Haskell code (principal lexer)
247    GlaExt -- Haskell code with Glasgow extensions
248    Comment -- Nested comment processing
249    String -- Inside a string literal with backslashes
250    StringEsc -- Immediately following a backslash in a string literal
251    Char -- Inside a character literal with backslashes
252    CharEsc -- Immediately following a backslash in a character literal 
253
254    Note that the INITIAL state is unused.  Also note that these states
255    are _exclusive_.  All rules should be prefixed with an appropriate
256    list of start states.
257  */
258
259 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
260
261 D                       [0-9]
262 O                       [0-7]
263 H                       [0-9A-Fa-f]
264 N                       {D}+
265 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
266 S                       [!#$%&*+./<=>?@\\^|~:]
267 SId                     ({S}|~|-){S}*
268 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
269 L                       [A-Z]
270 I                       [A-Za-z]
271 i                       [A-Za-z0-9'_]
272 Id                      {I}({i})*
273 WS                      [ \t\n\r\f\v]
274 CNTRL                   [@A-Z\[\\\]^_]
275 NL                      [\n\r]
276
277 %%
278
279 %{
280     /* 
281      * Special GHC pragma rules.  Do we need a start state for interface files,
282      * so these won't be matched in source files? --JSM
283      */
284 %}
285
286 <Code,GlaExt>^"# ".*{NL}    {
287                           char tempf[FILENAME_SIZE];
288                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
289                           new_filename(tempf);
290                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
291                         }
292
293 <Code,GlaExt>^"#line ".*{NL}    {
294                           char tempf[FILENAME_SIZE];
295                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
296                           new_filename(tempf); 
297                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
298                         }
299
300 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
301                           /* partain: pragma-style line directive */
302                           char tempf[FILENAME_SIZE];
303                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
304                           new_filename(tempf);
305                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
306                         }
307 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
308                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
309                         }
310 <Code,GlaExt>"{-# GHC_PRAGMA "   { 
311                           if ( ignorePragmas ||
312                                thisIfacePragmaVersion < minAcceptablePragmaVersion || 
313                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
314                              nested_comments = 1;
315                              PUSH_STATE(Comment);
316                           } else {
317                              PUSH_STATE(GhcPragma);
318                              RETURN(GHC_PRAGMA);
319                           }
320                         }
321 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
322 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
323 <GhcPragma>"_ABSTRACT_"     { RETURN(ABSTRACT_PRAGMA); }
324 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
325 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
326 <GhcPragma>"_M_"            { RETURN(MODNAME_PRAGMA); }
327 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
328 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
329 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
330 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
331 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
332 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
333
334 <GhcPragma>"_!_"            { RETURN(COCON); }
335 <GhcPragma>"_#_"            { RETURN(COPRIM); }
336 <GhcPragma>"_APP_"          { RETURN(COAPP); }
337 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
338 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
339 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
340 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
341 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
342
343 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
344 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
345 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
346 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
347 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
348
349 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
350 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
351
352 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
353 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
354 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
355 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
356 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
357 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
358 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
359 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
360
361 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
362 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
363
364 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
365 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
366 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
367
368 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
369
370 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
371                               PUSH_STATE(UserPragma);
372                               RETURN(SPECIALISE_UPRAGMA);
373                             }
374 <Code,GlaExt>"{-#"{WS}*"INLINE" {
375                               PUSH_STATE(UserPragma);
376                               RETURN(INLINE_UPRAGMA);
377                             }
378 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
379                               PUSH_STATE(UserPragma);
380                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
381                             }
382 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
383                               PUSH_STATE(UserPragma);
384                               RETURN(DEFOREST_UPRAGMA);
385                             }
386 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
387                               PUSH_STATE(UserPragma);
388                               RETURN(ABSTRACT_UPRAGMA);
389                             }
390 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
391
392 %{
393     /*
394      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
395      * intentionally accepted as a keyword even for normal <Code>.
396      */
397 %}
398
399 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
400 <Code,GlaExt>"class"            { RETURN(CLASS); }
401 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
402 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
403 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
404 <Code,GlaExt>"else"             { RETURN(ELSE); }
405 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
406 <Code,GlaExt>"if"               { RETURN(IF); }
407 <Code,GlaExt>"import"           { RETURN(IMPORT); }
408 <Code,GlaExt>"infix"            { RETURN(INFIX); }
409 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
410 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
411 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
412 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
413 <Code,GlaExt>"module"           { RETURN(MODULE); }
414 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
415 <Code,GlaExt>"renaming"         { RETURN(RENAMING); }
416 <Code,GlaExt>"then"             { RETURN(THEN); }
417 <Code,GlaExt>"to"               { RETURN(TO); }
418 <Code,GlaExt>"type"             { RETURN(TYPE); }
419 <Code,GlaExt>"where"            { RETURN(WHERE); }
420 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
421 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
422 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
423 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
424 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
425 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
426 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
427 <GhcPragma>"_forall_"           { RETURN(FORALL); }
428
429 %{
430     /* 
431      * Haskell operators.  Nothing special about these.
432      */
433 %}
434
435 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
436 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
437 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
438 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
439 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
440 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
441 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
442 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
443 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
444 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
445 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
446 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
447 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
448 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
449 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
450 <Code,GlaExt>"+"                        { RETURN(PLUS); }
451 <Code,GlaExt>"@"                        { RETURN(AT); }
452 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
453 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
454 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
455 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
456 <Code,GlaExt>"~"                        { RETURN(LAZY); }
457 <Code,GlaExt>"-"                        { RETURN(MINUS); }
458
459 %{
460     /*
461      * Integers and (for Glasgow extensions) primitive integers.  Note that
462      * we pass all of the text on to the parser, because flex/C can't handle
463      * arbitrary precision numbers.
464      */
465 %}
466
467 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
468                          yylval.uid = xstrndup(yytext, yyleng - 1);
469                          RETURN(INTPRIM);
470                         }
471 <Code,GlaExt>"0o"{O}+   { /* octal */
472                          yylval.uid = xstrndup(yytext, yyleng);
473                          RETURN(INTEGER);
474                         }
475 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
476                          yylval.uid = xstrndup(yytext, yyleng - 1);
477                          RETURN(INTPRIM);
478                         }
479 <Code,GlaExt>"0x"{H}+   { /* hexadecimal */
480                          yylval.uid = xstrndup(yytext, yyleng);
481                          RETURN(INTEGER);
482                         }
483 <GlaExt,GhcPragma>("-")?{N}"#"  {
484                          yylval.uid = xstrndup(yytext, yyleng - 1);
485                          RETURN(INTPRIM);
486                         }
487 <Code,GlaExt,GhcPragma>{N} {
488                          yylval.uid = xstrndup(yytext, yyleng);
489                          RETURN(INTEGER);
490                         }
491
492 %{
493     /*
494      * Floats and (for Glasgow extensions) primitive floats/doubles.
495      */
496 %}
497
498 <GlaExt,GhcPragma>("-")?{F}"##" {
499                          yylval.uid = xstrndup(yytext, yyleng - 2);
500                          RETURN(DOUBLEPRIM);
501                         }
502 <GlaExt,GhcPragma>("-")?{F}"#" {
503                          yylval.uid = xstrndup(yytext, yyleng - 1);
504                          RETURN(FLOATPRIM);
505                         }
506 <Code,GlaExt>{F}        {
507                          yylval.uid = xstrndup(yytext, yyleng);
508                          RETURN(FLOAT);
509                         }
510
511 %{
512     /*
513      * Funky ``foo'' style C literals for Glasgow extensions
514      */
515 %}
516
517 <GlaExt,GhcPragma>"``"[^']+"''" {
518                          hsnewid(yytext + 2, yyleng - 4);
519                          RETURN(CLITLIT);
520                         }
521
522 %{
523     /*
524      * Identifiers, both variables and operators.  The trailing hash is allowed
525      * for Glasgow extensions.
526      */
527 %}
528
529 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
530 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
531 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
532
533 <GlaExt,GhcPragma,UserPragma>{Id}"#" { 
534                          hsnewid(yytext, yyleng);
535                          RETURN(_isconstr(yytext) ? CONID : VARID);
536                         }
537 %{
538 /* This SHOULDNAE work in "Code" (sigh) */
539 %}
540 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} { 
541                          if (! (nonstandardFlag || in_interface)) {
542                             char errbuf[ERR_BUF_SIZE];
543                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
544                             hsperror(errbuf);
545                          }
546                          hsnewid(yytext, yyleng);
547                          RETURN(isconstr(yytext) ? CONID : VARID);
548                          /* NB: ^^^^^^^^ : not the macro! */
549                         }
550 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
551                          hsnewid(yytext, yyleng);
552                          RETURN(_isconstr(yytext) ? CONID : VARID);
553                         }
554 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
555                          hsnewid(yytext, yyleng);
556                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
557                         }
558
559 %{
560     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
561
562     /* Because we can make the former well-behaved (we defined them).
563
564        Sadly, the latter is defined by Haskell, which allows such
565        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
566     */
567 %}
568
569 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {       
570                          hsnewid(yytext + 1, yyleng - 2);
571                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
572                         }
573
574 %{
575     /*
576      * Character literals.  The first form is the quick form, for character
577      * literals that don't contain backslashes.  Literals with backslashes are
578      * lexed through multiple rules.  First, we match the open ' and as many
579      * normal characters as possible.  This puts us into the <Char> state, where
580      * a backslash is legal.  Then, we match the backslash and move into the 
581      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
582      * characters and the close '.  We may end up with too many characters, but
583      * this allows us to easily share the lex rules with strings.  Excess characters
584      * are ignored with a warning.
585      */
586 %}
587
588 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
589                          yylval.uhstring = installHstring(1, yytext+1);
590                          RETURN(CHARPRIM);
591                         }
592 <Code,GlaExt>'({CHAR}|"\"")'    {
593                          yylval.uhstring = installHstring(1, yytext+1);
594                          RETURN(CHAR);
595                         }
596 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
597                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
598                          hsperror(errbuf);
599                         }
600 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
601                          hsmlcolno = hspcolno;
602                          cleartext();
603                          addtext(yytext+1, yyleng-1);
604                          PUSH_STATE(Char);
605                         }
606 <Char>({CHAR}|"\"")*'#  {
607                          unsigned length;
608                          char *text;
609
610                          addtext(yytext, yyleng - 2);
611                          text = fetchtext(&length);
612
613                          if (! (nonstandardFlag || in_interface)) {
614                             char errbuf[ERR_BUF_SIZE];
615                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
616                             hsperror(errbuf);
617                          }
618
619                          if (length > 1) {
620                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
621                               input_filename, hsplineno, hspcolno + 1);
622                             format_string(stderr, (unsigned char *) text, length);
623                             fputs("' too long\n", stderr);
624                             hsperror("");
625                          }
626                          yylval.uhstring = installHstring(1, text);
627                          hspcolno = hsmlcolno;
628                          POP_STATE;
629                          RETURN(CHARPRIM); 
630                         }
631 <Char>({CHAR}|"\"")*'   {
632                          unsigned length;
633                          char *text;
634
635                          addtext(yytext, yyleng - 1);
636                          text = fetchtext(&length);
637
638                          if (length > 1) {
639                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
640                               input_filename, hsplineno, hspcolno + 1);
641                             format_string(stderr, (unsigned char *) text, length);
642                             fputs("' too long\n", stderr);
643                             hsperror("");
644                          }
645                          yylval.uhstring = installHstring(1, text);
646                          hspcolno = hsmlcolno;
647                          POP_STATE;
648                          RETURN(CHAR); 
649                         }
650 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
651
652
653 %{
654     /*
655      * String literals.  The first form is the quick form, for string literals
656      * that don't contain backslashes.  Literals with backslashes are lexed
657      * through multiple rules.  First, we match the open " and as many normal
658      * characters as possible.  This puts us into the <String> state, where
659      * a backslash is legal.  Then, we match the backslash and move into the 
660      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
661      * characters, moving back and forth between <String> and <StringEsc> as more
662      * backslashes are encountered.  (We may even digress into <Comment> mode if we
663      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
664      * of normal characters and the close ".
665      */
666 %}
667
668 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
669                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
670                             /* the -3 accounts for the " on front, "# on the end */
671                          RETURN(STRINGPRIM); 
672                         }
673 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
674                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
675                          RETURN(STRING); 
676                         }
677 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
678                          hsmlcolno = hspcolno;
679                          cleartext();
680                          addtext(yytext+1, yyleng-1);
681                          PUSH_STATE(String);
682                         }
683 <String>({CHAR}|"'")*"\"#"   {
684                          unsigned length;
685                          char *text;
686
687                          addtext(yytext, yyleng-2);
688                          text = fetchtext(&length);
689
690                          if (! (nonstandardFlag || in_interface)) {
691                             char errbuf[ERR_BUF_SIZE];
692                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
693                             hsperror(errbuf);
694                          }
695
696                          yylval.uhstring = installHstring(length, text);
697                          hspcolno = hsmlcolno;
698                          POP_STATE;
699                          RETURN(STRINGPRIM);
700                         }
701 <String>({CHAR}|"'")*"\""   {
702                          unsigned length;
703                          char *text;
704
705                          addtext(yytext, yyleng-1);
706                          text = fetchtext(&length);
707
708                          yylval.uhstring = installHstring(length, text);
709                          hspcolno = hsmlcolno;
710                          POP_STATE;
711                          RETURN(STRING); 
712                         }
713 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
714
715 %{
716     /*
717      * Character and string escapes are roughly the same, but strings have the
718      * extra `\&' sequence which is not allowed for characters.  Also, comments
719      * are allowed in the <StringEsc> state.  (See the comment section much
720      * further down.)
721      *
722      * NB: Backslashes and tabs are stored in strings as themselves.
723      * But if we print them (in printtree.c), they must go out as
724      * "\\\\" and "\\t" respectively.  (This is because of the bogus
725      * intermediate format that the parser produces.  It uses '\t' fpr end of
726      * string, so it needs to be able to escape tabs, which means that it
727      * also needs to be able to escape the escape character ('\\').  Sigh.
728      */
729 %}
730
731 <Char>\\                { PUSH_STATE(CharEsc); }
732 <String>\\&             /* Ignore */ ;
733 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
734
735 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
736 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
737
738 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
739 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
740 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
741 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
742 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
743 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
744 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
745 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
746 <CharEsc,StringEsc>BEL  |
747 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
748 <CharEsc,StringEsc>BS   |
749 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
750 <CharEsc,StringEsc>HT   |
751 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
752 <CharEsc,StringEsc>LF   |
753 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
754 <CharEsc,StringEsc>VT   |
755 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
756 <CharEsc,StringEsc>FF   |
757 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
758 <CharEsc,StringEsc>CR   |
759 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
760 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
761 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
762 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
763 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
764 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
765 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
766 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
767 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
768 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
769 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
770 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
771 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
772 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
773 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
774 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
775 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
776 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
777 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
778 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
779 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
780 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
781 <CharEsc,StringEsc>{D}+  {
782                           int i = strtol(yytext, NULL, 10);
783                           if (i < NCHARS) {
784                              addchar((char) i);
785                           } else {
786                              char errbuf[ERR_BUF_SIZE];
787                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
788                                 yytext);
789                              hsperror(errbuf);
790                           }
791                           POP_STATE;
792                         }
793 <CharEsc,StringEsc>o{O}+ {
794                           int i = strtol(yytext + 1, NULL, 8);
795                           if (i < NCHARS) {
796                              addchar((char) i);
797                           } else {
798                              char errbuf[ERR_BUF_SIZE];
799                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
800                                 yytext);
801                              hsperror(errbuf);
802                           }
803                           POP_STATE;
804                         }
805 <CharEsc,StringEsc>x{H}+ {
806                           int i = strtol(yytext + 1, NULL, 16);
807                           if (i < NCHARS) {
808                              addchar((char) i);
809                           } else {
810                              char errbuf[ERR_BUF_SIZE];
811                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
812                                 yytext);
813                              hsperror(errbuf);
814                           }
815                           POP_STATE;
816                         }
817
818 %{
819     /*
820      * Simple comments and whitespace.  Normally, we would just ignore these, but
821      * in case we're processing a string escape, we need to note that we've seen
822      * a gap.
823      *
824      * Note that we cater for a comment line that *doesn't* end in a newline.
825      * This is incorrect, strictly speaking, but seems like the right thing
826      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
827      */
828 %}
829
830 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
831 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
832
833 %{
834     /*
835      * Nested comments.  The major complication here is in trying to match the
836      * longest lexemes possible, for better performance.  (See the flex document.)
837      * That's why the rules look so bizarre.
838      */
839 %}
840
841 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        { 
842                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
843                         }
844
845 <Comment>[^-{]*         |
846 <Comment>"-"+[^-{}]+    |
847 <Comment>"{"+[^-{}]+    ;
848 <Comment>"{-"           { nested_comments++; }
849 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
850 <Comment>(.|\n)         ;
851
852 %{
853     /*
854      * Illegal characters.  This used to be a single rule, but we might as well
855      * pass on as much information as we have, so now we indicate our state in
856      * the error message.
857      */
858 %}
859
860 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        { 
861                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
862                             input_filename, hsplineno, hspcolno + 1); 
863                          format_string(stderr, (unsigned char *) yytext, 1);
864                          fputs("'\n", stderr);
865                          hsperror("");
866                         }
867 <Char>(.|\n)            { 
868                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
869                             input_filename, hsplineno, hspcolno + 1); 
870                          format_string(stderr, (unsigned char *) yytext, 1);
871                          fputs("' in a character literal\n", stderr);
872                          hsperror("");
873                         }
874 <CharEsc>(.|\n)         {
875                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
876                             input_filename, hsplineno, hspcolno + 1); 
877                          format_string(stderr, (unsigned char *) yytext, 1);
878                          fputs("'\n", stderr);
879                          hsperror("");
880                         }
881 <String>(.|\n)          { if (nonstandardFlag) {
882                              addtext(yytext, yyleng);
883                           } else { 
884                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
885                                 input_filename, hsplineno, hspcolno + 1); 
886                                 format_string(stderr, (unsigned char *) yytext, 1);
887                                 fputs("' in a string literal\n", stderr);
888                                 hsperror("");
889                           }
890                         }
891 <StringEsc>(.|\n)       {
892                          if (noGap) {
893                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
894                                 input_filename, hsplineno, hspcolno + 1); 
895                              format_string(stderr, (unsigned char *) yytext, 1);
896                              fputs("'\n", stderr);
897                              hsperror("");
898                          } else {
899                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
900                                 input_filename, hsplineno, hspcolno + 1);
901                              format_string(stderr, (unsigned char *) yytext, 1);
902                              fputs("' in a string gap\n", stderr);
903                              hsperror("");
904                          }
905                         }
906
907 %{
908     /*
909      * End of file.  In any sub-state, this is an error.  However, for the primary
910      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
911      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
912      * adding virtual close curlies, or closing an interface and returning to the
913      * primary source file.
914      *
915      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
916      * line/column advancement has to be done by hand.
917      */
918 %}
919
920 <Char,CharEsc><<EOF>>   { 
921                           hsplineno = hslineno; hspcolno = hscolno;
922                           hsperror("unterminated character literal");
923                         }
924 <Comment><<EOF>>        { 
925                           hsplineno = hslineno; hspcolno = hscolno;
926                           hsperror("unterminated comment"); 
927                         }
928 <String,StringEsc><<EOF>>   { 
929                           hsplineno = hslineno; hspcolno = hscolno;
930                           hsperror("unterminated string literal"); 
931                         }
932 <GhcPragma><<EOF>>      {
933                           hsplineno = hslineno; hspcolno = hscolno;
934                           hsperror("unterminated interface pragma"); 
935                         }
936 <UserPragma><<EOF>>     {
937                           hsplineno = hslineno; hspcolno = hscolno;
938                           hsperror("unterminated user-specified pragma"); 
939                         }
940 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
941
942 %%
943
944 /**********************************************************************
945 *                                                                     *
946 *                                                                     *
947 *     YACC/LEX Initialisation etc.                                    *
948 *                                                                     *
949 *                                                                     *
950 **********************************************************************/
951
952 /*
953    We initialise input_filename to "<stdin>".
954    This allows unnamed sources to be piped into the parser.
955 */
956
957 extern BOOLEAN acceptPrim;
958
959 void
960 yyinit(void)
961 {
962     input_filename = xstrdup("<stdin>");
963
964     /* We must initialize the input buffer _now_, because we call
965        setyyin _before_ calling yylex for the first time! */
966     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
967
968     if (acceptPrim)
969         PUSH_STATE(GlaExt);
970     else
971         PUSH_STATE(Code);
972 }
973
974 static void
975 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
976 {
977     if (input_filename != NULL)
978         free(input_filename);
979     input_filename = xstrdup(f);
980 }
981
982 /**********************************************************************
983 *                                                                     *
984 *                                                                     *
985 *     Layout Processing                                               *
986 *                                                                     *
987 *                                                                     *
988 **********************************************************************/
989
990 /*
991         The following section deals with Haskell Layout conventions
992         forcing insertion of ; or } as appropriate
993 */
994
995 static BOOLEAN
996 hsshouldindent(void)
997 {
998     return (!forgetindent && INDENTON);
999 }
1000
1001
1002 /* Enter new context and set new indentation level */
1003 void
1004 hssetindent(void)
1005 {
1006 #ifdef HSP_DEBUG
1007     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1008 #endif
1009
1010     /*
1011      * partain: first chk that new indent won't be less than current one; this code
1012      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1013      * current token; what that has to do with indenting, I don't know.
1014      */
1015
1016
1017     if (hscolno - 1 <= INDENTPT) {
1018         if (INDENTPT == -1)
1019             return;             /* Empty input OK for Haskell 1.1 */
1020         else {
1021             char errbuf[ERR_BUF_SIZE];
1022
1023             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1024             hsperror(errbuf);
1025         }
1026     }
1027     hsentercontext((hspcolno << 1) | 1);
1028 }
1029
1030
1031 /* Enter a new context without changing the indentation level */
1032 void
1033 hsincindent(void)
1034 {
1035 #ifdef HSP_DEBUG
1036     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1037 #endif
1038     hsentercontext(indenttab[icontexts] & ~1);
1039 }
1040
1041
1042 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1043 void
1044 hsindentoff(void)
1045 {
1046     forgetindent = TRUE;
1047 }
1048
1049
1050 /* Enter a new layout context. */
1051 static void
1052 hsentercontext(int indent)
1053 {
1054     /* Enter new context and set indentation as specified */
1055     if (++icontexts >= MAX_CONTEXTS) {
1056         char errbuf[ERR_BUF_SIZE];
1057
1058         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1059         hsperror(errbuf);
1060     }
1061     forgetindent = FALSE;
1062     indenttab[icontexts] = indent;
1063 #ifdef HSP_DEBUG
1064     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1065 #endif
1066 }
1067
1068
1069 /* Exit a layout context */
1070 void
1071 hsendindent(void)
1072 {
1073     --icontexts;
1074 #ifdef HSP_DEBUG
1075     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1076 #endif
1077 }
1078
1079 /*
1080  *      Return checks the indentation level and returns ;, } or the specified token.
1081  */
1082
1083 static int
1084 Return(int tok)
1085 {
1086 #ifdef HSP_DEBUG
1087     extern int yyleng;
1088 #endif
1089
1090     if (hsshouldindent()) {
1091         if (hspcolno < INDENTPT) {
1092 #ifdef HSP_DEBUG
1093             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1094 #endif
1095             hssttok = tok;
1096             return (VCCURLY);
1097         } else if (hspcolno == INDENTPT) {
1098 #ifdef HSP_DEBUG
1099             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1100 #endif
1101             hssttok = -tok;
1102             return (SEMI);
1103         }
1104     }
1105     hssttok = -1;
1106 #ifdef HSP_DEBUG
1107     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1108 #endif
1109     return (tok);
1110 }
1111
1112
1113 /*
1114  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1115  */
1116 int
1117 yylex()
1118 {
1119     int tok;
1120     static BOOLEAN eof = FALSE;
1121
1122     if (!eof) {
1123         if (hssttok != -1) {
1124             if (hssttok < 0) {
1125                 tok = -hssttok;
1126                 hssttok = -1;
1127                 return tok;
1128             }
1129             RETURN(hssttok);
1130         } else {
1131             endlineno = hslineno;
1132             if ((tok = yylex1()) != EOF)
1133                 return tok;
1134             else
1135                 eof = TRUE;
1136         }
1137     }
1138     if (icontexts > icontexts_save) {
1139         if (INDENTON) {
1140             eof = TRUE;
1141             indenttab[icontexts] = 0;
1142             return (VCCURLY);
1143         } else
1144             hsperror("missing '}' at end of file");
1145     } else if (hsbuf_save != NULL) {
1146         fclose(yyin);
1147         yy_delete_buffer(YY_CURRENT_BUFFER);
1148         yy_switch_to_buffer(hsbuf_save);
1149         hsbuf_save = NULL;
1150         new_filename(filename_save);
1151         free(filename_save);
1152         hslineno = hslineno_save;
1153         hsplineno = hsplineno_save;
1154         hscolno = hscolno_save;
1155         hspcolno = hspcolno_save;
1156         etags = etags_save;
1157         in_interface = FALSE;
1158         icontexts = icontexts_save - 1;
1159         icontexts_save = 0;
1160 #ifdef HSP_DEBUG
1161         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1162 #endif
1163         eof = FALSE;
1164         RETURN(LEOF);
1165     } else {
1166         yyterminate();
1167     }
1168     abort(); /* should never get here! */
1169     return(0);
1170 }
1171
1172 /**********************************************************************
1173 *                                                                     *
1174 *                                                                     *
1175 *     Input Processing for Interfaces                                 *
1176 *                                                                     *
1177 *                                                                     *
1178 **********************************************************************/
1179
1180 /* setyyin(file)        open file as new lex input buffer */
1181 extern FILE *yyin;
1182
1183 void
1184 setyyin(char *file)
1185 {
1186     hsbuf_save = YY_CURRENT_BUFFER;
1187     if ((yyin = fopen(file, "r")) == NULL) {
1188         char errbuf[ERR_BUF_SIZE];
1189
1190         sprintf(errbuf, "can't read \"%-.50s\"", file);
1191         hsperror(errbuf);
1192     }
1193     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1194
1195     hslineno_save = hslineno;
1196     hsplineno_save = hsplineno;
1197     hslineno = hsplineno = 1;
1198
1199     filename_save = input_filename;
1200     input_filename = NULL;
1201     new_filename(file);
1202     hscolno_save = hscolno;
1203     hspcolno_save = hspcolno;
1204     hscolno = hspcolno = 0;
1205     in_interface = TRUE;
1206     etags_save = etags; /* do not do "etags" stuff in interfaces */
1207     etags = 0;          /* We remember whether we are doing it in
1208                            the module, so we can restore it later [WDP 94/09] */
1209     hsentercontext(-1);         /* partain: changed this from 0 */
1210     icontexts_save = icontexts;
1211 #ifdef HSP_DEBUG
1212     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1213 #endif
1214 }
1215
1216 static void
1217 layout_input(char *text, int len)
1218 {
1219 #ifdef HSP_DEBUG
1220     fprintf(stderr, "Scanning \"%s\"\n", text);
1221 #endif
1222
1223     hsplineno = hslineno;
1224     hspcolno = hscolno;
1225
1226     while (len-- > 0) {
1227         switch (*text++) {
1228         case '\n':
1229         case '\r':
1230         case '\f':
1231             hslineno++;
1232             hscolno = 0;
1233             break;
1234         case '\t':
1235             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1236             break;
1237         case '\v':
1238             break;
1239         default:
1240             ++hscolno;
1241             break;
1242         }
1243     }
1244 }
1245
1246 void
1247 setstartlineno(void)
1248 {
1249     startlineno = hsplineno;
1250 #if 1/*etags*/
1251 #else
1252     if (etags)
1253         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1254 #endif
1255 }
1256
1257 /**********************************************************************
1258 *                                                                     *
1259 *                                                                     *
1260 *                      Text Caching                                   *
1261 *                                                                     *
1262 *                                                                     *
1263 **********************************************************************/
1264
1265 #define CACHE_SIZE YY_BUF_SIZE
1266
1267 static struct {
1268     unsigned allocated;
1269     unsigned next;
1270     char *text;
1271 } textcache = { 0, 0, NULL };
1272
1273 static void
1274 cleartext(void)
1275 {
1276 /*  fprintf(stderr, "cleartext\n"); */
1277     textcache.next = 0;
1278     if (textcache.allocated == 0) {
1279         textcache.allocated = CACHE_SIZE;
1280         textcache.text = xmalloc(CACHE_SIZE);
1281     }
1282 }
1283
1284 static void
1285 addtext(char *text, unsigned length)
1286 {
1287 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1288
1289     if (length == 0)
1290         return;
1291
1292     if (textcache.next + length + 1 >= textcache.allocated) {
1293         textcache.allocated += length + CACHE_SIZE;
1294         textcache.text = xrealloc(textcache.text, textcache.allocated);
1295     }
1296     bcopy(text, textcache.text + textcache.next, length);
1297     textcache.next += length;
1298 }
1299
1300 static void
1301 addchar(char c)
1302 {
1303 /*  fprintf(stderr, "addchar: %c\n", c); */
1304
1305     if (textcache.next + 2 >= textcache.allocated) {
1306         textcache.allocated += CACHE_SIZE;
1307         textcache.text = xrealloc(textcache.text, textcache.allocated);
1308     }
1309     textcache.text[textcache.next++] = c;
1310 }
1311
1312 static char *
1313 fetchtext(unsigned *length)
1314 {
1315 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1316
1317     *length = textcache.next;
1318     textcache.text[textcache.next] = '\0';
1319     return textcache.text;
1320 }
1321
1322 /**********************************************************************
1323 *                                                                     *
1324 *                                                                     *
1325 *    Identifier Processing                                             *
1326 *                                                                     *
1327 *                                                                     *
1328 **********************************************************************/
1329
1330 /*
1331         hsnewid         Enters an id of length n into the symbol table.
1332 */
1333
1334 static void
1335 hsnewid(char *name, int length)
1336 {
1337     char save = name[length];
1338
1339     name[length] = '\0';
1340     yylval.uid = installid(name);
1341     name[length] = save;
1342 }
1343
1344 BOOLEAN 
1345 isconstr(char *s) /* walks past leading underscores before using the macro */
1346 {
1347     char *temp = s;
1348
1349     for ( ; temp != NULL && *temp == '_' ; temp++ );
1350
1351     return _isconstr(temp);
1352 }