[project @ 1996-01-08 20:28:12 by partain]
[ghc-hetmet.git] / ghc / compiler / yaccParser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       LEX grammar for Haskell.                                      *
6 *       ------------------------                                      *
7 *                                                                     *
8 *       (c) Copyright K. Hammond, University of Glasgow,              *
9 *               10th. February 1989                                   *
10 *                                                                     *
11 *       Modification History                                          *
12 *       --------------------                                          *
13 *                                                                     *
14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
15 *       18/10/91 kh             Added 'ccall'.                        *
16 *       19/11/91 kh             Tidied generally.                     *
17 *       04/12/91 kh             Added Int#.                           *
18 *       31/01/92 kh             Haskell 1.2 version.                  *
19 *       24/04/92 ps             Added 'scc'.                          *
20 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
21 *       23/08/93 jsm            Changed to support flex               *
22 *                                                                     *
23 *                                                                     *
24 *       Known Problems:                                               *
25 *                                                                     *
26 *               None, any more.                                       *
27 *                                                                     *
28 **********************************************************************/
29
30 #include "../../includes/config.h"
31
32 #include <stdio.h>
33
34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
35 #include <string.h>
36 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
38 #include <memory.h>
39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
40 #define index strchr
41 #define rindex strrchr
42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
44 #define bzero(s, n) memset ((s), 0, (n))
45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
46 #include <strings.h>
47 /* memory.h and strings.h conflict on some systems.  */
48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
49
50 #include "hspincl.h"
51 #include "hsparser.tab.h"
52 #include "constants.h"
53 #include "utils.h"
54
55 /* Our substitute for <ctype.h> */
56
57 #define NCHARS  256
58 #define _S      0x1
59 #define _D      0x2
60 #define _H      0x4
61 #define _O      0x8
62 #define _C      0x10
63
64 #define _isconstr(s)    (CharTable[*s]&(_C))
65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
66
67 unsigned char CharTable[NCHARS] = {
68 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
69 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
70 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
71 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
72 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
73 /* '(' */       0,      0,      0,      0,      0,      0,      0,      0,
74 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
75 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
76 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
77 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
78 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
79 /* 'X' */       _C,     _C,     _C,     0,      0,      0,      0,      0,
80 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
81 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
82 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
83 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
84
85 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
86 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
87 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
88 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
89 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
90 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
91 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
92 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
93 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
94 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
95 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
96 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
97 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
98 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
99 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
100 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
101 };
102
103 /**********************************************************************
104 *                                                                     *
105 *                                                                     *
106 *      Declarations                                                   *
107 *                                                                     *
108 *                                                                     *
109 **********************************************************************/
110
111 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
112
113 /*
114  * For my own sanity, things that are not part of the flex skeleton
115  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
116  */
117
118 int hslineno = 0;               /* Line number at end of token */
119 int hsplineno = 0;              /* Line number at end of previous token */
120
121 int hscolno = 0;                /* Column number at end of token */
122 int hspcolno = 0;               /* Column number at end of previous token */
123 int hsmlcolno = 0;              /* Column number for multiple-rule lexemes */
124
125 int startlineno = 0;            /* The line number where something starts */
126 int endlineno = 0;              /* The line number where something ends */
127
128 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
129 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
130
131 static int nested_comments;     /* For counting comment nesting depth */
132
133 /* Hacky definition of yywrap: see flex doc.
134
135    If we don't do this, then we'll have to get the default
136    yywrap from the flex library, which is often something
137    we are not good at locating.  This avoids that difficulty.
138    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
139    WDP 94/09/05
140 */
141 #define yywrap() 1
142
143 /* Essential forward declarations */
144
145 static VOID hsnewid      PROTO((char *, int));
146 static VOID layout_input PROTO((char *, int));
147 static VOID cleartext    (NO_ARGS);
148 static VOID addtext      PROTO((char *, unsigned));
149 static VOID addchar      PROTO((char));
150 static char *fetchtext   PROTO((unsigned *));
151
152 /* Special file handling for IMPORTS */
153 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
154
155 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
156 static char *filename_save;             /* File Name                     */
157 static int hslineno_save = 0,           /* Line Number                   */
158  hsplineno_save = 0,                    /* Line Number of Prev. token    */
159  hscolno_save = 0,                      /* Indentation                   */
160  hspcolno_save = 0;                     /* Left Indentation              */
161 static short icontexts_save = 0;        /* Indent Context Level          */
162
163 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
164 extern BOOLEAN etags;      /* that which is saved */
165
166 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
167
168 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
169
170 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
171 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
172 extern int maxAcceptablePragmaVersion;
173 extern int thisIfacePragmaVersion;
174
175 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
176                                  * inserted before token +ve  -- "}" inserted before
177                                  * token */
178
179 short icontexts = 0;            /* Which context we're in */
180
181
182
183 /*
184         Table of indentations:  right bit indicates whether to use
185           indentation rules (1 = use rules; 0 = ignore)
186
187     partain:
188     push one of these "contexts" at every "case" or "where"; the right bit says
189     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
190
191     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
192     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
193     push is shown just below.
194
195 */
196
197
198 static short indenttab[MAX_CONTEXTS] = {-1};
199
200 #define INDENTPT (indenttab[icontexts]>>1)
201 #define INDENTON (indenttab[icontexts]&1)
202
203 #define RETURN(tok) return(Return(tok))
204
205 #undef YY_DECL
206 #define YY_DECL int yylex1()
207
208 /* We should not peek at yy_act, but flex calls us even for the internal action
209    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
210    to support older versions of flex, we'll continue to peek for now.
211  */
212 #define YY_USER_ACTION \
213     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
214
215 #if 0/*debug*/
216 #undef YY_BREAK
217 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
218 #endif
219
220 /* Each time we enter a new start state, we push it onto the state stack.
221    Note that the rules do not allow us to underflow or overflow the stack.
222    (At least, they shouldn't.)  The maximum expected depth is 4:
223    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
224 */
225 static int StateStack[5];
226 static int StateDepth = -1;
227
228 #ifdef HSP_DEBUG
229 #define PUSH_STATE(n)   do {\
230     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
231     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
232 #define POP_STATE       do {--StateDepth;\
233     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
234     BEGIN(StateStack[StateDepth]);} while(0)
235 #else
236 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
237 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
238 #endif
239
240 %}
241
242 /* The start states are:
243    Code -- normal Haskell code (principal lexer)
244    GlaExt -- Haskell code with Glasgow extensions
245    Comment -- Nested comment processing
246    String -- Inside a string literal with backslashes
247    StringEsc -- Immediately following a backslash in a string literal
248    Char -- Inside a character literal with backslashes
249    CharEsc -- Immediately following a backslash in a character literal 
250
251    Note that the INITIAL state is unused.  Also note that these states
252    are _exclusive_.  All rules should be prefixed with an appropriate
253    list of start states.
254  */
255
256 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
257
258 D                       [0-9]
259 O                       [0-7]
260 H                       [0-9A-Fa-f]
261 N                       {D}+
262 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
263 S                       [!#$%&*+./<=>?@\\^|~:]
264 SId                     ({S}|~|-){S}*
265 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
266 L                       [A-Z]
267 I                       [A-Za-z]
268 i                       [A-Za-z0-9'_]
269 Id                      {I}({i})*
270 WS                      [ \t\n\r\f\v]
271 CNTRL                   [@A-Z\[\\\]^_]
272 NL                      [\n\r]
273
274 %%
275
276 %{
277     /* 
278      * Special GHC pragma rules.  Do we need a start state for interface files,
279      * so these won't be matched in source files? --JSM
280      */
281 %}
282
283 <Code,GlaExt>^"# ".*{NL}    {
284                           char tempf[FILENAME_SIZE];
285                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
286                           new_filename(tempf);
287                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
288                         }
289
290 <Code,GlaExt>^"#line ".*{NL}    {
291                           char tempf[FILENAME_SIZE];
292                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
293                           new_filename(tempf); 
294                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
295                         }
296
297 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
298                           /* partain: pragma-style line directive */
299                           char tempf[FILENAME_SIZE];
300                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
301                           new_filename(tempf);
302                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
303                         }
304 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
305                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
306                         }
307 <Code,GlaExt>"{-# GHC_PRAGMA "   { 
308                           if ( ignorePragmas ||
309                                thisIfacePragmaVersion < minAcceptablePragmaVersion || 
310                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
311                              nested_comments = 1;
312                              PUSH_STATE(Comment);
313                           } else {
314                              PUSH_STATE(GhcPragma);
315                              RETURN(GHC_PRAGMA);
316                           }
317                         }
318 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
319 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
320 <GhcPragma>"_ABSTRACT_"     { RETURN(ABSTRACT_PRAGMA); }
321 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
322 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
323 <GhcPragma>"_M_"            { RETURN(MODNAME_PRAGMA); }
324 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
325 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
326 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
327 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
328 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
329 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
330
331 <GhcPragma>"_!_"            { RETURN(COCON); }
332 <GhcPragma>"_#_"            { RETURN(COPRIM); }
333 <GhcPragma>"_APP_"          { RETURN(COAPP); }
334 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
335 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
336 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
337 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
338 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
339
340 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
341 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
342 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
343 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
344 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
345
346 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
347 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
348
349 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
350 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
351 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
352 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
353 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
354 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
355 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
356 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
357
358 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
359 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
360
361 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
362 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
363 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
364
365 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
366
367 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
368                               PUSH_STATE(UserPragma);
369                               RETURN(SPECIALISE_UPRAGMA);
370                             }
371 <Code,GlaExt>"{-#"{WS}*"INLINE" {
372                               PUSH_STATE(UserPragma);
373                               RETURN(INLINE_UPRAGMA);
374                             }
375 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
376                               PUSH_STATE(UserPragma);
377                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
378                             }
379 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
380                               PUSH_STATE(UserPragma);
381                               RETURN(DEFOREST_UPRAGMA);
382                             }
383 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
384                               PUSH_STATE(UserPragma);
385                               RETURN(ABSTRACT_UPRAGMA);
386                             }
387 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
388
389 %{
390     /*
391      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
392      * intentionally accepted as a keyword even for normal <Code>.
393      */
394 %}
395
396 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
397 <Code,GlaExt>"class"            { RETURN(CLASS); }
398 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
399 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
400 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
401 <Code,GlaExt>"else"             { RETURN(ELSE); }
402 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
403 <Code,GlaExt>"if"               { RETURN(IF); }
404 <Code,GlaExt>"import"           { RETURN(IMPORT); }
405 <Code,GlaExt>"infix"            { RETURN(INFIX); }
406 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
407 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
408 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
409 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
410 <Code,GlaExt>"module"           { RETURN(MODULE); }
411 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
412 <Code,GlaExt>"renaming"         { RETURN(RENAMING); }
413 <Code,GlaExt>"then"             { RETURN(THEN); }
414 <Code,GlaExt>"to"               { RETURN(TO); }
415 <Code,GlaExt>"type"             { RETURN(TYPE); }
416 <Code,GlaExt>"where"            { RETURN(WHERE); }
417 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
418 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
419 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
420 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
421 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
422 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
423 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
424 <GhcPragma>"_forall_"           { RETURN(FORALL); }
425
426 %{
427     /* 
428      * Haskell operators.  Nothing special about these.
429      */
430 %}
431
432 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
433 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
434 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
435 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
436 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
437 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
438 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
439 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
440 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
441 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
442 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
443 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
444 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
445 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
446 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
447 <Code,GlaExt>"+"                        { RETURN(PLUS); }
448 <Code,GlaExt>"@"                        { RETURN(AT); }
449 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
450 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
451 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
452 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
453 <Code,GlaExt>"~"                        { RETURN(LAZY); }
454 <Code,GlaExt>"-"                        { RETURN(MINUS); }
455
456 %{
457     /*
458      * Integers and (for Glasgow extensions) primitive integers.  Note that
459      * we pass all of the text on to the parser, because flex/C can't handle
460      * arbitrary precision numbers.
461      */
462 %}
463
464 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
465                          yylval.uid = xstrndup(yytext, yyleng - 1);
466                          RETURN(INTPRIM);
467                         }
468 <Code,GlaExt>"0o"{O}+   { /* octal */
469                          yylval.uid = xstrndup(yytext, yyleng);
470                          RETURN(INTEGER);
471                         }
472 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
473                          yylval.uid = xstrndup(yytext, yyleng - 1);
474                          RETURN(INTPRIM);
475                         }
476 <Code,GlaExt>"0x"{H}+   { /* hexadecimal */
477                          yylval.uid = xstrndup(yytext, yyleng);
478                          RETURN(INTEGER);
479                         }
480 <GlaExt,GhcPragma>("-")?{N}"#"  {
481                          yylval.uid = xstrndup(yytext, yyleng - 1);
482                          RETURN(INTPRIM);
483                         }
484 <Code,GlaExt,GhcPragma>{N} {
485                          yylval.uid = xstrndup(yytext, yyleng);
486                          RETURN(INTEGER);
487                         }
488
489 %{
490     /*
491      * Floats and (for Glasgow extensions) primitive floats/doubles.
492      */
493 %}
494
495 <GlaExt,GhcPragma>("-")?{F}"##" {
496                          yylval.uid = xstrndup(yytext, yyleng - 2);
497                          RETURN(DOUBLEPRIM);
498                         }
499 <GlaExt,GhcPragma>("-")?{F}"#" {
500                          yylval.uid = xstrndup(yytext, yyleng - 1);
501                          RETURN(FLOATPRIM);
502                         }
503 <Code,GlaExt>{F}        {
504                          yylval.uid = xstrndup(yytext, yyleng);
505                          RETURN(FLOAT);
506                         }
507
508 %{
509     /*
510      * Funky ``foo'' style C literals for Glasgow extensions
511      */
512 %}
513
514 <GlaExt,GhcPragma>"``"[^']+"''" {
515                          hsnewid(yytext + 2, yyleng - 4);
516                          RETURN(CLITLIT);
517                         }
518
519 %{
520     /*
521      * Identifiers, both variables and operators.  The trailing hash is allowed
522      * for Glasgow extensions.
523      */
524 %}
525
526 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
527 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
528 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
529
530 <GlaExt,GhcPragma,UserPragma>{Id}"#" { 
531                          hsnewid(yytext, yyleng);
532                          RETURN(_isconstr(yytext) ? CONID : VARID);
533                         }
534 %{
535 /* This SHOULDNAE work in "Code" (sigh) */
536 %}
537 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} { 
538                          if (! (nonstandardFlag || in_interface)) {
539                             char errbuf[ERR_BUF_SIZE];
540                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
541                             hsperror(errbuf);
542                          }
543                          hsnewid(yytext, yyleng);
544                          RETURN(isconstr(yytext) ? CONID : VARID);
545                          /* NB: ^^^^^^^^ : not the macro! */
546                         }
547 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
548                          hsnewid(yytext, yyleng);
549                          RETURN(_isconstr(yytext) ? CONID : VARID);
550                         }
551 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
552                          hsnewid(yytext, yyleng);
553                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
554                         }
555
556 %{
557     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
558
559     /* Because we can make the former well-behaved (we defined them).
560
561        Sadly, the latter is defined by Haskell, which allows such
562        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
563     */
564 %}
565
566 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {       
567                          hsnewid(yytext + 1, yyleng - 2);
568                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
569                         }
570
571 %{
572     /*
573      * Character literals.  The first form is the quick form, for character
574      * literals that don't contain backslashes.  Literals with backslashes are
575      * lexed through multiple rules.  First, we match the open ' and as many
576      * normal characters as possible.  This puts us into the <Char> state, where
577      * a backslash is legal.  Then, we match the backslash and move into the 
578      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
579      * characters and the close '.  We may end up with too many characters, but
580      * this allows us to easily share the lex rules with strings.  Excess characters
581      * are ignored with a warning.
582      */
583 %}
584
585 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
586                          yylval.uhstring = installHstring(1, yytext+1);
587                          RETURN(CHARPRIM);
588                         }
589 <Code,GlaExt>'({CHAR}|"\"")'    {
590                          yylval.uhstring = installHstring(1, yytext+1);
591                          RETURN(CHAR);
592                         }
593 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
594                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
595                          hsperror(errbuf);
596                         }
597 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
598                          hsmlcolno = hspcolno;
599                          cleartext();
600                          addtext(yytext+1, yyleng-1);
601                          PUSH_STATE(Char);
602                         }
603 <Char>({CHAR}|"\"")*'#  {
604                          unsigned length;
605                          char *text;
606
607                          addtext(yytext, yyleng - 2);
608                          text = fetchtext(&length);
609
610                          if (! (nonstandardFlag || in_interface)) {
611                             char errbuf[ERR_BUF_SIZE];
612                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
613                             hsperror(errbuf);
614                          }
615
616                          if (length > 1) {
617                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
618                               input_filename, hsplineno, hspcolno + 1);
619                             format_string(stderr, (unsigned char *) text, length);
620                             fputs("' too long\n", stderr);
621                             hsperror("");
622                          }
623                          yylval.uhstring = installHstring(1, text);
624                          hspcolno = hsmlcolno;
625                          POP_STATE;
626                          RETURN(CHARPRIM); 
627                         }
628 <Char>({CHAR}|"\"")*'   {
629                          unsigned length;
630                          char *text;
631
632                          addtext(yytext, yyleng - 1);
633                          text = fetchtext(&length);
634
635                          if (length > 1) {
636                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
637                               input_filename, hsplineno, hspcolno + 1);
638                             format_string(stderr, (unsigned char *) text, length);
639                             fputs("' too long\n", stderr);
640                             hsperror("");
641                          }
642                          yylval.uhstring = installHstring(1, text);
643                          hspcolno = hsmlcolno;
644                          POP_STATE;
645                          RETURN(CHAR); 
646                         }
647 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
648
649
650 %{
651     /*
652      * String literals.  The first form is the quick form, for string literals
653      * that don't contain backslashes.  Literals with backslashes are lexed
654      * through multiple rules.  First, we match the open " and as many normal
655      * characters as possible.  This puts us into the <String> state, where
656      * a backslash is legal.  Then, we match the backslash and move into the 
657      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
658      * characters, moving back and forth between <String> and <StringEsc> as more
659      * backslashes are encountered.  (We may even digress into <Comment> mode if we
660      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
661      * of normal characters and the close ".
662      */
663 %}
664
665 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
666                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
667                             /* the -3 accounts for the " on front, "# on the end */
668                          RETURN(STRINGPRIM); 
669                         }
670 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
671                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
672                          RETURN(STRING); 
673                         }
674 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
675                          hsmlcolno = hspcolno;
676                          cleartext();
677                          addtext(yytext+1, yyleng-1);
678                          PUSH_STATE(String);
679                         }
680 <String>({CHAR}|"'")*"\"#"   {
681                          unsigned length;
682                          char *text;
683
684                          addtext(yytext, yyleng-2);
685                          text = fetchtext(&length);
686
687                          if (! (nonstandardFlag || in_interface)) {
688                             char errbuf[ERR_BUF_SIZE];
689                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
690                             hsperror(errbuf);
691                          }
692
693                          yylval.uhstring = installHstring(length, text);
694                          hspcolno = hsmlcolno;
695                          POP_STATE;
696                          RETURN(STRINGPRIM);
697                         }
698 <String>({CHAR}|"'")*"\""   {
699                          unsigned length;
700                          char *text;
701
702                          addtext(yytext, yyleng-1);
703                          text = fetchtext(&length);
704
705                          yylval.uhstring = installHstring(length, text);
706                          hspcolno = hsmlcolno;
707                          POP_STATE;
708                          RETURN(STRING); 
709                         }
710 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
711
712 %{
713     /*
714      * Character and string escapes are roughly the same, but strings have the
715      * extra `\&' sequence which is not allowed for characters.  Also, comments
716      * are allowed in the <StringEsc> state.  (See the comment section much
717      * further down.)
718      *
719      * NB: Backslashes and tabs are stored in strings as themselves.
720      * But if we print them (in printtree.c), they must go out as
721      * "\\\\" and "\\t" respectively.  (This is because of the bogus
722      * intermediate format that the parser produces.  It uses '\t' fpr end of
723      * string, so it needs to be able to escape tabs, which means that it
724      * also needs to be able to escape the escape character ('\\').  Sigh.
725      */
726 %}
727
728 <Char>\\                { PUSH_STATE(CharEsc); }
729 <String>\\&             /* Ignore */ ;
730 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
731
732 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
733 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
734
735 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
736 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
737 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
738 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
739 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
740 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
741 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
742 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
743 <CharEsc,StringEsc>BEL  |
744 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
745 <CharEsc,StringEsc>BS   |
746 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
747 <CharEsc,StringEsc>HT   |
748 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
749 <CharEsc,StringEsc>LF   |
750 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
751 <CharEsc,StringEsc>VT   |
752 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
753 <CharEsc,StringEsc>FF   |
754 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
755 <CharEsc,StringEsc>CR   |
756 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
757 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
758 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
759 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
760 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
761 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
762 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
763 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
764 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
765 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
766 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
767 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
768 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
769 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
770 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
771 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
772 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
773 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
774 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
775 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
776 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
777 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
778 <CharEsc,StringEsc>{D}+  {
779                           int i = strtol(yytext, NULL, 10);
780                           if (i < NCHARS) {
781                              addchar((char) i);
782                           } else {
783                              char errbuf[ERR_BUF_SIZE];
784                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
785                                 yytext);
786                              hsperror(errbuf);
787                           }
788                           POP_STATE;
789                         }
790 <CharEsc,StringEsc>o{O}+ {
791                           int i = strtol(yytext + 1, NULL, 8);
792                           if (i < NCHARS) {
793                              addchar((char) i);
794                           } else {
795                              char errbuf[ERR_BUF_SIZE];
796                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
797                                 yytext);
798                              hsperror(errbuf);
799                           }
800                           POP_STATE;
801                         }
802 <CharEsc,StringEsc>x{H}+ {
803                           int i = strtol(yytext + 1, NULL, 16);
804                           if (i < NCHARS) {
805                              addchar((char) i);
806                           } else {
807                              char errbuf[ERR_BUF_SIZE];
808                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
809                                 yytext);
810                              hsperror(errbuf);
811                           }
812                           POP_STATE;
813                         }
814
815 %{
816     /*
817      * Simple comments and whitespace.  Normally, we would just ignore these, but
818      * in case we're processing a string escape, we need to note that we've seen
819      * a gap.
820      */
821 %}
822
823 <Code,GlaExt,StringEsc>"--".*{NL}{WS}* |
824 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
825
826 %{
827     /*
828      * Nested comments.  The major complication here is in trying to match the
829      * longest lexemes possible, for better performance.  (See the flex document.)
830      * That's why the rules look so bizarre.
831      */
832 %}
833
834 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        { 
835                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
836                         }
837
838 <Comment>[^-{]*         |
839 <Comment>"-"+[^-{}]+    |
840 <Comment>"{"+[^-{}]+    ;
841 <Comment>"{-"           { nested_comments++; }
842 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
843 <Comment>(.|\n)         ;
844
845 %{
846     /*
847      * Illegal characters.  This used to be a single rule, but we might as well
848      * pass on as much information as we have, so now we indicate our state in
849      * the error message.
850      */
851 %}
852
853 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        { 
854                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
855                             input_filename, hsplineno, hspcolno + 1); 
856                          format_string(stderr, (unsigned char *) yytext, 1);
857                          fputs("'\n", stderr);
858                          hsperror("");
859                         }
860 <Char>(.|\n)            { 
861                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
862                             input_filename, hsplineno, hspcolno + 1); 
863                          format_string(stderr, (unsigned char *) yytext, 1);
864                          fputs("' in a character literal\n", stderr);
865                          hsperror("");
866                         }
867 <CharEsc>(.|\n)         {
868                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
869                             input_filename, hsplineno, hspcolno + 1); 
870                          format_string(stderr, (unsigned char *) yytext, 1);
871                          fputs("'\n", stderr);
872                          hsperror("");
873                         }
874 <String>(.|\n)          { if (nonstandardFlag) {
875                              addtext(yytext, yyleng);
876                           } else { 
877                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
878                                 input_filename, hsplineno, hspcolno + 1); 
879                                 format_string(stderr, (unsigned char *) yytext, 1);
880                                 fputs("' in a string literal\n", stderr);
881                                 hsperror("");
882                           }
883                         }
884 <StringEsc>(.|\n)       {
885                          if (noGap) {
886                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
887                                 input_filename, hsplineno, hspcolno + 1); 
888                              format_string(stderr, (unsigned char *) yytext, 1);
889                              fputs("'\n", stderr);
890                              hsperror("");
891                          } else {
892                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
893                                 input_filename, hsplineno, hspcolno + 1);
894                              format_string(stderr, (unsigned char *) yytext, 1);
895                              fputs("' in a string gap\n", stderr);
896                              hsperror("");
897                          }
898                         }
899
900 %{
901     /*
902      * End of file.  In any sub-state, this is an error.  However, for the primary
903      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
904      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
905      * adding virtual close curlies, or closing an interface and returning to the
906      * primary source file.
907      *
908      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
909      * line/column advancement has to be done by hand.
910      */
911 %}
912
913 <Char,CharEsc><<EOF>>   { 
914                           hsplineno = hslineno; hspcolno = hscolno;
915                           hsperror("unterminated character literal");
916                         }
917 <Comment><<EOF>>        { 
918                           hsplineno = hslineno; hspcolno = hscolno;
919                           hsperror("unterminated comment"); 
920                         }
921 <String,StringEsc><<EOF>>   { 
922                           hsplineno = hslineno; hspcolno = hscolno;
923                           hsperror("unterminated string literal"); 
924                         }
925 <GhcPragma><<EOF>>      {
926                           hsplineno = hslineno; hspcolno = hscolno;
927                           hsperror("unterminated interface pragma"); 
928                         }
929 <UserPragma><<EOF>>     {
930                           hsplineno = hslineno; hspcolno = hscolno;
931                           hsperror("unterminated user-specified pragma"); 
932                         }
933 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
934
935 %%
936
937 /**********************************************************************
938 *                                                                     *
939 *                                                                     *
940 *     YACC/LEX Initialisation etc.                                    *
941 *                                                                     *
942 *                                                                     *
943 **********************************************************************/
944
945 /*
946    We initialise input_filename to "<stdin>".
947    This allows unnamed sources to be piped into the parser.
948 */
949
950 void
951 yyinit()
952 {
953     extern BOOLEAN acceptPrim;
954
955     input_filename = xstrdup("<stdin>");
956
957     /* We must initialize the input buffer _now_, because we call
958        setyyin _before_ calling yylex for the first time! */
959     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
960
961     if (acceptPrim)
962         PUSH_STATE(GlaExt);
963     else
964         PUSH_STATE(Code);
965 }
966
967 void
968 new_filename(f) /* This looks pretty dodgy to me (WDP) */
969   char *f;
970 {
971     if (input_filename != NULL)
972         free(input_filename);
973     input_filename = xstrdup(f);
974 }
975
976 /**********************************************************************
977 *                                                                     *
978 *                                                                     *
979 *     Layout Processing                                               *
980 *                                                                     *
981 *                                                                     *
982 **********************************************************************/
983
984 /*
985         The following section deals with Haskell Layout conventions
986         forcing insertion of ; or } as appropriate
987 */
988
989 BOOLEAN
990 hsshouldindent()
991 {
992     return (!forgetindent && INDENTON);
993 }
994
995
996 /* Enter new context and set new indentation level */
997 void
998 hssetindent()
999 {
1000 #ifdef HSP_DEBUG
1001     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1002 #endif
1003
1004     /*
1005      * partain: first chk that new indent won't be less than current one; this code
1006      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1007      * current token; what that has to do with indenting, I don't know.
1008      */
1009
1010
1011     if (hscolno - 1 <= INDENTPT) {
1012         if (INDENTPT == -1)
1013             return;             /* Empty input OK for Haskell 1.1 */
1014         else {
1015             char errbuf[ERR_BUF_SIZE];
1016
1017             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1018             hsperror(errbuf);
1019         }
1020     }
1021     hsentercontext((hspcolno << 1) | 1);
1022 }
1023
1024
1025 /* Enter a new context without changing the indentation level */
1026 void
1027 hsincindent()
1028 {
1029 #ifdef HSP_DEBUG
1030     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1031 #endif
1032     hsentercontext(indenttab[icontexts] & ~1);
1033 }
1034
1035
1036 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1037 void
1038 hsindentoff()
1039 {
1040     forgetindent = TRUE;
1041 }
1042
1043
1044 /* Enter a new layout context. */
1045 void
1046 hsentercontext(indent)
1047   int indent;
1048 {
1049     /* Enter new context and set indentation as specified */
1050     if (++icontexts >= MAX_CONTEXTS) {
1051         char errbuf[ERR_BUF_SIZE];
1052
1053         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1054         hsperror(errbuf);
1055     }
1056     forgetindent = FALSE;
1057     indenttab[icontexts] = indent;
1058 #ifdef HSP_DEBUG
1059     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1060 #endif
1061 }
1062
1063
1064 /* Exit a layout context */
1065 void
1066 hsendindent()
1067 {
1068     --icontexts;
1069 #ifdef HSP_DEBUG
1070     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1071 #endif
1072 }
1073
1074 /*
1075  *      Return checks the indentation level and returns ;, } or the specified token.
1076  */
1077
1078 int
1079 Return(tok)
1080   int tok;
1081 {
1082 #ifdef HSP_DEBUG
1083     extern int yyleng;
1084 #endif
1085
1086     if (hsshouldindent()) {
1087         if (hspcolno < INDENTPT) {
1088 #ifdef HSP_DEBUG
1089             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1090 #endif
1091             hssttok = tok;
1092             return (VCCURLY);
1093         } else if (hspcolno == INDENTPT) {
1094 #ifdef HSP_DEBUG
1095             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1096 #endif
1097             hssttok = -tok;
1098             return (SEMI);
1099         }
1100     }
1101     hssttok = -1;
1102 #ifdef HSP_DEBUG
1103     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1104 #endif
1105     return (tok);
1106 }
1107
1108
1109 /*
1110  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1111  */
1112 int
1113 yylex()
1114 {
1115     int tok;
1116     static BOOLEAN eof = FALSE;
1117
1118     if (!eof) {
1119         if (hssttok != -1) {
1120             if (hssttok < 0) {
1121                 tok = -hssttok;
1122                 hssttok = -1;
1123                 return tok;
1124             }
1125             RETURN(hssttok);
1126         } else {
1127             endlineno = hslineno;
1128             if ((tok = yylex1()) != EOF)
1129                 return tok;
1130             else
1131                 eof = TRUE;
1132         }
1133     }
1134     if (icontexts > icontexts_save) {
1135         if (INDENTON) {
1136             eof = TRUE;
1137             indenttab[icontexts] = 0;
1138             return (VCCURLY);
1139         } else
1140             hsperror("missing '}' at end of file");
1141     } else if (hsbuf_save != NULL) {
1142         fclose(yyin);
1143         yy_delete_buffer(YY_CURRENT_BUFFER);
1144         yy_switch_to_buffer(hsbuf_save);
1145         hsbuf_save = NULL;
1146         new_filename(filename_save);
1147         free(filename_save);
1148         hslineno = hslineno_save;
1149         hsplineno = hsplineno_save;
1150         hscolno = hscolno_save;
1151         hspcolno = hspcolno_save;
1152         etags = etags_save;
1153         in_interface = FALSE;
1154         icontexts = icontexts_save - 1;
1155         icontexts_save = 0;
1156 #ifdef HSP_DEBUG
1157         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1158 #endif
1159         eof = FALSE;
1160         RETURN(LEOF);
1161     } else {
1162         yyterminate();
1163     }
1164     abort(); /* should never get here! */
1165     return(0);
1166 }
1167
1168 /**********************************************************************
1169 *                                                                     *
1170 *                                                                     *
1171 *     Input Processing for Interfaces                                 *
1172 *                                                                     *
1173 *                                                                     *
1174 **********************************************************************/
1175
1176 /* setyyin(file)        open file as new lex input buffer */
1177 void
1178 setyyin(file)
1179   char *file;
1180 {
1181     extern FILE *yyin;
1182
1183     hsbuf_save = YY_CURRENT_BUFFER;
1184     if ((yyin = fopen(file, "r")) == NULL) {
1185         char errbuf[ERR_BUF_SIZE];
1186
1187         sprintf(errbuf, "can't read \"%-.50s\"", file);
1188         hsperror(errbuf);
1189     }
1190     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1191
1192     hslineno_save = hslineno;
1193     hsplineno_save = hsplineno;
1194     hslineno = hsplineno = 1;
1195
1196     filename_save = input_filename;
1197     input_filename = NULL;
1198     new_filename(file);
1199     hscolno_save = hscolno;
1200     hspcolno_save = hspcolno;
1201     hscolno = hspcolno = 0;
1202     in_interface = TRUE;
1203     etags_save = etags; /* do not do "etags" stuff in interfaces */
1204     etags = 0;          /* We remember whether we are doing it in
1205                            the module, so we can restore it later [WDP 94/09] */
1206     hsentercontext(-1);         /* partain: changed this from 0 */
1207     icontexts_save = icontexts;
1208 #ifdef HSP_DEBUG
1209     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1210 #endif
1211 }
1212
1213 static VOID
1214 layout_input(text, len)
1215 char *text;
1216 int len;
1217 {
1218 #ifdef HSP_DEBUG
1219     fprintf(stderr, "Scanning \"%s\"\n", text);
1220 #endif
1221
1222     hsplineno = hslineno;
1223     hspcolno = hscolno;
1224
1225     while (len-- > 0) {
1226         switch (*text++) {
1227         case '\n':
1228         case '\r':
1229         case '\f':
1230             hslineno++;
1231             hscolno = 0;
1232             break;
1233         case '\t':
1234             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1235             break;
1236         case '\v':
1237             break;
1238         default:
1239             ++hscolno;
1240             break;
1241         }
1242     }
1243 }
1244
1245 void
1246 setstartlineno()
1247 {
1248     startlineno = hsplineno;
1249 #if 1/*etags*/
1250 #else
1251     if (etags)
1252         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1253 #endif
1254 }
1255
1256 /**********************************************************************
1257 *                                                                     *
1258 *                                                                     *
1259 *                      Text Caching                                   *
1260 *                                                                     *
1261 *                                                                     *
1262 **********************************************************************/
1263
1264 #define CACHE_SIZE YY_BUF_SIZE
1265
1266 static struct {
1267     unsigned allocated;
1268     unsigned next;
1269     char *text;
1270 } textcache = { 0, 0, NULL };
1271
1272 static VOID
1273 cleartext()
1274 {
1275 /*  fprintf(stderr, "cleartext\n"); */
1276     textcache.next = 0;
1277     if (textcache.allocated == 0) {
1278         textcache.allocated = CACHE_SIZE;
1279         textcache.text = xmalloc(CACHE_SIZE);
1280     }
1281 }
1282
1283 static VOID
1284 addtext(text, length)
1285 char *text;
1286 unsigned length;
1287 {
1288 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1289
1290     if (length == 0)
1291         return;
1292
1293     if (textcache.next + length + 1 >= textcache.allocated) {
1294         textcache.allocated += length + CACHE_SIZE;
1295         textcache.text = xrealloc(textcache.text, textcache.allocated);
1296     }
1297     bcopy(text, textcache.text + textcache.next, length);
1298     textcache.next += length;
1299 }
1300
1301 static VOID
1302 #ifdef __STDC__
1303 addchar(char c)
1304 #else
1305 addchar(c)
1306   char c;
1307 #endif
1308 {
1309 /*  fprintf(stderr, "addchar: %c\n", c); */
1310
1311     if (textcache.next + 2 >= textcache.allocated) {
1312         textcache.allocated += CACHE_SIZE;
1313         textcache.text = xrealloc(textcache.text, textcache.allocated);
1314     }
1315     textcache.text[textcache.next++] = c;
1316 }
1317
1318 static char *
1319 fetchtext(length)
1320 unsigned *length;
1321 {
1322 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1323
1324     *length = textcache.next;
1325     textcache.text[textcache.next] = '\0';
1326     return textcache.text;
1327 }
1328
1329 /**********************************************************************
1330 *                                                                     *
1331 *                                                                     *
1332 *    Identifier Processing                                             *
1333 *                                                                     *
1334 *                                                                     *
1335 **********************************************************************/
1336
1337 /*
1338         hsnewid         Enters an id of length n into the symbol table.
1339 */
1340
1341 static VOID
1342 hsnewid(name, length)
1343 char *name;
1344 int length;
1345 {
1346     char save = name[length];
1347
1348     name[length] = '\0';
1349     yylval.uid = installid(name);
1350     name[length] = save;
1351 }
1352
1353 BOOLEAN 
1354 isconstr(s) /* walks past leading underscores before using the macro */
1355   char *s;
1356 {
1357     char *temp = s;
1358
1359     for ( ; temp != NULL && *temp == '_' ; temp++ );
1360
1361     return _isconstr(temp);
1362 }