[project @ 1996-03-22 09:24:22 by partain]
[ghc-hetmet.git] / ghc / compiler / yaccParser / hslexer.flex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       LEX grammar for Haskell.                                      *
6 *       ------------------------                                      *
7 *                                                                     *
8 *       (c) Copyright K. Hammond, University of Glasgow,              *
9 *               10th. February 1989                                   *
10 *                                                                     *
11 *       Modification History                                          *
12 *       --------------------                                          *
13 *                                                                     *
14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
15 *       18/10/91 kh             Added 'ccall'.                        *
16 *       19/11/91 kh             Tidied generally.                     *
17 *       04/12/91 kh             Added Int#.                           *
18 *       31/01/92 kh             Haskell 1.2 version.                  *
19 *       24/04/92 ps             Added 'scc'.                          *
20 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
21 *       23/08/93 jsm            Changed to support flex               *
22 *                                                                     *
23 *                                                                     *
24 *       Known Problems:                                               *
25 *                                                                     *
26 *               None, any more.                                       *
27 *                                                                     *
28 **********************************************************************/
29
30 #include "../../includes/config.h"
31
32 #include <stdio.h>
33
34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
35 #include <string.h>
36 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
38 #include <memory.h>
39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
40 #define index strchr
41 #define rindex strrchr
42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
44 #define bzero(s, n) memset ((s), 0, (n))
45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
46 #include <strings.h>
47 /* memory.h and strings.h conflict on some systems.  */
48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
49
50 #include "hspincl.h"
51 #include "hsparser.tab.h"
52 #include "constants.h"
53 #include "utils.h"
54
55 /* Our substitute for <ctype.h> */
56
57 #define NCHARS  256
58 #define _S      0x1
59 #define _D      0x2
60 #define _H      0x4
61 #define _O      0x8
62 #define _C      0x10
63
64 #define _isconstr(s)    (CharTable[*s]&(_C))
65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
66
67 static unsigned char CharTable[NCHARS] = {
68 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
69 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
70 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
71 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
72 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
73 /* '(' */       0,      0,      0,      0,      0,      0,      0,      0,
74 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
75 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
76 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
77 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
78 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
79 /* 'X' */       _C,     _C,     _C,     0,      0,      0,      0,      0,
80 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
81 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
82 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
83 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
84
85 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
86 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
87 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
88 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
89 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
90 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
91 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
92 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
93 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
94 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
95 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
96 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
97 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
98 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
99 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
100 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
101 };
102
103 /**********************************************************************
104 *                                                                     *
105 *                                                                     *
106 *      Declarations                                                   *
107 *                                                                     *
108 *                                                                     *
109 **********************************************************************/
110
111 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
112
113 /*
114  * For my own sanity, things that are not part of the flex skeleton
115  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
116  */
117
118 static int hslineno = 0;        /* Line number at end of token */
119 int hsplineno = 0;              /* Line number at end of previous token */
120
121 static int hscolno = 0;         /* Column number at end of token */
122 int hspcolno = 0;               /* Column number at end of previous token */
123 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
124
125 int startlineno = 0;            /* The line number where something starts */
126 int endlineno = 0;              /* The line number where something ends */
127
128 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
129 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
130
131 static int nested_comments;     /* For counting comment nesting depth */
132
133 /* Hacky definition of yywrap: see flex doc.
134
135    If we don't do this, then we'll have to get the default
136    yywrap from the flex library, which is often something
137    we are not good at locating.  This avoids that difficulty.
138    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
139    WDP 94/09/05
140 */
141 #define yywrap() 1
142
143 /* Essential forward declarations */
144
145 static void hsnewid      PROTO((char *, int));
146 static void layout_input PROTO((char *, int));
147 static void cleartext    (NO_ARGS);
148 static void addtext      PROTO((char *, unsigned));
149 static void addchar      PROTO((char));
150 static char *fetchtext   PROTO((unsigned *));
151 static void new_filename PROTO((char *));
152 static int  Return       PROTO((int));
153 static void hsentercontext PROTO((int));
154
155 /* Special file handling for IMPORTS */
156 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
157
158 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
159 static char *filename_save;             /* File Name                     */
160 static int hslineno_save = 0,           /* Line Number                   */
161  hsplineno_save = 0,                    /* Line Number of Prev. token    */
162  hscolno_save = 0,                      /* Indentation                   */
163  hspcolno_save = 0;                     /* Left Indentation              */
164 static short icontexts_save = 0;        /* Indent Context Level          */
165
166 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
167 extern BOOLEAN etags;      /* that which is saved */
168
169 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
170
171 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
172
173 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
174 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
175 extern int maxAcceptablePragmaVersion;
176 extern int thisIfacePragmaVersion;
177
178 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
179                                  * inserted before token +ve  -- "}" inserted before
180                                  * token */
181
182 short icontexts = 0;            /* Which context we're in */
183
184
185
186 /*
187         Table of indentations:  right bit indicates whether to use
188           indentation rules (1 = use rules; 0 = ignore)
189
190     partain:
191     push one of these "contexts" at every "case" or "where"; the right bit says
192     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
193
194     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
195     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
196     push is shown just below.
197
198 */
199
200
201 static short indenttab[MAX_CONTEXTS] = {-1};
202
203 #define INDENTPT (indenttab[icontexts]>>1)
204 #define INDENTON (indenttab[icontexts]&1)
205
206 #define RETURN(tok) return(Return(tok))
207
208 #undef YY_DECL
209 #define YY_DECL int yylex1()
210
211 /* We should not peek at yy_act, but flex calls us even for the internal action
212    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
213    to support older versions of flex, we'll continue to peek for now.
214  */
215 #define YY_USER_ACTION \
216     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
217
218 #if 0/*debug*/
219 #undef YY_BREAK
220 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
221 #endif
222
223 /* Each time we enter a new start state, we push it onto the state stack.
224    Note that the rules do not allow us to underflow or overflow the stack.
225    (At least, they shouldn't.)  The maximum expected depth is 4:
226    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
227 */
228 static int StateStack[5];
229 static int StateDepth = -1;
230
231 #ifdef HSP_DEBUG
232 #define PUSH_STATE(n)   do {\
233     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
234     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
235 #define POP_STATE       do {--StateDepth;\
236     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
237     BEGIN(StateStack[StateDepth]);} while(0)
238 #else
239 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
240 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
241 #endif
242
243 %}
244
245 /* The start states are:
246    Code -- normal Haskell code (principal lexer)
247    GlaExt -- Haskell code with Glasgow extensions
248    Comment -- Nested comment processing
249    String -- Inside a string literal with backslashes
250    StringEsc -- Immediately following a backslash in a string literal
251    Char -- Inside a character literal with backslashes
252    CharEsc -- Immediately following a backslash in a character literal 
253
254    Note that the INITIAL state is unused.  Also note that these states
255    are _exclusive_.  All rules should be prefixed with an appropriate
256    list of start states.
257  */
258
259 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
260
261 D                       [0-9]
262 O                       [0-7]
263 H                       [0-9A-Fa-f]
264 N                       {D}+
265 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
266 S                       [!#$%&*+./<=>?@\\^|~:]
267 SId                     ({S}|~|-){S}*
268 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
269 L                       [A-Z]
270 I                       [A-Za-z]
271 i                       [A-Za-z0-9'_]
272 Id                      {I}({i})*
273 WS                      [ \t\n\r\f\v]
274 CNTRL                   [@A-Z\[\\\]^_]
275 NL                      [\n\r]
276
277 %%
278
279 %{
280     /* 
281      * Special GHC pragma rules.  Do we need a start state for interface files,
282      * so these won't be matched in source files? --JSM
283      */
284 %}
285
286 <Code,GlaExt>^"# ".*{NL}    {
287                           char tempf[FILENAME_SIZE];
288                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
289                           new_filename(tempf);
290                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
291                         }
292
293 <Code,GlaExt>^"#line ".*{NL}    {
294                           char tempf[FILENAME_SIZE];
295                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf); 
296                           new_filename(tempf); 
297                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
298                         }
299
300 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
301                           /* partain: pragma-style line directive */
302                           char tempf[FILENAME_SIZE];
303                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf); 
304                           new_filename(tempf);
305                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
306                         }
307 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
308                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
309                         }
310 <Code,GlaExt>"{-# GHC_PRAGMA "   { 
311                           if ( ignorePragmas ||
312                                thisIfacePragmaVersion < minAcceptablePragmaVersion || 
313                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
314                              nested_comments = 1;
315                              PUSH_STATE(Comment);
316                           } else {
317                              PUSH_STATE(GhcPragma);
318                              RETURN(GHC_PRAGMA);
319                           }
320                         }
321 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
322 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
323 <GhcPragma>"_ABSTRACT_"     { RETURN(ABSTRACT_PRAGMA); }
324 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
325 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
326 <GhcPragma>"_M_"            { RETURN(MODNAME_PRAGMA); }
327 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
328 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
329 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
330 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
331 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
332 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
333
334 <GhcPragma>"_!_"            { RETURN(COCON); }
335 <GhcPragma>"_#_"            { RETURN(COPRIM); }
336 <GhcPragma>"_APP_"          { RETURN(COAPP); }
337 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
338 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
339 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
340 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
341 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
342
343 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
344 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
345 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
346 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
347 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
348
349 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
350 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
351
352 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
353 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
354 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
355 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
356 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
357 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
358 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
359 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
360
361 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
362 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
363
364 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
365 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
366 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
367
368 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
369
370 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
371                               PUSH_STATE(UserPragma);
372                               RETURN(SPECIALISE_UPRAGMA);
373                             }
374 <Code,GlaExt>"{-#"{WS}*"INLINE" {
375                               PUSH_STATE(UserPragma);
376                               RETURN(INLINE_UPRAGMA);
377                             }
378 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
379                               PUSH_STATE(UserPragma);
380                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
381                             }
382 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
383                               PUSH_STATE(UserPragma);
384                               RETURN(DEFOREST_UPRAGMA);
385                             }
386 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
387                               PUSH_STATE(UserPragma);
388                               RETURN(ABSTRACT_UPRAGMA);
389                             }
390 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
391                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
392                                 input_filename, hsplineno);
393                               format_string(stderr, (unsigned char *) yytext, yyleng);
394                               fputs("'\n", stderr);
395                               nested_comments = 1;
396                               PUSH_STATE(Comment);
397                             }
398 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
399
400 %{
401     /*
402      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
403      * intentionally accepted as a keyword even for normal <Code>.
404      */
405 %}
406
407 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
408 <Code,GlaExt>"class"            { RETURN(CLASS); }
409 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
410 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
411 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
412 <Code,GlaExt>"else"             { RETURN(ELSE); }
413 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
414 <Code,GlaExt>"if"               { RETURN(IF); }
415 <Code,GlaExt>"import"           { RETURN(IMPORT); }
416 <Code,GlaExt>"infix"            { RETURN(INFIX); }
417 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
418 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
419 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
420 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
421 <Code,GlaExt>"module"           { RETURN(MODULE); }
422 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
423 <Code,GlaExt>"renaming"         { RETURN(RENAMING); }
424 <Code,GlaExt>"then"             { RETURN(THEN); }
425 <Code,GlaExt>"to"               { RETURN(TO); }
426 <Code,GlaExt>"type"             { RETURN(TYPE); }
427 <Code,GlaExt>"where"            { RETURN(WHERE); }
428 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
429 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
430 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
431 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
432 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
433 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
434 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
435 <GhcPragma>"_forall_"           { RETURN(FORALL); }
436
437 %{
438     /* 
439      * Haskell operators.  Nothing special about these.
440      */
441 %}
442
443 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
444 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
445 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
446 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
447 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
448 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
449 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
450 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
451 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
452 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
453 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
454 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
455 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
456 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
457 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
458 <Code,GlaExt>"+"                        { RETURN(PLUS); }
459 <Code,GlaExt>"@"                        { RETURN(AT); }
460 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
461 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
462 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
463 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
464 <Code,GlaExt>"~"                        { RETURN(LAZY); }
465 <Code,GlaExt>"-"                        { RETURN(MINUS); }
466
467 %{
468     /*
469      * Integers and (for Glasgow extensions) primitive integers.  Note that
470      * we pass all of the text on to the parser, because flex/C can't handle
471      * arbitrary precision numbers.
472      */
473 %}
474
475 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
476                          yylval.uid = xstrndup(yytext, yyleng - 1);
477                          RETURN(INTPRIM);
478                         }
479 <Code,GlaExt>"0o"{O}+   { /* octal */
480                          yylval.uid = xstrndup(yytext, yyleng);
481                          RETURN(INTEGER);
482                         }
483 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
484                          yylval.uid = xstrndup(yytext, yyleng - 1);
485                          RETURN(INTPRIM);
486                         }
487 <Code,GlaExt>"0x"{H}+   { /* hexadecimal */
488                          yylval.uid = xstrndup(yytext, yyleng);
489                          RETURN(INTEGER);
490                         }
491 <GlaExt,GhcPragma>("-")?{N}"#"  {
492                          yylval.uid = xstrndup(yytext, yyleng - 1);
493                          RETURN(INTPRIM);
494                         }
495 <Code,GlaExt,GhcPragma>{N} {
496                          yylval.uid = xstrndup(yytext, yyleng);
497                          RETURN(INTEGER);
498                         }
499
500 %{
501     /*
502      * Floats and (for Glasgow extensions) primitive floats/doubles.
503      */
504 %}
505
506 <GlaExt,GhcPragma>("-")?{F}"##" {
507                          yylval.uid = xstrndup(yytext, yyleng - 2);
508                          RETURN(DOUBLEPRIM);
509                         }
510 <GlaExt,GhcPragma>("-")?{F}"#" {
511                          yylval.uid = xstrndup(yytext, yyleng - 1);
512                          RETURN(FLOATPRIM);
513                         }
514 <Code,GlaExt>{F}        {
515                          yylval.uid = xstrndup(yytext, yyleng);
516                          RETURN(FLOAT);
517                         }
518
519 %{
520     /*
521      * Funky ``foo'' style C literals for Glasgow extensions
522      */
523 %}
524
525 <GlaExt,GhcPragma>"``"[^']+"''" {
526                          hsnewid(yytext + 2, yyleng - 4);
527                          RETURN(CLITLIT);
528                         }
529
530 %{
531     /*
532      * Identifiers, both variables and operators.  The trailing hash is allowed
533      * for Glasgow extensions.
534      */
535 %}
536
537 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
538 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
539 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
540
541 %{
542 /* These SHOULDNAE work in "Code" (sigh) */
543 %}
544 <Code,GlaExt,GhcPragma,UserPragma>{Id}"#" { 
545                          if (! (nonstandardFlag || in_interface)) {
546                             char errbuf[ERR_BUF_SIZE];
547                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
548                             hsperror(errbuf);
549                          }
550                          hsnewid(yytext, yyleng);
551                          RETURN(_isconstr(yytext) ? CONID : VARID);
552                         }
553 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} { 
554                          if (! (nonstandardFlag || in_interface)) {
555                             char errbuf[ERR_BUF_SIZE];
556                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
557                             hsperror(errbuf);
558                          }
559                          hsnewid(yytext, yyleng);
560                          RETURN(isconstr(yytext) ? CONID : VARID);
561                          /* NB: ^^^^^^^^ : not the macro! */
562                         }
563 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
564                          hsnewid(yytext, yyleng);
565                          RETURN(_isconstr(yytext) ? CONID : VARID);
566                         }
567 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
568                          hsnewid(yytext, yyleng);
569                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
570                         }
571
572 %{
573     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
574
575     /* Because we can make the former well-behaved (we defined them).
576
577        Sadly, the latter is defined by Haskell, which allows such
578        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
579     */
580 %}
581
582 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {       
583                          hsnewid(yytext + 1, yyleng - 2);
584                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
585                         }
586
587 %{
588     /*
589      * Character literals.  The first form is the quick form, for character
590      * literals that don't contain backslashes.  Literals with backslashes are
591      * lexed through multiple rules.  First, we match the open ' and as many
592      * normal characters as possible.  This puts us into the <Char> state, where
593      * a backslash is legal.  Then, we match the backslash and move into the 
594      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
595      * characters and the close '.  We may end up with too many characters, but
596      * this allows us to easily share the lex rules with strings.  Excess characters
597      * are ignored with a warning.
598      */
599 %}
600
601 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
602                          yylval.uhstring = installHstring(1, yytext+1);
603                          RETURN(CHARPRIM);
604                         }
605 <Code,GlaExt>'({CHAR}|"\"")'    {
606                          yylval.uhstring = installHstring(1, yytext+1);
607                          RETURN(CHAR);
608                         }
609 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
610                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
611                          hsperror(errbuf);
612                         }
613 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
614                          hsmlcolno = hspcolno;
615                          cleartext();
616                          addtext(yytext+1, yyleng-1);
617                          PUSH_STATE(Char);
618                         }
619 <Char>({CHAR}|"\"")*'#  {
620                          unsigned length;
621                          char *text;
622
623                          addtext(yytext, yyleng - 2);
624                          text = fetchtext(&length);
625
626                          if (! (nonstandardFlag || in_interface)) {
627                             char errbuf[ERR_BUF_SIZE];
628                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
629                             hsperror(errbuf);
630                          }
631
632                          if (length > 1) {
633                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
634                               input_filename, hsplineno, hspcolno + 1);
635                             format_string(stderr, (unsigned char *) text, length);
636                             fputs("' too long\n", stderr);
637                             hsperror("");
638                          }
639                          yylval.uhstring = installHstring(1, text);
640                          hspcolno = hsmlcolno;
641                          POP_STATE;
642                          RETURN(CHARPRIM); 
643                         }
644 <Char>({CHAR}|"\"")*'   {
645                          unsigned length;
646                          char *text;
647
648                          addtext(yytext, yyleng - 1);
649                          text = fetchtext(&length);
650
651                          if (length > 1) {
652                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
653                               input_filename, hsplineno, hspcolno + 1);
654                             format_string(stderr, (unsigned char *) text, length);
655                             fputs("' too long\n", stderr);
656                             hsperror("");
657                          }
658                          yylval.uhstring = installHstring(1, text);
659                          hspcolno = hsmlcolno;
660                          POP_STATE;
661                          RETURN(CHAR); 
662                         }
663 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
664
665
666 %{
667     /*
668      * String literals.  The first form is the quick form, for string literals
669      * that don't contain backslashes.  Literals with backslashes are lexed
670      * through multiple rules.  First, we match the open " and as many normal
671      * characters as possible.  This puts us into the <String> state, where
672      * a backslash is legal.  Then, we match the backslash and move into the 
673      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
674      * characters, moving back and forth between <String> and <StringEsc> as more
675      * backslashes are encountered.  (We may even digress into <Comment> mode if we
676      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
677      * of normal characters and the close ".
678      */
679 %}
680
681 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
682                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
683                             /* the -3 accounts for the " on front, "# on the end */
684                          RETURN(STRINGPRIM); 
685                         }
686 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
687                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
688                          RETURN(STRING); 
689                         }
690 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
691                          hsmlcolno = hspcolno;
692                          cleartext();
693                          addtext(yytext+1, yyleng-1);
694                          PUSH_STATE(String);
695                         }
696 <String>({CHAR}|"'")*"\"#"   {
697                          unsigned length;
698                          char *text;
699
700                          addtext(yytext, yyleng-2);
701                          text = fetchtext(&length);
702
703                          if (! (nonstandardFlag || in_interface)) {
704                             char errbuf[ERR_BUF_SIZE];
705                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
706                             hsperror(errbuf);
707                          }
708
709                          yylval.uhstring = installHstring(length, text);
710                          hspcolno = hsmlcolno;
711                          POP_STATE;
712                          RETURN(STRINGPRIM);
713                         }
714 <String>({CHAR}|"'")*"\""   {
715                          unsigned length;
716                          char *text;
717
718                          addtext(yytext, yyleng-1);
719                          text = fetchtext(&length);
720
721                          yylval.uhstring = installHstring(length, text);
722                          hspcolno = hsmlcolno;
723                          POP_STATE;
724                          RETURN(STRING); 
725                         }
726 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
727
728 %{
729     /*
730      * Character and string escapes are roughly the same, but strings have the
731      * extra `\&' sequence which is not allowed for characters.  Also, comments
732      * are allowed in the <StringEsc> state.  (See the comment section much
733      * further down.)
734      *
735      * NB: Backslashes and tabs are stored in strings as themselves.
736      * But if we print them (in printtree.c), they must go out as
737      * "\\\\" and "\\t" respectively.  (This is because of the bogus
738      * intermediate format that the parser produces.  It uses '\t' fpr end of
739      * string, so it needs to be able to escape tabs, which means that it
740      * also needs to be able to escape the escape character ('\\').  Sigh.
741      */
742 %}
743
744 <Char>\\                { PUSH_STATE(CharEsc); }
745 <String>\\&             /* Ignore */ ;
746 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
747
748 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
749 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
750
751 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
752 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
753 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
754 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
755 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
756 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
757 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
758 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
759 <CharEsc,StringEsc>BEL  |
760 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
761 <CharEsc,StringEsc>BS   |
762 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
763 <CharEsc,StringEsc>HT   |
764 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
765 <CharEsc,StringEsc>LF   |
766 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
767 <CharEsc,StringEsc>VT   |
768 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
769 <CharEsc,StringEsc>FF   |
770 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
771 <CharEsc,StringEsc>CR   |
772 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
773 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
774 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
775 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
776 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
777 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
778 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
779 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
780 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
781 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
782 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
783 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
784 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
785 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
786 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
787 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
788 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
789 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
790 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
791 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
792 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
793 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
794 <CharEsc,StringEsc>{D}+  {
795                           int i = strtol(yytext, NULL, 10);
796                           if (i < NCHARS) {
797                              addchar((char) i);
798                           } else {
799                              char errbuf[ERR_BUF_SIZE];
800                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
801                                 yytext);
802                              hsperror(errbuf);
803                           }
804                           POP_STATE;
805                         }
806 <CharEsc,StringEsc>o{O}+ {
807                           int i = strtol(yytext + 1, NULL, 8);
808                           if (i < NCHARS) {
809                              addchar((char) i);
810                           } else {
811                              char errbuf[ERR_BUF_SIZE];
812                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
813                                 yytext);
814                              hsperror(errbuf);
815                           }
816                           POP_STATE;
817                         }
818 <CharEsc,StringEsc>x{H}+ {
819                           int i = strtol(yytext + 1, NULL, 16);
820                           if (i < NCHARS) {
821                              addchar((char) i);
822                           } else {
823                              char errbuf[ERR_BUF_SIZE];
824                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n", 
825                                 yytext);
826                              hsperror(errbuf);
827                           }
828                           POP_STATE;
829                         }
830
831 %{
832     /*
833      * Simple comments and whitespace.  Normally, we would just ignore these, but
834      * in case we're processing a string escape, we need to note that we've seen
835      * a gap.
836      *
837      * Note that we cater for a comment line that *doesn't* end in a newline.
838      * This is incorrect, strictly speaking, but seems like the right thing
839      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
840      */
841 %}
842
843 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
844 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
845
846 %{
847     /*
848      * Nested comments.  The major complication here is in trying to match the
849      * longest lexemes possible, for better performance.  (See the flex document.)
850      * That's why the rules look so bizarre.
851      */
852 %}
853
854 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        { 
855                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
856                         }
857
858 <Comment>[^-{]*         |
859 <Comment>"-"+[^-{}]+    |
860 <Comment>"{"+[^-{}]+    ;
861 <Comment>"{-"           { nested_comments++; }
862 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
863 <Comment>(.|\n)         ;
864
865 %{
866     /*
867      * Illegal characters.  This used to be a single rule, but we might as well
868      * pass on as much information as we have, so now we indicate our state in
869      * the error message.
870      */
871 %}
872
873 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        { 
874                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
875                             input_filename, hsplineno, hspcolno + 1); 
876                          format_string(stderr, (unsigned char *) yytext, 1);
877                          fputs("'\n", stderr);
878                          hsperror("");
879                         }
880 <Char>(.|\n)            { 
881                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
882                             input_filename, hsplineno, hspcolno + 1); 
883                          format_string(stderr, (unsigned char *) yytext, 1);
884                          fputs("' in a character literal\n", stderr);
885                          hsperror("");
886                         }
887 <CharEsc>(.|\n)         {
888                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
889                             input_filename, hsplineno, hspcolno + 1); 
890                          format_string(stderr, (unsigned char *) yytext, 1);
891                          fputs("'\n", stderr);
892                          hsperror("");
893                         }
894 <String>(.|\n)          { if (nonstandardFlag) {
895                              addtext(yytext, yyleng);
896                           } else { 
897                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
898                                 input_filename, hsplineno, hspcolno + 1); 
899                                 format_string(stderr, (unsigned char *) yytext, 1);
900                                 fputs("' in a string literal\n", stderr);
901                                 hsperror("");
902                           }
903                         }
904 <StringEsc>(.|\n)       {
905                          if (noGap) {
906                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
907                                 input_filename, hsplineno, hspcolno + 1); 
908                              format_string(stderr, (unsigned char *) yytext, 1);
909                              fputs("'\n", stderr);
910                              hsperror("");
911                          } else {
912                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
913                                 input_filename, hsplineno, hspcolno + 1);
914                              format_string(stderr, (unsigned char *) yytext, 1);
915                              fputs("' in a string gap\n", stderr);
916                              hsperror("");
917                          }
918                         }
919
920 %{
921     /*
922      * End of file.  In any sub-state, this is an error.  However, for the primary
923      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
924      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
925      * adding virtual close curlies, or closing an interface and returning to the
926      * primary source file.
927      *
928      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
929      * line/column advancement has to be done by hand.
930      */
931 %}
932
933 <Char,CharEsc><<EOF>>   { 
934                           hsplineno = hslineno; hspcolno = hscolno;
935                           hsperror("unterminated character literal");
936                         }
937 <Comment><<EOF>>        { 
938                           hsplineno = hslineno; hspcolno = hscolno;
939                           hsperror("unterminated comment"); 
940                         }
941 <String,StringEsc><<EOF>>   { 
942                           hsplineno = hslineno; hspcolno = hscolno;
943                           hsperror("unterminated string literal"); 
944                         }
945 <GhcPragma><<EOF>>      {
946                           hsplineno = hslineno; hspcolno = hscolno;
947                           hsperror("unterminated interface pragma"); 
948                         }
949 <UserPragma><<EOF>>     {
950                           hsplineno = hslineno; hspcolno = hscolno;
951                           hsperror("unterminated user-specified pragma"); 
952                         }
953 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
954
955 %%
956
957 /**********************************************************************
958 *                                                                     *
959 *                                                                     *
960 *     YACC/LEX Initialisation etc.                                    *
961 *                                                                     *
962 *                                                                     *
963 **********************************************************************/
964
965 /*
966    We initialise input_filename to "<stdin>".
967    This allows unnamed sources to be piped into the parser.
968 */
969
970 extern BOOLEAN acceptPrim;
971
972 void
973 yyinit(void)
974 {
975     input_filename = xstrdup("<stdin>");
976
977     /* We must initialize the input buffer _now_, because we call
978        setyyin _before_ calling yylex for the first time! */
979     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
980
981     if (acceptPrim)
982         PUSH_STATE(GlaExt);
983     else
984         PUSH_STATE(Code);
985 }
986
987 static void
988 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
989 {
990     if (input_filename != NULL)
991         free(input_filename);
992     input_filename = xstrdup(f);
993 }
994
995 /**********************************************************************
996 *                                                                     *
997 *                                                                     *
998 *     Layout Processing                                               *
999 *                                                                     *
1000 *                                                                     *
1001 **********************************************************************/
1002
1003 /*
1004         The following section deals with Haskell Layout conventions
1005         forcing insertion of ; or } as appropriate
1006 */
1007
1008 static BOOLEAN
1009 hsshouldindent(void)
1010 {
1011     return (!forgetindent && INDENTON);
1012 }
1013
1014
1015 /* Enter new context and set new indentation level */
1016 void
1017 hssetindent(void)
1018 {
1019 #ifdef HSP_DEBUG
1020     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1021 #endif
1022
1023     /*
1024      * partain: first chk that new indent won't be less than current one; this code
1025      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1026      * current token; what that has to do with indenting, I don't know.
1027      */
1028
1029
1030     if (hscolno - 1 <= INDENTPT) {
1031         if (INDENTPT == -1)
1032             return;             /* Empty input OK for Haskell 1.1 */
1033         else {
1034             char errbuf[ERR_BUF_SIZE];
1035
1036             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1037             hsperror(errbuf);
1038         }
1039     }
1040     hsentercontext((hspcolno << 1) | 1);
1041 }
1042
1043
1044 /* Enter a new context without changing the indentation level */
1045 void
1046 hsincindent(void)
1047 {
1048 #ifdef HSP_DEBUG
1049     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1050 #endif
1051     hsentercontext(indenttab[icontexts] & ~1);
1052 }
1053
1054
1055 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1056 void
1057 hsindentoff(void)
1058 {
1059     forgetindent = TRUE;
1060 }
1061
1062
1063 /* Enter a new layout context. */
1064 static void
1065 hsentercontext(int indent)
1066 {
1067     /* Enter new context and set indentation as specified */
1068     if (++icontexts >= MAX_CONTEXTS) {
1069         char errbuf[ERR_BUF_SIZE];
1070
1071         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1072         hsperror(errbuf);
1073     }
1074     forgetindent = FALSE;
1075     indenttab[icontexts] = indent;
1076 #ifdef HSP_DEBUG
1077     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1078 #endif
1079 }
1080
1081
1082 /* Exit a layout context */
1083 void
1084 hsendindent(void)
1085 {
1086     --icontexts;
1087 #ifdef HSP_DEBUG
1088     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1089 #endif
1090 }
1091
1092 /*
1093  *      Return checks the indentation level and returns ;, } or the specified token.
1094  */
1095
1096 static int
1097 Return(int tok)
1098 {
1099 #ifdef HSP_DEBUG
1100     extern int yyleng;
1101 #endif
1102
1103     if (hsshouldindent()) {
1104         if (hspcolno < INDENTPT) {
1105 #ifdef HSP_DEBUG
1106             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1107 #endif
1108             hssttok = tok;
1109             return (VCCURLY);
1110         } else if (hspcolno == INDENTPT) {
1111 #ifdef HSP_DEBUG
1112             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1113 #endif
1114             hssttok = -tok;
1115             return (SEMI);
1116         }
1117     }
1118     hssttok = -1;
1119 #ifdef HSP_DEBUG
1120     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1121 #endif
1122     return (tok);
1123 }
1124
1125
1126 /*
1127  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1128  */
1129 int
1130 yylex()
1131 {
1132     int tok;
1133     static BOOLEAN eof = FALSE;
1134
1135     if (!eof) {
1136         if (hssttok != -1) {
1137             if (hssttok < 0) {
1138                 tok = -hssttok;
1139                 hssttok = -1;
1140                 return tok;
1141             }
1142             RETURN(hssttok);
1143         } else {
1144             endlineno = hslineno;
1145             if ((tok = yylex1()) != EOF)
1146                 return tok;
1147             else
1148                 eof = TRUE;
1149         }
1150     }
1151     if (icontexts > icontexts_save) {
1152         if (INDENTON) {
1153             eof = TRUE;
1154             indenttab[icontexts] = 0;
1155             return (VCCURLY);
1156         } else
1157             hsperror("missing '}' at end of file");
1158     } else if (hsbuf_save != NULL) {
1159         fclose(yyin);
1160         yy_delete_buffer(YY_CURRENT_BUFFER);
1161         yy_switch_to_buffer(hsbuf_save);
1162         hsbuf_save = NULL;
1163         new_filename(filename_save);
1164         free(filename_save);
1165         hslineno = hslineno_save;
1166         hsplineno = hsplineno_save;
1167         hscolno = hscolno_save;
1168         hspcolno = hspcolno_save;
1169         etags = etags_save;
1170         in_interface = FALSE;
1171         icontexts = icontexts_save - 1;
1172         icontexts_save = 0;
1173 #ifdef HSP_DEBUG
1174         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1175 #endif
1176         eof = FALSE;
1177         RETURN(LEOF);
1178     } else {
1179         yyterminate();
1180     }
1181     abort(); /* should never get here! */
1182     return(0);
1183 }
1184
1185 /**********************************************************************
1186 *                                                                     *
1187 *                                                                     *
1188 *     Input Processing for Interfaces                                 *
1189 *                                                                     *
1190 *                                                                     *
1191 **********************************************************************/
1192
1193 /* setyyin(file)        open file as new lex input buffer */
1194 extern FILE *yyin;
1195
1196 void
1197 setyyin(char *file)
1198 {
1199     hsbuf_save = YY_CURRENT_BUFFER;
1200     if ((yyin = fopen(file, "r")) == NULL) {
1201         char errbuf[ERR_BUF_SIZE];
1202
1203         sprintf(errbuf, "can't read \"%-.50s\"", file);
1204         hsperror(errbuf);
1205     }
1206     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1207
1208     hslineno_save = hslineno;
1209     hsplineno_save = hsplineno;
1210     hslineno = hsplineno = 1;
1211
1212     filename_save = input_filename;
1213     input_filename = NULL;
1214     new_filename(file);
1215     hscolno_save = hscolno;
1216     hspcolno_save = hspcolno;
1217     hscolno = hspcolno = 0;
1218     in_interface = TRUE;
1219     etags_save = etags; /* do not do "etags" stuff in interfaces */
1220     etags = 0;          /* We remember whether we are doing it in
1221                            the module, so we can restore it later [WDP 94/09] */
1222     hsentercontext(-1);         /* partain: changed this from 0 */
1223     icontexts_save = icontexts;
1224 #ifdef HSP_DEBUG
1225     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1226 #endif
1227 }
1228
1229 static void
1230 layout_input(char *text, int len)
1231 {
1232 #ifdef HSP_DEBUG
1233     fprintf(stderr, "Scanning \"%s\"\n", text);
1234 #endif
1235
1236     hsplineno = hslineno;
1237     hspcolno = hscolno;
1238
1239     while (len-- > 0) {
1240         switch (*text++) {
1241         case '\n':
1242         case '\r':
1243         case '\f':
1244             hslineno++;
1245             hscolno = 0;
1246             break;
1247         case '\t':
1248             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1249             break;
1250         case '\v':
1251             break;
1252         default:
1253             ++hscolno;
1254             break;
1255         }
1256     }
1257 }
1258
1259 void
1260 setstartlineno(void)
1261 {
1262     startlineno = hsplineno;
1263 #if 1/*etags*/
1264 #else
1265     if (etags)
1266         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1267 #endif
1268 }
1269
1270 /**********************************************************************
1271 *                                                                     *
1272 *                                                                     *
1273 *                      Text Caching                                   *
1274 *                                                                     *
1275 *                                                                     *
1276 **********************************************************************/
1277
1278 #define CACHE_SIZE YY_BUF_SIZE
1279
1280 static struct {
1281     unsigned allocated;
1282     unsigned next;
1283     char *text;
1284 } textcache = { 0, 0, NULL };
1285
1286 static void
1287 cleartext(void)
1288 {
1289 /*  fprintf(stderr, "cleartext\n"); */
1290     textcache.next = 0;
1291     if (textcache.allocated == 0) {
1292         textcache.allocated = CACHE_SIZE;
1293         textcache.text = xmalloc(CACHE_SIZE);
1294     }
1295 }
1296
1297 static void
1298 addtext(char *text, unsigned length)
1299 {
1300 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1301
1302     if (length == 0)
1303         return;
1304
1305     if (textcache.next + length + 1 >= textcache.allocated) {
1306         textcache.allocated += length + CACHE_SIZE;
1307         textcache.text = xrealloc(textcache.text, textcache.allocated);
1308     }
1309     bcopy(text, textcache.text + textcache.next, length);
1310     textcache.next += length;
1311 }
1312
1313 static void
1314 addchar(char c)
1315 {
1316 /*  fprintf(stderr, "addchar: %c\n", c); */
1317
1318     if (textcache.next + 2 >= textcache.allocated) {
1319         textcache.allocated += CACHE_SIZE;
1320         textcache.text = xrealloc(textcache.text, textcache.allocated);
1321     }
1322     textcache.text[textcache.next++] = c;
1323 }
1324
1325 static char *
1326 fetchtext(unsigned *length)
1327 {
1328 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1329
1330     *length = textcache.next;
1331     textcache.text[textcache.next] = '\0';
1332     return textcache.text;
1333 }
1334
1335 /**********************************************************************
1336 *                                                                     *
1337 *                                                                     *
1338 *    Identifier Processing                                             *
1339 *                                                                     *
1340 *                                                                     *
1341 **********************************************************************/
1342
1343 /*
1344         hsnewid         Enters an id of length n into the symbol table.
1345 */
1346
1347 static void
1348 hsnewid(char *name, int length)
1349 {
1350     char save = name[length];
1351
1352     name[length] = '\0';
1353     yylval.uid = installid(name);
1354     name[length] = save;
1355 }
1356
1357 BOOLEAN 
1358 isconstr(char *s) /* walks past leading underscores before using the macro */
1359 {
1360     char *temp = s;
1361
1362     for ( ; temp != NULL && *temp == '_' ; temp++ );
1363
1364     return _isconstr(temp);
1365 }