ghc/compiler/parser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       FLEX for Haskell.                                             *
   6 *       -----------------                                             *
   7 *                                                                     *
   8 **********************************************************************/
   9
  10 /* The includes/config.h one */
  11 #include "config.h"
  12
  13 #include <stdio.h>
  14
  15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  16 #include <string.h>
  17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  19 #include <memory.h>
  20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  21 #define index strchr
  22 #define rindex strrchr
  23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  25 #define bzero(s, n) memset ((s), 0, (n))
  26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  27 #include <strings.h>
  28 /* memory.h and strings.h conflict on some systems.  */
  29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  30
  31 #include "hspincl.h"
  32 #include "hsparser.tab.h"
  33 #include "constants.h"
  34 #include "utils.h"
  35
  36 /* Our substitute for <ctype.h> */
  37
  38 #define NCHARS  256
  39 #define _S      0x1
  40 #define _D      0x2
  41 #define _H      0x4
  42 #define _O      0x8
  43 #define _C      0x10
  44
  45 static unsigned char CharTable[NCHARS] = {
  46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
  52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
  58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  62
  63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  79 };
  80
  81 BOOLEAN
  82 isconstr (char *s)
  83 {
  84     return(CharTable[*s]&(_C));
  85 }
  86
  87 /**********************************************************************
  88 *                                                                     *
  89 *                                                                     *
  90 *      Declarations                                                   *
  91 *                                                                     *
  92 *                                                                     *
  93 **********************************************************************/
  94
  95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
  96
  97 /*
  98  * For my own sanity, things that are not part of the flex skeleton
  99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
 100  */
 101
 102 static int hslineno = 0;        /* Line number at end of token */
 103 int hsplineno = 0;              /* Line number at end of previous token */
 104
 105 static int hscolno = 0;         /* Column number at end of token */
 106 int hspcolno = 0;               /* Column number at end of previous token */
 107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 108
 109 int modulelineno = -1;          /* The line number where the module starts */
 110 int startlineno = 0;            /* The line number where something starts */
 111 int endlineno = 0;              /* The line number where something ends */
 112
 113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 115
 116 static int nested_comments;     /* For counting comment nesting depth */
 117
 118 /* OLD: Hacky definition of yywrap: see flex doc.
 119
 120    If we don't do this, then we'll have to get the default
 121    yywrap from the flex library, which is often something
 122    we are not good at locating.  This avoids that difficulty.
 123    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 124    WDP 94/09/05
 125 #define yywrap() 1
 126 */
 127
 128 /* Essential forward declarations */
 129
 130 static void hsnewid      PROTO((char *, int));
 131 static void layout_input PROTO((char *, int));
 132 static void cleartext    (NO_ARGS);
 133 static void addtext      PROTO((char *, unsigned));
 134 static void addchar      PROTO((char));
 135 static char *fetchtext   PROTO((unsigned *));
 136 static void new_filename PROTO((char *));
 137 static int  Return       PROTO((int));
 138 static void hsentercontext PROTO((int));
 139
 140 static BOOLEAN is_commment PROTO((char*, int));
 141
 142 /* Special file handling for IMPORTS */
 143 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 144
 145 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 146 static char *filename_save;             /* File Name                     */
 147 static int hslineno_save = 0,           /* Line Number                   */
 148  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 149  hscolno_save = 0,                      /* Indentation                   */
 150  hspcolno_save = 0;                     /* Left Indentation              */
 151 static short icontexts_save = 0;        /* Indent Context Level          */
 152
 153 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
 154 extern BOOLEAN etags;                   /* that which is saved */
 155
 156 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
 157
 158 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 159                                  * inserted before token +ve  -- "}" inserted before
 160                                  * token */
 161
 162 short icontexts = 0;            /* Which context we're in */
 163
 164 /*
 165         Table of indentations:  right bit indicates whether to use
 166           indentation rules (1 = use rules; 0 = ignore)
 167
 168     partain:
 169     push one of these "contexts" at every "case" or "where"; the right bit says
 170     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 171
 172     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 173     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 174     push is shown just below.
 175
 176 */
 177
 178
 179 static short indenttab[MAX_CONTEXTS] = {-1};
 180
 181 #define INDENTPT (indenttab[icontexts]>>1)
 182 #define INDENTON (indenttab[icontexts]&1)
 183
 184 #define RETURN(tok) return(Return(tok))
 185
 186 #undef YY_DECL
 187 #define YY_DECL int yylex1()
 188
 189 /* We should not peek at yy_act, but flex calls us even for the internal action
 190    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 191    to support older versions of flex, we'll continue to peek for now.
 192  */
 193 #define YY_USER_ACTION \
 194     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 195
 196 #if 0/*debug*/
 197 #undef YY_BREAK
 198 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 199 #endif
 200
 201 /* Each time we enter a new start state, we push it onto the state stack.
 202 */
 203 #define PUSH_STATE(n)   yy_push_state(n)
 204 #define POP_STATE       yy_pop_state()
 205
 206 %}
 207 /* Options:
 208     8bit (8-bit input)
 209     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
 210     never-interactive (to go a bit faster)
 211     stack (use a start-condition stack)
 212 */
 213 %option 8bit
 214 %option noyywrap
 215 %option never-interactive
 216 %option stack
 217
 218 /* The start states are:
 219    Code -- normal Haskell code (principal lexer)
 220    GlaExt -- Haskell code with Glasgow extensions
 221    Comment -- Nested comment processing
 222    String -- Inside a string literal with backslashes
 223    StringEsc -- Immediately following a backslash in a string literal
 224    Char -- Inside a character literal with backslashes
 225    CharEsc -- Immediately following a backslash in a character literal
 226
 227    Note that the INITIAL state is unused.  Also note that these states
 228    are _exclusive_.  All rules should be prefixed with an appropriate
 229    list of start states.
 230  */
 231
 232 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
 233
 234 isoS                    [\xa1-\xbf\xd7\xf7]
 235 isoL                    [\xc0-\xd6\xd8-\xde]
 236 isol                    [\xdf-\xf6\xf8-\xff]
 237 isoA                    [\xa1-\xff]
 238
 239 D                       [0-9]
 240 O                       [0-7]
 241 H                       [0-9A-Fa-f]
 242 N                       {D}+
 243 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 244 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 245 SId                     {S}{S}*
 246 L                       [A-Z\xc0-\xd6\xd8-\xde]
 247 l                       [a-z_\xdf-\xf6\xf8-\xff]
 248 I                       {L}|{l}
 249 i                       {L}|{l}|[0-9'_]
 250 Id                      {I}{i}*
 251 Mod                     {L}{i}*
 252 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
 253 CNTRL                   [@A-Z\[\\\]^_]
 254 WS                      [ \t\n\r\f\v]
 255 NL                      [\n\r]
 256
 257 %%
 258
 259 %{
 260     /*
 261      * Simple comments and whitespace.  Normally, we would just ignore these, but
 262      * in case we're processing a string escape, we need to note that we've seen
 263      * a gap.
 264      *
 265      * Note that we cater for a comment line that *doesn't* end in a newline.
 266      * This is incorrect, strictly speaking, but seems like the right thing
 267      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 268      *
 269      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
 270      */
 271 %}
 272
 273 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
 274
 275 %{
 276     /*
 277      * Special GHC pragma rules.  Do we need a start state for interface files,
 278      * so these won't be matched in source files? --JSM
 279      */
 280
 281 %}
 282
 283 %{
 284 /* I believe the next rule is not ever matched.
 285
 286    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
 287    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
 288    where possible, so the lexer should now never see cpp directives
 289    like '# ' and '#line'.
 290
 291    -- SOF
 292
 293 <Code,GlaExt>^"# ".*{NL}    {
 294                           char tempf[FILENAME_SIZE];
 295                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 296                           new_filename(tempf);
 297                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 298                         }
 299
 300 <Code,GlaExt>^"#line ".*{NL}    {
 301                           char tempf[FILENAME_SIZE];
 302                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 303                           new_filename(tempf);
 304                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 305                         }
 306 */
 307 %}
 308
 309 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 310                           /* partain: pragma-style line directive */
 311                           char tempf[FILENAME_SIZE];
 312                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 313                           new_filename(tempf);
 314                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 315                         }
 316
 317 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
 318                               PUSH_STATE(UserPragma);
 319                               RETURN(INTERFACE_UPRAGMA);
 320                             }
 321 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 322                               PUSH_STATE(UserPragma);
 323                               RETURN(SPECIALISE_UPRAGMA);
 324                             }
 325 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 326                               PUSH_STATE(UserPragma);
 327                               RETURN(INLINE_UPRAGMA);
 328                             }
 329 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
 330                               PUSH_STATE(UserPragma);
 331                               RETURN(NOINLINE_UPRAGMA);
 332                             }
 333 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 334                               PUSH_STATE(UserPragma);
 335                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 336                             }
 337 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
 338                               /* these are handled by hscpp */
 339                               nested_comments =1;
 340                               PUSH_STATE(Comment);
 341                             }
 342 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
 343                               /* these are for the driver! */
 344                               nested_comments =1;
 345                               PUSH_STATE(Comment);
 346                             }
 347 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
 348                               /* these are used by `make depend' and the
 349                                  compiler to indicate that a module should
 350                                  be imported from source */
 351                               nested_comments =1;
 352                               RETURN(SOURCE_UPRAGMA);
 353                             }
 354
 355 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
 356                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
 357                                 input_filename, hsplineno);
 358                               format_string(stderr, (unsigned char *) yytext, yyleng);
 359                               fputs("'\n", stderr);
 360                               nested_comments = 1;
 361                               PUSH_STATE(Comment);
 362                             }
 363 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 364
 365 %{
 366     /*
 367      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 368      * intentionally accepted as a keyword even for normal <Code>.
 369      */
 370 %}
 371
 372 <Code,GlaExt>"case"             { RETURN(CASE); }
 373 <Code,GlaExt>"class"            { RETURN(CLASS); }
 374 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 375 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 376 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 377 <Code,GlaExt>"do"               { RETURN(DO); }
 378 <Code,GlaExt>"else"             { RETURN(ELSE); }
 379 <Code,GlaExt>"if"               { RETURN(IF); }
 380 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 381 <Code,GlaExt>"in"               { RETURN(IN); }
 382 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 383 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 384 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 385 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 386 <Code,GlaExt>"let"              { RETURN(LET); }
 387 <Code,GlaExt>"module"           { RETURN(MODULE); }
 388 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
 389 <Code,GlaExt>"of"               { RETURN(OF); }
 390 <Code,GlaExt>"then"             { RETURN(THEN); }
 391 <Code,GlaExt>"type"             { RETURN(TYPE); }
 392 <Code,GlaExt>"where"            { RETURN(WHERE); }
 393
 394 <Code,GlaExt>"as"               { RETURN(AS); }
 395 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 396 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
 397
 398 <Code,GlaExt>"forall"           { RETURN(FORALL); }
 399
 400 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
 401 <GlaExt>"_ccall_"               { RETURN(CCALL); }
 402 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
 403 <GlaExt>"_casm_"                { RETURN(CASM); }
 404 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
 405 <GlaExt>"(#"                    { RETURN(OUNBOXPAREN); }
 406 <GlaExt>"#)"                    { RETURN(CUNBOXPAREN); }
 407 <GlaExt>"foreign"               { RETURN(FOREIGN); }
 408 <GlaExt>"export"                { RETURN(EXPORT); }
 409 <GlaExt>"label"                 { RETURN(LABEL); }
 410 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
 411 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
 412 <GlaExt>"_ccall"                { RETURN(C_CALL); }
 413 <GlaExt>"_pascal"               { RETURN(PASCAL); }
 414 <GlaExt>"stdcall"               { RETURN(STDCALL); }
 415 <GlaExt>"ccall"                 { RETURN(C_CALL); }
 416 <GlaExt>"pascal"                { RETURN(PASCAL); }
 417 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
 418
 419 %{
 420     /*
 421      * Haskell operators: special, reservedops and useful varsyms
 422      */
 423 %}
 424
 425 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
 426 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
 427 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
 428 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
 429 <Code,GlaExt>"{"                { RETURN(OCURLY); }
 430 <Code,GlaExt>"}"                { RETURN(CCURLY); }
 431 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
 432 <Code,GlaExt>";"                { RETURN(SEMI); }
 433 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
 434
 435 <Code,GlaExt>"."                { RETURN(DOT); }
 436 <Code,GlaExt>".."               { RETURN(DOTDOT); }
 437 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
 438 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
 439 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
 440 <Code,GlaExt>"|"                { RETURN(VBAR); }
 441 <Code,GlaExt>"<-"               { RETURN(LARROW); }
 442 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
 443 <Code,GlaExt>"-"                { RETURN(MINUS); }
 444 <Code,GlaExt>"+"                { RETURN(PLUS); }
 445
 446 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
 447 <Code,GlaExt>"@"                { RETURN(AT); }
 448 <Code,GlaExt>"!"                { RETURN(BANG); }
 449 <Code,GlaExt>"~"                { RETURN(LAZY); }
 450
 451 %{
 452     /*
 453      * Integers and (for Glasgow extensions) primitive integers.  Note that
 454      * we pass all of the text on to the parser, because flex/C can't handle
 455      * arbitrary precision numbers.
 456      */
 457 %}
 458
 459 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
 460                          yylval.uid = xstrndup(yytext, yyleng - 1);
 461                          RETURN(INTPRIM);
 462                         }
 463 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
 464                          yylval.uid = xstrndup(yytext, yyleng);
 465                          RETURN(INTEGER);
 466                         }
 467 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
 468                          yylval.uid = xstrndup(yytext, yyleng - 1);
 469                          RETURN(INTPRIM);
 470                         }
 471 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
 472                          yylval.uid = xstrndup(yytext, yyleng);
 473                          RETURN(INTEGER);
 474                         }
 475 <GlaExt>("-")?{N}"#"    {
 476                          yylval.uid = xstrndup(yytext, yyleng - 1);
 477                          RETURN(INTPRIM);
 478                         }
 479 <Code,GlaExt,UserPragma>{N} {
 480                          yylval.uid = xstrndup(yytext, yyleng);
 481                          RETURN(INTEGER);
 482                         }
 483
 484 %{
 485     /*
 486      * Floats and (for Glasgow extensions) primitive floats/doubles.
 487      */
 488 %}
 489
 490 <GlaExt>("-")?{F}"##"   {
 491                          yylval.uid = xstrndup(yytext, yyleng - 2);
 492                          RETURN(DOUBLEPRIM);
 493                         }
 494 <GlaExt>("-")?{F}"#"    {
 495                          yylval.uid = xstrndup(yytext, yyleng - 1);
 496                          RETURN(FLOATPRIM);
 497                         }
 498 <Code,GlaExt>{F}        {
 499                          yylval.uid = xstrndup(yytext, yyleng);
 500                          RETURN(FLOAT);
 501                         }
 502
 503 %{
 504     /*
 505      * Funky ``foo'' style C literals for Glasgow extensions
 506      */
 507 %}
 508
 509 <GlaExt>"``"[^']+"''"   {
 510                          hsnewid(yytext + 2, yyleng - 4);
 511                          RETURN(CLITLIT);
 512                         }
 513
 514 %{
 515     /*
 516      * Identifiers, both variables and operators.  The trailing hash is allowed
 517      * for Glasgow extensions.
 518      */
 519 %}
 520
 521
 522 %{
 523 /* These SHOULDNAE work in "Code" (sigh) */
 524 %}
 525 <GlaExt,UserPragma>{Id}"#" {
 526                         if (! nonstandardFlag) {
 527                             char errbuf[ERR_BUF_SIZE];
 528                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 529                             hsperror(errbuf);
 530                          }
 531                          hsnewid(yytext, yyleng);
 532                          RETURN(isconstr(yytext) ? CONID : VARID);
 533                         }
 534 <Code,GlaExt,UserPragma>{Id}    {
 535                          hsnewid(yytext, yyleng);
 536                          RETURN(isconstr(yytext) ? CONID : VARID);
 537                         }
 538 <Code,GlaExt,UserPragma>{SId}   {
 539                          if (is_commment(yytext,yyleng)) {
 540                                 int c;
 541                                 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
 542                                         ;
 543                                 if (c != EOF)
 544                                    unput(c);
 545                          } else {
 546                             hsnewid(yytext, yyleng);
 547                             RETURN(isconstr(yytext) ? CONSYM : VARSYM);
 548                          }
 549                         }
 550 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
 551                          BOOLEAN is_constr;
 552                          if (! nonstandardFlag) {
 553                             char errbuf[ERR_BUF_SIZE];
 554                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 555                             hsperror(errbuf);
 556                          }
 557                          is_constr = hsnewqid(yytext, yyleng);
 558                          RETURN(is_constr ? QCONID : QVARID);
 559                         }
 560 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
 561                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
 562                          RETURN(is_constr ? QCONID : QVARID);
 563                         }
 564 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
 565                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
 566                          RETURN(is_constr ? QCONSYM : QVARSYM);
 567                         }
 568
 569 %{
 570     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 571
 572     /* Because we can make the former well-behaved (we defined them).
 573
 574        Sadly, the latter is defined by Haskell, which allows such
 575        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 576     */
 577 %}
 578
 579 <GlaExt,UserPragma>"`"{Id}"#`"  {
 580                          hsnewid(yytext + 1, yyleng - 2);
 581                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
 582                         }
 583
 584 %{
 585     /*
 586      * Character literals.  The first form is the quick form, for character
 587      * literals that don't contain backslashes.  Literals with backslashes are
 588      * lexed through multiple rules.  First, we match the open ' and as many
 589      * normal characters as possible.  This puts us into the <Char> state, where
 590      * a backslash is legal.  Then, we match the backslash and move into the
 591      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 592      * characters and the close '.  We may end up with too many characters, but
 593      * this allows us to easily share the lex rules with strings.  Excess characters
 594      * are ignored with a warning.
 595      */
 596 %}
 597
 598 <GlaExt>'({CHAR}|"\"")"'#" {
 599                          yylval.uhstring = installHstring(1, yytext+1);
 600                          RETURN(CHARPRIM);
 601                         }
 602 <Code,GlaExt>'({CHAR}|"\"")'    {
 603                          yylval.uhstring = installHstring(1, yytext+1);
 604                          RETURN(CHAR);
 605                         }
 606 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 607                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 608                          hsperror(errbuf);
 609                         }
 610 <Code,GlaExt>'({CHAR}|"\"")* {
 611                          hsmlcolno = hspcolno;
 612                          cleartext();
 613                          addtext(yytext+1, yyleng-1);
 614                          PUSH_STATE(Char);
 615                         }
 616 <Char>({CHAR}|"\"")*'#  {
 617                          unsigned length;
 618                          char *text;
 619
 620                          addtext(yytext, yyleng - 2);
 621                          text = fetchtext(&length);
 622
 623                          if (! nonstandardFlag) {
 624                             char errbuf[ERR_BUF_SIZE];
 625                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 626                             hsperror(errbuf);
 627                          }
 628
 629                          if (length > 1) {
 630                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
 631                               input_filename, hsplineno, hspcolno + 1);
 632                             format_string(stderr, (unsigned char *) text, length);
 633                             fputs("' too long\n", stderr);
 634                             hsperror("");
 635                          }
 636                          yylval.uhstring = installHstring(1, text);
 637                          hspcolno = hsmlcolno;
 638                          POP_STATE;
 639                          RETURN(CHARPRIM);
 640                         }
 641 <Char>({CHAR}|"\"")*'   {
 642                          unsigned length;
 643                          char *text;
 644
 645                          addtext(yytext, yyleng - 1);
 646                          text = fetchtext(&length);
 647
 648                          if (length > 1) {
 649                             fprintf(stderr, "%s:%d:%d: Character literal '",
 650                               input_filename, hsplineno, hspcolno + 1);
 651                             format_string(stderr, (unsigned char *) text, length);
 652                             fputs("' too long\n", stderr);
 653                             hsperror("");
 654                          }
 655                          yylval.uhstring = installHstring(1, text);
 656                          hspcolno = hsmlcolno;
 657                          POP_STATE;
 658                          RETURN(CHAR);
 659                         }
 660 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 661
 662
 663 %{
 664     /*
 665      * String literals.  The first form is the quick form, for string literals
 666      * that don't contain backslashes.  Literals with backslashes are lexed
 667      * through multiple rules.  First, we match the open " and as many normal
 668      * characters as possible.  This puts us into the <String> state, where
 669      * a backslash is legal.  Then, we match the backslash and move into the
 670      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 671      * characters, moving back and forth between <String> and <StringEsc> as more
 672      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 673      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 674      * of normal characters and the close ".
 675      */
 676 %}
 677
 678 <GlaExt>"\""({CHAR}|"'")*"\""#  {
 679                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 680                             /* the -3 accounts for the " on front, "# on the end */
 681                          RETURN(STRINGPRIM);
 682                         }
 683 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
 684                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 685                          RETURN(STRING);
 686                         }
 687 <Code,GlaExt>"\""({CHAR}|"'")* {
 688                          hsmlcolno = hspcolno;
 689                          cleartext();
 690                          addtext(yytext+1, yyleng-1);
 691                          PUSH_STATE(String);
 692                         }
 693 <String>({CHAR}|"'")*"\"#"   {
 694                          unsigned length;
 695                          char *text;
 696
 697                          addtext(yytext, yyleng-2);
 698                          text = fetchtext(&length);
 699
 700                          if (! nonstandardFlag) {
 701                             char errbuf[ERR_BUF_SIZE];
 702                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 703                             hsperror(errbuf);
 704                          }
 705
 706                          yylval.uhstring = installHstring(length, text);
 707                          hspcolno = hsmlcolno;
 708                          POP_STATE;
 709                          RETURN(STRINGPRIM);
 710                         }
 711 <String>({CHAR}|"'")*"\""   {
 712                          unsigned length;
 713                          char *text;
 714
 715                          addtext(yytext, yyleng-1);
 716                          text = fetchtext(&length);
 717
 718                          yylval.uhstring = installHstring(length, text);
 719                          hspcolno = hsmlcolno;
 720                          POP_STATE;
 721                          RETURN(STRING);
 722                         }
 723 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 724
 725 %{
 726     /*
 727      * Character and string escapes are roughly the same, but strings have the
 728      * extra `\&' sequence which is not allowed for characters.  Also, comments
 729      * are allowed in the <StringEsc> state.  (See the comment section much
 730      * further down.)
 731      *
 732      * NB: Backslashes and tabs are stored in strings as themselves.
 733      * But if we print them (in printtree.c), they must go out as
 734      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 735      * intermediate format that the parser produces.  It uses '\t' fpr end of
 736      * string, so it needs to be able to escape tabs, which means that it
 737      * also needs to be able to escape the escape character ('\\').  Sigh.
 738      */
 739 %}
 740
 741 <Char>\\                { PUSH_STATE(CharEsc); }
 742 <String>\\&             /* Ignore */ ;
 743 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 744
 745 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 746 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 747
 748 %{
 749 /*
 750  Not 100% correct, tokenizes "foo \  --<>--
 751                                  \ bar"
 752
 753  as "foo  bar", but this is not correct as per Haskell 98 report and its
 754  maximal munch rule for "--"-style comments.
 755
 756  For the moment, not deemed worthy to fix.
 757 */
 758 %}
 759 <StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
 760
 761 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 762 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 763 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 764 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 765 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 766 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 767 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 768 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 769 <CharEsc,StringEsc>BEL  |
 770 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 771 <CharEsc,StringEsc>BS   |
 772 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 773 <CharEsc,StringEsc>HT   |
 774 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 775 <CharEsc,StringEsc>LF   |
 776 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 777 <CharEsc,StringEsc>VT   |
 778 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 779 <CharEsc,StringEsc>FF   |
 780 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 781 <CharEsc,StringEsc>CR   |
 782 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 783 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 784 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 785 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 786 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 787 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 788 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 789 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 790 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 791 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 792 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 793 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 794 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 795 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 796 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 797 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 798 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 799 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 800 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 801 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 802 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 803 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 804 <CharEsc,StringEsc>{D}+  {
 805                           int i = strtol(yytext, NULL, 10);
 806                           if (i < NCHARS) {
 807                              addchar((char) i);
 808                           } else {
 809                              char errbuf[ERR_BUF_SIZE];
 810                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 811                                 yytext);
 812                              hsperror(errbuf);
 813                           }
 814                           POP_STATE;
 815                         }
 816 <CharEsc,StringEsc>o{O}+ {
 817                           int i = strtol(yytext + 1, NULL, 8);
 818                           if (i < NCHARS) {
 819                              addchar((char) i);
 820                           } else {
 821                              char errbuf[ERR_BUF_SIZE];
 822                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 823                                 yytext);
 824                              hsperror(errbuf);
 825                           }
 826                           POP_STATE;
 827                         }
 828 <CharEsc,StringEsc>x{H}+ {
 829                           int i = strtol(yytext + 1, NULL, 16);
 830                           if (i < NCHARS) {
 831                              addchar((char) i);
 832                           } else {
 833                              char errbuf[ERR_BUF_SIZE];
 834                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 835                                 yytext);
 836                              hsperror(errbuf);
 837                           }
 838                           POP_STATE;
 839                         }
 840
 841
 842 %{
 843     /*
 844      * Nested comments.  The major complication here is in trying to match the
 845      * longest lexemes possible, for better performance.  (See the flex document.)
 846      * That's why the rules look so bizarre.
 847      */
 848 %}
 849
 850 <Code,GlaExt,UserPragma,StringEsc>"{-"  {
 851                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
 852                         }
 853
 854 <Comment>[^-{]*         |
 855 <Comment>"-"+[^-{}]+    |
 856 <Comment>"{"+[^-{}]+    ;
 857 <Comment>"{-"           { nested_comments++; }
 858 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 859 <Comment>(.|\n)         ;
 860
 861
 862 %{
 863     /*
 864      * Illegal characters.  This used to be a single rule, but we might as well
 865      * pass on as much information as we have, so now we indicate our state in
 866      * the error message.
 867      */
 868 %}
 869
 870 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  {
 871                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
 872                             input_filename, hsplineno, hspcolno + 1);
 873                          format_string(stderr, (unsigned char *) yytext, 1);
 874                          fputs("'\n", stderr);
 875                          hsperror("");
 876                         }
 877 <Char>(.|\n)            {
 878                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
 879                             input_filename, hsplineno, hspcolno + 1);
 880                          format_string(stderr, (unsigned char *) yytext, 1);
 881                          fputs("' in a character literal\n", stderr);
 882                          hsperror("");
 883                         }
 884 <CharEsc>(.|\n)         {
 885                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
 886                             input_filename, hsplineno, hspcolno + 1);
 887                          format_string(stderr, (unsigned char *) yytext, 1);
 888                          fputs("'\n", stderr);
 889                          hsperror("");
 890                         }
 891 <String>(.|\n)          { if (nonstandardFlag) {
 892                              addtext(yytext, yyleng);
 893                           } else {
 894                                 fprintf(stderr, "%s:%d:%d: Illegal character: `",
 895                                 input_filename, hsplineno, hspcolno + 1);
 896                                 format_string(stderr, (unsigned char *) yytext, 1);
 897                                 fputs("' in a string literal\n", stderr);
 898                                 hsperror("");
 899                           }
 900                         }
 901 <StringEsc>(.|\n)       {
 902                          if (noGap) {
 903                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
 904                                 input_filename, hsplineno, hspcolno + 1);
 905                              format_string(stderr, (unsigned char *) yytext, 1);
 906                              fputs("'\n", stderr);
 907                              hsperror("");
 908                          } else {
 909                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
 910                                 input_filename, hsplineno, hspcolno + 1);
 911                              format_string(stderr, (unsigned char *) yytext, 1);
 912                              fputs("' in a string gap\n", stderr);
 913                              hsperror("");
 914                          }
 915                         }
 916
 917 %{
 918     /*
 919      * End of file.  In any sub-state, this is an error.  However, for the primary
 920      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 921      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 922      * adding virtual close curlies, or closing an interface and returning to the
 923      * primary source file.
 924      *
 925      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 926      * line/column advancement has to be done by hand.
 927      */
 928 %}
 929
 930 <Char,CharEsc><<EOF>>   {
 931                           hsplineno = hslineno; hspcolno = hscolno;
 932                           hsperror("unterminated character literal");
 933                         }
 934 <Comment><<EOF>>        {
 935                           hsplineno = hslineno; hspcolno = hscolno;
 936                           hsperror("unterminated comment");
 937                         }
 938 <String,StringEsc><<EOF>>   {
 939                           hsplineno = hslineno; hspcolno = hscolno;
 940                           hsperror("unterminated string literal");
 941                         }
 942 <UserPragma><<EOF>>     {
 943                           hsplineno = hslineno; hspcolno = hscolno;
 944                           hsperror("unterminated user-specified pragma");
 945                         }
 946 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 947
 948 %%
 949
 950 /**********************************************************************
 951 *                                                                     *
 952 *                                                                     *
 953 *     YACC/LEX Initialisation etc.                                    *
 954 *                                                                     *
 955 *                                                                     *
 956 **********************************************************************/
 957
 958 /*
 959    We initialise input_filename to "<stdin>".
 960    This allows unnamed sources to be piped into the parser.
 961 */
 962
 963 void
 964 yyinit(void)
 965 {
 966     input_filename = xstrdup("<stdin>");
 967
 968     /* We must initialize the input buffer _now_, because we call
 969        setyyin _before_ calling yylex for the first time! */
 970     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 971
 972     if (nonstandardFlag)
 973         PUSH_STATE(GlaExt);
 974     else
 975         PUSH_STATE(Code);
 976 }
 977
 978 static void
 979 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 980 {
 981     if (input_filename != NULL)
 982         free(input_filename);
 983     input_filename = xstrdup(f);
 984 }
 985
 986 /**********************************************************************
 987 *                                                                     *
 988 *                                                                     *
 989 *     Layout Processing                                               *
 990 *                                                                     *
 991 *                                                                     *
 992 **********************************************************************/
 993
 994 /*
 995         The following section deals with Haskell Layout conventions
 996         forcing insertion of ; or } as appropriate
 997 */
 998
 999 #ifdef HSP_DEBUG
1000 #define LAYOUT_DEBUG
1001 #endif
1002
1003
1004 static BOOLEAN
1005 hsshouldindent(void)
1006 {
1007     return (!forgetindent && INDENTON);
1008 }
1009
1010
1011 /* Enter new context and set new indentation level */
1012 void
1013 hssetindent(void)
1014 {
1015 #ifdef LAYOUT_DEBUG
1016     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1017 #endif
1018
1019     /*
1020      * partain: first chk that new indent won't be less than current one; this code
1021      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1022      * current token; what that has to do with indenting, I don't know.
1023      */
1024
1025
1026     if (hscolno - 1 <= INDENTPT) {
1027         if (INDENTPT == -1)
1028             return;             /* Empty input OK for Haskell 1.1 */
1029         else {
1030             char errbuf[ERR_BUF_SIZE];
1031
1032             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1033             hsperror(errbuf);
1034         }
1035     }
1036     hsentercontext((hspcolno << 1) | 1);
1037 }
1038
1039
1040 /* Enter a new context without changing the indentation level */
1041 void
1042 hsincindent(void)
1043 {
1044 #ifdef LAYOUT_DEBUG
1045     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1046 #endif
1047     hsentercontext(indenttab[icontexts] & ~1);
1048 }
1049
1050
1051 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1052 void
1053 hsindentoff(void)
1054 {
1055     forgetindent = TRUE;
1056 }
1057
1058
1059 /* Enter a new layout context. */
1060 static void
1061 hsentercontext(int indent)
1062 {
1063     /* Enter new context and set indentation as specified */
1064     if (++icontexts >= MAX_CONTEXTS) {
1065         char errbuf[ERR_BUF_SIZE];
1066
1067         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1068         hsperror(errbuf);
1069     }
1070     forgetindent = FALSE;
1071     indenttab[icontexts] = indent;
1072 #ifdef LAYOUT_DEBUG
1073     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1074 #endif
1075 }
1076
1077
1078 /* Exit a layout context */
1079 void
1080 hsendindent(void)
1081 {
1082     --icontexts;
1083 #ifdef LAYOUT_DEBUG
1084     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1085 #endif
1086 }
1087
1088 /*
1089  *      Return checks the indentation level and returns ;, } or the specified token.
1090  */
1091 static int
1092 Return(int tok)
1093 {
1094 #ifdef HSP_DEBUG
1095     extern int yyleng;
1096 #endif
1097     if (hsshouldindent()) {
1098         if (hspcolno < INDENTPT) {
1099 #ifdef HSP_DEBUG
1100             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1101 #endif
1102             hssttok = tok;
1103             return (VCCURLY);
1104         } else if (hspcolno == INDENTPT) {
1105 #ifdef HSP_DEBUG
1106             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1107 #endif
1108             hssttok = -tok;
1109             return (SEMI);
1110         }
1111     }
1112
1113     hssttok = -1;
1114 #ifdef HSP_DEBUG
1115     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1116 #endif
1117     return (tok);
1118 }
1119
1120
1121 /*
1122  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1123  */
1124 int
1125 yylex()
1126 {
1127     int tok;
1128     static BOOLEAN eof = FALSE;
1129
1130     if (!eof) {
1131         if (hssttok != -1) {
1132             if (hssttok < 0) {
1133                 tok = -hssttok;
1134                 hssttok = -1;
1135                 return tok;
1136             }
1137             RETURN(hssttok);
1138         } else {
1139             endlineno = hslineno;
1140             if ((tok = yylex1()) != EOF)
1141                 return tok;
1142             else
1143                 eof = TRUE;
1144         }
1145     }
1146     if (icontexts > icontexts_save) {
1147         if (INDENTON) {
1148             eof = TRUE;
1149             indenttab[icontexts] = 0;
1150             return (VCCURLY);
1151         } else
1152             hsperror("missing '}' at end of file");
1153     } else if (hsbuf_save != NULL) {
1154         fclose(yyin);
1155         yy_delete_buffer(YY_CURRENT_BUFFER);
1156         yy_switch_to_buffer(hsbuf_save);
1157         hsbuf_save = NULL;
1158         new_filename(filename_save);
1159         free(filename_save);
1160         hslineno = hslineno_save;
1161         hsplineno = hsplineno_save;
1162         hscolno = hscolno_save;
1163         hspcolno = hspcolno_save;
1164         etags = etags_save;
1165         icontexts = icontexts_save - 1;
1166         icontexts_save = 0;
1167 #ifdef HSP_DEBUG
1168         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1169 #endif
1170         eof = FALSE;
1171
1172         /* RETURN(LEOF); */
1173         hsperror("No longer using yacc to parse interface files");
1174
1175     } else {
1176         yyterminate();
1177     }
1178     abort(); /* should never get here! */
1179     return(0);
1180 }
1181
1182 /**********************************************************************
1183 *                                                                     *
1184 *                                                                     *
1185 *     Input Processing for Interfaces -- Not currently used !!!       *
1186 *                                                                     *
1187 *                                                                     *
1188 **********************************************************************/
1189
1190 /* setyyin(file)        open file as new lex input buffer */
1191 extern FILE *yyin;
1192
1193 void
1194 setyyin(char *file)
1195 {
1196     hsbuf_save = YY_CURRENT_BUFFER;
1197     if ((yyin = fopen(file, "r")) == NULL) {
1198         char errbuf[ERR_BUF_SIZE];
1199
1200         sprintf(errbuf, "can't read \"%-.50s\"", file);
1201         hsperror(errbuf);
1202     }
1203     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1204
1205     hslineno_save = hslineno;
1206     hsplineno_save = hsplineno;
1207     hslineno = hsplineno = 1;
1208
1209     filename_save = input_filename;
1210     input_filename = NULL;
1211     new_filename(file);
1212     hscolno_save = hscolno;
1213     hspcolno_save = hspcolno;
1214     hscolno = hspcolno = 0;
1215     etags_save = etags; /* do not do "etags" stuff in interfaces */
1216     etags = 0;          /* We remember whether we are doing it in
1217                            the module, so we can restore it later [WDP 94/09] */
1218     hsentercontext(-1);         /* partain: changed this from 0 */
1219     icontexts_save = icontexts;
1220 #ifdef HSP_DEBUG
1221     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1222 #endif
1223 }
1224
1225 static void
1226 layout_input(char *text, int len)
1227 {
1228 #ifdef HSP_DEBUG
1229     fprintf(stderr, "Scanning \"%s\"\n", text);
1230 #endif
1231
1232     hsplineno = hslineno;
1233     hspcolno = hscolno;
1234
1235     while (len-- > 0) {
1236         switch (*text++) {
1237         case '\n':
1238         case '\r':
1239         case '\f':
1240             hslineno++;
1241             hscolno = 0;
1242             break;
1243         case '\t':
1244             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1245             break;
1246         case '\v':
1247             break;
1248         default:
1249             ++hscolno;
1250             break;
1251         }
1252     }
1253 }
1254
1255 void
1256 setstartlineno(void)
1257 {
1258     startlineno = hsplineno;
1259
1260     if (modulelineno == 0) {
1261         modulelineno = startlineno;
1262     }
1263
1264 #if 1/*etags*/
1265 #else
1266     if (etags)
1267         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1268 #endif
1269 }
1270
1271 /**********************************************************************
1272 *                                                                     *
1273 *                                                                     *
1274 *                      Text Caching                                   *
1275 *                                                                     *
1276 *                                                                     *
1277 **********************************************************************/
1278
1279 #define CACHE_SIZE YY_BUF_SIZE
1280
1281 static struct {
1282     unsigned allocated;
1283     unsigned next;
1284     char *text;
1285 } textcache = { 0, 0, NULL };
1286
1287 static void
1288 cleartext(void)
1289 {
1290 /*  fprintf(stderr, "cleartext\n"); */
1291     textcache.next = 0;
1292     if (textcache.allocated == 0) {
1293         textcache.allocated = CACHE_SIZE;
1294         textcache.text = xmalloc(CACHE_SIZE);
1295     }
1296 }
1297
1298 static void
1299 addtext(char *text, unsigned length)
1300 {
1301 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1302
1303     if (length == 0)
1304         return;
1305
1306     if (textcache.next + length + 1 >= textcache.allocated) {
1307         textcache.allocated += length + CACHE_SIZE;
1308         textcache.text = xrealloc(textcache.text, textcache.allocated);
1309     }
1310     bcopy(text, textcache.text + textcache.next, length);
1311     textcache.next += length;
1312 }
1313
1314 static void
1315 addchar(char c)
1316 {
1317 /*  fprintf(stderr, "addchar: %c\n", c); */
1318
1319     if (textcache.next + 2 >= textcache.allocated) {
1320         textcache.allocated += CACHE_SIZE;
1321         textcache.text = xrealloc(textcache.text, textcache.allocated);
1322     }
1323     textcache.text[textcache.next++] = c;
1324 }
1325
1326 static char *
1327 fetchtext(unsigned *length)
1328 {
1329 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1330
1331     *length = textcache.next;
1332     textcache.text[textcache.next] = '\0';
1333     return textcache.text;
1334 }
1335
1336 /**********************************************************************
1337 *                                                                     *
1338 *                                                                     *
1339 *    Identifier Processing                                             *
1340 *                                                                     *
1341 *                                                                     *
1342 **********************************************************************/
1343
1344 /*
1345         hsnewid         Enters an id of length n into the symbol table.
1346 */
1347
1348 static void
1349 hsnewid(char *name, int length)
1350 {
1351     char save = name[length];
1352
1353     name[length] = '\0';
1354     yylval.uid = installid(name);
1355     name[length] = save;
1356 }
1357
1358 BOOLEAN
1359 hsnewqid(char *name, int length)
1360 {
1361     char* dot;
1362     char save = name[length];
1363     name[length] = '\0';
1364
1365     dot = strchr(name, '.');
1366     *dot = '\0';
1367     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1368     *dot = '.';
1369     name[length] = save;
1370
1371     return isconstr(dot+1);
1372 }
1373
1374 static
1375 BOOLEAN
1376 is_commment(char* lexeme, int len)
1377 {
1378    char* ptr;
1379    int i;
1380
1381    if (len < 2) {
1382       return FALSE;
1383    }
1384
1385    for(i=0;i<len;i++) {
1386      if (lexeme[i] != '-') return FALSE;
1387    }
1388    return TRUE;
1389 }
1390