ghc/compiler/parser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       FLEX for Haskell.                                             *
   6 *       -----------------                                             *
   7 *                                                                     *
   8 **********************************************************************/
   9
  10 #include "../../includes/config.h"
  11
  12 #include <stdio.h>
  13
  14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  15 #include <string.h>
  16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  18 #include <memory.h>
  19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  20 #define index strchr
  21 #define rindex strrchr
  22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  24 #define bzero(s, n) memset ((s), 0, (n))
  25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  26 #include <strings.h>
  27 /* memory.h and strings.h conflict on some systems.  */
  28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  29
  30 #include "hspincl.h"
  31 #include "hsparser.tab.h"
  32 #include "constants.h"
  33 #include "utils.h"
  34
  35 /* Our substitute for <ctype.h> */
  36
  37 #define NCHARS  256
  38 #define _S      0x1
  39 #define _D      0x2
  40 #define _H      0x4
  41 #define _O      0x8
  42 #define _C      0x10
  43
  44 #define _isconstr(s)    (CharTable[*s]&(_C))
  45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
  46
  47 static unsigned char CharTable[NCHARS] = {
  48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
  54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
  60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  64
  65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  81 };
  82
  83 /**********************************************************************
  84 *                                                                     *
  85 *                                                                     *
  86 *      Declarations                                                   *
  87 *                                                                     *
  88 *                                                                     *
  89 **********************************************************************/
  90
  91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
  92
  93 /*
  94  * For my own sanity, things that are not part of the flex skeleton
  95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
  96  */
  97
  98 static int hslineno = 0;        /* Line number at end of token */
  99 int hsplineno = 0;              /* Line number at end of previous token */
 100
 101 static int hscolno = 0;         /* Column number at end of token */
 102 int hspcolno = 0;               /* Column number at end of previous token */
 103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 104
 105 int modulelineno = -1;          /* The line number where the module starts */
 106 int startlineno = 0;            /* The line number where something starts */
 107 int endlineno = 0;              /* The line number where something ends */
 108
 109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 111
 112 static int nested_comments;     /* For counting comment nesting depth */
 113
 114 /* Hacky definition of yywrap: see flex doc.
 115
 116    If we don't do this, then we'll have to get the default
 117    yywrap from the flex library, which is often something
 118    we are not good at locating.  This avoids that difficulty.
 119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 120    WDP 94/09/05
 121 */
 122 #define yywrap() 1
 123
 124 /* Essential forward declarations */
 125
 126 static void hsnewid      PROTO((char *, int));
 127 static void layout_input PROTO((char *, int));
 128 static void cleartext    (NO_ARGS);
 129 static void addtext      PROTO((char *, unsigned));
 130 static void addchar      PROTO((char));
 131 static char *fetchtext   PROTO((unsigned *));
 132 static void new_filename PROTO((char *));
 133 static int  Return       PROTO((int));
 134 static void hsentercontext PROTO((int));
 135
 136 /* Special file handling for IMPORTS */
 137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 138
 139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 140 static char *filename_save;             /* File Name                     */
 141 static int hslineno_save = 0,           /* Line Number                   */
 142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 143  hscolno_save = 0,                      /* Indentation                   */
 144  hspcolno_save = 0;                     /* Left Indentation              */
 145 static short icontexts_save = 0;        /* Indent Context Level          */
 146
 147 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
 148 extern BOOLEAN etags;                   /* that which is saved */
 149
 150 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
 151
 152 static BOOLEAN in_interface = FALSE;    /* TRUE if we are reading a .hi file */
 153
 154 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
 155 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
 156 extern int maxAcceptablePragmaVersion;
 157 extern int thisIfacePragmaVersion;
 158
 159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 160                                  * inserted before token +ve  -- "}" inserted before
 161                                  * token */
 162
 163 short icontexts = 0;            /* Which context we're in */
 164
 165
 166
 167 /*
 168         Table of indentations:  right bit indicates whether to use
 169           indentation rules (1 = use rules; 0 = ignore)
 170
 171     partain:
 172     push one of these "contexts" at every "case" or "where"; the right bit says
 173     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 174
 175     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 176     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 177     push is shown just below.
 178
 179 */
 180
 181
 182 static short indenttab[MAX_CONTEXTS] = {-1};
 183
 184 #define INDENTPT (indenttab[icontexts]>>1)
 185 #define INDENTON (indenttab[icontexts]&1)
 186
 187 #define RETURN(tok) return(Return(tok))
 188
 189 #undef YY_DECL
 190 #define YY_DECL int yylex1()
 191
 192 /* We should not peek at yy_act, but flex calls us even for the internal action
 193    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 194    to support older versions of flex, we'll continue to peek for now.
 195  */
 196 #define YY_USER_ACTION \
 197     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 198
 199 #if 0/*debug*/
 200 #undef YY_BREAK
 201 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 202 #endif
 203
 204 /* Each time we enter a new start state, we push it onto the state stack.
 205    Note that the rules do not allow us to underflow or overflow the stack.
 206    (At least, they shouldn't.)  The maximum expected depth is 4:
 207    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 208 */
 209 static int StateStack[5];
 210 static int StateDepth = -1;
 211
 212 #ifdef HSP_DEBUG
 213 #define PUSH_STATE(n)   do {\
 214     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
 215     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 216 #define POP_STATE       do {--StateDepth;\
 217     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
 218     BEGIN(StateStack[StateDepth]);} while(0)
 219 #else
 220 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 221 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
 222 #endif
 223
 224 %}
 225
 226 /* The start states are:
 227    Code -- normal Haskell code (principal lexer)
 228    GlaExt -- Haskell code with Glasgow extensions
 229    Comment -- Nested comment processing
 230    String -- Inside a string literal with backslashes
 231    StringEsc -- Immediately following a backslash in a string literal
 232    Char -- Inside a character literal with backslashes
 233    CharEsc -- Immediately following a backslash in a character literal
 234
 235    Note that the INITIAL state is unused.  Also note that these states
 236    are _exclusive_.  All rules should be prefixed with an appropriate
 237    list of start states.
 238  */
 239
 240 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
 241
 242 isoS                    [\xa1-\xbf\xd7\xf7]
 243 isoL                    [\xc0-\xd6\xd8-\xde]
 244 isol                    [\xdf-\xf6\xf8-\xff]
 245 isoA                    [\xa1-\xff]
 246
 247 D                       [0-9]
 248 O                       [0-7]
 249 H                       [0-9A-Fa-f]
 250 N                       {D}+
 251 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 252 S                       [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
 253 SId                     {S}{S}*
 254 L                       [A-Z\xc0-\xd6\xd8-\xde]
 255 l                       [a-z\xdf-\xf6\xf8-\xff]
 256 I                       {L}|{l}
 257 i                       {L}|{l}|[0-9'_]
 258 Id                      {I}{i}*
 259 Mod                     {L}{i}*
 260 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
 261 CNTRL                   [@A-Z\[\\\]^_]
 262 WS                      [ \t\n\r\f\v]
 263 NL                      [\n\r]
 264
 265 %%
 266
 267 %{
 268     /*
 269      * Special GHC pragma rules.  Do we need a start state for interface files,
 270      * so these won't be matched in source files? --JSM
 271      */
 272 %}
 273
 274 <Code,GlaExt>^"# ".*{NL}    {
 275                           char tempf[FILENAME_SIZE];
 276                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 277                           new_filename(tempf);
 278                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 279                         }
 280
 281 <Code,GlaExt>^"#line ".*{NL}    {
 282                           char tempf[FILENAME_SIZE];
 283                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 284                           new_filename(tempf);
 285                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 286                         }
 287
 288 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 289                           /* partain: pragma-style line directive */
 290                           char tempf[FILENAME_SIZE];
 291                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 292                           new_filename(tempf);
 293                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 294                         }
 295
 296 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
 297                               PUSH_STATE(UserPragma);
 298                               RETURN(INTERFACE_UPRAGMA);
 299                             }
 300 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 301                               PUSH_STATE(UserPragma);
 302                               RETURN(SPECIALISE_UPRAGMA);
 303                             }
 304 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 305                               PUSH_STATE(UserPragma);
 306                               RETURN(INLINE_UPRAGMA);
 307                             }
 308 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 309                               PUSH_STATE(UserPragma);
 310                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 311                             }
 312 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
 313                               PUSH_STATE(UserPragma);
 314                               RETURN(DEFOREST_UPRAGMA);
 315                             }
 316 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
 317                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
 318                                 input_filename, hsplineno);
 319                               format_string(stderr, (unsigned char *) yytext, yyleng);
 320                               fputs("'\n", stderr);
 321                               nested_comments = 1;
 322                               PUSH_STATE(Comment);
 323                             }
 324 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 325
 326 %{
 327     /*
 328      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 329      * intentionally accepted as a keyword even for normal <Code>.
 330      */
 331 %}
 332
 333 <Code,GlaExt>"case"             { RETURN(CASE); }
 334 <Code,GlaExt>"class"            { RETURN(CLASS); }
 335 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 336 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 337 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 338 <Code,GlaExt>"do"               { RETURN(DO); }
 339 <Code,GlaExt>"else"             { RETURN(ELSE); }
 340 <Code,GlaExt>"if"               { RETURN(IF); }
 341 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 342 <Code,GlaExt>"in"               { RETURN(IN); }
 343 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 344 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 345 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 346 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 347 <Code,GlaExt>"let"              { RETURN(LET); }
 348 <Code,GlaExt>"module"           { RETURN(MODULE); }
 349 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
 350 <Code,GlaExt>"of"               { RETURN(OF); }
 351 <Code,GlaExt>"then"             { RETURN(THEN); }
 352 <Code,GlaExt>"type"             { RETURN(TYPE); }
 353 <Code,GlaExt>"where"            { RETURN(WHERE); }
 354
 355 <Code,GlaExt>"as"               { RETURN(AS); }
 356 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 357 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
 358
 359 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
 360 <GlaExt>"_ccall_"               { RETURN(CCALL); }
 361 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
 362 <GlaExt>"_casm_"                { RETURN(CASM); }
 363 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
 364
 365 %{
 366     /*
 367      * Haskell operators: special, reservedops and useful varsyms
 368      */
 369 %}
 370
 371 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
 372 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
 373 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
 374 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
 375 <Code,GlaExt>"{"                { RETURN(OCURLY); }
 376 <Code,GlaExt>"}"                { RETURN(CCURLY); }
 377 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
 378 <Code,GlaExt>";"                { RETURN(SEMI); }
 379 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
 380 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
 381
 382 <Code,GlaExt>".."               { RETURN(DOTDOT); }
 383 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
 384 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
 385 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
 386 <Code,GlaExt>"|"                { RETURN(VBAR); }
 387 <Code,GlaExt>"<-"               { RETURN(LARROW); }
 388 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
 389 <Code,GlaExt>"-"                { RETURN(MINUS); }
 390
 391 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
 392 <Code,GlaExt>"@"                { RETURN(AT); }
 393 <Code,GlaExt>"!"                { RETURN(BANG); }
 394 <Code,GlaExt>"~"                { RETURN(LAZY); }
 395
 396 %{
 397     /*
 398      * Integers and (for Glasgow extensions) primitive integers.  Note that
 399      * we pass all of the text on to the parser, because flex/C can't handle
 400      * arbitrary precision numbers.
 401      */
 402 %}
 403
 404 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
 405                          yylval.uid = xstrndup(yytext, yyleng - 1);
 406                          RETURN(INTPRIM);
 407                         }
 408 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
 409                          yylval.uid = xstrndup(yytext, yyleng);
 410                          RETURN(INTEGER);
 411                         }
 412 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
 413                          yylval.uid = xstrndup(yytext, yyleng - 1);
 414                          RETURN(INTPRIM);
 415                         }
 416 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
 417                          yylval.uid = xstrndup(yytext, yyleng);
 418                          RETURN(INTEGER);
 419                         }
 420 <GlaExt>("-")?{N}"#"    {
 421                          yylval.uid = xstrndup(yytext, yyleng - 1);
 422                          RETURN(INTPRIM);
 423                         }
 424 <Code,GlaExt,UserPragma>{N} {
 425                          yylval.uid = xstrndup(yytext, yyleng);
 426                          RETURN(INTEGER);
 427                         }
 428
 429 %{
 430     /*
 431      * Floats and (for Glasgow extensions) primitive floats/doubles.
 432      */
 433 %}
 434
 435 <GlaExt>("-")?{F}"##"   {
 436                          yylval.uid = xstrndup(yytext, yyleng - 2);
 437                          RETURN(DOUBLEPRIM);
 438                         }
 439 <GlaExt>("-")?{F}"#"    {
 440                          yylval.uid = xstrndup(yytext, yyleng - 1);
 441                          RETURN(FLOATPRIM);
 442                         }
 443 <Code,GlaExt>{F}        {
 444                          yylval.uid = xstrndup(yytext, yyleng);
 445                          RETURN(FLOAT);
 446                         }
 447
 448 %{
 449     /*
 450      * Funky ``foo'' style C literals for Glasgow extensions
 451      */
 452 %}
 453
 454 <GlaExt>"``"[^']+"''"   {
 455                          hsnewid(yytext + 2, yyleng - 4);
 456                          RETURN(CLITLIT);
 457                         }
 458
 459 %{
 460     /*
 461      * Identifiers, both variables and operators.  The trailing hash is allowed
 462      * for Glasgow extensions.
 463      */
 464 %}
 465
 466
 467 %{
 468 /* These SHOULDNAE work in "Code" (sigh) */
 469 %}
 470 <Code,GlaExt,UserPragma>{Id}"#" {
 471                          if (! (nonstandardFlag || in_interface)) {
 472                             char errbuf[ERR_BUF_SIZE];
 473                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 474                             hsperror(errbuf);
 475                          }
 476                          hsnewid(yytext, yyleng);
 477                          RETURN(_isconstr(yytext) ? CONID : VARID);
 478                         }
 479 <Code,GlaExt,UserPragma>_+{Id} {
 480                          if (! (nonstandardFlag || in_interface)) {
 481                             char errbuf[ERR_BUF_SIZE];
 482                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
 483                             hsperror(errbuf);
 484                          }
 485                          hsnewid(yytext, yyleng);
 486                          RETURN(isconstr(yytext) ? CONID : VARID);
 487                          /* NB: ^^^^^^^^ : not the macro! */
 488                         }
 489 <Code,GlaExt,UserPragma>{Id}    {
 490                          hsnewid(yytext, yyleng);
 491                          RETURN(_isconstr(yytext) ? CONID : VARID);
 492                         }
 493 <Code,GlaExt,UserPragma>{SId}   {
 494                          hsnewid(yytext, yyleng);
 495                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
 496                         }
 497 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
 498                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
 499                          RETURN(isconstr ? QCONID : QVARID);
 500                         }
 501 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
 502                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
 503                          RETURN(isconstr ? QCONSYM : QVARSYM);
 504                         }
 505
 506 %{
 507     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 508
 509     /* Because we can make the former well-behaved (we defined them).
 510
 511        Sadly, the latter is defined by Haskell, which allows such
 512        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 513     */
 514 %}
 515
 516 <GlaExt,UserPragma>"`"{Id}"#`"  {
 517                          hsnewid(yytext + 1, yyleng - 2);
 518                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
 519                         }
 520
 521 %{
 522     /*
 523      * Character literals.  The first form is the quick form, for character
 524      * literals that don't contain backslashes.  Literals with backslashes are
 525      * lexed through multiple rules.  First, we match the open ' and as many
 526      * normal characters as possible.  This puts us into the <Char> state, where
 527      * a backslash is legal.  Then, we match the backslash and move into the
 528      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 529      * characters and the close '.  We may end up with too many characters, but
 530      * this allows us to easily share the lex rules with strings.  Excess characters
 531      * are ignored with a warning.
 532      */
 533 %}
 534
 535 <GlaExt>'({CHAR}|"\"")"'#" {
 536                          yylval.uhstring = installHstring(1, yytext+1);
 537                          RETURN(CHARPRIM);
 538                         }
 539 <Code,GlaExt>'({CHAR}|"\"")'    {
 540                          yylval.uhstring = installHstring(1, yytext+1);
 541                          RETURN(CHAR);
 542                         }
 543 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 544                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 545                          hsperror(errbuf);
 546                         }
 547 <Code,GlaExt>'({CHAR}|"\"")* {
 548                          hsmlcolno = hspcolno;
 549                          cleartext();
 550                          addtext(yytext+1, yyleng-1);
 551                          PUSH_STATE(Char);
 552                         }
 553 <Char>({CHAR}|"\"")*'#  {
 554                          unsigned length;
 555                          char *text;
 556
 557                          addtext(yytext, yyleng - 2);
 558                          text = fetchtext(&length);
 559
 560                          if (! (nonstandardFlag || in_interface)) {
 561                             char errbuf[ERR_BUF_SIZE];
 562                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 563                             hsperror(errbuf);
 564                          }
 565
 566                          if (length > 1) {
 567                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
 568                               input_filename, hsplineno, hspcolno + 1);
 569                             format_string(stderr, (unsigned char *) text, length);
 570                             fputs("' too long\n", stderr);
 571                             hsperror("");
 572                          }
 573                          yylval.uhstring = installHstring(1, text);
 574                          hspcolno = hsmlcolno;
 575                          POP_STATE;
 576                          RETURN(CHARPRIM);
 577                         }
 578 <Char>({CHAR}|"\"")*'   {
 579                          unsigned length;
 580                          char *text;
 581
 582                          addtext(yytext, yyleng - 1);
 583                          text = fetchtext(&length);
 584
 585                          if (length > 1) {
 586                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
 587                               input_filename, hsplineno, hspcolno + 1);
 588                             format_string(stderr, (unsigned char *) text, length);
 589                             fputs("' too long\n", stderr);
 590                             hsperror("");
 591                          }
 592                          yylval.uhstring = installHstring(1, text);
 593                          hspcolno = hsmlcolno;
 594                          POP_STATE;
 595                          RETURN(CHAR);
 596                         }
 597 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 598
 599
 600 %{
 601     /*
 602      * String literals.  The first form is the quick form, for string literals
 603      * that don't contain backslashes.  Literals with backslashes are lexed
 604      * through multiple rules.  First, we match the open " and as many normal
 605      * characters as possible.  This puts us into the <String> state, where
 606      * a backslash is legal.  Then, we match the backslash and move into the
 607      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 608      * characters, moving back and forth between <String> and <StringEsc> as more
 609      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 610      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 611      * of normal characters and the close ".
 612      */
 613 %}
 614
 615 <GlaExt>"\""({CHAR}|"'")*"\""#  {
 616                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 617                             /* the -3 accounts for the " on front, "# on the end */
 618                          RETURN(STRINGPRIM);
 619                         }
 620 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
 621                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 622                          RETURN(STRING);
 623                         }
 624 <Code,GlaExt>"\""({CHAR}|"'")* {
 625                          hsmlcolno = hspcolno;
 626                          cleartext();
 627                          addtext(yytext+1, yyleng-1);
 628                          PUSH_STATE(String);
 629                         }
 630 <String>({CHAR}|"'")*"\"#"   {
 631                          unsigned length;
 632                          char *text;
 633
 634                          addtext(yytext, yyleng-2);
 635                          text = fetchtext(&length);
 636
 637                          if (! (nonstandardFlag || in_interface)) {
 638                             char errbuf[ERR_BUF_SIZE];
 639                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 640                             hsperror(errbuf);
 641                          }
 642
 643                          yylval.uhstring = installHstring(length, text);
 644                          hspcolno = hsmlcolno;
 645                          POP_STATE;
 646                          RETURN(STRINGPRIM);
 647                         }
 648 <String>({CHAR}|"'")*"\""   {
 649                          unsigned length;
 650                          char *text;
 651
 652                          addtext(yytext, yyleng-1);
 653                          text = fetchtext(&length);
 654
 655                          yylval.uhstring = installHstring(length, text);
 656                          hspcolno = hsmlcolno;
 657                          POP_STATE;
 658                          RETURN(STRING);
 659                         }
 660 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 661
 662 %{
 663     /*
 664      * Character and string escapes are roughly the same, but strings have the
 665      * extra `\&' sequence which is not allowed for characters.  Also, comments
 666      * are allowed in the <StringEsc> state.  (See the comment section much
 667      * further down.)
 668      *
 669      * NB: Backslashes and tabs are stored in strings as themselves.
 670      * But if we print them (in printtree.c), they must go out as
 671      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 672      * intermediate format that the parser produces.  It uses '\t' fpr end of
 673      * string, so it needs to be able to escape tabs, which means that it
 674      * also needs to be able to escape the escape character ('\\').  Sigh.
 675      */
 676 %}
 677
 678 <Char>\\                { PUSH_STATE(CharEsc); }
 679 <String>\\&             /* Ignore */ ;
 680 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 681
 682 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 683 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 684
 685 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 686 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 687 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 688 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 689 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 690 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 691 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 692 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 693 <CharEsc,StringEsc>BEL  |
 694 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 695 <CharEsc,StringEsc>BS   |
 696 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 697 <CharEsc,StringEsc>HT   |
 698 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 699 <CharEsc,StringEsc>LF   |
 700 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 701 <CharEsc,StringEsc>VT   |
 702 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 703 <CharEsc,StringEsc>FF   |
 704 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 705 <CharEsc,StringEsc>CR   |
 706 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 707 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 708 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 709 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 710 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 711 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 712 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 713 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 714 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 715 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 716 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 717 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 718 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 719 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 720 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 721 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 722 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 723 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 724 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 725 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 726 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 727 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 728 <CharEsc,StringEsc>{D}+  {
 729                           int i = strtol(yytext, NULL, 10);
 730                           if (i < NCHARS) {
 731                              addchar((char) i);
 732                           } else {
 733                              char errbuf[ERR_BUF_SIZE];
 734                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 735                                 yytext);
 736                              hsperror(errbuf);
 737                           }
 738                           POP_STATE;
 739                         }
 740 <CharEsc,StringEsc>o{O}+ {
 741                           int i = strtol(yytext + 1, NULL, 8);
 742                           if (i < NCHARS) {
 743                              addchar((char) i);
 744                           } else {
 745                              char errbuf[ERR_BUF_SIZE];
 746                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 747                                 yytext);
 748                              hsperror(errbuf);
 749                           }
 750                           POP_STATE;
 751                         }
 752 <CharEsc,StringEsc>x{H}+ {
 753                           int i = strtol(yytext + 1, NULL, 16);
 754                           if (i < NCHARS) {
 755                              addchar((char) i);
 756                           } else {
 757                              char errbuf[ERR_BUF_SIZE];
 758                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 759                                 yytext);
 760                              hsperror(errbuf);
 761                           }
 762                           POP_STATE;
 763                         }
 764
 765 %{
 766     /*
 767      * Simple comments and whitespace.  Normally, we would just ignore these, but
 768      * in case we're processing a string escape, we need to note that we've seen
 769      * a gap.
 770      *
 771      * Note that we cater for a comment line that *doesn't* end in a newline.
 772      * This is incorrect, strictly speaking, but seems like the right thing
 773      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 774      */
 775 %}
 776
 777 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
 778 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
 779
 780 %{
 781     /*
 782      * Nested comments.  The major complication here is in trying to match the
 783      * longest lexemes possible, for better performance.  (See the flex document.)
 784      * That's why the rules look so bizarre.
 785      */
 786 %}
 787
 788 <Code,GlaExt,UserPragma,StringEsc>"{-"  {
 789                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
 790                         }
 791
 792 <Comment>[^-{]*         |
 793 <Comment>"-"+[^-{}]+    |
 794 <Comment>"{"+[^-{}]+    ;
 795 <Comment>"{-"           { nested_comments++; }
 796 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 797 <Comment>(.|\n)         ;
 798
 799 %{
 800     /*
 801      * Illegal characters.  This used to be a single rule, but we might as well
 802      * pass on as much information as we have, so now we indicate our state in
 803      * the error message.
 804      */
 805 %}
 806
 807 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  {
 808                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 809                             input_filename, hsplineno, hspcolno + 1);
 810                          format_string(stderr, (unsigned char *) yytext, 1);
 811                          fputs("'\n", stderr);
 812                          hsperror("");
 813                         }
 814 <Char>(.|\n)            {
 815                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 816                             input_filename, hsplineno, hspcolno + 1);
 817                          format_string(stderr, (unsigned char *) yytext, 1);
 818                          fputs("' in a character literal\n", stderr);
 819                          hsperror("");
 820                         }
 821 <CharEsc>(.|\n)         {
 822                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
 823                             input_filename, hsplineno, hspcolno + 1);
 824                          format_string(stderr, (unsigned char *) yytext, 1);
 825                          fputs("'\n", stderr);
 826                          hsperror("");
 827                         }
 828 <String>(.|\n)          { if (nonstandardFlag) {
 829                              addtext(yytext, yyleng);
 830                           } else {
 831                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 832                                 input_filename, hsplineno, hspcolno + 1);
 833                                 format_string(stderr, (unsigned char *) yytext, 1);
 834                                 fputs("' in a string literal\n", stderr);
 835                                 hsperror("");
 836                           }
 837                         }
 838 <StringEsc>(.|\n)       {
 839                          if (noGap) {
 840                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
 841                                 input_filename, hsplineno, hspcolno + 1);
 842                              format_string(stderr, (unsigned char *) yytext, 1);
 843                              fputs("'\n", stderr);
 844                              hsperror("");
 845                          } else {
 846                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 847                                 input_filename, hsplineno, hspcolno + 1);
 848                              format_string(stderr, (unsigned char *) yytext, 1);
 849                              fputs("' in a string gap\n", stderr);
 850                              hsperror("");
 851                          }
 852                         }
 853
 854 %{
 855     /*
 856      * End of file.  In any sub-state, this is an error.  However, for the primary
 857      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 858      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 859      * adding virtual close curlies, or closing an interface and returning to the
 860      * primary source file.
 861      *
 862      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 863      * line/column advancement has to be done by hand.
 864      */
 865 %}
 866
 867 <Char,CharEsc><<EOF>>   {
 868                           hsplineno = hslineno; hspcolno = hscolno;
 869                           hsperror("unterminated character literal");
 870                         }
 871 <Comment><<EOF>>        {
 872                           hsplineno = hslineno; hspcolno = hscolno;
 873                           hsperror("unterminated comment");
 874                         }
 875 <String,StringEsc><<EOF>>   {
 876                           hsplineno = hslineno; hspcolno = hscolno;
 877                           hsperror("unterminated string literal");
 878                         }
 879 <UserPragma><<EOF>>     {
 880                           hsplineno = hslineno; hspcolno = hscolno;
 881                           hsperror("unterminated user-specified pragma");
 882                         }
 883 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 884
 885 %%
 886
 887 /**********************************************************************
 888 *                                                                     *
 889 *                                                                     *
 890 *     YACC/LEX Initialisation etc.                                    *
 891 *                                                                     *
 892 *                                                                     *
 893 **********************************************************************/
 894
 895 /*
 896    We initialise input_filename to "<stdin>".
 897    This allows unnamed sources to be piped into the parser.
 898 */
 899
 900 extern BOOLEAN acceptPrim;
 901
 902 void
 903 yyinit(void)
 904 {
 905     input_filename = xstrdup("<stdin>");
 906
 907     /* We must initialize the input buffer _now_, because we call
 908        setyyin _before_ calling yylex for the first time! */
 909     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 910
 911     if (acceptPrim)
 912         PUSH_STATE(GlaExt);
 913     else
 914         PUSH_STATE(Code);
 915 }
 916
 917 static void
 918 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 919 {
 920     if (input_filename != NULL)
 921         free(input_filename);
 922     input_filename = xstrdup(f);
 923 }
 924
 925 /**********************************************************************
 926 *                                                                     *
 927 *                                                                     *
 928 *     Layout Processing                                               *
 929 *                                                                     *
 930 *                                                                     *
 931 **********************************************************************/
 932
 933 /*
 934         The following section deals with Haskell Layout conventions
 935         forcing insertion of ; or } as appropriate
 936 */
 937
 938 static BOOLEAN
 939 hsshouldindent(void)
 940 {
 941     return (!forgetindent && INDENTON);
 942 }
 943
 944
 945 /* Enter new context and set new indentation level */
 946 void
 947 hssetindent(void)
 948 {
 949 #ifdef HSP_DEBUG
 950     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 951 #endif
 952
 953     /*
 954      * partain: first chk that new indent won't be less than current one; this code
 955      * doesn't make sense to me; hscolno tells the position of the _end_ of the
 956      * current token; what that has to do with indenting, I don't know.
 957      */
 958
 959
 960     if (hscolno - 1 <= INDENTPT) {
 961         if (INDENTPT == -1)
 962             return;             /* Empty input OK for Haskell 1.1 */
 963         else {
 964             char errbuf[ERR_BUF_SIZE];
 965
 966             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
 967             hsperror(errbuf);
 968         }
 969     }
 970     hsentercontext((hspcolno << 1) | 1);
 971 }
 972
 973
 974 /* Enter a new context without changing the indentation level */
 975 void
 976 hsincindent(void)
 977 {
 978 #ifdef HSP_DEBUG
 979     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 980 #endif
 981     hsentercontext(indenttab[icontexts] & ~1);
 982 }
 983
 984
 985 /* Turn off indentation processing, usually because an explicit "{" has been seen */
 986 void
 987 hsindentoff(void)
 988 {
 989     forgetindent = TRUE;
 990 }
 991
 992
 993 /* Enter a new layout context. */
 994 static void
 995 hsentercontext(int indent)
 996 {
 997     /* Enter new context and set indentation as specified */
 998     if (++icontexts >= MAX_CONTEXTS) {
 999         char errbuf[ERR_BUF_SIZE];
1000
1001         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1002         hsperror(errbuf);
1003     }
1004     forgetindent = FALSE;
1005     indenttab[icontexts] = indent;
1006 #ifdef HSP_DEBUG
1007     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1008 #endif
1009 }
1010
1011
1012 /* Exit a layout context */
1013 void
1014 hsendindent(void)
1015 {
1016     --icontexts;
1017 #ifdef HSP_DEBUG
1018     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1019 #endif
1020 }
1021
1022 /*
1023  *      Return checks the indentation level and returns ;, } or the specified token.
1024  */
1025
1026 static int
1027 Return(int tok)
1028 {
1029 #ifdef HSP_DEBUG
1030     extern int yyleng;
1031 #endif
1032
1033     if (hsshouldindent()) {
1034         if (hspcolno < INDENTPT) {
1035 #ifdef HSP_DEBUG
1036             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1037 #endif
1038             hssttok = tok;
1039             return (VCCURLY);
1040         } else if (hspcolno == INDENTPT) {
1041 #ifdef HSP_DEBUG
1042             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1043 #endif
1044             hssttok = -tok;
1045             return (SEMI);
1046         }
1047     }
1048     hssttok = -1;
1049 #ifdef HSP_DEBUG
1050     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1051 #endif
1052     return (tok);
1053 }
1054
1055
1056 /*
1057  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1058  */
1059 int
1060 yylex()
1061 {
1062     int tok;
1063     static BOOLEAN eof = FALSE;
1064
1065     if (!eof) {
1066         if (hssttok != -1) {
1067             if (hssttok < 0) {
1068                 tok = -hssttok;
1069                 hssttok = -1;
1070                 return tok;
1071             }
1072             RETURN(hssttok);
1073         } else {
1074             endlineno = hslineno;
1075             if ((tok = yylex1()) != EOF)
1076                 return tok;
1077             else
1078                 eof = TRUE;
1079         }
1080     }
1081     if (icontexts > icontexts_save) {
1082         if (INDENTON) {
1083             eof = TRUE;
1084             indenttab[icontexts] = 0;
1085             return (VCCURLY);
1086         } else
1087             hsperror("missing '}' at end of file");
1088     } else if (hsbuf_save != NULL) {
1089         fclose(yyin);
1090         yy_delete_buffer(YY_CURRENT_BUFFER);
1091         yy_switch_to_buffer(hsbuf_save);
1092         hsbuf_save = NULL;
1093         new_filename(filename_save);
1094         free(filename_save);
1095         hslineno = hslineno_save;
1096         hsplineno = hsplineno_save;
1097         hscolno = hscolno_save;
1098         hspcolno = hspcolno_save;
1099         etags = etags_save;
1100         in_interface = FALSE;
1101         icontexts = icontexts_save - 1;
1102         icontexts_save = 0;
1103 #ifdef HSP_DEBUG
1104         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1105 #endif
1106         eof = FALSE;
1107
1108         /* RETURN(LEOF); */
1109         hsperror("No longer using yacc to parse interface files");
1110
1111     } else {
1112         yyterminate();
1113     }
1114     abort(); /* should never get here! */
1115     return(0);
1116 }
1117
1118 /**********************************************************************
1119 *                                                                     *
1120 *                                                                     *
1121 *     Input Processing for Interfaces -- Not currently used !!!       *
1122 *                                                                     *
1123 *                                                                     *
1124 **********************************************************************/
1125
1126 /* setyyin(file)        open file as new lex input buffer */
1127 extern FILE *yyin;
1128
1129 void
1130 setyyin(char *file)
1131 {
1132     hsbuf_save = YY_CURRENT_BUFFER;
1133     if ((yyin = fopen(file, "r")) == NULL) {
1134         char errbuf[ERR_BUF_SIZE];
1135
1136         sprintf(errbuf, "can't read \"%-.50s\"", file);
1137         hsperror(errbuf);
1138     }
1139     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1140
1141     hslineno_save = hslineno;
1142     hsplineno_save = hsplineno;
1143     hslineno = hsplineno = 1;
1144
1145     filename_save = input_filename;
1146     input_filename = NULL;
1147     new_filename(file);
1148     hscolno_save = hscolno;
1149     hspcolno_save = hspcolno;
1150     hscolno = hspcolno = 0;
1151     in_interface = TRUE;
1152     etags_save = etags; /* do not do "etags" stuff in interfaces */
1153     etags = 0;          /* We remember whether we are doing it in
1154                            the module, so we can restore it later [WDP 94/09] */
1155     hsentercontext(-1);         /* partain: changed this from 0 */
1156     icontexts_save = icontexts;
1157 #ifdef HSP_DEBUG
1158     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1159 #endif
1160 }
1161
1162 static void
1163 layout_input(char *text, int len)
1164 {
1165 #ifdef HSP_DEBUG
1166     fprintf(stderr, "Scanning \"%s\"\n", text);
1167 #endif
1168
1169     hsplineno = hslineno;
1170     hspcolno = hscolno;
1171
1172     while (len-- > 0) {
1173         switch (*text++) {
1174         case '\n':
1175         case '\r':
1176         case '\f':
1177             hslineno++;
1178             hscolno = 0;
1179             break;
1180         case '\t':
1181             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1182             break;
1183         case '\v':
1184             break;
1185         default:
1186             ++hscolno;
1187             break;
1188         }
1189     }
1190 }
1191
1192 void
1193 setstartlineno(void)
1194 {
1195     startlineno = hsplineno;
1196
1197     if (modulelineno == 0) {
1198         modulelineno = startlineno;
1199     }
1200
1201 #if 1/*etags*/
1202 #else
1203     if (etags)
1204         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1205 #endif
1206 }
1207
1208 /**********************************************************************
1209 *                                                                     *
1210 *                                                                     *
1211 *                      Text Caching                                   *
1212 *                                                                     *
1213 *                                                                     *
1214 **********************************************************************/
1215
1216 #define CACHE_SIZE YY_BUF_SIZE
1217
1218 static struct {
1219     unsigned allocated;
1220     unsigned next;
1221     char *text;
1222 } textcache = { 0, 0, NULL };
1223
1224 static void
1225 cleartext(void)
1226 {
1227 /*  fprintf(stderr, "cleartext\n"); */
1228     textcache.next = 0;
1229     if (textcache.allocated == 0) {
1230         textcache.allocated = CACHE_SIZE;
1231         textcache.text = xmalloc(CACHE_SIZE);
1232     }
1233 }
1234
1235 static void
1236 addtext(char *text, unsigned length)
1237 {
1238 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1239
1240     if (length == 0)
1241         return;
1242
1243     if (textcache.next + length + 1 >= textcache.allocated) {
1244         textcache.allocated += length + CACHE_SIZE;
1245         textcache.text = xrealloc(textcache.text, textcache.allocated);
1246     }
1247     bcopy(text, textcache.text + textcache.next, length);
1248     textcache.next += length;
1249 }
1250
1251 static void
1252 addchar(char c)
1253 {
1254 /*  fprintf(stderr, "addchar: %c\n", c); */
1255
1256     if (textcache.next + 2 >= textcache.allocated) {
1257         textcache.allocated += CACHE_SIZE;
1258         textcache.text = xrealloc(textcache.text, textcache.allocated);
1259     }
1260     textcache.text[textcache.next++] = c;
1261 }
1262
1263 static char *
1264 fetchtext(unsigned *length)
1265 {
1266 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1267
1268     *length = textcache.next;
1269     textcache.text[textcache.next] = '\0';
1270     return textcache.text;
1271 }
1272
1273 /**********************************************************************
1274 *                                                                     *
1275 *                                                                     *
1276 *    Identifier Processing                                             *
1277 *                                                                     *
1278 *                                                                     *
1279 **********************************************************************/
1280
1281 /*
1282         hsnewid         Enters an id of length n into the symbol table.
1283 */
1284
1285 static void
1286 hsnewid(char *name, int length)
1287 {
1288     char save = name[length];
1289
1290     name[length] = '\0';
1291     yylval.uid = installid(name);
1292     name[length] = save;
1293 }
1294
1295 BOOLEAN
1296 hsnewqid(char *name, int length)
1297 {
1298     char* dot;
1299     char save = name[length];
1300     name[length] = '\0';
1301
1302     dot = strchr(name, '.');
1303     *dot = '\0';
1304     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1305     *dot = '.';
1306     name[length] = save;
1307
1308     return _isconstr(dot+1);
1309 }
1310
1311 BOOLEAN
1312 isconstr(char *s) /* walks past leading underscores before using the macro */
1313 {
1314     char *temp = s;
1315
1316     for ( ; temp != NULL && *temp == '_' ; temp++ );
1317
1318     return _isconstr(temp);
1319 }