ghc/compiler/parser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       FLEX for Haskell.                                             *
   6 *       -----------------                                             *
   7 *                                                                     *
   8 **********************************************************************/
   9
  10 #include "../../includes/config.h"
  11
  12 #include <stdio.h>
  13
  14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  15 #include <string.h>
  16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  18 #include <memory.h>
  19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  20 #define index strchr
  21 #define rindex strrchr
  22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  24 #define bzero(s, n) memset ((s), 0, (n))
  25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  26 #include <strings.h>
  27 /* memory.h and strings.h conflict on some systems.  */
  28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  29
  30 #include "hspincl.h"
  31 #include "hsparser.tab.h"
  32 #include "constants.h"
  33 #include "utils.h"
  34
  35 /* Our substitute for <ctype.h> */
  36
  37 #define NCHARS  256
  38 #define _S      0x1
  39 #define _D      0x2
  40 #define _H      0x4
  41 #define _O      0x8
  42 #define _C      0x10
  43
  44 #define _isconstr(s)    (CharTable[*s]&(_C))
  45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
  46
  47 static unsigned char CharTable[NCHARS] = {
  48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
  54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
  60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  64
  65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  81 };
  82
  83 /**********************************************************************
  84 *                                                                     *
  85 *                                                                     *
  86 *      Declarations                                                   *
  87 *                                                                     *
  88 *                                                                     *
  89 **********************************************************************/
  90
  91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
  92
  93 /*
  94  * For my own sanity, things that are not part of the flex skeleton
  95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
  96  */
  97
  98 static int hslineno = 0;        /* Line number at end of token */
  99 int hsplineno = 0;              /* Line number at end of previous token */
 100
 101 static int hscolno = 0;         /* Column number at end of token */
 102 int hspcolno = 0;               /* Column number at end of previous token */
 103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 104
 105 int modulelineno = -1;          /* The line number where the module starts */
 106 int startlineno = 0;            /* The line number where something starts */
 107 int endlineno = 0;              /* The line number where something ends */
 108
 109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 111
 112 static int nested_comments;     /* For counting comment nesting depth */
 113
 114 /* Hacky definition of yywrap: see flex doc.
 115
 116    If we don't do this, then we'll have to get the default
 117    yywrap from the flex library, which is often something
 118    we are not good at locating.  This avoids that difficulty.
 119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 120    WDP 94/09/05
 121 */
 122 #define yywrap() 1
 123
 124 /* Essential forward declarations */
 125
 126 static void hsnewid      PROTO((char *, int));
 127 static void layout_input PROTO((char *, int));
 128 static void cleartext    (NO_ARGS);
 129 static void addtext      PROTO((char *, unsigned));
 130 static void addchar      PROTO((char));
 131 static char *fetchtext   PROTO((unsigned *));
 132 static void new_filename PROTO((char *));
 133 static int  Return       PROTO((int));
 134 static void hsentercontext PROTO((int));
 135
 136 /* Special file handling for IMPORTS */
 137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 138
 139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 140 static char *filename_save;             /* File Name                     */
 141 static int hslineno_save = 0,           /* Line Number                   */
 142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 143  hscolno_save = 0,                      /* Indentation                   */
 144  hspcolno_save = 0;                     /* Left Indentation              */
 145 static short icontexts_save = 0;        /* Indent Context Level          */
 146
 147 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
 148 extern BOOLEAN etags;                   /* that which is saved */
 149
 150 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
 151
 152 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 153                                  * inserted before token +ve  -- "}" inserted before
 154                                  * token */
 155
 156 short icontexts = 0;            /* Which context we're in */
 157
 158 /*
 159         Table of indentations:  right bit indicates whether to use
 160           indentation rules (1 = use rules; 0 = ignore)
 161
 162     partain:
 163     push one of these "contexts" at every "case" or "where"; the right bit says
 164     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 165
 166     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 167     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 168     push is shown just below.
 169
 170 */
 171
 172
 173 static short indenttab[MAX_CONTEXTS] = {-1};
 174
 175 #define INDENTPT (indenttab[icontexts]>>1)
 176 #define INDENTON (indenttab[icontexts]&1)
 177
 178 #define RETURN(tok) return(Return(tok))
 179
 180 #undef YY_DECL
 181 #define YY_DECL int yylex1()
 182
 183 /* We should not peek at yy_act, but flex calls us even for the internal action
 184    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 185    to support older versions of flex, we'll continue to peek for now.
 186  */
 187 #define YY_USER_ACTION \
 188     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 189
 190 #if 0/*debug*/
 191 #undef YY_BREAK
 192 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 193 #endif
 194
 195 /* Each time we enter a new start state, we push it onto the state stack.
 196    Note that the rules do not allow us to underflow or overflow the stack.
 197    (At least, they shouldn't.)  The maximum expected depth is 4:
 198    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 199 */
 200 static int StateStack[5];
 201 static int StateDepth = -1;
 202
 203 #ifdef HSP_DEBUG
 204 #define PUSH_STATE(n)   do {\
 205     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
 206     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 207 #define POP_STATE       do {--StateDepth;\
 208     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
 209     BEGIN(StateStack[StateDepth]);} while(0)
 210 #else
 211 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 212 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
 213 #endif
 214
 215 %}
 216
 217 /* The start states are:
 218    Code -- normal Haskell code (principal lexer)
 219    GlaExt -- Haskell code with Glasgow extensions
 220    Comment -- Nested comment processing
 221    String -- Inside a string literal with backslashes
 222    StringEsc -- Immediately following a backslash in a string literal
 223    Char -- Inside a character literal with backslashes
 224    CharEsc -- Immediately following a backslash in a character literal
 225
 226    Note that the INITIAL state is unused.  Also note that these states
 227    are _exclusive_.  All rules should be prefixed with an appropriate
 228    list of start states.
 229  */
 230
 231 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
 232
 233 isoS                    [\xa1-\xbf\xd7\xf7]
 234 isoL                    [\xc0-\xd6\xd8-\xde]
 235 isol                    [\xdf-\xf6\xf8-\xff]
 236 isoA                    [\xa1-\xff]
 237
 238 D                       [0-9]
 239 O                       [0-7]
 240 H                       [0-9A-Fa-f]
 241 N                       {D}+
 242 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 243 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 244 SId                     {S}{S}*
 245 L                       [A-Z\xc0-\xd6\xd8-\xde]
 246 l                       [a-z\xdf-\xf6\xf8-\xff]
 247 I                       {L}|{l}
 248 i                       {L}|{l}|[0-9'_]
 249 Id                      {I}{i}*
 250 Mod                     {L}{i}*
 251 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
 252 CNTRL                   [@A-Z\[\\\]^_]
 253 WS                      [ \t\n\r\f\v]
 254 NL                      [\n\r]
 255
 256 %%
 257
 258 %{
 259     /*
 260      * Special GHC pragma rules.  Do we need a start state for interface files,
 261      * so these won't be matched in source files? --JSM
 262      */
 263 %}
 264
 265 <Code,GlaExt>^"# ".*{NL}    {
 266                           char tempf[FILENAME_SIZE];
 267                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 268                           new_filename(tempf);
 269                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 270                         }
 271
 272 <Code,GlaExt>^"#line ".*{NL}    {
 273                           char tempf[FILENAME_SIZE];
 274                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 275                           new_filename(tempf);
 276                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 277                         }
 278
 279 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 280                           /* partain: pragma-style line directive */
 281                           char tempf[FILENAME_SIZE];
 282                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 283                           new_filename(tempf);
 284                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 285                         }
 286
 287 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
 288                               PUSH_STATE(UserPragma);
 289                               RETURN(INTERFACE_UPRAGMA);
 290                             }
 291 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 292                               PUSH_STATE(UserPragma);
 293                               RETURN(SPECIALISE_UPRAGMA);
 294                             }
 295 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 296                               PUSH_STATE(UserPragma);
 297                               RETURN(INLINE_UPRAGMA);
 298                             }
 299 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 300                               PUSH_STATE(UserPragma);
 301                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 302                             }
 303 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
 304                               PUSH_STATE(UserPragma);
 305                               RETURN(DEFOREST_UPRAGMA);
 306                             }
 307 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
 308                               /* these are handled by hscpp */
 309                               nested_comments =1;
 310                               PUSH_STATE(Comment);
 311                             }
 312 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
 313                               fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
 314                                 input_filename, hsplineno);
 315                               format_string(stderr, (unsigned char *) yytext, yyleng);
 316                               fputs("'\n", stderr);
 317                               nested_comments = 1;
 318                               PUSH_STATE(Comment);
 319                             }
 320 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 321
 322 %{
 323     /*
 324      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 325      * intentionally accepted as a keyword even for normal <Code>.
 326      */
 327 %}
 328
 329 <Code,GlaExt>"case"             { RETURN(CASE); }
 330 <Code,GlaExt>"class"            { RETURN(CLASS); }
 331 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 332 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 333 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 334 <Code,GlaExt>"do"               { RETURN(DO); }
 335 <Code,GlaExt>"else"             { RETURN(ELSE); }
 336 <Code,GlaExt>"if"               { RETURN(IF); }
 337 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 338 <Code,GlaExt>"in"               { RETURN(IN); }
 339 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 340 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 341 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 342 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 343 <Code,GlaExt>"let"              { RETURN(LET); }
 344 <Code,GlaExt>"module"           { RETURN(MODULE); }
 345 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
 346 <Code,GlaExt>"of"               { RETURN(OF); }
 347 <Code,GlaExt>"then"             { RETURN(THEN); }
 348 <Code,GlaExt>"type"             { RETURN(TYPE); }
 349 <Code,GlaExt>"where"            { RETURN(WHERE); }
 350
 351 <Code,GlaExt>"as"               { RETURN(AS); }
 352 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 353 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
 354
 355 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
 356 <GlaExt>"_ccall_"               { RETURN(CCALL); }
 357 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
 358 <GlaExt>"_casm_"                { RETURN(CASM); }
 359 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
 360
 361 %{
 362     /*
 363      * Haskell operators: special, reservedops and useful varsyms
 364      */
 365 %}
 366
 367 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
 368 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
 369 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
 370 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
 371 <Code,GlaExt>"{"                { RETURN(OCURLY); }
 372 <Code,GlaExt>"}"                { RETURN(CCURLY); }
 373 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
 374 <Code,GlaExt>";"                { RETURN(SEMI); }
 375 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
 376 <Code,GlaExt>"_"                { RETURN(WILDCARD); }
 377
 378 <Code,GlaExt>".."               { RETURN(DOTDOT); }
 379 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
 380 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
 381 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
 382 <Code,GlaExt>"|"                { RETURN(VBAR); }
 383 <Code,GlaExt>"<-"               { RETURN(LARROW); }
 384 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
 385 <Code,GlaExt>"-"                { RETURN(MINUS); }
 386
 387 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
 388 <Code,GlaExt>"@"                { RETURN(AT); }
 389 <Code,GlaExt>"!"                { RETURN(BANG); }
 390 <Code,GlaExt>"~"                { RETURN(LAZY); }
 391
 392 %{
 393     /*
 394      * Integers and (for Glasgow extensions) primitive integers.  Note that
 395      * we pass all of the text on to the parser, because flex/C can't handle
 396      * arbitrary precision numbers.
 397      */
 398 %}
 399
 400 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
 401                          yylval.uid = xstrndup(yytext, yyleng - 1);
 402                          RETURN(INTPRIM);
 403                         }
 404 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
 405                          yylval.uid = xstrndup(yytext, yyleng);
 406                          RETURN(INTEGER);
 407                         }
 408 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
 409                          yylval.uid = xstrndup(yytext, yyleng - 1);
 410                          RETURN(INTPRIM);
 411                         }
 412 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
 413                          yylval.uid = xstrndup(yytext, yyleng);
 414                          RETURN(INTEGER);
 415                         }
 416 <GlaExt>("-")?{N}"#"    {
 417                          yylval.uid = xstrndup(yytext, yyleng - 1);
 418                          RETURN(INTPRIM);
 419                         }
 420 <Code,GlaExt,UserPragma>{N} {
 421                          yylval.uid = xstrndup(yytext, yyleng);
 422                          RETURN(INTEGER);
 423                         }
 424
 425 %{
 426     /*
 427      * Floats and (for Glasgow extensions) primitive floats/doubles.
 428      */
 429 %}
 430
 431 <GlaExt>("-")?{F}"##"   {
 432                          yylval.uid = xstrndup(yytext, yyleng - 2);
 433                          RETURN(DOUBLEPRIM);
 434                         }
 435 <GlaExt>("-")?{F}"#"    {
 436                          yylval.uid = xstrndup(yytext, yyleng - 1);
 437                          RETURN(FLOATPRIM);
 438                         }
 439 <Code,GlaExt>{F}        {
 440                          yylval.uid = xstrndup(yytext, yyleng);
 441                          RETURN(FLOAT);
 442                         }
 443
 444 %{
 445     /*
 446      * Funky ``foo'' style C literals for Glasgow extensions
 447      */
 448 %}
 449
 450 <GlaExt>"``"[^']+"''"   {
 451                          hsnewid(yytext + 2, yyleng - 4);
 452                          RETURN(CLITLIT);
 453                         }
 454
 455 %{
 456     /*
 457      * Identifiers, both variables and operators.  The trailing hash is allowed
 458      * for Glasgow extensions.
 459      */
 460 %}
 461
 462
 463 %{
 464 /* These SHOULDNAE work in "Code" (sigh) */
 465 %}
 466 <Code,GlaExt,UserPragma>{Id}"#" {
 467                          if (! nonstandardFlag) {
 468                             char errbuf[ERR_BUF_SIZE];
 469                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 470                             hsperror(errbuf);
 471                          }
 472                          hsnewid(yytext, yyleng);
 473                          RETURN(_isconstr(yytext) ? CONID : VARID);
 474                         }
 475 <Code,GlaExt,UserPragma>_+{Id} {
 476                          if (! nonstandardFlag) {
 477                             char errbuf[ERR_BUF_SIZE];
 478                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
 479                             hsperror(errbuf);
 480                          }
 481                          hsnewid(yytext, yyleng);
 482                          RETURN(isconstr(yytext) ? CONID : VARID);
 483                          /* NB: ^^^^^^^^ : not the macro! */
 484                         }
 485 <Code,GlaExt,UserPragma>{Id}    {
 486                          hsnewid(yytext, yyleng);
 487                          RETURN(_isconstr(yytext) ? CONID : VARID);
 488                         }
 489 <Code,GlaExt,UserPragma>{SId}   {
 490                          hsnewid(yytext, yyleng);
 491                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
 492                         }
 493 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
 494                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
 495                          RETURN(isconstr ? QCONID : QVARID);
 496                         }
 497 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
 498                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
 499                          RETURN(isconstr ? QCONSYM : QVARSYM);
 500                         }
 501
 502 %{
 503     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 504
 505     /* Because we can make the former well-behaved (we defined them).
 506
 507        Sadly, the latter is defined by Haskell, which allows such
 508        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 509     */
 510 %}
 511
 512 <GlaExt,UserPragma>"`"{Id}"#`"  {
 513                          hsnewid(yytext + 1, yyleng - 2);
 514                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
 515                         }
 516
 517 %{
 518     /*
 519      * Character literals.  The first form is the quick form, for character
 520      * literals that don't contain backslashes.  Literals with backslashes are
 521      * lexed through multiple rules.  First, we match the open ' and as many
 522      * normal characters as possible.  This puts us into the <Char> state, where
 523      * a backslash is legal.  Then, we match the backslash and move into the
 524      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 525      * characters and the close '.  We may end up with too many characters, but
 526      * this allows us to easily share the lex rules with strings.  Excess characters
 527      * are ignored with a warning.
 528      */
 529 %}
 530
 531 <GlaExt>'({CHAR}|"\"")"'#" {
 532                          yylval.uhstring = installHstring(1, yytext+1);
 533                          RETURN(CHARPRIM);
 534                         }
 535 <Code,GlaExt>'({CHAR}|"\"")'    {
 536                          yylval.uhstring = installHstring(1, yytext+1);
 537                          RETURN(CHAR);
 538                         }
 539 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 540                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 541                          hsperror(errbuf);
 542                         }
 543 <Code,GlaExt>'({CHAR}|"\"")* {
 544                          hsmlcolno = hspcolno;
 545                          cleartext();
 546                          addtext(yytext+1, yyleng-1);
 547                          PUSH_STATE(Char);
 548                         }
 549 <Char>({CHAR}|"\"")*'#  {
 550                          unsigned length;
 551                          char *text;
 552
 553                          addtext(yytext, yyleng - 2);
 554                          text = fetchtext(&length);
 555
 556                          if (! nonstandardFlag) {
 557                             char errbuf[ERR_BUF_SIZE];
 558                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 559                             hsperror(errbuf);
 560                          }
 561
 562                          if (length > 1) {
 563                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
 564                               input_filename, hsplineno, hspcolno + 1);
 565                             format_string(stderr, (unsigned char *) text, length);
 566                             fputs("' too long\n", stderr);
 567                             hsperror("");
 568                          }
 569                          yylval.uhstring = installHstring(1, text);
 570                          hspcolno = hsmlcolno;
 571                          POP_STATE;
 572                          RETURN(CHARPRIM);
 573                         }
 574 <Char>({CHAR}|"\"")*'   {
 575                          unsigned length;
 576                          char *text;
 577
 578                          addtext(yytext, yyleng - 1);
 579                          text = fetchtext(&length);
 580
 581                          if (length > 1) {
 582                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
 583                               input_filename, hsplineno, hspcolno + 1);
 584                             format_string(stderr, (unsigned char *) text, length);
 585                             fputs("' too long\n", stderr);
 586                             hsperror("");
 587                          }
 588                          yylval.uhstring = installHstring(1, text);
 589                          hspcolno = hsmlcolno;
 590                          POP_STATE;
 591                          RETURN(CHAR);
 592                         }
 593 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 594
 595
 596 %{
 597     /*
 598      * String literals.  The first form is the quick form, for string literals
 599      * that don't contain backslashes.  Literals with backslashes are lexed
 600      * through multiple rules.  First, we match the open " and as many normal
 601      * characters as possible.  This puts us into the <String> state, where
 602      * a backslash is legal.  Then, we match the backslash and move into the
 603      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 604      * characters, moving back and forth between <String> and <StringEsc> as more
 605      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 606      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 607      * of normal characters and the close ".
 608      */
 609 %}
 610
 611 <GlaExt>"\""({CHAR}|"'")*"\""#  {
 612                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 613                             /* the -3 accounts for the " on front, "# on the end */
 614                          RETURN(STRINGPRIM);
 615                         }
 616 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
 617                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 618                          RETURN(STRING);
 619                         }
 620 <Code,GlaExt>"\""({CHAR}|"'")* {
 621                          hsmlcolno = hspcolno;
 622                          cleartext();
 623                          addtext(yytext+1, yyleng-1);
 624                          PUSH_STATE(String);
 625                         }
 626 <String>({CHAR}|"'")*"\"#"   {
 627                          unsigned length;
 628                          char *text;
 629
 630                          addtext(yytext, yyleng-2);
 631                          text = fetchtext(&length);
 632
 633                          if (! nonstandardFlag) {
 634                             char errbuf[ERR_BUF_SIZE];
 635                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 636                             hsperror(errbuf);
 637                          }
 638
 639                          yylval.uhstring = installHstring(length, text);
 640                          hspcolno = hsmlcolno;
 641                          POP_STATE;
 642                          RETURN(STRINGPRIM);
 643                         }
 644 <String>({CHAR}|"'")*"\""   {
 645                          unsigned length;
 646                          char *text;
 647
 648                          addtext(yytext, yyleng-1);
 649                          text = fetchtext(&length);
 650
 651                          yylval.uhstring = installHstring(length, text);
 652                          hspcolno = hsmlcolno;
 653                          POP_STATE;
 654                          RETURN(STRING);
 655                         }
 656 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 657
 658 %{
 659     /*
 660      * Character and string escapes are roughly the same, but strings have the
 661      * extra `\&' sequence which is not allowed for characters.  Also, comments
 662      * are allowed in the <StringEsc> state.  (See the comment section much
 663      * further down.)
 664      *
 665      * NB: Backslashes and tabs are stored in strings as themselves.
 666      * But if we print them (in printtree.c), they must go out as
 667      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 668      * intermediate format that the parser produces.  It uses '\t' fpr end of
 669      * string, so it needs to be able to escape tabs, which means that it
 670      * also needs to be able to escape the escape character ('\\').  Sigh.
 671      */
 672 %}
 673
 674 <Char>\\                { PUSH_STATE(CharEsc); }
 675 <String>\\&             /* Ignore */ ;
 676 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 677
 678 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 679 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 680
 681 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 682 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 683 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 684 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 685 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 686 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 687 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 688 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 689 <CharEsc,StringEsc>BEL  |
 690 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 691 <CharEsc,StringEsc>BS   |
 692 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 693 <CharEsc,StringEsc>HT   |
 694 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 695 <CharEsc,StringEsc>LF   |
 696 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 697 <CharEsc,StringEsc>VT   |
 698 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 699 <CharEsc,StringEsc>FF   |
 700 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 701 <CharEsc,StringEsc>CR   |
 702 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 703 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 704 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 705 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 706 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 707 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 708 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 709 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 710 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 711 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 712 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 713 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 714 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 715 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 716 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 717 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 718 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 719 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 720 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 721 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 722 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 723 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 724 <CharEsc,StringEsc>{D}+  {
 725                           int i = strtol(yytext, NULL, 10);
 726                           if (i < NCHARS) {
 727                              addchar((char) i);
 728                           } else {
 729                              char errbuf[ERR_BUF_SIZE];
 730                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 731                                 yytext);
 732                              hsperror(errbuf);
 733                           }
 734                           POP_STATE;
 735                         }
 736 <CharEsc,StringEsc>o{O}+ {
 737                           int i = strtol(yytext + 1, NULL, 8);
 738                           if (i < NCHARS) {
 739                              addchar((char) i);
 740                           } else {
 741                              char errbuf[ERR_BUF_SIZE];
 742                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 743                                 yytext);
 744                              hsperror(errbuf);
 745                           }
 746                           POP_STATE;
 747                         }
 748 <CharEsc,StringEsc>x{H}+ {
 749                           int i = strtol(yytext + 1, NULL, 16);
 750                           if (i < NCHARS) {
 751                              addchar((char) i);
 752                           } else {
 753                              char errbuf[ERR_BUF_SIZE];
 754                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 755                                 yytext);
 756                              hsperror(errbuf);
 757                           }
 758                           POP_STATE;
 759                         }
 760
 761 %{
 762     /*
 763      * Simple comments and whitespace.  Normally, we would just ignore these, but
 764      * in case we're processing a string escape, we need to note that we've seen
 765      * a gap.
 766      *
 767      * Note that we cater for a comment line that *doesn't* end in a newline.
 768      * This is incorrect, strictly speaking, but seems like the right thing
 769      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 770      */
 771 %}
 772
 773 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
 774 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
 775
 776 %{
 777     /*
 778      * Nested comments.  The major complication here is in trying to match the
 779      * longest lexemes possible, for better performance.  (See the flex document.)
 780      * That's why the rules look so bizarre.
 781      */
 782 %}
 783
 784 <Code,GlaExt,UserPragma,StringEsc>"{-"  {
 785                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
 786                         }
 787
 788 <Comment>[^-{]*         |
 789 <Comment>"-"+[^-{}]+    |
 790 <Comment>"{"+[^-{}]+    ;
 791 <Comment>"{-"           { nested_comments++; }
 792 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 793 <Comment>(.|\n)         ;
 794
 795 %{
 796     /*
 797      * Illegal characters.  This used to be a single rule, but we might as well
 798      * pass on as much information as we have, so now we indicate our state in
 799      * the error message.
 800      */
 801 %}
 802
 803 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  {
 804                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 805                             input_filename, hsplineno, hspcolno + 1);
 806                          format_string(stderr, (unsigned char *) yytext, 1);
 807                          fputs("'\n", stderr);
 808                          hsperror("");
 809                         }
 810 <Char>(.|\n)            {
 811                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 812                             input_filename, hsplineno, hspcolno + 1);
 813                          format_string(stderr, (unsigned char *) yytext, 1);
 814                          fputs("' in a character literal\n", stderr);
 815                          hsperror("");
 816                         }
 817 <CharEsc>(.|\n)         {
 818                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
 819                             input_filename, hsplineno, hspcolno + 1);
 820                          format_string(stderr, (unsigned char *) yytext, 1);
 821                          fputs("'\n", stderr);
 822                          hsperror("");
 823                         }
 824 <String>(.|\n)          { if (nonstandardFlag) {
 825                              addtext(yytext, yyleng);
 826                           } else {
 827                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 828                                 input_filename, hsplineno, hspcolno + 1);
 829                                 format_string(stderr, (unsigned char *) yytext, 1);
 830                                 fputs("' in a string literal\n", stderr);
 831                                 hsperror("");
 832                           }
 833                         }
 834 <StringEsc>(.|\n)       {
 835                          if (noGap) {
 836                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
 837                                 input_filename, hsplineno, hspcolno + 1);
 838                              format_string(stderr, (unsigned char *) yytext, 1);
 839                              fputs("'\n", stderr);
 840                              hsperror("");
 841                          } else {
 842                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 843                                 input_filename, hsplineno, hspcolno + 1);
 844                              format_string(stderr, (unsigned char *) yytext, 1);
 845                              fputs("' in a string gap\n", stderr);
 846                              hsperror("");
 847                          }
 848                         }
 849
 850 %{
 851     /*
 852      * End of file.  In any sub-state, this is an error.  However, for the primary
 853      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 854      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 855      * adding virtual close curlies, or closing an interface and returning to the
 856      * primary source file.
 857      *
 858      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 859      * line/column advancement has to be done by hand.
 860      */
 861 %}
 862
 863 <Char,CharEsc><<EOF>>   {
 864                           hsplineno = hslineno; hspcolno = hscolno;
 865                           hsperror("unterminated character literal");
 866                         }
 867 <Comment><<EOF>>        {
 868                           hsplineno = hslineno; hspcolno = hscolno;
 869                           hsperror("unterminated comment");
 870                         }
 871 <String,StringEsc><<EOF>>   {
 872                           hsplineno = hslineno; hspcolno = hscolno;
 873                           hsperror("unterminated string literal");
 874                         }
 875 <UserPragma><<EOF>>     {
 876                           hsplineno = hslineno; hspcolno = hscolno;
 877                           hsperror("unterminated user-specified pragma");
 878                         }
 879 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 880
 881 %%
 882
 883 /**********************************************************************
 884 *                                                                     *
 885 *                                                                     *
 886 *     YACC/LEX Initialisation etc.                                    *
 887 *                                                                     *
 888 *                                                                     *
 889 **********************************************************************/
 890
 891 /*
 892    We initialise input_filename to "<stdin>".
 893    This allows unnamed sources to be piped into the parser.
 894 */
 895
 896 void
 897 yyinit(void)
 898 {
 899     input_filename = xstrdup("<stdin>");
 900
 901     /* We must initialize the input buffer _now_, because we call
 902        setyyin _before_ calling yylex for the first time! */
 903     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 904
 905     if (nonstandardFlag)
 906         PUSH_STATE(GlaExt);
 907     else
 908         PUSH_STATE(Code);
 909 }
 910
 911 static void
 912 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 913 {
 914     if (input_filename != NULL)
 915         free(input_filename);
 916     input_filename = xstrdup(f);
 917 }
 918
 919 /**********************************************************************
 920 *                                                                     *
 921 *                                                                     *
 922 *     Layout Processing                                               *
 923 *                                                                     *
 924 *                                                                     *
 925 **********************************************************************/
 926
 927 /*
 928         The following section deals with Haskell Layout conventions
 929         forcing insertion of ; or } as appropriate
 930 */
 931
 932 static BOOLEAN
 933 hsshouldindent(void)
 934 {
 935     return (!forgetindent && INDENTON);
 936 }
 937
 938
 939 /* Enter new context and set new indentation level */
 940 void
 941 hssetindent(void)
 942 {
 943 #ifdef HSP_DEBUG
 944     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 945 #endif
 946
 947     /*
 948      * partain: first chk that new indent won't be less than current one; this code
 949      * doesn't make sense to me; hscolno tells the position of the _end_ of the
 950      * current token; what that has to do with indenting, I don't know.
 951      */
 952
 953
 954     if (hscolno - 1 <= INDENTPT) {
 955         if (INDENTPT == -1)
 956             return;             /* Empty input OK for Haskell 1.1 */
 957         else {
 958             char errbuf[ERR_BUF_SIZE];
 959
 960             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
 961             hsperror(errbuf);
 962         }
 963     }
 964     hsentercontext((hspcolno << 1) | 1);
 965 }
 966
 967
 968 /* Enter a new context without changing the indentation level */
 969 void
 970 hsincindent(void)
 971 {
 972 #ifdef HSP_DEBUG
 973     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 974 #endif
 975     hsentercontext(indenttab[icontexts] & ~1);
 976 }
 977
 978
 979 /* Turn off indentation processing, usually because an explicit "{" has been seen */
 980 void
 981 hsindentoff(void)
 982 {
 983     forgetindent = TRUE;
 984 }
 985
 986
 987 /* Enter a new layout context. */
 988 static void
 989 hsentercontext(int indent)
 990 {
 991     /* Enter new context and set indentation as specified */
 992     if (++icontexts >= MAX_CONTEXTS) {
 993         char errbuf[ERR_BUF_SIZE];
 994
 995         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
 996         hsperror(errbuf);
 997     }
 998     forgetindent = FALSE;
 999     indenttab[icontexts] = indent;
1000 #ifdef HSP_DEBUG
1001     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1002 #endif
1003 }
1004
1005
1006 /* Exit a layout context */
1007 void
1008 hsendindent(void)
1009 {
1010     --icontexts;
1011 #ifdef HSP_DEBUG
1012     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1013 #endif
1014 }
1015
1016 /*
1017  *      Return checks the indentation level and returns ;, } or the specified token.
1018  */
1019
1020 static int
1021 Return(int tok)
1022 {
1023 #ifdef HSP_DEBUG
1024     extern int yyleng;
1025 #endif
1026
1027     if (hsshouldindent()) {
1028         if (hspcolno < INDENTPT) {
1029 #ifdef HSP_DEBUG
1030             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1031 #endif
1032             hssttok = tok;
1033             return (VCCURLY);
1034         } else if (hspcolno == INDENTPT) {
1035 #ifdef HSP_DEBUG
1036             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1037 #endif
1038             hssttok = -tok;
1039             return (SEMI);
1040         }
1041     }
1042     hssttok = -1;
1043 #ifdef HSP_DEBUG
1044     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1045 #endif
1046     return (tok);
1047 }
1048
1049
1050 /*
1051  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1052  */
1053 int
1054 yylex()
1055 {
1056     int tok;
1057     static BOOLEAN eof = FALSE;
1058
1059     if (!eof) {
1060         if (hssttok != -1) {
1061             if (hssttok < 0) {
1062                 tok = -hssttok;
1063                 hssttok = -1;
1064                 return tok;
1065             }
1066             RETURN(hssttok);
1067         } else {
1068             endlineno = hslineno;
1069             if ((tok = yylex1()) != EOF)
1070                 return tok;
1071             else
1072                 eof = TRUE;
1073         }
1074     }
1075     if (icontexts > icontexts_save) {
1076         if (INDENTON) {
1077             eof = TRUE;
1078             indenttab[icontexts] = 0;
1079             return (VCCURLY);
1080         } else
1081             hsperror("missing '}' at end of file");
1082     } else if (hsbuf_save != NULL) {
1083         fclose(yyin);
1084         yy_delete_buffer(YY_CURRENT_BUFFER);
1085         yy_switch_to_buffer(hsbuf_save);
1086         hsbuf_save = NULL;
1087         new_filename(filename_save);
1088         free(filename_save);
1089         hslineno = hslineno_save;
1090         hsplineno = hsplineno_save;
1091         hscolno = hscolno_save;
1092         hspcolno = hspcolno_save;
1093         etags = etags_save;
1094         icontexts = icontexts_save - 1;
1095         icontexts_save = 0;
1096 #ifdef HSP_DEBUG
1097         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1098 #endif
1099         eof = FALSE;
1100
1101         /* RETURN(LEOF); */
1102         hsperror("No longer using yacc to parse interface files");
1103
1104     } else {
1105         yyterminate();
1106     }
1107     abort(); /* should never get here! */
1108     return(0);
1109 }
1110
1111 /**********************************************************************
1112 *                                                                     *
1113 *                                                                     *
1114 *     Input Processing for Interfaces -- Not currently used !!!       *
1115 *                                                                     *
1116 *                                                                     *
1117 **********************************************************************/
1118
1119 /* setyyin(file)        open file as new lex input buffer */
1120 extern FILE *yyin;
1121
1122 void
1123 setyyin(char *file)
1124 {
1125     hsbuf_save = YY_CURRENT_BUFFER;
1126     if ((yyin = fopen(file, "r")) == NULL) {
1127         char errbuf[ERR_BUF_SIZE];
1128
1129         sprintf(errbuf, "can't read \"%-.50s\"", file);
1130         hsperror(errbuf);
1131     }
1132     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1133
1134     hslineno_save = hslineno;
1135     hsplineno_save = hsplineno;
1136     hslineno = hsplineno = 1;
1137
1138     filename_save = input_filename;
1139     input_filename = NULL;
1140     new_filename(file);
1141     hscolno_save = hscolno;
1142     hspcolno_save = hspcolno;
1143     hscolno = hspcolno = 0;
1144     etags_save = etags; /* do not do "etags" stuff in interfaces */
1145     etags = 0;          /* We remember whether we are doing it in
1146                            the module, so we can restore it later [WDP 94/09] */
1147     hsentercontext(-1);         /* partain: changed this from 0 */
1148     icontexts_save = icontexts;
1149 #ifdef HSP_DEBUG
1150     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1151 #endif
1152 }
1153
1154 static void
1155 layout_input(char *text, int len)
1156 {
1157 #ifdef HSP_DEBUG
1158     fprintf(stderr, "Scanning \"%s\"\n", text);
1159 #endif
1160
1161     hsplineno = hslineno;
1162     hspcolno = hscolno;
1163
1164     while (len-- > 0) {
1165         switch (*text++) {
1166         case '\n':
1167         case '\r':
1168         case '\f':
1169             hslineno++;
1170             hscolno = 0;
1171             break;
1172         case '\t':
1173             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1174             break;
1175         case '\v':
1176             break;
1177         default:
1178             ++hscolno;
1179             break;
1180         }
1181     }
1182 }
1183
1184 void
1185 setstartlineno(void)
1186 {
1187     startlineno = hsplineno;
1188
1189     if (modulelineno == 0) {
1190         modulelineno = startlineno;
1191     }
1192
1193 #if 1/*etags*/
1194 #else
1195     if (etags)
1196         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1197 #endif
1198 }
1199
1200 /**********************************************************************
1201 *                                                                     *
1202 *                                                                     *
1203 *                      Text Caching                                   *
1204 *                                                                     *
1205 *                                                                     *
1206 **********************************************************************/
1207
1208 #define CACHE_SIZE YY_BUF_SIZE
1209
1210 static struct {
1211     unsigned allocated;
1212     unsigned next;
1213     char *text;
1214 } textcache = { 0, 0, NULL };
1215
1216 static void
1217 cleartext(void)
1218 {
1219 /*  fprintf(stderr, "cleartext\n"); */
1220     textcache.next = 0;
1221     if (textcache.allocated == 0) {
1222         textcache.allocated = CACHE_SIZE;
1223         textcache.text = xmalloc(CACHE_SIZE);
1224     }
1225 }
1226
1227 static void
1228 addtext(char *text, unsigned length)
1229 {
1230 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1231
1232     if (length == 0)
1233         return;
1234
1235     if (textcache.next + length + 1 >= textcache.allocated) {
1236         textcache.allocated += length + CACHE_SIZE;
1237         textcache.text = xrealloc(textcache.text, textcache.allocated);
1238     }
1239     bcopy(text, textcache.text + textcache.next, length);
1240     textcache.next += length;
1241 }
1242
1243 static void
1244 addchar(char c)
1245 {
1246 /*  fprintf(stderr, "addchar: %c\n", c); */
1247
1248     if (textcache.next + 2 >= textcache.allocated) {
1249         textcache.allocated += CACHE_SIZE;
1250         textcache.text = xrealloc(textcache.text, textcache.allocated);
1251     }
1252     textcache.text[textcache.next++] = c;
1253 }
1254
1255 static char *
1256 fetchtext(unsigned *length)
1257 {
1258 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1259
1260     *length = textcache.next;
1261     textcache.text[textcache.next] = '\0';
1262     return textcache.text;
1263 }
1264
1265 /**********************************************************************
1266 *                                                                     *
1267 *                                                                     *
1268 *    Identifier Processing                                             *
1269 *                                                                     *
1270 *                                                                     *
1271 **********************************************************************/
1272
1273 /*
1274         hsnewid         Enters an id of length n into the symbol table.
1275 */
1276
1277 static void
1278 hsnewid(char *name, int length)
1279 {
1280     char save = name[length];
1281
1282     name[length] = '\0';
1283     yylval.uid = installid(name);
1284     name[length] = save;
1285 }
1286
1287 BOOLEAN
1288 hsnewqid(char *name, int length)
1289 {
1290     char* dot;
1291     char save = name[length];
1292     name[length] = '\0';
1293
1294     dot = strchr(name, '.');
1295     *dot = '\0';
1296     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1297     *dot = '.';
1298     name[length] = save;
1299
1300     return _isconstr(dot+1);
1301 }
1302
1303 BOOLEAN
1304 isconstr(char *s) /* walks past leading underscores before using the macro */
1305 {
1306     char *temp = s;
1307
1308     for ( ; temp != NULL && *temp == '_' ; temp++ );
1309
1310     return _isconstr(temp);
1311 }