ghc/compiler/parser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       FLEX for Haskell.                                             *
   6 *       -----------------                                             *
   7 *                                                                     *
   8 **********************************************************************/
   9
  10 /* The includes/config.h one */
  11 #include "config.h"
  12
  13 #include <stdio.h>
  14
  15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  16 #include <string.h>
  17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  19 #include <memory.h>
  20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  21 #define index strchr
  22 #define rindex strrchr
  23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  25 #define bzero(s, n) memset ((s), 0, (n))
  26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  27 #include <strings.h>
  28 /* memory.h and strings.h conflict on some systems.  */
  29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  30
  31 #include "hspincl.h"
  32 #include "hsparser.tab.h"
  33 #include "constants.h"
  34 #include "utils.h"
  35
  36 /* Our substitute for <ctype.h> */
  37
  38 #define NCHARS  256
  39 #define _S      0x1
  40 #define _D      0x2
  41 #define _H      0x4
  42 #define _O      0x8
  43 #define _C      0x10
  44
  45 static unsigned char CharTable[NCHARS] = {
  46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
  52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
  58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  62
  63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  79 };
  80
  81 BOOLEAN
  82 isconstr (char *s)
  83 {
  84     return(CharTable[*s]&(_C));
  85 }
  86
  87 /**********************************************************************
  88 *                                                                     *
  89 *                                                                     *
  90 *      Declarations                                                   *
  91 *                                                                     *
  92 *                                                                     *
  93 **********************************************************************/
  94
  95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
  96
  97 /*
  98  * For my own sanity, things that are not part of the flex skeleton
  99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
 100  */
 101
 102 static int hslineno = 0;        /* Line number at end of token */
 103 int hsplineno = 0;              /* Line number at end of previous token */
 104
 105 static int hscolno = 0;         /* Column number at end of token */
 106 int hspcolno = 0;               /* Column number at end of previous token */
 107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 108
 109 int modulelineno = -1;          /* The line number where the module starts */
 110 int startlineno = 0;            /* The line number where something starts */
 111 int endlineno = 0;              /* The line number where something ends */
 112
 113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 115
 116 static int nested_comments;     /* For counting comment nesting depth */
 117 static int comment_start;
 118
 119 /* OLD: Hacky definition of yywrap: see flex doc.
 120
 121    If we don't do this, then we'll have to get the default
 122    yywrap from the flex library, which is often something
 123    we are not good at locating.  This avoids that difficulty.
 124    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 125    WDP 94/09/05
 126 #define yywrap() 1
 127 */
 128
 129 /* Essential forward declarations */
 130
 131 static void hsnewid      PROTO((char *, int));
 132 static void layout_input PROTO((char *, int));
 133 static void cleartext    (NO_ARGS);
 134 static void addtext      PROTO((char *, unsigned));
 135 static void addchar      PROTO((char));
 136 static char *fetchtext   PROTO((unsigned *));
 137 static void new_filename PROTO((char *));
 138 static int  Return       PROTO((int));
 139 static void hsentercontext PROTO((int));
 140
 141 static BOOLEAN is_commment PROTO((char*, int));
 142
 143 /* Special file handling for IMPORTS */
 144 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 145
 146 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 147 static char *filename_save;             /* File Name                     */
 148 static int hslineno_save = 0,           /* Line Number                   */
 149  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 150  hscolno_save = 0,                      /* Indentation                   */
 151  hspcolno_save = 0;                     /* Left Indentation              */
 152 static short icontexts_save = 0;        /* Indent Context Level          */
 153
 154 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
 155 extern BOOLEAN etags;                   /* that which is saved */
 156
 157 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
 158
 159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 160                                  * inserted before token +ve  -- "}" inserted before
 161                                  * token */
 162
 163 short icontexts = 0;            /* Which context we're in */
 164
 165 /*
 166         Table of indentations:  right bit indicates whether to use
 167           indentation rules (1 = use rules; 0 = ignore)
 168
 169     partain:
 170     push one of these "contexts" at every "case" or "where"; the right bit says
 171     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 172
 173     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 174     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 175     push is shown just below.
 176
 177 */
 178
 179
 180 static short indenttab[MAX_CONTEXTS] = {-1};
 181
 182 #define INDENTPT (indenttab[icontexts]>>1)
 183 #define INDENTON (indenttab[icontexts]&1)
 184
 185 #define RETURN(tok) return(Return(tok))
 186
 187 #undef YY_DECL
 188 #define YY_DECL int yylex1()
 189
 190 /* We should not peek at yy_act, but flex calls us even for the internal action
 191    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 192    to support older versions of flex, we'll continue to peek for now.
 193  */
 194 #define YY_USER_ACTION \
 195     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 196
 197 #if 0/*debug*/
 198 #undef YY_BREAK
 199 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 200 #endif
 201
 202 /* Each time we enter a new start state, we push it onto the state stack.
 203 */
 204 #define PUSH_STATE(n)   yy_push_state(n)
 205 #define POP_STATE       yy_pop_state()
 206
 207 %}
 208 /* Options:
 209     8bit (8-bit input)
 210     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
 211     never-interactive (to go a bit faster)
 212     stack (use a start-condition stack)
 213 */
 214 %option 8bit
 215 %option noyywrap
 216 %option never-interactive
 217 %option stack
 218
 219 /* The start states are:
 220    Code -- normal Haskell code (principal lexer)
 221    GlaExt -- Haskell code with Glasgow extensions
 222    Comment -- Nested comment processing
 223    String -- Inside a string literal with backslashes
 224    StringEsc -- Immediately following a backslash in a string literal
 225    Char -- Inside a character literal with backslashes
 226    CharEsc -- Immediately following a backslash in a character literal
 227
 228    Note that the INITIAL state is unused.  Also note that these states
 229    are _exclusive_.  All rules should be prefixed with an appropriate
 230    list of start states.
 231  */
 232
 233 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
 234
 235 isoS                    [\xa1-\xbf\xd7\xf7]
 236 isoL                    [\xc0-\xd6\xd8-\xde]
 237 isol                    [\xdf-\xf6\xf8-\xff]
 238 isoA                    [\xa1-\xff]
 239
 240 D                       [0-9]
 241 O                       [0-7]
 242 H                       [0-9A-Fa-f]
 243 N                       {D}+
 244 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 245 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 246 SId                     {S}{S}*
 247 L                       [A-Z\xc0-\xd6\xd8-\xde]
 248 l                       [a-z_\xdf-\xf6\xf8-\xff]
 249 I                       {L}|{l}
 250 i                       {L}|{l}|[0-9'_]
 251 Id                      {I}{i}*
 252 Mod                     {L}{i}*
 253 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
 254 CNTRL                   [@A-Z\[\\\]^_]
 255 WS                      [ \t\n\r\f\v]
 256 NL                      [\n\r]
 257
 258 %%
 259
 260 %{
 261     /*
 262      * Simple comments and whitespace.  Normally, we would just ignore these, but
 263      * in case we're processing a string escape, we need to note that we've seen
 264      * a gap.
 265      *
 266      * Note that we cater for a comment line that *doesn't* end in a newline.
 267      * This is incorrect, strictly speaking, but seems like the right thing
 268      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 269      *
 270      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
 271      */
 272 %}
 273
 274 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
 275
 276 %{
 277     /*
 278      * Special GHC pragma rules.  Do we need a start state for interface files,
 279      * so these won't be matched in source files? --JSM
 280      */
 281
 282 %}
 283
 284 %{
 285 /* I believe the next rule is not ever matched.
 286
 287    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
 288    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
 289    where possible, so the lexer should now never see cpp directives
 290    like '# ' and '#line'.
 291
 292    -- SOF
 293
 294 <Code,GlaExt>^"# ".*{NL}    {
 295                           char tempf[FILENAME_SIZE];
 296                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 297                           new_filename(tempf);
 298                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 299                         }
 300
 301 <Code,GlaExt>^"#line ".*{NL}    {
 302                           char tempf[FILENAME_SIZE];
 303                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 304                           new_filename(tempf);
 305                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 306                         }
 307 */
 308 %}
 309
 310 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 311                           /* partain: pragma-style line directive */
 312                           char tempf[FILENAME_SIZE];
 313                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 314                           new_filename(tempf);
 315                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 316                         }
 317
 318 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
 319                               PUSH_STATE(UserPragma);
 320                               RETURN(INTERFACE_UPRAGMA);
 321                             }
 322 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 323                               PUSH_STATE(UserPragma);
 324                               RETURN(SPECIALISE_UPRAGMA);
 325                             }
 326 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 327                               PUSH_STATE(UserPragma);
 328                               RETURN(INLINE_UPRAGMA);
 329                             }
 330 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
 331                               PUSH_STATE(UserPragma);
 332                               RETURN(NOINLINE_UPRAGMA);
 333                             }
 334 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 335                               PUSH_STATE(UserPragma);
 336                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 337                             }
 338 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
 339                               /* these are handled by hscpp */
 340                               nested_comments =1; comment_start = hsplineno;
 341                               PUSH_STATE(Comment);
 342                             }
 343 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
 344                               /* these are for the driver! */
 345                               nested_comments =1; comment_start = hsplineno;
 346                               PUSH_STATE(Comment);
 347                             }
 348 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
 349                               /* these are used by `make depend' and the
 350                                  compiler to indicate that a module should
 351                                  be imported from source */
 352                               nested_comments =1; comment_start = hsplineno;
 353                               RETURN(SOURCE_UPRAGMA);
 354                             }
 355
 356 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
 357                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
 358                                 input_filename, hsplineno);
 359                               format_string(stderr, (unsigned char *) yytext, yyleng);
 360                               fputs("'\n", stderr);
 361                               nested_comments = 1; comment_start = hsplineno;
 362                               PUSH_STATE(Comment);
 363                             }
 364 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 365
 366 %{
 367     /*
 368      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 369      * intentionally accepted as a keyword even for normal <Code>.
 370      */
 371 %}
 372
 373 <Code,GlaExt>"case"             { RETURN(CASE); }
 374 <Code,GlaExt>"class"            { RETURN(CLASS); }
 375 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 376 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 377 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 378 <Code,GlaExt>"do"               { RETURN(DO); }
 379 <Code,GlaExt>"else"             { RETURN(ELSE); }
 380 <Code,GlaExt>"if"               { RETURN(IF); }
 381 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 382 <Code,GlaExt>"in"               { RETURN(IN); }
 383 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 384 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 385 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 386 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 387 <Code,GlaExt>"let"              { RETURN(LET); }
 388 <Code,GlaExt>"module"           { RETURN(MODULE); }
 389 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
 390 <Code,GlaExt>"of"               { RETURN(OF); }
 391 <Code,GlaExt>"then"             { RETURN(THEN); }
 392 <Code,GlaExt>"type"             { RETURN(TYPE); }
 393 <Code,GlaExt>"where"            { RETURN(WHERE); }
 394
 395 <Code,GlaExt>"as"               { RETURN(AS); }
 396 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 397 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
 398
 399 <Code,GlaExt>"forall"           { RETURN(FORALL); }
 400
 401 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
 402 <GlaExt>"_ccall_"               { RETURN(CCALL); }
 403 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
 404 <GlaExt>"_casm_"                { RETURN(CASM); }
 405 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
 406 <GlaExt>"(#"                    { RETURN(OUNBOXPAREN); }
 407 <GlaExt>"#)"                    { RETURN(CUNBOXPAREN); }
 408 <GlaExt>"foreign"               { RETURN(FOREIGN); }
 409 <GlaExt>"export"                { RETURN(EXPORT); }
 410 <GlaExt>"label"                 { RETURN(LABEL); }
 411 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
 412 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
 413 <GlaExt>"_ccall"                { RETURN(C_CALL); }
 414 <GlaExt>"_pascal"               { RETURN(PASCAL); }
 415 <GlaExt>"stdcall"               { RETURN(STDCALL); }
 416 <GlaExt>"ccall"                 { RETURN(C_CALL); }
 417 <GlaExt>"pascal"                { RETURN(PASCAL); }
 418 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
 419
 420 %{
 421     /*
 422      * Haskell operators: special, reservedops and useful varsyms
 423      */
 424 %}
 425
 426 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
 427 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
 428 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
 429 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
 430 <Code,GlaExt>"{"                { RETURN(OCURLY); }
 431 <Code,GlaExt>"}"                { RETURN(CCURLY); }
 432 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
 433 <Code,GlaExt>";"                { RETURN(SEMI); }
 434 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
 435
 436 <Code,GlaExt>"."                { RETURN(DOT); }
 437 <Code,GlaExt>".."               { RETURN(DOTDOT); }
 438 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
 439 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
 440 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
 441 <Code,GlaExt>"|"                { RETURN(VBAR); }
 442 <Code,GlaExt>"<-"               { RETURN(LARROW); }
 443 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
 444 <Code,GlaExt>"-"                { RETURN(MINUS); }
 445 <Code,GlaExt>"+"                { RETURN(PLUS); }
 446
 447 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
 448 <Code,GlaExt>"@"                { RETURN(AT); }
 449 <Code,GlaExt>"!"                { RETURN(BANG); }
 450 <Code,GlaExt>"~"                { RETURN(LAZY); }
 451
 452 %{
 453     /*
 454      * Integers and (for Glasgow extensions) primitive integers.  Note that
 455      * we pass all of the text on to the parser, because flex/C can't handle
 456      * arbitrary precision numbers.
 457      */
 458 %}
 459
 460 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
 461                          yylval.uid = xstrndup(yytext, yyleng - 1);
 462                          RETURN(INTPRIM);
 463                         }
 464 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
 465                          yylval.uid = xstrndup(yytext, yyleng);
 466                          RETURN(INTEGER);
 467                         }
 468 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
 469                          yylval.uid = xstrndup(yytext, yyleng - 1);
 470                          RETURN(INTPRIM);
 471                         }
 472 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
 473                          yylval.uid = xstrndup(yytext, yyleng);
 474                          RETURN(INTEGER);
 475                         }
 476 <GlaExt>("-")?{N}"#"    {
 477                          yylval.uid = xstrndup(yytext, yyleng - 1);
 478                          RETURN(INTPRIM);
 479                         }
 480 <Code,GlaExt,UserPragma>{N} {
 481                          yylval.uid = xstrndup(yytext, yyleng);
 482                          RETURN(INTEGER);
 483                         }
 484
 485 %{
 486     /*
 487      * Floats and (for Glasgow extensions) primitive floats/doubles.
 488      */
 489 %}
 490
 491 <GlaExt>("-")?{F}"##"   {
 492                          yylval.uid = xstrndup(yytext, yyleng - 2);
 493                          RETURN(DOUBLEPRIM);
 494                         }
 495 <GlaExt>("-")?{F}"#"    {
 496                          yylval.uid = xstrndup(yytext, yyleng - 1);
 497                          RETURN(FLOATPRIM);
 498                         }
 499 <Code,GlaExt>{F}        {
 500                          yylval.uid = xstrndup(yytext, yyleng);
 501                          RETURN(FLOAT);
 502                         }
 503
 504 %{
 505     /*
 506      * Funky ``foo'' style C literals for Glasgow extensions
 507      */
 508 %}
 509
 510 <GlaExt>"``"[^']+"''"   {
 511                          hsnewid(yytext + 2, yyleng - 4);
 512                          RETURN(CLITLIT);
 513                         }
 514
 515 %{
 516     /*
 517      * Identifiers, both variables and operators.  The trailing hash is allowed
 518      * for Glasgow extensions.
 519      */
 520 %}
 521
 522
 523 %{
 524 /* These SHOULDNAE work in "Code" (sigh) */
 525 %}
 526 <GlaExt,UserPragma>{Id}"#" {
 527                         if (! nonstandardFlag) {
 528                             char errbuf[ERR_BUF_SIZE];
 529                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 530                             hsperror(errbuf);
 531                          }
 532                          hsnewid(yytext, yyleng);
 533                          RETURN(isconstr(yytext) ? CONID : VARID);
 534                         }
 535 <Code,GlaExt,UserPragma>{Id}    {
 536                          hsnewid(yytext, yyleng);
 537                          RETURN(isconstr(yytext) ? CONID : VARID);
 538                         }
 539 <Code,GlaExt,UserPragma>{SId}   {
 540                          if (is_commment(yytext,yyleng)) {
 541                                 int c;
 542                                 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
 543                                         ;
 544                                 if (c != EOF)
 545                                    unput(c);
 546                          } else {
 547                             hsnewid(yytext, yyleng);
 548                             RETURN(isconstr(yytext) ? CONSYM : VARSYM);
 549                          }
 550                         }
 551 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
 552                          BOOLEAN is_constr;
 553                          if (! nonstandardFlag) {
 554                             char errbuf[ERR_BUF_SIZE];
 555                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 556                             hsperror(errbuf);
 557                          }
 558                          is_constr = hsnewqid(yytext, yyleng);
 559                          RETURN(is_constr ? QCONID : QVARID);
 560                         }
 561 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
 562                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
 563                          RETURN(is_constr ? QCONID : QVARID);
 564                         }
 565 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
 566                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
 567                          RETURN(is_constr ? QCONSYM : QVARSYM);
 568                         }
 569
 570 %{
 571     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 572
 573     /* Because we can make the former well-behaved (we defined them).
 574
 575        Sadly, the latter is defined by Haskell, which allows such
 576        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 577     */
 578 %}
 579
 580 <GlaExt,UserPragma>"`"{Id}"#`"  {
 581                          hsnewid(yytext + 1, yyleng - 2);
 582                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
 583                         }
 584
 585 %{
 586     /*
 587      * Character literals.  The first form is the quick form, for character
 588      * literals that don't contain backslashes.  Literals with backslashes are
 589      * lexed through multiple rules.  First, we match the open ' and as many
 590      * normal characters as possible.  This puts us into the <Char> state, where
 591      * a backslash is legal.  Then, we match the backslash and move into the
 592      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 593      * characters and the close '.  We may end up with too many characters, but
 594      * this allows us to easily share the lex rules with strings.  Excess characters
 595      * are ignored with a warning.
 596      */
 597 %}
 598
 599 <GlaExt>'({CHAR}|"\"")"'#" {
 600                          yylval.uhstring = installHstring(1, yytext+1);
 601                          RETURN(CHARPRIM);
 602                         }
 603 <Code,GlaExt>'({CHAR}|"\"")'    {
 604                          yylval.uhstring = installHstring(1, yytext+1);
 605                          RETURN(CHAR);
 606                         }
 607 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 608                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 609                          hsperror(errbuf);
 610                         }
 611 <Code,GlaExt>'({CHAR}|"\"")* {
 612                          hsmlcolno = hspcolno;
 613                          cleartext();
 614                          addtext(yytext+1, yyleng-1);
 615                          PUSH_STATE(Char);
 616                         }
 617 <Char>({CHAR}|"\"")*'#  {
 618                          unsigned length;
 619                          char *text;
 620
 621                          addtext(yytext, yyleng - 2);
 622                          text = fetchtext(&length);
 623
 624                          if (! nonstandardFlag) {
 625                             char errbuf[ERR_BUF_SIZE];
 626                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 627                             hsperror(errbuf);
 628                          }
 629
 630                          if (length > 1) {
 631                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
 632                               input_filename, hsplineno, hspcolno + 1);
 633                             format_string(stderr, (unsigned char *) text, length);
 634                             fputs("' too long\n", stderr);
 635                             hsperror("");
 636                          }
 637                          yylval.uhstring = installHstring(1, text);
 638                          hspcolno = hsmlcolno;
 639                          POP_STATE;
 640                          RETURN(CHARPRIM);
 641                         }
 642 <Char>({CHAR}|"\"")*'   {
 643                          unsigned length;
 644                          char *text;
 645
 646                          addtext(yytext, yyleng - 1);
 647                          text = fetchtext(&length);
 648
 649                          if (length > 1) {
 650                             fprintf(stderr, "%s:%d:%d: Character literal '",
 651                               input_filename, hsplineno, hspcolno + 1);
 652                             format_string(stderr, (unsigned char *) text, length);
 653                             fputs("' too long\n", stderr);
 654                             hsperror("");
 655                          }
 656                          yylval.uhstring = installHstring(1, text);
 657                          hspcolno = hsmlcolno;
 658                          POP_STATE;
 659                          RETURN(CHAR);
 660                         }
 661 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 662
 663
 664 %{
 665     /*
 666      * String literals.  The first form is the quick form, for string literals
 667      * that don't contain backslashes.  Literals with backslashes are lexed
 668      * through multiple rules.  First, we match the open " and as many normal
 669      * characters as possible.  This puts us into the <String> state, where
 670      * a backslash is legal.  Then, we match the backslash and move into the
 671      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 672      * characters, moving back and forth between <String> and <StringEsc> as more
 673      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 674      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 675      * of normal characters and the close ".
 676      */
 677 %}
 678
 679 <GlaExt>"\""({CHAR}|"'")*"\""#  {
 680                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 681                             /* the -3 accounts for the " on front, "# on the end */
 682                          RETURN(STRINGPRIM);
 683                         }
 684 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
 685                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 686                          RETURN(STRING);
 687                         }
 688 <Code,GlaExt>"\""({CHAR}|"'")* {
 689                          hsmlcolno = hspcolno;
 690                          cleartext();
 691                          addtext(yytext+1, yyleng-1);
 692                          PUSH_STATE(String);
 693                         }
 694 <String>({CHAR}|"'")*"\"#"   {
 695                          unsigned length;
 696                          char *text;
 697
 698                          addtext(yytext, yyleng-2);
 699                          text = fetchtext(&length);
 700
 701                          if (! nonstandardFlag) {
 702                             char errbuf[ERR_BUF_SIZE];
 703                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 704                             hsperror(errbuf);
 705                          }
 706
 707                          yylval.uhstring = installHstring(length, text);
 708                          hspcolno = hsmlcolno;
 709                          POP_STATE;
 710                          RETURN(STRINGPRIM);
 711                         }
 712 <String>({CHAR}|"'")*"\""   {
 713                          unsigned length;
 714                          char *text;
 715
 716                          addtext(yytext, yyleng-1);
 717                          text = fetchtext(&length);
 718
 719                          yylval.uhstring = installHstring(length, text);
 720                          hspcolno = hsmlcolno;
 721                          POP_STATE;
 722                          RETURN(STRING);
 723                         }
 724 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 725
 726 %{
 727     /*
 728      * Character and string escapes are roughly the same, but strings have the
 729      * extra `\&' sequence which is not allowed for characters.  Also, comments
 730      * are allowed in the <StringEsc> state.  (See the comment section much
 731      * further down.)
 732      *
 733      * NB: Backslashes and tabs are stored in strings as themselves.
 734      * But if we print them (in printtree.c), they must go out as
 735      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 736      * intermediate format that the parser produces.  It uses '\t' fpr end of
 737      * string, so it needs to be able to escape tabs, which means that it
 738      * also needs to be able to escape the escape character ('\\').  Sigh.
 739      */
 740 %}
 741
 742 <Char>\\                { PUSH_STATE(CharEsc); }
 743 <String>\\&             /* Ignore */ ;
 744 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 745
 746 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 747 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 748
 749 %{
 750 /*
 751  Not 100% correct, tokenizes "foo \  --<>--
 752                                  \ bar"
 753
 754  as "foo  bar", but this is not correct as per Haskell 98 report and its
 755  maximal munch rule for "--"-style comments.
 756
 757  For the moment, not deemed worthy to fix.
 758 */
 759 %}
 760 <StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
 761
 762 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 763 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 764 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 765 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 766 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 767 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 768 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 769 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 770 <CharEsc,StringEsc>BEL  |
 771 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 772 <CharEsc,StringEsc>BS   |
 773 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 774 <CharEsc,StringEsc>HT   |
 775 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 776 <CharEsc,StringEsc>LF   |
 777 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 778 <CharEsc,StringEsc>VT   |
 779 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 780 <CharEsc,StringEsc>FF   |
 781 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 782 <CharEsc,StringEsc>CR   |
 783 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 784 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 785 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 786 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 787 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 788 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 789 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 790 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 791 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 792 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 793 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 794 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 795 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 796 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 797 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 798 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 799 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 800 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 801 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 802 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 803 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 804 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 805 <CharEsc,StringEsc>{D}+  {
 806                           int i = strtol(yytext, NULL, 10);
 807                           if (i < NCHARS) {
 808                              addchar((char) i);
 809                           } else {
 810                              char errbuf[ERR_BUF_SIZE];
 811                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 812                                 yytext);
 813                              hsperror(errbuf);
 814                           }
 815                           POP_STATE;
 816                         }
 817 <CharEsc,StringEsc>o{O}+ {
 818                           int i = strtol(yytext + 1, NULL, 8);
 819                           if (i < NCHARS) {
 820                              addchar((char) i);
 821                           } else {
 822                              char errbuf[ERR_BUF_SIZE];
 823                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 824                                 yytext);
 825                              hsperror(errbuf);
 826                           }
 827                           POP_STATE;
 828                         }
 829 <CharEsc,StringEsc>x{H}+ {
 830                           int i = strtol(yytext + 1, NULL, 16);
 831                           if (i < NCHARS) {
 832                              addchar((char) i);
 833                           } else {
 834                              char errbuf[ERR_BUF_SIZE];
 835                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 836                                 yytext);
 837                              hsperror(errbuf);
 838                           }
 839                           POP_STATE;
 840                         }
 841
 842
 843 %{
 844     /*
 845      * Nested comments.  The major complication here is in trying to match the
 846      * longest lexemes possible, for better performance.  (See the flex document.)
 847      * That's why the rules look so bizarre.
 848      */
 849 %}
 850
 851 <Code,GlaExt,UserPragma,StringEsc>"{-"  {
 852                           noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment);
 853                         }
 854
 855 <Comment>[^-{]*         |
 856 <Comment>"-"+[^-{}]+    |
 857 <Comment>"{"+[^-{}]+    ;
 858 <Comment>"{-"           { nested_comments++; }
 859 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 860 <Comment>(.|\n)         ;
 861
 862
 863 %{
 864     /*
 865      * Illegal characters.  This used to be a single rule, but we might as well
 866      * pass on as much information as we have, so now we indicate our state in
 867      * the error message.
 868      */
 869 %}
 870
 871 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  {
 872                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
 873                             input_filename, hsplineno, hspcolno + 1);
 874                          format_string(stderr, (unsigned char *) yytext, 1);
 875                          fputs("'\n", stderr);
 876                          hsperror("");
 877                         }
 878 <Char>(.|\n)            {
 879                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
 880                             input_filename, hsplineno, hspcolno + 1);
 881                          format_string(stderr, (unsigned char *) yytext, 1);
 882                          fputs("' in a character literal\n", stderr);
 883                          hsperror("");
 884                         }
 885 <CharEsc>(.|\n)         {
 886                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
 887                             input_filename, hsplineno, hspcolno + 1);
 888                          format_string(stderr, (unsigned char *) yytext, 1);
 889                          fputs("'\n", stderr);
 890                          hsperror("");
 891                         }
 892 <String>(.|\n)          { if (nonstandardFlag) {
 893                              addtext(yytext, yyleng);
 894                           } else {
 895                                 fprintf(stderr, "%s:%d:%d: Illegal character: `",
 896                                 input_filename, hsplineno, hspcolno + 1);
 897                                 format_string(stderr, (unsigned char *) yytext, 1);
 898                                 fputs("' in a string literal\n", stderr);
 899                                 hsperror("");
 900                           }
 901                         }
 902 <StringEsc>(.|\n)       {
 903                          if (noGap) {
 904                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
 905                                 input_filename, hsplineno, hspcolno + 1);
 906                              format_string(stderr, (unsigned char *) yytext, 1);
 907                              fputs("'\n", stderr);
 908                              hsperror("");
 909                          } else {
 910                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
 911                                 input_filename, hsplineno, hspcolno + 1);
 912                              format_string(stderr, (unsigned char *) yytext, 1);
 913                              fputs("' in a string gap\n", stderr);
 914                              hsperror("");
 915                          }
 916                         }
 917
 918 %{
 919     /*
 920      * End of file.  In any sub-state, this is an error.  However, for the primary
 921      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 922      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 923      * adding virtual close curlies, or closing an interface and returning to the
 924      * primary source file.
 925      *
 926      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 927      * line/column advancement has to be done by hand.
 928      */
 929 %}
 930
 931 <Char,CharEsc><<EOF>>   {
 932                           hsplineno = hslineno; hspcolno = hscolno;
 933                           hsperror("unterminated character literal");
 934                         }
 935 <Comment><<EOF>>        {
 936                           char errbuf[ERR_BUF_SIZE];
 937                           hsplineno = hslineno; hspcolno = hscolno;
 938                           sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
 939                           hsperror(errbuf);
 940                         }
 941 <String,StringEsc><<EOF>>   {
 942                           hsplineno = hslineno; hspcolno = hscolno;
 943                           hsperror("unterminated string literal");
 944                         }
 945 <UserPragma><<EOF>>     {
 946                           hsplineno = hslineno; hspcolno = hscolno;
 947                           hsperror("unterminated user-specified pragma");
 948                         }
 949 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 950
 951 %%
 952
 953 /**********************************************************************
 954 *                                                                     *
 955 *                                                                     *
 956 *     YACC/LEX Initialisation etc.                                    *
 957 *                                                                     *
 958 *                                                                     *
 959 **********************************************************************/
 960
 961 /*
 962    We initialise input_filename to "<stdin>".
 963    This allows unnamed sources to be piped into the parser.
 964 */
 965
 966 void
 967 yyinit(void)
 968 {
 969     input_filename = xstrdup("<stdin>");
 970
 971     /* We must initialize the input buffer _now_, because we call
 972        setyyin _before_ calling yylex for the first time! */
 973     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 974
 975     if (nonstandardFlag)
 976         PUSH_STATE(GlaExt);
 977     else
 978         PUSH_STATE(Code);
 979 }
 980
 981 static void
 982 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 983 {
 984     if (input_filename != NULL)
 985         free(input_filename);
 986     input_filename = xstrdup(f);
 987 }
 988
 989 /**********************************************************************
 990 *                                                                     *
 991 *                                                                     *
 992 *     Layout Processing                                               *
 993 *                                                                     *
 994 *                                                                     *
 995 **********************************************************************/
 996
 997 /*
 998         The following section deals with Haskell Layout conventions
 999         forcing insertion of ; or } as appropriate
1000 */
1001
1002 #ifdef HSP_DEBUG
1003 #define LAYOUT_DEBUG
1004 #endif
1005
1006
1007 static BOOLEAN
1008 hsshouldindent(void)
1009 {
1010     return (!forgetindent && INDENTON);
1011 }
1012
1013
1014 /* Enter new context and set new indentation level */
1015 void
1016 hssetindent(void)
1017 {
1018 #ifdef LAYOUT_DEBUG
1019     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1020 #endif
1021
1022     /*
1023      * partain: first chk that new indent won't be less than current one; this code
1024      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1025      * current token; what that has to do with indenting, I don't know.
1026      */
1027
1028
1029     if (hscolno - 1 <= INDENTPT) {
1030         if (INDENTPT == -1)
1031             return;             /* Empty input OK for Haskell 1.1 */
1032         else {
1033             char errbuf[ERR_BUF_SIZE];
1034
1035             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1036             hsperror(errbuf);
1037         }
1038     }
1039     hsentercontext((hspcolno << 1) | 1);
1040 }
1041
1042
1043 /* Enter a new context without changing the indentation level */
1044 void
1045 hsincindent(void)
1046 {
1047 #ifdef LAYOUT_DEBUG
1048     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1049 #endif
1050     hsentercontext(indenttab[icontexts] & ~1);
1051 }
1052
1053
1054 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1055 void
1056 hsindentoff(void)
1057 {
1058     forgetindent = TRUE;
1059 }
1060
1061
1062 /* Enter a new layout context. */
1063 static void
1064 hsentercontext(int indent)
1065 {
1066     /* Enter new context and set indentation as specified */
1067     if (++icontexts >= MAX_CONTEXTS) {
1068         char errbuf[ERR_BUF_SIZE];
1069
1070         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1071         hsperror(errbuf);
1072     }
1073     forgetindent = FALSE;
1074     indenttab[icontexts] = indent;
1075 #ifdef LAYOUT_DEBUG
1076     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1077 #endif
1078 }
1079
1080
1081 /* Exit a layout context */
1082 void
1083 hsendindent(void)
1084 {
1085     --icontexts;
1086 #ifdef LAYOUT_DEBUG
1087     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1088 #endif
1089 }
1090
1091 /*
1092  *      Return checks the indentation level and returns ;, } or the specified token.
1093  */
1094 static int
1095 Return(int tok)
1096 {
1097 #ifdef HSP_DEBUG
1098     extern int yyleng;
1099 #endif
1100     if (hsshouldindent()) {
1101         if (hspcolno < INDENTPT) {
1102 #ifdef HSP_DEBUG
1103             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1104 #endif
1105             hssttok = tok;
1106             return (VCCURLY);
1107         } else if (hspcolno == INDENTPT) {
1108 #ifdef HSP_DEBUG
1109             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1110 #endif
1111             hssttok = -tok;
1112             return (SEMI);
1113         }
1114     }
1115
1116     hssttok = -1;
1117 #ifdef HSP_DEBUG
1118     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1119 #endif
1120     return (tok);
1121 }
1122
1123
1124 /*
1125  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1126  */
1127 int
1128 yylex()
1129 {
1130     int tok;
1131     static BOOLEAN eof = FALSE;
1132
1133     if (!eof) {
1134         if (hssttok != -1) {
1135             if (hssttok < 0) {
1136                 tok = -hssttok;
1137                 hssttok = -1;
1138                 return tok;
1139             }
1140             RETURN(hssttok);
1141         } else {
1142             endlineno = hslineno;
1143             if ((tok = yylex1()) != EOF)
1144                 return tok;
1145             else
1146                 eof = TRUE;
1147         }
1148     }
1149     if (icontexts > icontexts_save) {
1150         if (INDENTON) {
1151             eof = TRUE;
1152             indenttab[icontexts] = 0;
1153             return (VCCURLY);
1154         } else
1155             hsperror("missing '}' at end of file");
1156     } else if (hsbuf_save != NULL) {
1157         fclose(yyin);
1158         yy_delete_buffer(YY_CURRENT_BUFFER);
1159         yy_switch_to_buffer(hsbuf_save);
1160         hsbuf_save = NULL;
1161         new_filename(filename_save);
1162         free(filename_save);
1163         hslineno = hslineno_save;
1164         hsplineno = hsplineno_save;
1165         hscolno = hscolno_save;
1166         hspcolno = hspcolno_save;
1167         etags = etags_save;
1168         icontexts = icontexts_save - 1;
1169         icontexts_save = 0;
1170 #ifdef HSP_DEBUG
1171         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1172 #endif
1173         eof = FALSE;
1174
1175         /* RETURN(LEOF); */
1176         hsperror("No longer using yacc to parse interface files");
1177
1178     } else {
1179         yyterminate();
1180     }
1181     abort(); /* should never get here! */
1182     return(0);
1183 }
1184
1185 /**********************************************************************
1186 *                                                                     *
1187 *                                                                     *
1188 *     Input Processing for Interfaces -- Not currently used !!!       *
1189 *                                                                     *
1190 *                                                                     *
1191 **********************************************************************/
1192
1193 /* setyyin(file)        open file as new lex input buffer */
1194 extern FILE *yyin;
1195
1196 void
1197 setyyin(char *file)
1198 {
1199     hsbuf_save = YY_CURRENT_BUFFER;
1200     if ((yyin = fopen(file, "r")) == NULL) {
1201         char errbuf[ERR_BUF_SIZE];
1202
1203         sprintf(errbuf, "can't read \"%-.50s\"", file);
1204         hsperror(errbuf);
1205     }
1206     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1207
1208     hslineno_save = hslineno;
1209     hsplineno_save = hsplineno;
1210     hslineno = hsplineno = 1;
1211
1212     filename_save = input_filename;
1213     input_filename = NULL;
1214     new_filename(file);
1215     hscolno_save = hscolno;
1216     hspcolno_save = hspcolno;
1217     hscolno = hspcolno = 0;
1218     etags_save = etags; /* do not do "etags" stuff in interfaces */
1219     etags = 0;          /* We remember whether we are doing it in
1220                            the module, so we can restore it later [WDP 94/09] */
1221     hsentercontext(-1);         /* partain: changed this from 0 */
1222     icontexts_save = icontexts;
1223 #ifdef HSP_DEBUG
1224     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1225 #endif
1226 }
1227
1228 static void
1229 layout_input(char *text, int len)
1230 {
1231 #ifdef HSP_DEBUG
1232     fprintf(stderr, "Scanning \"%s\"\n", text);
1233 #endif
1234
1235     hsplineno = hslineno;
1236     hspcolno = hscolno;
1237
1238     while (len-- > 0) {
1239         switch (*text++) {
1240         case '\n':
1241         case '\r':
1242         case '\f':
1243             hslineno++;
1244             hscolno = 0;
1245             break;
1246         case '\t':
1247             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1248             break;
1249         case '\v':
1250             break;
1251         default:
1252             ++hscolno;
1253             break;
1254         }
1255     }
1256 }
1257
1258 void
1259 setstartlineno(void)
1260 {
1261     startlineno = hsplineno;
1262
1263     if (modulelineno == 0) {
1264         modulelineno = startlineno;
1265     }
1266
1267 #if 1/*etags*/
1268 #else
1269     if (etags)
1270         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1271 #endif
1272 }
1273
1274 /**********************************************************************
1275 *                                                                     *
1276 *                                                                     *
1277 *                      Text Caching                                   *
1278 *                                                                     *
1279 *                                                                     *
1280 **********************************************************************/
1281
1282 #define CACHE_SIZE YY_BUF_SIZE
1283
1284 static struct {
1285     unsigned allocated;
1286     unsigned next;
1287     char *text;
1288 } textcache = { 0, 0, NULL };
1289
1290 static void
1291 cleartext(void)
1292 {
1293 /*  fprintf(stderr, "cleartext\n"); */
1294     textcache.next = 0;
1295     if (textcache.allocated == 0) {
1296         textcache.allocated = CACHE_SIZE;
1297         textcache.text = xmalloc(CACHE_SIZE);
1298     }
1299 }
1300
1301 static void
1302 addtext(char *text, unsigned length)
1303 {
1304 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1305
1306     if (length == 0)
1307         return;
1308
1309     if (textcache.next + length + 1 >= textcache.allocated) {
1310         textcache.allocated += length + CACHE_SIZE;
1311         textcache.text = xrealloc(textcache.text, textcache.allocated);
1312     }
1313     bcopy(text, textcache.text + textcache.next, length);
1314     textcache.next += length;
1315 }
1316
1317 static void
1318 addchar(char c)
1319 {
1320 /*  fprintf(stderr, "addchar: %c\n", c); */
1321
1322     if (textcache.next + 2 >= textcache.allocated) {
1323         textcache.allocated += CACHE_SIZE;
1324         textcache.text = xrealloc(textcache.text, textcache.allocated);
1325     }
1326     textcache.text[textcache.next++] = c;
1327 }
1328
1329 static char *
1330 fetchtext(unsigned *length)
1331 {
1332 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1333
1334     *length = textcache.next;
1335     textcache.text[textcache.next] = '\0';
1336     return textcache.text;
1337 }
1338
1339 /**********************************************************************
1340 *                                                                     *
1341 *                                                                     *
1342 *    Identifier Processing                                             *
1343 *                                                                     *
1344 *                                                                     *
1345 **********************************************************************/
1346
1347 /*
1348         hsnewid         Enters an id of length n into the symbol table.
1349 */
1350
1351 static void
1352 hsnewid(char *name, int length)
1353 {
1354     char save = name[length];
1355
1356     name[length] = '\0';
1357     yylval.uid = installid(name);
1358     name[length] = save;
1359 }
1360
1361 BOOLEAN
1362 hsnewqid(char *name, int length)
1363 {
1364     char* dot;
1365     char save = name[length];
1366     name[length] = '\0';
1367
1368     dot = strchr(name, '.');
1369     *dot = '\0';
1370     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1371     *dot = '.';
1372     name[length] = save;
1373
1374     return isconstr(dot+1);
1375 }
1376
1377 static
1378 BOOLEAN
1379 is_commment(char* lexeme, int len)
1380 {
1381    char* ptr;
1382    int i;
1383
1384    if (len < 2) {
1385       return FALSE;
1386    }
1387
1388    for(i=0;i<len;i++) {
1389      if (lexeme[i] != '-') return FALSE;
1390    }
1391    return TRUE;
1392 }
1393