ghc/compiler/parser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       FLEX for Haskell.                                             *
   6 *       -----------------                                             *
   7 *                                                                     *
   8 **********************************************************************/
   9
  10 /* The includes/config.h one */
  11 #include "config.h"
  12
  13 #include <stdio.h>
  14
  15 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  16 #include <string.h>
  17 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  18 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  19 #include <memory.h>
  20 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  21 #define index strchr
  22 #define rindex strrchr
  23 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  24 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  25 #define bzero(s, n) memset ((s), 0, (n))
  26 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  27 #include <strings.h>
  28 /* memory.h and strings.h conflict on some systems.  */
  29 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  30
  31 #include "hspincl.h"
  32 #include "hsparser.tab.h"
  33 #include "constants.h"
  34 #include "utils.h"
  35
  36 /* Our substitute for <ctype.h> */
  37
  38 #define NCHARS  256
  39 #define _S      0x1
  40 #define _D      0x2
  41 #define _H      0x4
  42 #define _O      0x8
  43 #define _C      0x10
  44
  45 static unsigned char CharTable[NCHARS] = {
  46 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  47 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  48 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  49 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  50 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  51 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,
  52 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  53 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  54 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  55 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  56 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  57 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,
  58 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  59 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  60 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  61 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  62
  63 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  64 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  79 };
  80
  81 BOOLEAN
  82 isconstr (char *s)
  83 {
  84     return(CharTable[*s]&(_C));
  85 }
  86
  87 /**********************************************************************
  88 *                                                                     *
  89 *                                                                     *
  90 *      Declarations                                                   *
  91 *                                                                     *
  92 *                                                                     *
  93 **********************************************************************/
  94
  95 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
  96
  97 /*
  98  * For my own sanity, things that are not part of the flex skeleton
  99  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
 100  */
 101
 102 static int hslineno = 0;        /* Line number at end of token */
 103 int hsplineno = 0;              /* Line number at end of previous token */
 104
 105 static int hscolno = 0;         /* Column number at end of token */
 106 int hspcolno = 0;               /* Column number at end of previous token */
 107 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 108
 109 int modulelineno = -1;          /* The line number where the module starts */
 110 int startlineno = 0;            /* The line number where something starts */
 111 int endlineno = 0;              /* The line number where something ends */
 112
 113 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 114 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 115
 116 static int nested_comments;     /* For counting comment nesting depth */
 117 static int comment_start;
 118
 119 /* OLD: Hacky definition of yywrap: see flex doc.
 120
 121    If we don't do this, then we'll have to get the default
 122    yywrap from the flex library, which is often something
 123    we are not good at locating.  This avoids that difficulty.
 124    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 125    WDP 94/09/05
 126 #define yywrap() 1
 127 */
 128
 129 /* Essential forward declarations */
 130
 131 static void hsnewid      PROTO((char *, int));
 132 static void layout_input PROTO((char *, int));
 133 static void cleartext    (NO_ARGS);
 134 static void addtext      PROTO((char *, unsigned));
 135 static void addchar      PROTO((char));
 136 static char *fetchtext   PROTO((unsigned *));
 137 static void new_filename PROTO((char *));
 138 static int  Return       PROTO((int));
 139 static void hsentercontext PROTO((int));
 140
 141 static BOOLEAN is_commment PROTO((char*, int));
 142
 143 /* Special file handling for IMPORTS */
 144 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 145
 146 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 147 static char *filename_save;             /* File Name                     */
 148 static int hslineno_save = 0,           /* Line Number                   */
 149  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 150  hscolno_save = 0,                      /* Indentation                   */
 151  hspcolno_save = 0;                     /* Left Indentation              */
 152 static short icontexts_save = 0;        /* Indent Context Level          */
 153
 154 static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
 155 extern BOOLEAN etags;                   /* that which is saved */
 156
 157 extern BOOLEAN nonstandardFlag;         /* Glasgow extensions allowed */
 158
 159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 160                                  * inserted before token +ve  -- "}" inserted before
 161                                  * token */
 162
 163 short icontexts = 0;            /* Which context we're in */
 164
 165 /*
 166         Table of indentations:  right bit indicates whether to use
 167           indentation rules (1 = use rules; 0 = ignore)
 168
 169     partain:
 170     push one of these "contexts" at every "case" or "where"; the right bit says
 171     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 172
 173     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 174     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 175     push is shown just below.
 176
 177 */
 178
 179
 180 static short indenttab[MAX_CONTEXTS] = {-1};
 181
 182 #define INDENTPT (indenttab[icontexts]>>1)
 183 #define INDENTON (indenttab[icontexts]&1)
 184
 185 #define RETURN(tok) return(Return(tok))
 186
 187 #undef YY_DECL
 188 #define YY_DECL int yylex1()
 189
 190 /* We should not peek at yy_act, but flex calls us even for the internal action
 191    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 192    to support older versions of flex, we'll continue to peek for now.
 193  */
 194 #define YY_USER_ACTION \
 195     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 196
 197 #if 0/*debug*/
 198 #undef YY_BREAK
 199 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 200 #endif
 201
 202 /* Each time we enter a new start state, we push it onto the state stack.
 203 */
 204 #define PUSH_STATE(n)   yy_push_state(n)
 205 #define POP_STATE       yy_pop_state()
 206
 207 %}
 208 /* Options:
 209     8bit (8-bit input)
 210     noyywrap (do not call yywrap on end of file; avoid use of -lfl)
 211     never-interactive (to go a bit faster)
 212     stack (use a start-condition stack)
 213 */
 214 %option 8bit
 215 %option noyywrap
 216 %option never-interactive
 217 %option stack
 218
 219 /* The start states are:
 220    Code -- normal Haskell code (principal lexer)
 221    GlaExt -- Haskell code with Glasgow extensions
 222    Comment -- Nested comment processing
 223    String -- Inside a string literal with backslashes
 224    StringEsc -- Immediately following a backslash in a string literal
 225    Char -- Inside a character literal with backslashes
 226    CharEsc -- Immediately following a backslash in a character literal
 227
 228    Note that the INITIAL state is unused.  Also note that these states
 229    are _exclusive_.  All rules should be prefixed with an appropriate
 230    list of start states.
 231  */
 232
 233 %x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
 234
 235 isoS                    [\xa1-\xbf\xd7\xf7]
 236 isoL                    [\xc0-\xd6\xd8-\xde]
 237 isol                    [\xdf-\xf6\xf8-\xff]
 238 isoA                    [\xa1-\xff]
 239
 240 D                       [0-9]
 241 O                       [0-7]
 242 H                       [0-9A-Fa-f]
 243 N                       {D}+
 244 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 245 S                       [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 246 SId                     {S}{S}*
 247 L                       [A-Z\xc0-\xd6\xd8-\xde]
 248 l                       [a-z_\xdf-\xf6\xf8-\xff]
 249 I                       {L}|{l}
 250 i                       {L}|{l}|[0-9'_]
 251 Id                      {I}{i}*
 252 Mod                     {L}{i}*
 253 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
 254 CNTRL                   [@A-Z\[\\\]^_]
 255 WS                      [ \t\n\r\f\v]
 256 NL                      [\n\r]
 257
 258 %%
 259
 260 %{
 261     /*
 262      * Simple comments and whitespace.  Normally, we would just ignore these, but
 263      * in case we're processing a string escape, we need to note that we've seen
 264      * a gap.
 265      *
 266      * Note that we cater for a comment line that *doesn't* end in a newline.
 267      * This is incorrect, strictly speaking, but seems like the right thing
 268      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 269      *
 270      * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
 271      */
 272 %}
 273
 274 <Code,GlaExt,UserPragma,StringEsc>{WS}+ { noGap = FALSE; }
 275
 276 %{
 277     /*
 278      * Special GHC pragma rules.  Do we need a start state for interface files,
 279      * so these won't be matched in source files? --JSM
 280      */
 281
 282 %}
 283
 284 %{
 285 /* I believe the next rule is not ever matched.
 286
 287    The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
 288    Driver has now been modified to output `standard' {-# LINE ..-} pragmas
 289    where possible, so the lexer should now never see cpp directives
 290    like '# ' and '#line'.
 291
 292    -- SOF
 293
 294 <Code,GlaExt>^"# ".*{NL}    {
 295                           char tempf[FILENAME_SIZE];
 296                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 297                           new_filename(tempf);
 298                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 299                         }
 300
 301 <Code,GlaExt>^"#line ".*{NL}    {
 302                           char tempf[FILENAME_SIZE];
 303                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 304                           new_filename(tempf);
 305                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 306                         }
 307 */
 308 %}
 309
 310 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 311                           /* partain: pragma-style line directive */
 312                           char tempf[FILENAME_SIZE];
 313                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 314                           new_filename(tempf);
 315                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 316                         }
 317
 318 <Code,GlaExt>"{-#"{WS}*"INTERFACE" {
 319                               PUSH_STATE(UserPragma);
 320                               RETURN(INTERFACE_UPRAGMA);
 321                             }
 322 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 323                               PUSH_STATE(UserPragma);
 324                               RETURN(SPECIALISE_UPRAGMA);
 325                             }
 326 <Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
 327                               PUSH_STATE(UserPragma);
 328                               RETURN(SPECIALISE_UPRAGMA);
 329                             }
 330 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 331                               PUSH_STATE(UserPragma);
 332                               RETURN(INLINE_UPRAGMA);
 333                             }
 334 <Code,GlaExt>"{-#"{WS}*"inline" {
 335                               PUSH_STATE(UserPragma);
 336                               RETURN(INLINE_UPRAGMA);
 337                             }
 338 <Code,GlaExt>"{-#"{WS}*"NOINLINE" {
 339                               PUSH_STATE(UserPragma);
 340                               RETURN(NOINLINE_UPRAGMA);
 341                             }
 342 <Code,GlaExt>"{-#"{WS}*"notInline" {
 343                               PUSH_STATE(UserPragma);
 344                               RETURN(NOINLINE_UPRAGMA);
 345                             }
 346 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 347                               PUSH_STATE(UserPragma);
 348                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 349                             }
 350 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
 351                               /* these are handled by hscpp */
 352                               nested_comments =1; comment_start = hsplineno;
 353                               PUSH_STATE(Comment);
 354                             }
 355 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
 356                               /* these are for the driver! */
 357                               nested_comments =1; comment_start = hsplineno;
 358                               PUSH_STATE(Comment);
 359                             }
 360 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
 361                               /* these are used by `make depend' and the
 362                                  compiler to indicate that a module should
 363                                  be imported from source */
 364                               nested_comments =1; comment_start = hsplineno;
 365                               RETURN(SOURCE_UPRAGMA);
 366                             }
 367
 368 <Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
 369                               fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
 370                                 input_filename, hsplineno);
 371                               format_string(stderr, (unsigned char *) yytext, yyleng);
 372                               fputs("'\n", stderr);
 373                               nested_comments = 1; comment_start = hsplineno;
 374                               PUSH_STATE(Comment);
 375                             }
 376 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 377
 378 %{
 379     /*
 380      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 381      * intentionally accepted as a keyword even for normal <Code>.
 382      */
 383 %}
 384
 385 <Code,GlaExt>"case"             { RETURN(CASE); }
 386 <Code,GlaExt>"class"            { RETURN(CLASS); }
 387 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 388 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 389 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 390 <Code,GlaExt>"do"               { RETURN(DO); }
 391 <Code,GlaExt>"else"             { RETURN(ELSE); }
 392 <Code,GlaExt>"if"               { RETURN(IF); }
 393 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 394 <Code,GlaExt>"in"               { RETURN(IN); }
 395 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 396 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 397 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 398 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 399 <Code,GlaExt>"let"              { RETURN(LET); }
 400 <Code,GlaExt>"module"           { RETURN(MODULE); }
 401 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
 402 <Code,GlaExt>"of"               { RETURN(OF); }
 403 <Code,GlaExt>"then"             { RETURN(THEN); }
 404 <Code,GlaExt>"type"             { RETURN(TYPE); }
 405 <Code,GlaExt>"where"            { RETURN(WHERE); }
 406
 407 <Code,GlaExt>"as"               { RETURN(AS); }
 408 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 409 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
 410
 411 <Code,GlaExt>"forall"           { RETURN(FORALL); }
 412
 413 <Code,GlaExt>"_scc_"            { RETURN(SCC); }
 414 <GlaExt>"_ccall_"               { RETURN(CCALL); }
 415 <GlaExt>"_ccall_GC_"            { RETURN(CCALL_GC); }
 416 <GlaExt>"_casm_"                { RETURN(CASM); }
 417 <GlaExt>"_casm_GC_"             { RETURN(CASM_GC); }
 418 <GlaExt>"(#"                    { RETURN(OUNBOXPAREN); }
 419 <GlaExt>"#)"                    { RETURN(CUNBOXPAREN); }
 420 <GlaExt>"foreign"               { RETURN(FOREIGN); }
 421 <GlaExt>"export"                { RETURN(EXPORT); }
 422 <GlaExt>"label"                 { RETURN(LABEL); }
 423 <GlaExt>"unsafe"                { RETURN(UNSAFE); }
 424 <GlaExt>"_stdcall"              { RETURN(STDCALL); }
 425 <GlaExt>"_ccall"                { RETURN(C_CALL); }
 426 <GlaExt>"_pascal"               { RETURN(PASCAL); }
 427 <GlaExt>"stdcall"               { RETURN(STDCALL); }
 428 <GlaExt>"ccall"                 { RETURN(C_CALL); }
 429 <GlaExt>"pascal"                { RETURN(PASCAL); }
 430 <GlaExt>"dynamic"               { RETURN(DYNAMIC); }
 431
 432 %{
 433     /*
 434      * Haskell operators: special, reservedops and useful varsyms
 435      */
 436 %}
 437
 438 <Code,GlaExt,UserPragma>"("     { RETURN(OPAREN); }
 439 <Code,GlaExt,UserPragma>")"     { RETURN(CPAREN); }
 440 <Code,GlaExt,UserPragma>"["     { RETURN(OBRACK); }
 441 <Code,GlaExt,UserPragma>"]"     { RETURN(CBRACK); }
 442 <Code,GlaExt>"{"                { RETURN(OCURLY); }
 443 <Code,GlaExt>"}"                { RETURN(CCURLY); }
 444 <Code,GlaExt,UserPragma>","     { RETURN(COMMA); }
 445 <Code,GlaExt>";"                { RETURN(SEMI); }
 446 <Code,GlaExt>"`"                { RETURN(BQUOTE); }
 447
 448 <Code,GlaExt>"."                { RETURN(DOT); }
 449 <Code,GlaExt>".."               { RETURN(DOTDOT); }
 450 <Code,GlaExt,UserPragma>"::"    { RETURN(DCOLON); }
 451 <Code,GlaExt,UserPragma>"="     { RETURN(EQUAL); }
 452 <Code,GlaExt>"\\"               { RETURN(LAMBDA); }
 453 <Code,GlaExt>"|"                { RETURN(VBAR); }
 454 <Code,GlaExt>"<-"               { RETURN(LARROW); }
 455 <Code,GlaExt,UserPragma>"->"    { RETURN(RARROW); }
 456 <Code,GlaExt>"-"                { RETURN(MINUS); }
 457 <Code,GlaExt>"+"                { RETURN(PLUS); }
 458
 459 <Code,GlaExt,UserPragma>"=>"    { RETURN(DARROW); }
 460 <Code,GlaExt>"@"                { RETURN(AT); }
 461 <Code,GlaExt>"!"                { RETURN(BANG); }
 462 <Code,GlaExt>"~"                { RETURN(LAZY); }
 463
 464 %{
 465     /*
 466      * Integers and (for Glasgow extensions) primitive integers.  Note that
 467      * we pass all of the text on to the parser, because flex/C can't handle
 468      * arbitrary precision numbers.
 469      */
 470 %}
 471
 472 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
 473                          yylval.uid = xstrndup(yytext, yyleng - 1);
 474                          RETURN(INTPRIM);
 475                         }
 476 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
 477                          yylval.uid = xstrndup(yytext, yyleng);
 478                          RETURN(INTEGER);
 479                         }
 480 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
 481                          yylval.uid = xstrndup(yytext, yyleng - 1);
 482                          RETURN(INTPRIM);
 483                         }
 484 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
 485                          yylval.uid = xstrndup(yytext, yyleng);
 486                          RETURN(INTEGER);
 487                         }
 488 <GlaExt>("-")?{N}"#"    {
 489                          yylval.uid = xstrndup(yytext, yyleng - 1);
 490                          RETURN(INTPRIM);
 491                         }
 492 <Code,GlaExt,UserPragma>{N} {
 493                          yylval.uid = xstrndup(yytext, yyleng);
 494                          RETURN(INTEGER);
 495                         }
 496
 497 %{
 498     /*
 499      * Floats and (for Glasgow extensions) primitive floats/doubles.
 500      */
 501 %}
 502
 503 <GlaExt>("-")?{F}"##"   {
 504                          yylval.uid = xstrndup(yytext, yyleng - 2);
 505                          RETURN(DOUBLEPRIM);
 506                         }
 507 <GlaExt>("-")?{F}"#"    {
 508                          yylval.uid = xstrndup(yytext, yyleng - 1);
 509                          RETURN(FLOATPRIM);
 510                         }
 511 <Code,GlaExt>{F}        {
 512                          yylval.uid = xstrndup(yytext, yyleng);
 513                          RETURN(FLOAT);
 514                         }
 515
 516 %{
 517     /*
 518      * Funky ``foo'' style C literals for Glasgow extensions
 519      */
 520 %}
 521
 522 <GlaExt>"``"[^']+"''"   {
 523                          hsnewid(yytext + 2, yyleng - 4);
 524                          RETURN(CLITLIT);
 525                         }
 526
 527 %{
 528     /*
 529      * Identifiers, both variables and operators.  The trailing hash is allowed
 530      * for Glasgow extensions.
 531      */
 532 %}
 533
 534
 535 %{
 536 /* These SHOULDNAE work in "Code" (sigh) */
 537 %}
 538 <GlaExt,UserPragma>{Id}"#" {
 539                         if (! nonstandardFlag) {
 540                             char errbuf[ERR_BUF_SIZE];
 541                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 542                             hsperror(errbuf);
 543                          }
 544                          hsnewid(yytext, yyleng);
 545                          RETURN(isconstr(yytext) ? CONID : VARID);
 546                         }
 547 <Code,GlaExt,UserPragma>{Id}    {
 548                          hsnewid(yytext, yyleng);
 549                          RETURN(isconstr(yytext) ? CONID : VARID);
 550                         }
 551 <Code,GlaExt,UserPragma>{SId}   {
 552                          if (is_commment(yytext,yyleng)) {
 553                                 int c;
 554                                 while ((c = input()) != '\n' && c != '\r' && c!= EOF )
 555                                         ;
 556                                 if (c != EOF)
 557                                    unput(c);
 558                          } else {
 559                             hsnewid(yytext, yyleng);
 560                             RETURN(isconstr(yytext) ? CONSYM : VARSYM);
 561                          }
 562                         }
 563 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#" {
 564                          BOOLEAN is_constr;
 565                          if (! nonstandardFlag) {
 566                             char errbuf[ERR_BUF_SIZE];
 567                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 568                             hsperror(errbuf);
 569                          }
 570                          is_constr = hsnewqid(yytext, yyleng);
 571                          RETURN(is_constr ? QCONID : QVARID);
 572                         }
 573 <Code,GlaExt,UserPragma>{Mod}"."{Id}    {
 574                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
 575                          RETURN(is_constr ? QCONID : QVARID);
 576                         }
 577 <Code,GlaExt,UserPragma>{Mod}"."{SId}   {
 578                          BOOLEAN is_constr = hsnewqid(yytext, yyleng);
 579                          RETURN(is_constr ? QCONSYM : QVARSYM);
 580                         }
 581
 582 %{
 583     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 584
 585     /* Because we can make the former well-behaved (we defined them).
 586
 587        Sadly, the latter is defined by Haskell, which allows such
 588        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 589     */
 590 %}
 591
 592 <GlaExt,UserPragma>"`"{Id}"#`"  {
 593                          hsnewid(yytext + 1, yyleng - 2);
 594                          RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
 595                         }
 596
 597 %{
 598     /*
 599      * Character literals.  The first form is the quick form, for character
 600      * literals that don't contain backslashes.  Literals with backslashes are
 601      * lexed through multiple rules.  First, we match the open ' and as many
 602      * normal characters as possible.  This puts us into the <Char> state, where
 603      * a backslash is legal.  Then, we match the backslash and move into the
 604      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 605      * characters and the close '.  We may end up with too many characters, but
 606      * this allows us to easily share the lex rules with strings.  Excess characters
 607      * are ignored with a warning.
 608      */
 609 %}
 610
 611 <GlaExt>'({CHAR}|"\"")"'#" {
 612                          yylval.uhstring = installHstring(1, yytext+1);
 613                          RETURN(CHARPRIM);
 614                         }
 615 <Code,GlaExt>'({CHAR}|"\"")'    {
 616                          yylval.uhstring = installHstring(1, yytext+1);
 617                          RETURN(CHAR);
 618                         }
 619 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 620                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 621                          hsperror(errbuf);
 622                         }
 623 <Code,GlaExt>'({CHAR}|"\"")* {
 624                          hsmlcolno = hspcolno;
 625                          cleartext();
 626                          addtext(yytext+1, yyleng-1);
 627                          PUSH_STATE(Char);
 628                         }
 629 <Char>({CHAR}|"\"")*'#  {
 630                          unsigned length;
 631                          char *text;
 632
 633                          addtext(yytext, yyleng - 2);
 634                          text = fetchtext(&length);
 635
 636                          if (! nonstandardFlag) {
 637                             char errbuf[ERR_BUF_SIZE];
 638                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 639                             hsperror(errbuf);
 640                          }
 641
 642                          if (length > 1) {
 643                             fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
 644                               input_filename, hsplineno, hspcolno + 1);
 645                             format_string(stderr, (unsigned char *) text, length);
 646                             fputs("' too long\n", stderr);
 647                             hsperror("");
 648                          }
 649                          yylval.uhstring = installHstring(1, text);
 650                          hspcolno = hsmlcolno;
 651                          POP_STATE;
 652                          RETURN(CHARPRIM);
 653                         }
 654 <Char>({CHAR}|"\"")*'   {
 655                          unsigned length;
 656                          char *text;
 657
 658                          addtext(yytext, yyleng - 1);
 659                          text = fetchtext(&length);
 660
 661                          if (length > 1) {
 662                             fprintf(stderr, "%s:%d:%d: Character literal '",
 663                               input_filename, hsplineno, hspcolno + 1);
 664                             format_string(stderr, (unsigned char *) text, length);
 665                             fputs("' too long\n", stderr);
 666                             hsperror("");
 667                          }
 668                          yylval.uhstring = installHstring(1, text);
 669                          hspcolno = hsmlcolno;
 670                          POP_STATE;
 671                          RETURN(CHAR);
 672                         }
 673 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 674
 675
 676 %{
 677     /*
 678      * String literals.  The first form is the quick form, for string literals
 679      * that don't contain backslashes.  Literals with backslashes are lexed
 680      * through multiple rules.  First, we match the open " and as many normal
 681      * characters as possible.  This puts us into the <String> state, where
 682      * a backslash is legal.  Then, we match the backslash and move into the
 683      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 684      * characters, moving back and forth between <String> and <StringEsc> as more
 685      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 686      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 687      * of normal characters and the close ".
 688      */
 689 %}
 690
 691 <GlaExt>"\""({CHAR}|"'")*"\""#  {
 692                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 693                             /* the -3 accounts for the " on front, "# on the end */
 694                          RETURN(STRINGPRIM);
 695                         }
 696 <Code,GlaExt>"\""({CHAR}|"'")*"\""  {
 697                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 698                          RETURN(STRING);
 699                         }
 700 <Code,GlaExt>"\""({CHAR}|"'")* {
 701                          hsmlcolno = hspcolno;
 702                          cleartext();
 703                          addtext(yytext+1, yyleng-1);
 704                          PUSH_STATE(String);
 705                         }
 706 <String>({CHAR}|"'")*"\"#"   {
 707                          unsigned length;
 708                          char *text;
 709
 710                          addtext(yytext, yyleng-2);
 711                          text = fetchtext(&length);
 712
 713                          if (! nonstandardFlag) {
 714                             char errbuf[ERR_BUF_SIZE];
 715                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 716                             hsperror(errbuf);
 717                          }
 718
 719                          yylval.uhstring = installHstring(length, text);
 720                          hspcolno = hsmlcolno;
 721                          POP_STATE;
 722                          RETURN(STRINGPRIM);
 723                         }
 724 <String>({CHAR}|"'")*"\""   {
 725                          unsigned length;
 726                          char *text;
 727
 728                          addtext(yytext, yyleng-1);
 729                          text = fetchtext(&length);
 730
 731                          yylval.uhstring = installHstring(length, text);
 732                          hspcolno = hsmlcolno;
 733                          POP_STATE;
 734                          RETURN(STRING);
 735                         }
 736 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 737
 738 %{
 739     /*
 740      * Character and string escapes are roughly the same, but strings have the
 741      * extra `\&' sequence which is not allowed for characters.  Also, comments
 742      * are allowed in the <StringEsc> state.  (See the comment section much
 743      * further down.)
 744      *
 745      * NB: Backslashes and tabs are stored in strings as themselves.
 746      * But if we print them (in printtree.c), they must go out as
 747      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 748      * intermediate format that the parser produces.  It uses '\t' fpr end of
 749      * string, so it needs to be able to escape tabs, which means that it
 750      * also needs to be able to escape the escape character ('\\').  Sigh.
 751      */
 752 %}
 753
 754 <Char>\\                { PUSH_STATE(CharEsc); }
 755 <String>\\&             /* Ignore */ ;
 756 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 757
 758 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 759 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 760
 761 %{
 762 /*
 763  Not 100% correct, tokenizes "foo \  --<>--
 764                                  \ bar"
 765
 766  as "foo  bar", but this is not correct as per Haskell 98 report and its
 767  maximal munch rule for "--"-style comments.
 768
 769  For the moment, not deemed worthy to fix.
 770 */
 771 %}
 772 <StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
 773
 774 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 775 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 776 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 777 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 778 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 779 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 780 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 781 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 782 <CharEsc,StringEsc>BEL  |
 783 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 784 <CharEsc,StringEsc>BS   |
 785 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 786 <CharEsc,StringEsc>HT   |
 787 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 788 <CharEsc,StringEsc>LF   |
 789 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 790 <CharEsc,StringEsc>VT   |
 791 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 792 <CharEsc,StringEsc>FF   |
 793 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 794 <CharEsc,StringEsc>CR   |
 795 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 796 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 797 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 798 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 799 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 800 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 801 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 802 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 803 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 804 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 805 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 806 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 807 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 808 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 809 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 810 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 811 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 812 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 813 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 814 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 815 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 816 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 817 <CharEsc,StringEsc>{D}+  {
 818                           int i = strtol(yytext, NULL, 10);
 819                           if (i < NCHARS) {
 820                              addchar((char) i);
 821                           } else {
 822                              char errbuf[ERR_BUF_SIZE];
 823                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 824                                 yytext);
 825                              hsperror(errbuf);
 826                           }
 827                           POP_STATE;
 828                         }
 829 <CharEsc,StringEsc>o{O}+ {
 830                           int i = strtol(yytext + 1, NULL, 8);
 831                           if (i < NCHARS) {
 832                              addchar((char) i);
 833                           } else {
 834                              char errbuf[ERR_BUF_SIZE];
 835                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 836                                 yytext);
 837                              hsperror(errbuf);
 838                           }
 839                           POP_STATE;
 840                         }
 841 <CharEsc,StringEsc>x{H}+ {
 842                           int i = strtol(yytext + 1, NULL, 16);
 843                           if (i < NCHARS) {
 844                              addchar((char) i);
 845                           } else {
 846                              char errbuf[ERR_BUF_SIZE];
 847                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 848                                 yytext);
 849                              hsperror(errbuf);
 850                           }
 851                           POP_STATE;
 852                         }
 853
 854
 855 %{
 856     /*
 857      * Nested comments.  The major complication here is in trying to match the
 858      * longest lexemes possible, for better performance.  (See the flex document.)
 859      * That's why the rules look so bizarre.
 860      */
 861 %}
 862
 863 <Code,GlaExt,UserPragma,StringEsc>"{-"  {
 864                           noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment);
 865                         }
 866
 867 <Comment>[^-{]*         |
 868 <Comment>"-"+[^-{}]+    |
 869 <Comment>"{"+[^-{}]+    ;
 870 <Comment>"{-"           { nested_comments++; }
 871 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 872 <Comment>(.|\n)         ;
 873
 874
 875 %{
 876     /*
 877      * Illegal characters.  This used to be a single rule, but we might as well
 878      * pass on as much information as we have, so now we indicate our state in
 879      * the error message.
 880      */
 881 %}
 882
 883 <INITIAL,Code,GlaExt,UserPragma>(.|\n)  {
 884                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
 885                             input_filename, hsplineno, hspcolno + 1);
 886                          format_string(stderr, (unsigned char *) yytext, 1);
 887                          fputs("'\n", stderr);
 888                          hsperror("");
 889                         }
 890 <Char>(.|\n)            {
 891                          fprintf(stderr, "%s:%d:%d: Illegal character: `",
 892                             input_filename, hsplineno, hspcolno + 1);
 893                          format_string(stderr, (unsigned char *) yytext, 1);
 894                          fputs("' in a character literal\n", stderr);
 895                          hsperror("");
 896                         }
 897 <CharEsc>(.|\n)         {
 898                          fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
 899                             input_filename, hsplineno, hspcolno + 1);
 900                          format_string(stderr, (unsigned char *) yytext, 1);
 901                          fputs("'\n", stderr);
 902                          hsperror("");
 903                         }
 904 <String>(.|\n)          { if (nonstandardFlag) {
 905                              addtext(yytext, yyleng);
 906                           } else {
 907                                 fprintf(stderr, "%s:%d:%d: Illegal character: `",
 908                                 input_filename, hsplineno, hspcolno + 1);
 909                                 format_string(stderr, (unsigned char *) yytext, 1);
 910                                 fputs("' in a string literal\n", stderr);
 911                                 hsperror("");
 912                           }
 913                         }
 914 <StringEsc>(.|\n)       {
 915                          if (noGap) {
 916                              fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\",
 917                                 input_filename, hsplineno, hspcolno + 1);
 918                              format_string(stderr, (unsigned char *) yytext, 1);
 919                              fputs("'\n", stderr);
 920                              hsperror("");
 921                          } else {
 922                              fprintf(stderr, "%s:%d:%d: Illegal character: `",
 923                                 input_filename, hsplineno, hspcolno + 1);
 924                              format_string(stderr, (unsigned char *) yytext, 1);
 925                              fputs("' in a string gap\n", stderr);
 926                              hsperror("");
 927                          }
 928                         }
 929
 930 %{
 931     /*
 932      * End of file.  In any sub-state, this is an error.  However, for the primary
 933      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 934      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 935      * adding virtual close curlies, or closing an interface and returning to the
 936      * primary source file.
 937      *
 938      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 939      * line/column advancement has to be done by hand.
 940      */
 941 %}
 942
 943 <Char,CharEsc><<EOF>>   {
 944                           hsplineno = hslineno; hspcolno = hscolno;
 945                           hsperror("unterminated character literal");
 946                         }
 947 <Comment><<EOF>>        {
 948                           char errbuf[ERR_BUF_SIZE];
 949                           hsplineno = hslineno; hspcolno = hscolno;
 950                           sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
 951                           hsperror(errbuf);
 952                         }
 953 <String,StringEsc><<EOF>>   {
 954                           hsplineno = hslineno; hspcolno = hscolno;
 955                           hsperror("unterminated string literal");
 956                         }
 957 <UserPragma><<EOF>>     {
 958                           hsplineno = hslineno; hspcolno = hscolno;
 959                           hsperror("unterminated user-specified pragma");
 960                         }
 961 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 962
 963 %%
 964
 965 /**********************************************************************
 966 *                                                                     *
 967 *                                                                     *
 968 *     YACC/LEX Initialisation etc.                                    *
 969 *                                                                     *
 970 *                                                                     *
 971 **********************************************************************/
 972
 973 /*
 974    We initialise input_filename to "<stdin>".
 975    This allows unnamed sources to be piped into the parser.
 976 */
 977
 978 void
 979 yyinit(void)
 980 {
 981     input_filename = xstrdup("<stdin>");
 982
 983     /* We must initialize the input buffer _now_, because we call
 984        setyyin _before_ calling yylex for the first time! */
 985     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 986
 987     if (nonstandardFlag)
 988         PUSH_STATE(GlaExt);
 989     else
 990         PUSH_STATE(Code);
 991 }
 992
 993 static void
 994 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 995 {
 996     if (input_filename != NULL)
 997         free(input_filename);
 998     input_filename = xstrdup(f);
 999 }
1000
1001 /**********************************************************************
1002 *                                                                     *
1003 *                                                                     *
1004 *     Layout Processing                                               *
1005 *                                                                     *
1006 *                                                                     *
1007 **********************************************************************/
1008
1009 /*
1010         The following section deals with Haskell Layout conventions
1011         forcing insertion of ; or } as appropriate
1012 */
1013
1014 #ifdef HSP_DEBUG
1015 #define LAYOUT_DEBUG
1016 #endif
1017
1018
1019 static BOOLEAN
1020 hsshouldindent(void)
1021 {
1022     return (!forgetindent && INDENTON);
1023 }
1024
1025
1026 /* Enter new context and set new indentation level */
1027 void
1028 hssetindent(void)
1029 {
1030 #ifdef LAYOUT_DEBUG
1031     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1032 #endif
1033
1034     /*
1035      * partain: first chk that new indent won't be less than current one; this code
1036      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1037      * current token; what that has to do with indenting, I don't know.
1038      */
1039
1040
1041     if (hscolno - 1 <= INDENTPT) {
1042         if (INDENTPT == -1)
1043             return;             /* Empty input OK for Haskell 1.1 */
1044         else {
1045             char errbuf[ERR_BUF_SIZE];
1046
1047             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1048             hsperror(errbuf);
1049         }
1050     }
1051     hsentercontext((hspcolno << 1) | 1);
1052 }
1053
1054
1055 /* Enter a new context without changing the indentation level */
1056 void
1057 hsincindent(void)
1058 {
1059 #ifdef LAYOUT_DEBUG
1060     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1061 #endif
1062     hsentercontext(indenttab[icontexts] & ~1);
1063 }
1064
1065
1066 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1067 void
1068 hsindentoff(void)
1069 {
1070     forgetindent = TRUE;
1071 }
1072
1073
1074 /* Enter a new layout context. */
1075 static void
1076 hsentercontext(int indent)
1077 {
1078     /* Enter new context and set indentation as specified */
1079     if (++icontexts >= MAX_CONTEXTS) {
1080         char errbuf[ERR_BUF_SIZE];
1081
1082         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1083         hsperror(errbuf);
1084     }
1085     forgetindent = FALSE;
1086     indenttab[icontexts] = indent;
1087 #ifdef LAYOUT_DEBUG
1088     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1089 #endif
1090 }
1091
1092
1093 /* Exit a layout context */
1094 void
1095 hsendindent(void)
1096 {
1097     --icontexts;
1098 #ifdef LAYOUT_DEBUG
1099     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1100 #endif
1101 }
1102
1103 /*
1104  *      Return checks the indentation level and returns ;, } or the specified token.
1105  */
1106 static int
1107 Return(int tok)
1108 {
1109 #ifdef HSP_DEBUG
1110     extern int yyleng;
1111 #endif
1112     if (hsshouldindent()) {
1113         if (hspcolno < INDENTPT) {
1114 #ifdef HSP_DEBUG
1115             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1116 #endif
1117             hssttok = tok;
1118             return (VCCURLY);
1119         } else if (hspcolno == INDENTPT) {
1120 #ifdef HSP_DEBUG
1121             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1122 #endif
1123             hssttok = -tok;
1124             return (SEMI);
1125         }
1126     }
1127
1128     hssttok = -1;
1129 #ifdef HSP_DEBUG
1130     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1131 #endif
1132     return (tok);
1133 }
1134
1135
1136 /*
1137  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1138  */
1139 int
1140 yylex()
1141 {
1142     int tok;
1143     static BOOLEAN eof = FALSE;
1144
1145     if (!eof) {
1146         if (hssttok != -1) {
1147             if (hssttok < 0) {
1148                 tok = -hssttok;
1149                 hssttok = -1;
1150                 return tok;
1151             }
1152             RETURN(hssttok);
1153         } else {
1154             endlineno = hslineno;
1155             if ((tok = yylex1()) != EOF)
1156                 return tok;
1157             else
1158                 eof = TRUE;
1159         }
1160     }
1161     if (icontexts > icontexts_save) {
1162         if (INDENTON) {
1163             eof = TRUE;
1164             indenttab[icontexts] = 0;
1165             return (VCCURLY);
1166         } else
1167             hsperror("missing '}' at end of file");
1168     } else if (hsbuf_save != NULL) {
1169         fclose(yyin);
1170         yy_delete_buffer(YY_CURRENT_BUFFER);
1171         yy_switch_to_buffer(hsbuf_save);
1172         hsbuf_save = NULL;
1173         new_filename(filename_save);
1174         free(filename_save);
1175         hslineno = hslineno_save;
1176         hsplineno = hsplineno_save;
1177         hscolno = hscolno_save;
1178         hspcolno = hspcolno_save;
1179         etags = etags_save;
1180         icontexts = icontexts_save - 1;
1181         icontexts_save = 0;
1182 #ifdef HSP_DEBUG
1183         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1184 #endif
1185         eof = FALSE;
1186
1187         /* RETURN(LEOF); */
1188         hsperror("No longer using yacc to parse interface files");
1189
1190     } else {
1191         yyterminate();
1192     }
1193     abort(); /* should never get here! */
1194     return(0);
1195 }
1196
1197 /**********************************************************************
1198 *                                                                     *
1199 *                                                                     *
1200 *     Input Processing for Interfaces -- Not currently used !!!       *
1201 *                                                                     *
1202 *                                                                     *
1203 **********************************************************************/
1204
1205 /* setyyin(file)        open file as new lex input buffer */
1206 extern FILE *yyin;
1207
1208 void
1209 setyyin(char *file)
1210 {
1211     hsbuf_save = YY_CURRENT_BUFFER;
1212     if ((yyin = fopen(file, "r")) == NULL) {
1213         char errbuf[ERR_BUF_SIZE];
1214
1215         sprintf(errbuf, "can't read \"%-.50s\"", file);
1216         hsperror(errbuf);
1217     }
1218     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1219
1220     hslineno_save = hslineno;
1221     hsplineno_save = hsplineno;
1222     hslineno = hsplineno = 1;
1223
1224     filename_save = input_filename;
1225     input_filename = NULL;
1226     new_filename(file);
1227     hscolno_save = hscolno;
1228     hspcolno_save = hspcolno;
1229     hscolno = hspcolno = 0;
1230     etags_save = etags; /* do not do "etags" stuff in interfaces */
1231     etags = 0;          /* We remember whether we are doing it in
1232                            the module, so we can restore it later [WDP 94/09] */
1233     hsentercontext(-1);         /* partain: changed this from 0 */
1234     icontexts_save = icontexts;
1235 #ifdef HSP_DEBUG
1236     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1237 #endif
1238 }
1239
1240 static void
1241 layout_input(char *text, int len)
1242 {
1243 #ifdef HSP_DEBUG
1244     fprintf(stderr, "Scanning \"%s\"\n", text);
1245 #endif
1246
1247     hsplineno = hslineno;
1248     hspcolno = hscolno;
1249
1250     while (len-- > 0) {
1251         switch (*text++) {
1252         case '\n':
1253         case '\r':
1254         case '\f':
1255             hslineno++;
1256             hscolno = 0;
1257             break;
1258         case '\t':
1259             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1260             break;
1261         case '\v':
1262             break;
1263         default:
1264             ++hscolno;
1265             break;
1266         }
1267     }
1268 }
1269
1270 void
1271 setstartlineno(void)
1272 {
1273     startlineno = hsplineno;
1274
1275     if (modulelineno == 0) {
1276         modulelineno = startlineno;
1277     }
1278
1279 #if 1/*etags*/
1280 #else
1281     if (etags)
1282         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1283 #endif
1284 }
1285
1286 /**********************************************************************
1287 *                                                                     *
1288 *                                                                     *
1289 *                      Text Caching                                   *
1290 *                                                                     *
1291 *                                                                     *
1292 **********************************************************************/
1293
1294 #define CACHE_SIZE YY_BUF_SIZE
1295
1296 static struct {
1297     unsigned allocated;
1298     unsigned next;
1299     char *text;
1300 } textcache = { 0, 0, NULL };
1301
1302 static void
1303 cleartext(void)
1304 {
1305 /*  fprintf(stderr, "cleartext\n"); */
1306     textcache.next = 0;
1307     if (textcache.allocated == 0) {
1308         textcache.allocated = CACHE_SIZE;
1309         textcache.text = xmalloc(CACHE_SIZE);
1310     }
1311 }
1312
1313 static void
1314 addtext(char *text, unsigned length)
1315 {
1316 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1317
1318     if (length == 0)
1319         return;
1320
1321     if (textcache.next + length + 1 >= textcache.allocated) {
1322         textcache.allocated += length + CACHE_SIZE;
1323         textcache.text = xrealloc(textcache.text, textcache.allocated);
1324     }
1325     bcopy(text, textcache.text + textcache.next, length);
1326     textcache.next += length;
1327 }
1328
1329 static void
1330 addchar(char c)
1331 {
1332 /*  fprintf(stderr, "addchar: %c\n", c); */
1333
1334     if (textcache.next + 2 >= textcache.allocated) {
1335         textcache.allocated += CACHE_SIZE;
1336         textcache.text = xrealloc(textcache.text, textcache.allocated);
1337     }
1338     textcache.text[textcache.next++] = c;
1339 }
1340
1341 static char *
1342 fetchtext(unsigned *length)
1343 {
1344 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1345
1346     *length = textcache.next;
1347     textcache.text[textcache.next] = '\0';
1348     return textcache.text;
1349 }
1350
1351 /**********************************************************************
1352 *                                                                     *
1353 *                                                                     *
1354 *    Identifier Processing                                             *
1355 *                                                                     *
1356 *                                                                     *
1357 **********************************************************************/
1358
1359 /*
1360         hsnewid         Enters an id of length n into the symbol table.
1361 */
1362
1363 static void
1364 hsnewid(char *name, int length)
1365 {
1366     char save = name[length];
1367
1368     name[length] = '\0';
1369     yylval.uid = installid(name);
1370     name[length] = save;
1371 }
1372
1373 BOOLEAN
1374 hsnewqid(char *name, int length)
1375 {
1376     char* dot;
1377     char save = name[length];
1378     name[length] = '\0';
1379
1380     dot = strchr(name, '.');
1381     *dot = '\0';
1382     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1383     *dot = '.';
1384     name[length] = save;
1385
1386     return isconstr(dot+1);
1387 }
1388
1389 static
1390 BOOLEAN
1391 is_commment(char* lexeme, int len)
1392 {
1393    char* ptr;
1394    int i;
1395
1396    if (len < 2) {
1397       return FALSE;
1398    }
1399
1400    for(i=0;i<len;i++) {
1401      if (lexeme[i] != '-') return FALSE;
1402    }
1403    return TRUE;
1404 }
1405