ghc/compiler/yaccParser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       LEX grammar for Haskell.                                      *
   6 *       ------------------------                                      *
   7 *                                                                     *
   8 *       (c) Copyright K. Hammond, University of Glasgow,              *
   9 *               10th. February 1989                                   *
  10 *                                                                     *
  11 *       Modification History                                          *
  12 *       --------------------                                          *
  13 *                                                                     *
  14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
  15 *       18/10/91 kh             Added 'ccall'.                        *
  16 *       19/11/91 kh             Tidied generally.                     *
  17 *       04/12/91 kh             Added Int#.                           *
  18 *       31/01/92 kh             Haskell 1.2 version.                  *
  19 *       24/04/92 ps             Added 'scc'.                          *
  20 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
  21 *       23/08/93 jsm            Changed to support flex               *
  22 *                                                                     *
  23 *                                                                     *
  24 *       Known Problems:                                               *
  25 *                                                                     *
  26 *               None, any more.                                       *
  27 *                                                                     *
  28 **********************************************************************/
  29
  30 #include "../../includes/config.h"
  31
  32 #include <stdio.h>
  33
  34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  35 #include <string.h>
  36 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  38 #include <memory.h>
  39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  40 #define index strchr
  41 #define rindex strrchr
  42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  44 #define bzero(s, n) memset ((s), 0, (n))
  45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  46 #include <strings.h>
  47 /* memory.h and strings.h conflict on some systems.  */
  48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  49
  50 #include "hspincl.h"
  51 #include "hsparser.tab.h"
  52 #include "constants.h"
  53 #include "utils.h"
  54
  55 /* Our substitute for <ctype.h> */
  56
  57 #define NCHARS  256
  58 #define _S      0x1
  59 #define _D      0x2
  60 #define _H      0x4
  61 #define _O      0x8
  62 #define _C      0x10
  63
  64 #define _isconstr(s)    (CharTable[*s]&(_C))
  65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
  66
  67 static unsigned char CharTable[NCHARS] = {
  68 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  70 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  73 /* '(' */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  75 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  76 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  77 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  78 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  79 /* 'X' */       _C,     _C,     _C,     0,      0,      0,      0,      0,
  80 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  81 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  82 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  83 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  84
  85 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  86 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  87 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  88 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  89 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  90 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  91 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  92 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  93 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  94 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  95 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  96 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  97 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  98 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  99 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
 100 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
 101 };
 102
 103 /**********************************************************************
 104 *                                                                     *
 105 *                                                                     *
 106 *      Declarations                                                   *
 107 *                                                                     *
 108 *                                                                     *
 109 **********************************************************************/
 110
 111 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
 112
 113 /*
 114  * For my own sanity, things that are not part of the flex skeleton
 115  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
 116  */
 117
 118 static int hslineno = 0;        /* Line number at end of token */
 119 int hsplineno = 0;              /* Line number at end of previous token */
 120
 121 static int hscolno = 0;         /* Column number at end of token */
 122 int hspcolno = 0;               /* Column number at end of previous token */
 123 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 124
 125 int startlineno = 0;            /* The line number where something starts */
 126 int endlineno = 0;              /* The line number where something ends */
 127
 128 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 129 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 130
 131 static int nested_comments;     /* For counting comment nesting depth */
 132
 133 /* Hacky definition of yywrap: see flex doc.
 134
 135    If we don't do this, then we'll have to get the default
 136    yywrap from the flex library, which is often something
 137    we are not good at locating.  This avoids that difficulty.
 138    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 139    WDP 94/09/05
 140 */
 141 #define yywrap() 1
 142
 143 /* Essential forward declarations */
 144
 145 static void hsnewid      PROTO((char *, int));
 146 static void layout_input PROTO((char *, int));
 147 static void cleartext    (NO_ARGS);
 148 static void addtext      PROTO((char *, unsigned));
 149 static void addchar      PROTO((char));
 150 static char *fetchtext   PROTO((unsigned *));
 151 static void new_filename PROTO((char *));
 152 static int  Return       PROTO((int));
 153 static void hsentercontext PROTO((int));
 154
 155 /* Special file handling for IMPORTS */
 156 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 157
 158 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 159 static char *filename_save;             /* File Name                     */
 160 static int hslineno_save = 0,           /* Line Number                   */
 161  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 162  hscolno_save = 0,                      /* Indentation                   */
 163  hspcolno_save = 0;                     /* Left Indentation              */
 164 static short icontexts_save = 0;        /* Indent Context Level          */
 165
 166 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
 167 extern BOOLEAN etags;      /* that which is saved */
 168
 169 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
 170
 171 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
 172
 173 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
 174 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
 175 extern int maxAcceptablePragmaVersion;
 176 extern int thisIfacePragmaVersion;
 177
 178 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 179                                  * inserted before token +ve  -- "}" inserted before
 180                                  * token */
 181
 182 short icontexts = 0;            /* Which context we're in */
 183
 184
 185
 186 /*
 187         Table of indentations:  right bit indicates whether to use
 188           indentation rules (1 = use rules; 0 = ignore)
 189
 190     partain:
 191     push one of these "contexts" at every "case" or "where"; the right bit says
 192     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 193
 194     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 195     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 196     push is shown just below.
 197
 198 */
 199
 200
 201 static short indenttab[MAX_CONTEXTS] = {-1};
 202
 203 #define INDENTPT (indenttab[icontexts]>>1)
 204 #define INDENTON (indenttab[icontexts]&1)
 205
 206 #define RETURN(tok) return(Return(tok))
 207
 208 #undef YY_DECL
 209 #define YY_DECL int yylex1()
 210
 211 /* We should not peek at yy_act, but flex calls us even for the internal action
 212    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 213    to support older versions of flex, we'll continue to peek for now.
 214  */
 215 #define YY_USER_ACTION \
 216     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 217
 218 #if 0/*debug*/
 219 #undef YY_BREAK
 220 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 221 #endif
 222
 223 /* Each time we enter a new start state, we push it onto the state stack.
 224    Note that the rules do not allow us to underflow or overflow the stack.
 225    (At least, they shouldn't.)  The maximum expected depth is 4:
 226    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 227 */
 228 static int StateStack[5];
 229 static int StateDepth = -1;
 230
 231 #ifdef HSP_DEBUG
 232 #define PUSH_STATE(n)   do {\
 233     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
 234     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 235 #define POP_STATE       do {--StateDepth;\
 236     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
 237     BEGIN(StateStack[StateDepth]);} while(0)
 238 #else
 239 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 240 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
 241 #endif
 242
 243 %}
 244
 245 /* The start states are:
 246    Code -- normal Haskell code (principal lexer)
 247    GlaExt -- Haskell code with Glasgow extensions
 248    Comment -- Nested comment processing
 249    String -- Inside a string literal with backslashes
 250    StringEsc -- Immediately following a backslash in a string literal
 251    Char -- Inside a character literal with backslashes
 252    CharEsc -- Immediately following a backslash in a character literal
 253
 254    Note that the INITIAL state is unused.  Also note that these states
 255    are _exclusive_.  All rules should be prefixed with an appropriate
 256    list of start states.
 257  */
 258
 259 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
 260
 261 D                       [0-9]
 262 O                       [0-7]
 263 H                       [0-9A-Fa-f]
 264 N                       {D}+
 265 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 266 S                       [!#$%&*+./<=>?@\\^|~:]
 267 SId                     ({S}|~|-){S}*
 268 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
 269 L                       [A-Z]
 270 I                       [A-Za-z]
 271 i                       [A-Za-z0-9'_]
 272 Id                      {I}({i})*
 273 WS                      [ \t\n\r\f\v]
 274 CNTRL                   [@A-Z\[\\\]^_]
 275 NL                      [\n\r]
 276
 277 %%
 278
 279 %{
 280     /*
 281      * Special GHC pragma rules.  Do we need a start state for interface files,
 282      * so these won't be matched in source files? --JSM
 283      */
 284 %}
 285
 286 <Code,GlaExt>^"# ".*{NL}    {
 287                           char tempf[FILENAME_SIZE];
 288                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 289                           new_filename(tempf);
 290                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 291                         }
 292
 293 <Code,GlaExt>^"#line ".*{NL}    {
 294                           char tempf[FILENAME_SIZE];
 295                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 296                           new_filename(tempf);
 297                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 298                         }
 299
 300 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 301                           /* partain: pragma-style line directive */
 302                           char tempf[FILENAME_SIZE];
 303                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 304                           new_filename(tempf);
 305                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 306                         }
 307 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
 308                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
 309                         }
 310 <Code,GlaExt>"{-# GHC_PRAGMA "   {
 311                           if ( ignorePragmas ||
 312                                thisIfacePragmaVersion < minAcceptablePragmaVersion ||
 313                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
 314                              nested_comments = 1;
 315                              PUSH_STATE(Comment);
 316                           } else {
 317                              PUSH_STATE(GhcPragma);
 318                              RETURN(GHC_PRAGMA);
 319                           }
 320                         }
 321 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
 322 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
 323 <GhcPragma>"_ABSTRACT_"     { RETURN(ABSTRACT_PRAGMA); }
 324 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
 325 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
 326 <GhcPragma>"_M_"            { RETURN(MODNAME_PRAGMA); }
 327 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
 328 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
 329 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
 330 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
 331 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
 332 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
 333
 334 <GhcPragma>"_!_"            { RETURN(COCON); }
 335 <GhcPragma>"_#_"            { RETURN(COPRIM); }
 336 <GhcPragma>"_APP_"          { RETURN(COAPP); }
 337 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
 338 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
 339 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
 340 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
 341 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
 342
 343 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
 344 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
 345 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
 346 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
 347 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
 348
 349 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
 350 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
 351
 352 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
 353 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
 354 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
 355 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
 356 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
 357 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
 358 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
 359 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
 360
 361 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
 362 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
 363
 364 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
 365 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
 366 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
 367
 368 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
 369
 370 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 371                               PUSH_STATE(UserPragma);
 372                               RETURN(SPECIALISE_UPRAGMA);
 373                             }
 374 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 375                               PUSH_STATE(UserPragma);
 376                               RETURN(INLINE_UPRAGMA);
 377                             }
 378 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 379                               PUSH_STATE(UserPragma);
 380                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 381                             }
 382 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
 383                               PUSH_STATE(UserPragma);
 384                               RETURN(DEFOREST_UPRAGMA);
 385                             }
 386 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
 387                               PUSH_STATE(UserPragma);
 388                               RETURN(ABSTRACT_UPRAGMA);
 389                             }
 390 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
 391                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
 392                                 input_filename, hsplineno);
 393                               format_string(stderr, (unsigned char *) yytext, yyleng);
 394                               fputs("'\n", stderr);
 395                               nested_comments = 1;
 396                               PUSH_STATE(Comment);
 397                             }
 398 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 399
 400 %{
 401     /*
 402      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 403      * intentionally accepted as a keyword even for normal <Code>.
 404      */
 405 %}
 406
 407 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
 408 <Code,GlaExt>"class"            { RETURN(CLASS); }
 409 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 410 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 411 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 412 <Code,GlaExt>"else"             { RETURN(ELSE); }
 413 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 414 <Code,GlaExt>"if"               { RETURN(IF); }
 415 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 416 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 417 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 418 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 419 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 420 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
 421 <Code,GlaExt>"module"           { RETURN(MODULE); }
 422 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
 423 <Code,GlaExt>"renaming"         { RETURN(RENAMING); }
 424 <Code,GlaExt>"then"             { RETURN(THEN); }
 425 <Code,GlaExt>"to"               { RETURN(TO); }
 426 <Code,GlaExt>"type"             { RETURN(TYPE); }
 427 <Code,GlaExt>"where"            { RETURN(WHERE); }
 428 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
 429 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
 430 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
 431 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
 432 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
 433 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
 434 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
 435 <GhcPragma>"_forall_"           { RETURN(FORALL); }
 436
 437 %{
 438     /*
 439      * Haskell operators.  Nothing special about these.
 440      */
 441 %}
 442
 443 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
 444 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
 445 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
 446 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
 447 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
 448 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
 449 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
 450 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
 451 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
 452 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
 453 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
 454 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
 455 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
 456 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
 457 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
 458 <Code,GlaExt>"+"                        { RETURN(PLUS); }
 459 <Code,GlaExt>"@"                        { RETURN(AT); }
 460 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
 461 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
 462 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
 463 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
 464 <Code,GlaExt>"~"                        { RETURN(LAZY); }
 465 <Code,GlaExt>"-"                        { RETURN(MINUS); }
 466
 467 %{
 468     /*
 469      * Integers and (for Glasgow extensions) primitive integers.  Note that
 470      * we pass all of the text on to the parser, because flex/C can't handle
 471      * arbitrary precision numbers.
 472      */
 473 %}
 474
 475 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
 476                          yylval.uid = xstrndup(yytext, yyleng - 1);
 477                          RETURN(INTPRIM);
 478                         }
 479 <Code,GlaExt>"0o"{O}+   { /* octal */
 480                          yylval.uid = xstrndup(yytext, yyleng);
 481                          RETURN(INTEGER);
 482                         }
 483 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
 484                          yylval.uid = xstrndup(yytext, yyleng - 1);
 485                          RETURN(INTPRIM);
 486                         }
 487 <Code,GlaExt>"0x"{H}+   { /* hexadecimal */
 488                          yylval.uid = xstrndup(yytext, yyleng);
 489                          RETURN(INTEGER);
 490                         }
 491 <GlaExt,GhcPragma>("-")?{N}"#"  {
 492                          yylval.uid = xstrndup(yytext, yyleng - 1);
 493                          RETURN(INTPRIM);
 494                         }
 495 <Code,GlaExt,GhcPragma>{N} {
 496                          yylval.uid = xstrndup(yytext, yyleng);
 497                          RETURN(INTEGER);
 498                         }
 499
 500 %{
 501     /*
 502      * Floats and (for Glasgow extensions) primitive floats/doubles.
 503      */
 504 %}
 505
 506 <GlaExt,GhcPragma>("-")?{F}"##" {
 507                          yylval.uid = xstrndup(yytext, yyleng - 2);
 508                          RETURN(DOUBLEPRIM);
 509                         }
 510 <GlaExt,GhcPragma>("-")?{F}"#" {
 511                          yylval.uid = xstrndup(yytext, yyleng - 1);
 512                          RETURN(FLOATPRIM);
 513                         }
 514 <Code,GlaExt>{F}        {
 515                          yylval.uid = xstrndup(yytext, yyleng);
 516                          RETURN(FLOAT);
 517                         }
 518
 519 %{
 520     /*
 521      * Funky ``foo'' style C literals for Glasgow extensions
 522      */
 523 %}
 524
 525 <GlaExt,GhcPragma>"``"[^']+"''" {
 526                          hsnewid(yytext + 2, yyleng - 4);
 527                          RETURN(CLITLIT);
 528                         }
 529
 530 %{
 531     /*
 532      * Identifiers, both variables and operators.  The trailing hash is allowed
 533      * for Glasgow extensions.
 534      */
 535 %}
 536
 537 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
 538 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
 539 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
 540
 541 <GlaExt,GhcPragma,UserPragma>{Id}"#" {
 542                          hsnewid(yytext, yyleng);
 543                          RETURN(_isconstr(yytext) ? CONID : VARID);
 544                         }
 545 %{
 546 /* This SHOULDNAE work in "Code" (sigh) */
 547 %}
 548 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
 549                          if (! (nonstandardFlag || in_interface)) {
 550                             char errbuf[ERR_BUF_SIZE];
 551                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
 552                             hsperror(errbuf);
 553                          }
 554                          hsnewid(yytext, yyleng);
 555                          RETURN(isconstr(yytext) ? CONID : VARID);
 556                          /* NB: ^^^^^^^^ : not the macro! */
 557                         }
 558 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
 559                          hsnewid(yytext, yyleng);
 560                          RETURN(_isconstr(yytext) ? CONID : VARID);
 561                         }
 562 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
 563                          hsnewid(yytext, yyleng);
 564                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
 565                         }
 566
 567 %{
 568     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 569
 570     /* Because we can make the former well-behaved (we defined them).
 571
 572        Sadly, the latter is defined by Haskell, which allows such
 573        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 574     */
 575 %}
 576
 577 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {
 578                          hsnewid(yytext + 1, yyleng - 2);
 579                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
 580                         }
 581
 582 %{
 583     /*
 584      * Character literals.  The first form is the quick form, for character
 585      * literals that don't contain backslashes.  Literals with backslashes are
 586      * lexed through multiple rules.  First, we match the open ' and as many
 587      * normal characters as possible.  This puts us into the <Char> state, where
 588      * a backslash is legal.  Then, we match the backslash and move into the
 589      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 590      * characters and the close '.  We may end up with too many characters, but
 591      * this allows us to easily share the lex rules with strings.  Excess characters
 592      * are ignored with a warning.
 593      */
 594 %}
 595
 596 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
 597                          yylval.uhstring = installHstring(1, yytext+1);
 598                          RETURN(CHARPRIM);
 599                         }
 600 <Code,GlaExt>'({CHAR}|"\"")'    {
 601                          yylval.uhstring = installHstring(1, yytext+1);
 602                          RETURN(CHAR);
 603                         }
 604 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 605                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 606                          hsperror(errbuf);
 607                         }
 608 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
 609                          hsmlcolno = hspcolno;
 610                          cleartext();
 611                          addtext(yytext+1, yyleng-1);
 612                          PUSH_STATE(Char);
 613                         }
 614 <Char>({CHAR}|"\"")*'#  {
 615                          unsigned length;
 616                          char *text;
 617
 618                          addtext(yytext, yyleng - 2);
 619                          text = fetchtext(&length);
 620
 621                          if (! (nonstandardFlag || in_interface)) {
 622                             char errbuf[ERR_BUF_SIZE];
 623                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 624                             hsperror(errbuf);
 625                          }
 626
 627                          if (length > 1) {
 628                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
 629                               input_filename, hsplineno, hspcolno + 1);
 630                             format_string(stderr, (unsigned char *) text, length);
 631                             fputs("' too long\n", stderr);
 632                             hsperror("");
 633                          }
 634                          yylval.uhstring = installHstring(1, text);
 635                          hspcolno = hsmlcolno;
 636                          POP_STATE;
 637                          RETURN(CHARPRIM);
 638                         }
 639 <Char>({CHAR}|"\"")*'   {
 640                          unsigned length;
 641                          char *text;
 642
 643                          addtext(yytext, yyleng - 1);
 644                          text = fetchtext(&length);
 645
 646                          if (length > 1) {
 647                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
 648                               input_filename, hsplineno, hspcolno + 1);
 649                             format_string(stderr, (unsigned char *) text, length);
 650                             fputs("' too long\n", stderr);
 651                             hsperror("");
 652                          }
 653                          yylval.uhstring = installHstring(1, text);
 654                          hspcolno = hsmlcolno;
 655                          POP_STATE;
 656                          RETURN(CHAR);
 657                         }
 658 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 659
 660
 661 %{
 662     /*
 663      * String literals.  The first form is the quick form, for string literals
 664      * that don't contain backslashes.  Literals with backslashes are lexed
 665      * through multiple rules.  First, we match the open " and as many normal
 666      * characters as possible.  This puts us into the <String> state, where
 667      * a backslash is legal.  Then, we match the backslash and move into the
 668      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 669      * characters, moving back and forth between <String> and <StringEsc> as more
 670      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 671      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 672      * of normal characters and the close ".
 673      */
 674 %}
 675
 676 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
 677                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 678                             /* the -3 accounts for the " on front, "# on the end */
 679                          RETURN(STRINGPRIM);
 680                         }
 681 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
 682                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 683                          RETURN(STRING);
 684                         }
 685 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
 686                          hsmlcolno = hspcolno;
 687                          cleartext();
 688                          addtext(yytext+1, yyleng-1);
 689                          PUSH_STATE(String);
 690                         }
 691 <String>({CHAR}|"'")*"\"#"   {
 692                          unsigned length;
 693                          char *text;
 694
 695                          addtext(yytext, yyleng-2);
 696                          text = fetchtext(&length);
 697
 698                          if (! (nonstandardFlag || in_interface)) {
 699                             char errbuf[ERR_BUF_SIZE];
 700                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 701                             hsperror(errbuf);
 702                          }
 703
 704                          yylval.uhstring = installHstring(length, text);
 705                          hspcolno = hsmlcolno;
 706                          POP_STATE;
 707                          RETURN(STRINGPRIM);
 708                         }
 709 <String>({CHAR}|"'")*"\""   {
 710                          unsigned length;
 711                          char *text;
 712
 713                          addtext(yytext, yyleng-1);
 714                          text = fetchtext(&length);
 715
 716                          yylval.uhstring = installHstring(length, text);
 717                          hspcolno = hsmlcolno;
 718                          POP_STATE;
 719                          RETURN(STRING);
 720                         }
 721 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 722
 723 %{
 724     /*
 725      * Character and string escapes are roughly the same, but strings have the
 726      * extra `\&' sequence which is not allowed for characters.  Also, comments
 727      * are allowed in the <StringEsc> state.  (See the comment section much
 728      * further down.)
 729      *
 730      * NB: Backslashes and tabs are stored in strings as themselves.
 731      * But if we print them (in printtree.c), they must go out as
 732      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 733      * intermediate format that the parser produces.  It uses '\t' fpr end of
 734      * string, so it needs to be able to escape tabs, which means that it
 735      * also needs to be able to escape the escape character ('\\').  Sigh.
 736      */
 737 %}
 738
 739 <Char>\\                { PUSH_STATE(CharEsc); }
 740 <String>\\&             /* Ignore */ ;
 741 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 742
 743 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 744 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 745
 746 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 747 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 748 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 749 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 750 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 751 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 752 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 753 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 754 <CharEsc,StringEsc>BEL  |
 755 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 756 <CharEsc,StringEsc>BS   |
 757 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 758 <CharEsc,StringEsc>HT   |
 759 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 760 <CharEsc,StringEsc>LF   |
 761 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 762 <CharEsc,StringEsc>VT   |
 763 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 764 <CharEsc,StringEsc>FF   |
 765 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 766 <CharEsc,StringEsc>CR   |
 767 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 768 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 769 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 770 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 771 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 772 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 773 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 774 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 775 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 776 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 777 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 778 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 779 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 780 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 781 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 782 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 783 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 784 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 785 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 786 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 787 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 788 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 789 <CharEsc,StringEsc>{D}+  {
 790                           int i = strtol(yytext, NULL, 10);
 791                           if (i < NCHARS) {
 792                              addchar((char) i);
 793                           } else {
 794                              char errbuf[ERR_BUF_SIZE];
 795                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 796                                 yytext);
 797                              hsperror(errbuf);
 798                           }
 799                           POP_STATE;
 800                         }
 801 <CharEsc,StringEsc>o{O}+ {
 802                           int i = strtol(yytext + 1, NULL, 8);
 803                           if (i < NCHARS) {
 804                              addchar((char) i);
 805                           } else {
 806                              char errbuf[ERR_BUF_SIZE];
 807                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 808                                 yytext);
 809                              hsperror(errbuf);
 810                           }
 811                           POP_STATE;
 812                         }
 813 <CharEsc,StringEsc>x{H}+ {
 814                           int i = strtol(yytext + 1, NULL, 16);
 815                           if (i < NCHARS) {
 816                              addchar((char) i);
 817                           } else {
 818                              char errbuf[ERR_BUF_SIZE];
 819                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 820                                 yytext);
 821                              hsperror(errbuf);
 822                           }
 823                           POP_STATE;
 824                         }
 825
 826 %{
 827     /*
 828      * Simple comments and whitespace.  Normally, we would just ignore these, but
 829      * in case we're processing a string escape, we need to note that we've seen
 830      * a gap.
 831      *
 832      * Note that we cater for a comment line that *doesn't* end in a newline.
 833      * This is incorrect, strictly speaking, but seems like the right thing
 834      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 835      */
 836 %}
 837
 838 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
 839 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
 840
 841 %{
 842     /*
 843      * Nested comments.  The major complication here is in trying to match the
 844      * longest lexemes possible, for better performance.  (See the flex document.)
 845      * That's why the rules look so bizarre.
 846      */
 847 %}
 848
 849 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        {
 850                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
 851                         }
 852
 853 <Comment>[^-{]*         |
 854 <Comment>"-"+[^-{}]+    |
 855 <Comment>"{"+[^-{}]+    ;
 856 <Comment>"{-"           { nested_comments++; }
 857 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 858 <Comment>(.|\n)         ;
 859
 860 %{
 861     /*
 862      * Illegal characters.  This used to be a single rule, but we might as well
 863      * pass on as much information as we have, so now we indicate our state in
 864      * the error message.
 865      */
 866 %}
 867
 868 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        {
 869                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 870                             input_filename, hsplineno, hspcolno + 1);
 871                          format_string(stderr, (unsigned char *) yytext, 1);
 872                          fputs("'\n", stderr);
 873                          hsperror("");
 874                         }
 875 <Char>(.|\n)            {
 876                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 877                             input_filename, hsplineno, hspcolno + 1);
 878                          format_string(stderr, (unsigned char *) yytext, 1);
 879                          fputs("' in a character literal\n", stderr);
 880                          hsperror("");
 881                         }
 882 <CharEsc>(.|\n)         {
 883                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
 884                             input_filename, hsplineno, hspcolno + 1);
 885                          format_string(stderr, (unsigned char *) yytext, 1);
 886                          fputs("'\n", stderr);
 887                          hsperror("");
 888                         }
 889 <String>(.|\n)          { if (nonstandardFlag) {
 890                              addtext(yytext, yyleng);
 891                           } else {
 892                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 893                                 input_filename, hsplineno, hspcolno + 1);
 894                                 format_string(stderr, (unsigned char *) yytext, 1);
 895                                 fputs("' in a string literal\n", stderr);
 896                                 hsperror("");
 897                           }
 898                         }
 899 <StringEsc>(.|\n)       {
 900                          if (noGap) {
 901                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
 902                                 input_filename, hsplineno, hspcolno + 1);
 903                              format_string(stderr, (unsigned char *) yytext, 1);
 904                              fputs("'\n", stderr);
 905                              hsperror("");
 906                          } else {
 907                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 908                                 input_filename, hsplineno, hspcolno + 1);
 909                              format_string(stderr, (unsigned char *) yytext, 1);
 910                              fputs("' in a string gap\n", stderr);
 911                              hsperror("");
 912                          }
 913                         }
 914
 915 %{
 916     /*
 917      * End of file.  In any sub-state, this is an error.  However, for the primary
 918      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 919      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 920      * adding virtual close curlies, or closing an interface and returning to the
 921      * primary source file.
 922      *
 923      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 924      * line/column advancement has to be done by hand.
 925      */
 926 %}
 927
 928 <Char,CharEsc><<EOF>>   {
 929                           hsplineno = hslineno; hspcolno = hscolno;
 930                           hsperror("unterminated character literal");
 931                         }
 932 <Comment><<EOF>>        {
 933                           hsplineno = hslineno; hspcolno = hscolno;
 934                           hsperror("unterminated comment");
 935                         }
 936 <String,StringEsc><<EOF>>   {
 937                           hsplineno = hslineno; hspcolno = hscolno;
 938                           hsperror("unterminated string literal");
 939                         }
 940 <GhcPragma><<EOF>>      {
 941                           hsplineno = hslineno; hspcolno = hscolno;
 942                           hsperror("unterminated interface pragma");
 943                         }
 944 <UserPragma><<EOF>>     {
 945                           hsplineno = hslineno; hspcolno = hscolno;
 946                           hsperror("unterminated user-specified pragma");
 947                         }
 948 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 949
 950 %%
 951
 952 /**********************************************************************
 953 *                                                                     *
 954 *                                                                     *
 955 *     YACC/LEX Initialisation etc.                                    *
 956 *                                                                     *
 957 *                                                                     *
 958 **********************************************************************/
 959
 960 /*
 961    We initialise input_filename to "<stdin>".
 962    This allows unnamed sources to be piped into the parser.
 963 */
 964
 965 extern BOOLEAN acceptPrim;
 966
 967 void
 968 yyinit(void)
 969 {
 970     input_filename = xstrdup("<stdin>");
 971
 972     /* We must initialize the input buffer _now_, because we call
 973        setyyin _before_ calling yylex for the first time! */
 974     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 975
 976     if (acceptPrim)
 977         PUSH_STATE(GlaExt);
 978     else
 979         PUSH_STATE(Code);
 980 }
 981
 982 static void
 983 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 984 {
 985     if (input_filename != NULL)
 986         free(input_filename);
 987     input_filename = xstrdup(f);
 988 }
 989
 990 /**********************************************************************
 991 *                                                                     *
 992 *                                                                     *
 993 *     Layout Processing                                               *
 994 *                                                                     *
 995 *                                                                     *
 996 **********************************************************************/
 997
 998 /*
 999         The following section deals with Haskell Layout conventions
1000         forcing insertion of ; or } as appropriate
1001 */
1002
1003 static BOOLEAN
1004 hsshouldindent(void)
1005 {
1006     return (!forgetindent && INDENTON);
1007 }
1008
1009
1010 /* Enter new context and set new indentation level */
1011 void
1012 hssetindent(void)
1013 {
1014 #ifdef HSP_DEBUG
1015     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1016 #endif
1017
1018     /*
1019      * partain: first chk that new indent won't be less than current one; this code
1020      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1021      * current token; what that has to do with indenting, I don't know.
1022      */
1023
1024
1025     if (hscolno - 1 <= INDENTPT) {
1026         if (INDENTPT == -1)
1027             return;             /* Empty input OK for Haskell 1.1 */
1028         else {
1029             char errbuf[ERR_BUF_SIZE];
1030
1031             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1032             hsperror(errbuf);
1033         }
1034     }
1035     hsentercontext((hspcolno << 1) | 1);
1036 }
1037
1038
1039 /* Enter a new context without changing the indentation level */
1040 void
1041 hsincindent(void)
1042 {
1043 #ifdef HSP_DEBUG
1044     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1045 #endif
1046     hsentercontext(indenttab[icontexts] & ~1);
1047 }
1048
1049
1050 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1051 void
1052 hsindentoff(void)
1053 {
1054     forgetindent = TRUE;
1055 }
1056
1057
1058 /* Enter a new layout context. */
1059 static void
1060 hsentercontext(int indent)
1061 {
1062     /* Enter new context and set indentation as specified */
1063     if (++icontexts >= MAX_CONTEXTS) {
1064         char errbuf[ERR_BUF_SIZE];
1065
1066         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1067         hsperror(errbuf);
1068     }
1069     forgetindent = FALSE;
1070     indenttab[icontexts] = indent;
1071 #ifdef HSP_DEBUG
1072     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1073 #endif
1074 }
1075
1076
1077 /* Exit a layout context */
1078 void
1079 hsendindent(void)
1080 {
1081     --icontexts;
1082 #ifdef HSP_DEBUG
1083     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1084 #endif
1085 }
1086
1087 /*
1088  *      Return checks the indentation level and returns ;, } or the specified token.
1089  */
1090
1091 static int
1092 Return(int tok)
1093 {
1094 #ifdef HSP_DEBUG
1095     extern int yyleng;
1096 #endif
1097
1098     if (hsshouldindent()) {
1099         if (hspcolno < INDENTPT) {
1100 #ifdef HSP_DEBUG
1101             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1102 #endif
1103             hssttok = tok;
1104             return (VCCURLY);
1105         } else if (hspcolno == INDENTPT) {
1106 #ifdef HSP_DEBUG
1107             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1108 #endif
1109             hssttok = -tok;
1110             return (SEMI);
1111         }
1112     }
1113     hssttok = -1;
1114 #ifdef HSP_DEBUG
1115     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1116 #endif
1117     return (tok);
1118 }
1119
1120
1121 /*
1122  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1123  */
1124 int
1125 yylex()
1126 {
1127     int tok;
1128     static BOOLEAN eof = FALSE;
1129
1130     if (!eof) {
1131         if (hssttok != -1) {
1132             if (hssttok < 0) {
1133                 tok = -hssttok;
1134                 hssttok = -1;
1135                 return tok;
1136             }
1137             RETURN(hssttok);
1138         } else {
1139             endlineno = hslineno;
1140             if ((tok = yylex1()) != EOF)
1141                 return tok;
1142             else
1143                 eof = TRUE;
1144         }
1145     }
1146     if (icontexts > icontexts_save) {
1147         if (INDENTON) {
1148             eof = TRUE;
1149             indenttab[icontexts] = 0;
1150             return (VCCURLY);
1151         } else
1152             hsperror("missing '}' at end of file");
1153     } else if (hsbuf_save != NULL) {
1154         fclose(yyin);
1155         yy_delete_buffer(YY_CURRENT_BUFFER);
1156         yy_switch_to_buffer(hsbuf_save);
1157         hsbuf_save = NULL;
1158         new_filename(filename_save);
1159         free(filename_save);
1160         hslineno = hslineno_save;
1161         hsplineno = hsplineno_save;
1162         hscolno = hscolno_save;
1163         hspcolno = hspcolno_save;
1164         etags = etags_save;
1165         in_interface = FALSE;
1166         icontexts = icontexts_save - 1;
1167         icontexts_save = 0;
1168 #ifdef HSP_DEBUG
1169         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1170 #endif
1171         eof = FALSE;
1172         RETURN(LEOF);
1173     } else {
1174         yyterminate();
1175     }
1176     abort(); /* should never get here! */
1177     return(0);
1178 }
1179
1180 /**********************************************************************
1181 *                                                                     *
1182 *                                                                     *
1183 *     Input Processing for Interfaces                                 *
1184 *                                                                     *
1185 *                                                                     *
1186 **********************************************************************/
1187
1188 /* setyyin(file)        open file as new lex input buffer */
1189 extern FILE *yyin;
1190
1191 void
1192 setyyin(char *file)
1193 {
1194     hsbuf_save = YY_CURRENT_BUFFER;
1195     if ((yyin = fopen(file, "r")) == NULL) {
1196         char errbuf[ERR_BUF_SIZE];
1197
1198         sprintf(errbuf, "can't read \"%-.50s\"", file);
1199         hsperror(errbuf);
1200     }
1201     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1202
1203     hslineno_save = hslineno;
1204     hsplineno_save = hsplineno;
1205     hslineno = hsplineno = 1;
1206
1207     filename_save = input_filename;
1208     input_filename = NULL;
1209     new_filename(file);
1210     hscolno_save = hscolno;
1211     hspcolno_save = hspcolno;
1212     hscolno = hspcolno = 0;
1213     in_interface = TRUE;
1214     etags_save = etags; /* do not do "etags" stuff in interfaces */
1215     etags = 0;          /* We remember whether we are doing it in
1216                            the module, so we can restore it later [WDP 94/09] */
1217     hsentercontext(-1);         /* partain: changed this from 0 */
1218     icontexts_save = icontexts;
1219 #ifdef HSP_DEBUG
1220     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1221 #endif
1222 }
1223
1224 static void
1225 layout_input(char *text, int len)
1226 {
1227 #ifdef HSP_DEBUG
1228     fprintf(stderr, "Scanning \"%s\"\n", text);
1229 #endif
1230
1231     hsplineno = hslineno;
1232     hspcolno = hscolno;
1233
1234     while (len-- > 0) {
1235         switch (*text++) {
1236         case '\n':
1237         case '\r':
1238         case '\f':
1239             hslineno++;
1240             hscolno = 0;
1241             break;
1242         case '\t':
1243             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1244             break;
1245         case '\v':
1246             break;
1247         default:
1248             ++hscolno;
1249             break;
1250         }
1251     }
1252 }
1253
1254 void
1255 setstartlineno(void)
1256 {
1257     startlineno = hsplineno;
1258 #if 1/*etags*/
1259 #else
1260     if (etags)
1261         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1262 #endif
1263 }
1264
1265 /**********************************************************************
1266 *                                                                     *
1267 *                                                                     *
1268 *                      Text Caching                                   *
1269 *                                                                     *
1270 *                                                                     *
1271 **********************************************************************/
1272
1273 #define CACHE_SIZE YY_BUF_SIZE
1274
1275 static struct {
1276     unsigned allocated;
1277     unsigned next;
1278     char *text;
1279 } textcache = { 0, 0, NULL };
1280
1281 static void
1282 cleartext(void)
1283 {
1284 /*  fprintf(stderr, "cleartext\n"); */
1285     textcache.next = 0;
1286     if (textcache.allocated == 0) {
1287         textcache.allocated = CACHE_SIZE;
1288         textcache.text = xmalloc(CACHE_SIZE);
1289     }
1290 }
1291
1292 static void
1293 addtext(char *text, unsigned length)
1294 {
1295 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1296
1297     if (length == 0)
1298         return;
1299
1300     if (textcache.next + length + 1 >= textcache.allocated) {
1301         textcache.allocated += length + CACHE_SIZE;
1302         textcache.text = xrealloc(textcache.text, textcache.allocated);
1303     }
1304     bcopy(text, textcache.text + textcache.next, length);
1305     textcache.next += length;
1306 }
1307
1308 static void
1309 addchar(char c)
1310 {
1311 /*  fprintf(stderr, "addchar: %c\n", c); */
1312
1313     if (textcache.next + 2 >= textcache.allocated) {
1314         textcache.allocated += CACHE_SIZE;
1315         textcache.text = xrealloc(textcache.text, textcache.allocated);
1316     }
1317     textcache.text[textcache.next++] = c;
1318 }
1319
1320 static char *
1321 fetchtext(unsigned *length)
1322 {
1323 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1324
1325     *length = textcache.next;
1326     textcache.text[textcache.next] = '\0';
1327     return textcache.text;
1328 }
1329
1330 /**********************************************************************
1331 *                                                                     *
1332 *                                                                     *
1333 *    Identifier Processing                                             *
1334 *                                                                     *
1335 *                                                                     *
1336 **********************************************************************/
1337
1338 /*
1339         hsnewid         Enters an id of length n into the symbol table.
1340 */
1341
1342 static void
1343 hsnewid(char *name, int length)
1344 {
1345     char save = name[length];
1346
1347     name[length] = '\0';
1348     yylval.uid = installid(name);
1349     name[length] = save;
1350 }
1351
1352 BOOLEAN
1353 isconstr(char *s) /* walks past leading underscores before using the macro */
1354 {
1355     char *temp = s;
1356
1357     for ( ; temp != NULL && *temp == '_' ; temp++ );
1358
1359     return _isconstr(temp);
1360 }