ghc/compiler/parser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       FLEX for Haskell.                                             *
   6 *       -----------------                                             *
   7 *                                                                     *
   8 **********************************************************************/
   9
  10 #include "../../includes/config.h"
  11
  12 #include <stdio.h>
  13
  14 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  15 #include <string.h>
  16 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  17 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  18 #include <memory.h>
  19 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  20 #define index strchr
  21 #define rindex strrchr
  22 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  23 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  24 #define bzero(s, n) memset ((s), 0, (n))
  25 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  26 #include <strings.h>
  27 /* memory.h and strings.h conflict on some systems.  */
  28 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  29
  30 #include "hspincl.h"
  31 #include "hsparser.tab.h"
  32 #include "constants.h"
  33 #include "utils.h"
  34
  35 /* Our substitute for <ctype.h> */
  36
  37 #define NCHARS  256
  38 #define _S      0x1
  39 #define _D      0x2
  40 #define _H      0x4
  41 #define _O      0x8
  42 #define _C      0x10
  43
  44 #define _isconstr(s)    (CharTable[*s]&(_C))
  45 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
  46
  47 static unsigned char CharTable[NCHARS] = {
  48 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  49 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  50 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  51 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  52 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  53 /* '(' */       _C,     0,      0,      0,      0,      0,      0,      0,      /* ( */
  54 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  55 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  56 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  57 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  58 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  59 /* 'X' */       _C,     _C,     _C,     _C,     0,      0,      0,      0,      /* [ */
  60 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  61 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  62 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  63 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  64
  65 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  66 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  67 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  68 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  70 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  73 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  75 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  76 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  77 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  78 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  79 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  80 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  81 };
  82
  83 /**********************************************************************
  84 *                                                                     *
  85 *                                                                     *
  86 *      Declarations                                                   *
  87 *                                                                     *
  88 *                                                                     *
  89 **********************************************************************/
  90
  91 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
  92
  93 /*
  94  * For my own sanity, things that are not part of the flex skeleton
  95  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
  96  */
  97
  98 static int hslineno = 0;        /* Line number at end of token */
  99 int hsplineno = 0;              /* Line number at end of previous token */
 100
 101 static int hscolno = 0;         /* Column number at end of token */
 102 int hspcolno = 0;               /* Column number at end of previous token */
 103 static int hsmlcolno = 0;       /* Column number for multiple-rule lexemes */
 104
 105 int modulelineno = -1;          /* The line number where the module starts */
 106 int startlineno = 0;            /* The line number where something starts */
 107 int endlineno = 0;              /* The line number where something ends */
 108
 109 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 110 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 111
 112 static int nested_comments;     /* For counting comment nesting depth */
 113
 114 /* Hacky definition of yywrap: see flex doc.
 115
 116    If we don't do this, then we'll have to get the default
 117    yywrap from the flex library, which is often something
 118    we are not good at locating.  This avoids that difficulty.
 119    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 120    WDP 94/09/05
 121 */
 122 #define yywrap() 1
 123
 124 /* Essential forward declarations */
 125
 126 static void hsnewid      PROTO((char *, int));
 127 static void layout_input PROTO((char *, int));
 128 static void cleartext    (NO_ARGS);
 129 static void addtext      PROTO((char *, unsigned));
 130 static void addchar      PROTO((char));
 131 static char *fetchtext   PROTO((unsigned *));
 132 static void new_filename PROTO((char *));
 133 static int  Return       PROTO((int));
 134 static void hsentercontext PROTO((int));
 135
 136 /* Special file handling for IMPORTS */
 137 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 138
 139 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 140 static char *filename_save;             /* File Name                     */
 141 static int hslineno_save = 0,           /* Line Number                   */
 142  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 143  hscolno_save = 0,                      /* Indentation                   */
 144  hspcolno_save = 0;                     /* Left Indentation              */
 145 static short icontexts_save = 0;        /* Indent Context Level          */
 146
 147 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
 148 extern BOOLEAN etags;      /* that which is saved */
 149
 150 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
 151
 152 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
 153
 154 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
 155 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
 156 extern int maxAcceptablePragmaVersion;
 157 extern int thisIfacePragmaVersion;
 158
 159 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 160                                  * inserted before token +ve  -- "}" inserted before
 161                                  * token */
 162
 163 short icontexts = 0;            /* Which context we're in */
 164
 165
 166
 167 /*
 168         Table of indentations:  right bit indicates whether to use
 169           indentation rules (1 = use rules; 0 = ignore)
 170
 171     partain:
 172     push one of these "contexts" at every "case" or "where"; the right bit says
 173     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 174
 175     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 176     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 177     push is shown just below.
 178
 179 */
 180
 181
 182 static short indenttab[MAX_CONTEXTS] = {-1};
 183
 184 #define INDENTPT (indenttab[icontexts]>>1)
 185 #define INDENTON (indenttab[icontexts]&1)
 186
 187 #define RETURN(tok) return(Return(tok))
 188
 189 #undef YY_DECL
 190 #define YY_DECL int yylex1()
 191
 192 /* We should not peek at yy_act, but flex calls us even for the internal action
 193    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 194    to support older versions of flex, we'll continue to peek for now.
 195  */
 196 #define YY_USER_ACTION \
 197     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 198
 199 #if 0/*debug*/
 200 #undef YY_BREAK
 201 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 202 #endif
 203
 204 /* Each time we enter a new start state, we push it onto the state stack.
 205    Note that the rules do not allow us to underflow or overflow the stack.
 206    (At least, they shouldn't.)  The maximum expected depth is 4:
 207    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 208 */
 209 static int StateStack[5];
 210 static int StateDepth = -1;
 211
 212 #ifdef HSP_DEBUG
 213 #define PUSH_STATE(n)   do {\
 214     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
 215     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 216 #define POP_STATE       do {--StateDepth;\
 217     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
 218     BEGIN(StateStack[StateDepth]);} while(0)
 219 #else
 220 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 221 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
 222 #endif
 223
 224 %}
 225
 226 /* The start states are:
 227    Code -- normal Haskell code (principal lexer)
 228    GlaExt -- Haskell code with Glasgow extensions
 229    Comment -- Nested comment processing
 230    String -- Inside a string literal with backslashes
 231    StringEsc -- Immediately following a backslash in a string literal
 232    Char -- Inside a character literal with backslashes
 233    CharEsc -- Immediately following a backslash in a character literal
 234
 235    Note that the INITIAL state is unused.  Also note that these states
 236    are _exclusive_.  All rules should be prefixed with an appropriate
 237    list of start states.
 238  */
 239
 240 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
 241
 242 isoS                    [\xa1-\xbf\xd7\xf7]
 243 isoL                    [\xc0-\xd6\xd8-\xde]
 244 isol                    [\xdf-\xf6\xf8-\xff]
 245 isoA                    [\xa1-\xff]
 246
 247 D                       [0-9]
 248 O                       [0-7]
 249 H                       [0-9A-Fa-f]
 250 N                       {D}+
 251 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 252 S                       [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
 253 SId                     {S}{S}*
 254 L                       [A-Z\xc0-\xd6\xd8-\xde]
 255 l                       [a-z\xdf-\xf6\xf8-\xff]
 256 I                       {L}|{l}
 257 i                       {L}|{l}|[0-9'_]
 258 Id                      {I}{i}*
 259 Mod                     {L}{i}*
 260 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~\xa1-\xff]
 261 CNTRL                   [@A-Z\[\\\]^_]
 262 WS                      [ \t\n\r\f\v]
 263 NL                      [\n\r]
 264
 265 %%
 266
 267 %{
 268     /*
 269      * Special GHC pragma rules.  Do we need a start state for interface files,
 270      * so these won't be matched in source files? --JSM
 271      */
 272 %}
 273
 274 <Code,GlaExt>^"# ".*{NL}    {
 275                           char tempf[FILENAME_SIZE];
 276                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 277                           new_filename(tempf);
 278                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 279                         }
 280
 281 <Code,GlaExt>^"#line ".*{NL}    {
 282                           char tempf[FILENAME_SIZE];
 283                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 284                           new_filename(tempf);
 285                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 286                         }
 287
 288 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 289                           /* partain: pragma-style line directive */
 290                           char tempf[FILENAME_SIZE];
 291                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 292                           new_filename(tempf);
 293                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 294                         }
 295 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
 296                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
 297                         }
 298 <Code,GlaExt>"{-# GHC_PRAGMA "   {
 299                           if ( ignorePragmas ||
 300                                thisIfacePragmaVersion < minAcceptablePragmaVersion ||
 301                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
 302                              nested_comments = 1;
 303                              PUSH_STATE(Comment);
 304                           } else {
 305                              PUSH_STATE(GhcPragma);
 306                              RETURN(GHC_PRAGMA);
 307                           }
 308                         }
 309 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
 310 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
 311 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
 312 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
 313 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
 314 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
 315 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
 316 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
 317 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
 318 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
 319
 320 <GhcPragma>"_!_"            { RETURN(COCON); }
 321 <GhcPragma>"_#_"            { RETURN(COPRIM); }
 322 <GhcPragma>"_APP_"          { RETURN(COAPP); }
 323 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
 324 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
 325 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
 326 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
 327 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
 328
 329 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
 330 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
 331 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
 332 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
 333 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
 334
 335 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
 336 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
 337
 338 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
 339 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
 340 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
 341 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
 342 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
 343 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
 344 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
 345 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
 346
 347 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
 348 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
 349
 350 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
 351 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
 352 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
 353
 354 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
 355
 356 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 357                               PUSH_STATE(UserPragma);
 358                               RETURN(SPECIALISE_UPRAGMA);
 359                             }
 360 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 361                               PUSH_STATE(UserPragma);
 362                               RETURN(INLINE_UPRAGMA);
 363                             }
 364 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 365                               PUSH_STATE(UserPragma);
 366                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 367                             }
 368 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
 369                               PUSH_STATE(UserPragma);
 370                               RETURN(DEFOREST_UPRAGMA);
 371                             }
 372 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
 373                               fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
 374                                 input_filename, hsplineno);
 375                               format_string(stderr, (unsigned char *) yytext, yyleng);
 376                               fputs("'\n", stderr);
 377                               nested_comments = 1;
 378                               PUSH_STATE(Comment);
 379                             }
 380 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 381
 382 %{
 383     /*
 384      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 385      * intentionally accepted as a keyword even for normal <Code>.
 386      */
 387 %}
 388
 389 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
 390 <Code,GlaExt>"class"            { RETURN(CLASS); }
 391 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 392 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 393 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 394 <Code,GlaExt>"do"               { RETURN(DO); }
 395 <Code,GlaExt>"else"             { RETURN(ELSE); }
 396 <Code,GlaExt>"if"               { RETURN(IF); }
 397 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 398 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
 399 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 400 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 401 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 402 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 403 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
 404 <Code,GlaExt>"module"           { RETURN(MODULE); }
 405 <Code,GlaExt>"newtype"          { RETURN(NEWTYPE); }
 406 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
 407 <Code,GlaExt>"then"             { RETURN(THEN); }
 408 <Code,GlaExt>"type"             { RETURN(TYPE); }
 409 <Code,GlaExt>"where"            { RETURN(WHERE); }
 410
 411 <Code,GlaExt>"as"               { RETURN(AS); }
 412 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 413 <Code,GlaExt>"qualified"        { RETURN(QUALIFIED); }
 414 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
 415
 416 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
 417 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
 418 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
 419 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
 420 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
 421 <GhcPragma>"_forall_"           { RETURN(FORALL); }
 422
 423 %{
 424     /*
 425      * Haskell operators: special, reservedops and useful varsyms
 426      */
 427 %}
 428
 429 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
 430 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
 431 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
 432 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
 433 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
 434 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
 435 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
 436 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
 437 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
 438 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
 439
 440 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
 441 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
 442 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
 443 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
 444 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
 445 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
 446 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
 447 <Code,GlaExt>"-"                        { RETURN(MINUS); }
 448
 449 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
 450 <Code,GlaExt>"@"                        { RETURN(AT); }
 451 <Code,GlaExt>"!"                        { RETURN(BANG); }
 452 <Code,GlaExt>"~"                        { RETURN(LAZY); }
 453
 454 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
 455
 456 %{
 457     /*
 458      * Integers and (for Glasgow extensions) primitive integers.  Note that
 459      * we pass all of the text on to the parser, because flex/C can't handle
 460      * arbitrary precision numbers.
 461      */
 462 %}
 463
 464 <GlaExt>("-")?"0"[Oo]{O}+"#"  { /* octal */
 465                          yylval.uid = xstrndup(yytext, yyleng - 1);
 466                          RETURN(INTPRIM);
 467                         }
 468 <Code,GlaExt>"0"[Oo]{O}+  { /* octal */
 469                          yylval.uid = xstrndup(yytext, yyleng);
 470                          RETURN(INTEGER);
 471                         }
 472 <GlaExt>("-")?"0"[Xx]{H}+"#"  { /* hexadecimal */
 473                          yylval.uid = xstrndup(yytext, yyleng - 1);
 474                          RETURN(INTPRIM);
 475                         }
 476 <Code,GlaExt>"0"[Xx]{H}+  { /* hexadecimal */
 477                          yylval.uid = xstrndup(yytext, yyleng);
 478                          RETURN(INTEGER);
 479                         }
 480 <GlaExt,GhcPragma>("-")?{N}"#"  {
 481                          yylval.uid = xstrndup(yytext, yyleng - 1);
 482                          RETURN(INTPRIM);
 483                         }
 484 <Code,GlaExt,GhcPragma>{N} {
 485                          yylval.uid = xstrndup(yytext, yyleng);
 486                          RETURN(INTEGER);
 487                         }
 488
 489 %{
 490     /*
 491      * Floats and (for Glasgow extensions) primitive floats/doubles.
 492      */
 493 %}
 494
 495 <GlaExt,GhcPragma>("-")?{F}"##" {
 496                          yylval.uid = xstrndup(yytext, yyleng - 2);
 497                          RETURN(DOUBLEPRIM);
 498                         }
 499 <GlaExt,GhcPragma>("-")?{F}"#" {
 500                          yylval.uid = xstrndup(yytext, yyleng - 1);
 501                          RETURN(FLOATPRIM);
 502                         }
 503 <Code,GlaExt>{F}        {
 504                          yylval.uid = xstrndup(yytext, yyleng);
 505                          RETURN(FLOAT);
 506                         }
 507
 508 %{
 509     /*
 510      * Funky ``foo'' style C literals for Glasgow extensions
 511      */
 512 %}
 513
 514 <GlaExt,GhcPragma>"``"[^']+"''" {
 515                          hsnewid(yytext + 2, yyleng - 4);
 516                          RETURN(CLITLIT);
 517                         }
 518
 519 %{
 520     /*
 521      * Identifiers, both variables and operators.  The trailing hash is allowed
 522      * for Glasgow extensions.
 523      */
 524 %}
 525
 526 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
 527 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
 528 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
 529
 530 %{
 531 /* These SHOULDNAE work in "Code" (sigh) */
 532 %}
 533 <Code,GlaExt,GhcPragma,UserPragma>{Id}"#" {
 534                          if (! (nonstandardFlag || in_interface)) {
 535                             char errbuf[ERR_BUF_SIZE];
 536                             sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
 537                             hsperror(errbuf);
 538                          }
 539                          hsnewid(yytext, yyleng);
 540                          RETURN(_isconstr(yytext) ? CONID : VARID);
 541                         }
 542 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
 543                          if (! (nonstandardFlag || in_interface)) {
 544                             char errbuf[ERR_BUF_SIZE];
 545                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
 546                             hsperror(errbuf);
 547                          }
 548                          hsnewid(yytext, yyleng);
 549                          RETURN(isconstr(yytext) ? CONID : VARID);
 550                          /* NB: ^^^^^^^^ : not the macro! */
 551                         }
 552 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
 553                          hsnewid(yytext, yyleng);
 554                          RETURN(_isconstr(yytext) ? CONID : VARID);
 555                         }
 556 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
 557                          hsnewid(yytext, yyleng);
 558                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
 559                         }
 560 <Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{Id}  {
 561                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
 562                          RETURN(isconstr ? QCONID : QVARID);
 563                         }
 564 <Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{SId} {
 565                          BOOLEAN isconstr = hsnewqid(yytext, yyleng);
 566                          RETURN(isconstr ? QCONSYM : QVARSYM);
 567                         }
 568
 569 %{
 570     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 571
 572     /* Because we can make the former well-behaved (we defined them).
 573
 574        Sadly, the latter is defined by Haskell, which allows such
 575        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 576     */
 577 %}
 578
 579 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {
 580                          hsnewid(yytext + 1, yyleng - 2);
 581                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
 582                         }
 583
 584 %{
 585     /*
 586      * Character literals.  The first form is the quick form, for character
 587      * literals that don't contain backslashes.  Literals with backslashes are
 588      * lexed through multiple rules.  First, we match the open ' and as many
 589      * normal characters as possible.  This puts us into the <Char> state, where
 590      * a backslash is legal.  Then, we match the backslash and move into the
 591      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 592      * characters and the close '.  We may end up with too many characters, but
 593      * this allows us to easily share the lex rules with strings.  Excess characters
 594      * are ignored with a warning.
 595      */
 596 %}
 597
 598 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
 599                          yylval.uhstring = installHstring(1, yytext+1);
 600                          RETURN(CHARPRIM);
 601                         }
 602 <Code,GlaExt>'({CHAR}|"\"")'    {
 603                          yylval.uhstring = installHstring(1, yytext+1);
 604                          RETURN(CHAR);
 605                         }
 606 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 607                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 608                          hsperror(errbuf);
 609                         }
 610 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
 611                          hsmlcolno = hspcolno;
 612                          cleartext();
 613                          addtext(yytext+1, yyleng-1);
 614                          PUSH_STATE(Char);
 615                         }
 616 <Char>({CHAR}|"\"")*'#  {
 617                          unsigned length;
 618                          char *text;
 619
 620                          addtext(yytext, yyleng - 2);
 621                          text = fetchtext(&length);
 622
 623                          if (! (nonstandardFlag || in_interface)) {
 624                             char errbuf[ERR_BUF_SIZE];
 625                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 626                             hsperror(errbuf);
 627                          }
 628
 629                          if (length > 1) {
 630                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
 631                               input_filename, hsplineno, hspcolno + 1);
 632                             format_string(stderr, (unsigned char *) text, length);
 633                             fputs("' too long\n", stderr);
 634                             hsperror("");
 635                          }
 636                          yylval.uhstring = installHstring(1, text);
 637                          hspcolno = hsmlcolno;
 638                          POP_STATE;
 639                          RETURN(CHARPRIM);
 640                         }
 641 <Char>({CHAR}|"\"")*'   {
 642                          unsigned length;
 643                          char *text;
 644
 645                          addtext(yytext, yyleng - 1);
 646                          text = fetchtext(&length);
 647
 648                          if (length > 1) {
 649                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
 650                               input_filename, hsplineno, hspcolno + 1);
 651                             format_string(stderr, (unsigned char *) text, length);
 652                             fputs("' too long\n", stderr);
 653                             hsperror("");
 654                          }
 655                          yylval.uhstring = installHstring(1, text);
 656                          hspcolno = hsmlcolno;
 657                          POP_STATE;
 658                          RETURN(CHAR);
 659                         }
 660 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 661
 662
 663 %{
 664     /*
 665      * String literals.  The first form is the quick form, for string literals
 666      * that don't contain backslashes.  Literals with backslashes are lexed
 667      * through multiple rules.  First, we match the open " and as many normal
 668      * characters as possible.  This puts us into the <String> state, where
 669      * a backslash is legal.  Then, we match the backslash and move into the
 670      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 671      * characters, moving back and forth between <String> and <StringEsc> as more
 672      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 673      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 674      * of normal characters and the close ".
 675      */
 676 %}
 677
 678 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
 679                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 680                             /* the -3 accounts for the " on front, "# on the end */
 681                          RETURN(STRINGPRIM);
 682                         }
 683 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
 684                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 685                          RETURN(STRING);
 686                         }
 687 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
 688                          hsmlcolno = hspcolno;
 689                          cleartext();
 690                          addtext(yytext+1, yyleng-1);
 691                          PUSH_STATE(String);
 692                         }
 693 <String>({CHAR}|"'")*"\"#"   {
 694                          unsigned length;
 695                          char *text;
 696
 697                          addtext(yytext, yyleng-2);
 698                          text = fetchtext(&length);
 699
 700                          if (! (nonstandardFlag || in_interface)) {
 701                             char errbuf[ERR_BUF_SIZE];
 702                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 703                             hsperror(errbuf);
 704                          }
 705
 706                          yylval.uhstring = installHstring(length, text);
 707                          hspcolno = hsmlcolno;
 708                          POP_STATE;
 709                          RETURN(STRINGPRIM);
 710                         }
 711 <String>({CHAR}|"'")*"\""   {
 712                          unsigned length;
 713                          char *text;
 714
 715                          addtext(yytext, yyleng-1);
 716                          text = fetchtext(&length);
 717
 718                          yylval.uhstring = installHstring(length, text);
 719                          hspcolno = hsmlcolno;
 720                          POP_STATE;
 721                          RETURN(STRING);
 722                         }
 723 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 724
 725 %{
 726     /*
 727      * Character and string escapes are roughly the same, but strings have the
 728      * extra `\&' sequence which is not allowed for characters.  Also, comments
 729      * are allowed in the <StringEsc> state.  (See the comment section much
 730      * further down.)
 731      *
 732      * NB: Backslashes and tabs are stored in strings as themselves.
 733      * But if we print them (in printtree.c), they must go out as
 734      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 735      * intermediate format that the parser produces.  It uses '\t' fpr end of
 736      * string, so it needs to be able to escape tabs, which means that it
 737      * also needs to be able to escape the escape character ('\\').  Sigh.
 738      */
 739 %}
 740
 741 <Char>\\                { PUSH_STATE(CharEsc); }
 742 <String>\\&             /* Ignore */ ;
 743 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 744
 745 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 746 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 747
 748 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 749 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 750 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 751 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 752 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 753 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 754 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 755 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 756 <CharEsc,StringEsc>BEL  |
 757 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 758 <CharEsc,StringEsc>BS   |
 759 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 760 <CharEsc,StringEsc>HT   |
 761 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 762 <CharEsc,StringEsc>LF   |
 763 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 764 <CharEsc,StringEsc>VT   |
 765 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 766 <CharEsc,StringEsc>FF   |
 767 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 768 <CharEsc,StringEsc>CR   |
 769 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 770 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 771 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 772 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 773 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 774 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 775 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 776 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 777 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 778 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 779 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 780 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 781 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 782 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 783 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 784 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 785 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 786 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 787 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 788 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 789 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 790 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 791 <CharEsc,StringEsc>{D}+  {
 792                           int i = strtol(yytext, NULL, 10);
 793                           if (i < NCHARS) {
 794                              addchar((char) i);
 795                           } else {
 796                              char errbuf[ERR_BUF_SIZE];
 797                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 798                                 yytext);
 799                              hsperror(errbuf);
 800                           }
 801                           POP_STATE;
 802                         }
 803 <CharEsc,StringEsc>o{O}+ {
 804                           int i = strtol(yytext + 1, NULL, 8);
 805                           if (i < NCHARS) {
 806                              addchar((char) i);
 807                           } else {
 808                              char errbuf[ERR_BUF_SIZE];
 809                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 810                                 yytext);
 811                              hsperror(errbuf);
 812                           }
 813                           POP_STATE;
 814                         }
 815 <CharEsc,StringEsc>x{H}+ {
 816                           int i = strtol(yytext + 1, NULL, 16);
 817                           if (i < NCHARS) {
 818                              addchar((char) i);
 819                           } else {
 820                              char errbuf[ERR_BUF_SIZE];
 821                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 822                                 yytext);
 823                              hsperror(errbuf);
 824                           }
 825                           POP_STATE;
 826                         }
 827
 828 %{
 829     /*
 830      * Simple comments and whitespace.  Normally, we would just ignore these, but
 831      * in case we're processing a string escape, we need to note that we've seen
 832      * a gap.
 833      *
 834      * Note that we cater for a comment line that *doesn't* end in a newline.
 835      * This is incorrect, strictly speaking, but seems like the right thing
 836      * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
 837      */
 838 %}
 839
 840 <Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
 841 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
 842
 843 %{
 844     /*
 845      * Nested comments.  The major complication here is in trying to match the
 846      * longest lexemes possible, for better performance.  (See the flex document.)
 847      * That's why the rules look so bizarre.
 848      */
 849 %}
 850
 851 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        {
 852                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
 853                         }
 854
 855 <Comment>[^-{]*         |
 856 <Comment>"-"+[^-{}]+    |
 857 <Comment>"{"+[^-{}]+    ;
 858 <Comment>"{-"           { nested_comments++; }
 859 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 860 <Comment>(.|\n)         ;
 861
 862 %{
 863     /*
 864      * Illegal characters.  This used to be a single rule, but we might as well
 865      * pass on as much information as we have, so now we indicate our state in
 866      * the error message.
 867      */
 868 %}
 869
 870 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        {
 871                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 872                             input_filename, hsplineno, hspcolno + 1);
 873                          format_string(stderr, (unsigned char *) yytext, 1);
 874                          fputs("'\n", stderr);
 875                          hsperror("");
 876                         }
 877 <Char>(.|\n)            {
 878                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 879                             input_filename, hsplineno, hspcolno + 1);
 880                          format_string(stderr, (unsigned char *) yytext, 1);
 881                          fputs("' in a character literal\n", stderr);
 882                          hsperror("");
 883                         }
 884 <CharEsc>(.|\n)         {
 885                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
 886                             input_filename, hsplineno, hspcolno + 1);
 887                          format_string(stderr, (unsigned char *) yytext, 1);
 888                          fputs("'\n", stderr);
 889                          hsperror("");
 890                         }
 891 <String>(.|\n)          { if (nonstandardFlag) {
 892                              addtext(yytext, yyleng);
 893                           } else {
 894                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 895                                 input_filename, hsplineno, hspcolno + 1);
 896                                 format_string(stderr, (unsigned char *) yytext, 1);
 897                                 fputs("' in a string literal\n", stderr);
 898                                 hsperror("");
 899                           }
 900                         }
 901 <StringEsc>(.|\n)       {
 902                          if (noGap) {
 903                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
 904                                 input_filename, hsplineno, hspcolno + 1);
 905                              format_string(stderr, (unsigned char *) yytext, 1);
 906                              fputs("'\n", stderr);
 907                              hsperror("");
 908                          } else {
 909                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 910                                 input_filename, hsplineno, hspcolno + 1);
 911                              format_string(stderr, (unsigned char *) yytext, 1);
 912                              fputs("' in a string gap\n", stderr);
 913                              hsperror("");
 914                          }
 915                         }
 916
 917 %{
 918     /*
 919      * End of file.  In any sub-state, this is an error.  However, for the primary
 920      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 921      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 922      * adding virtual close curlies, or closing an interface and returning to the
 923      * primary source file.
 924      *
 925      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 926      * line/column advancement has to be done by hand.
 927      */
 928 %}
 929
 930 <Char,CharEsc><<EOF>>   {
 931                           hsplineno = hslineno; hspcolno = hscolno;
 932                           hsperror("unterminated character literal");
 933                         }
 934 <Comment><<EOF>>        {
 935                           hsplineno = hslineno; hspcolno = hscolno;
 936                           hsperror("unterminated comment");
 937                         }
 938 <String,StringEsc><<EOF>>   {
 939                           hsplineno = hslineno; hspcolno = hscolno;
 940                           hsperror("unterminated string literal");
 941                         }
 942 <GhcPragma><<EOF>>      {
 943                           hsplineno = hslineno; hspcolno = hscolno;
 944                           hsperror("unterminated interface pragma");
 945                         }
 946 <UserPragma><<EOF>>     {
 947                           hsplineno = hslineno; hspcolno = hscolno;
 948                           hsperror("unterminated user-specified pragma");
 949                         }
 950 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 951
 952 %%
 953
 954 /**********************************************************************
 955 *                                                                     *
 956 *                                                                     *
 957 *     YACC/LEX Initialisation etc.                                    *
 958 *                                                                     *
 959 *                                                                     *
 960 **********************************************************************/
 961
 962 /*
 963    We initialise input_filename to "<stdin>".
 964    This allows unnamed sources to be piped into the parser.
 965 */
 966
 967 extern BOOLEAN acceptPrim;
 968
 969 void
 970 yyinit(void)
 971 {
 972     input_filename = xstrdup("<stdin>");
 973
 974     /* We must initialize the input buffer _now_, because we call
 975        setyyin _before_ calling yylex for the first time! */
 976     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 977
 978     if (acceptPrim)
 979         PUSH_STATE(GlaExt);
 980     else
 981         PUSH_STATE(Code);
 982 }
 983
 984 static void
 985 new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
 986 {
 987     if (input_filename != NULL)
 988         free(input_filename);
 989     input_filename = xstrdup(f);
 990 }
 991
 992 /**********************************************************************
 993 *                                                                     *
 994 *                                                                     *
 995 *     Layout Processing                                               *
 996 *                                                                     *
 997 *                                                                     *
 998 **********************************************************************/
 999
1000 /*
1001         The following section deals with Haskell Layout conventions
1002         forcing insertion of ; or } as appropriate
1003 */
1004
1005 static BOOLEAN
1006 hsshouldindent(void)
1007 {
1008     return (!forgetindent && INDENTON);
1009 }
1010
1011
1012 /* Enter new context and set new indentation level */
1013 void
1014 hssetindent(void)
1015 {
1016 #ifdef HSP_DEBUG
1017     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1018 #endif
1019
1020     /*
1021      * partain: first chk that new indent won't be less than current one; this code
1022      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1023      * current token; what that has to do with indenting, I don't know.
1024      */
1025
1026
1027     if (hscolno - 1 <= INDENTPT) {
1028         if (INDENTPT == -1)
1029             return;             /* Empty input OK for Haskell 1.1 */
1030         else {
1031             char errbuf[ERR_BUF_SIZE];
1032
1033             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1034             hsperror(errbuf);
1035         }
1036     }
1037     hsentercontext((hspcolno << 1) | 1);
1038 }
1039
1040
1041 /* Enter a new context without changing the indentation level */
1042 void
1043 hsincindent(void)
1044 {
1045 #ifdef HSP_DEBUG
1046     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1047 #endif
1048     hsentercontext(indenttab[icontexts] & ~1);
1049 }
1050
1051
1052 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1053 void
1054 hsindentoff(void)
1055 {
1056     forgetindent = TRUE;
1057 }
1058
1059
1060 /* Enter a new layout context. */
1061 static void
1062 hsentercontext(int indent)
1063 {
1064     /* Enter new context and set indentation as specified */
1065     if (++icontexts >= MAX_CONTEXTS) {
1066         char errbuf[ERR_BUF_SIZE];
1067
1068         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1069         hsperror(errbuf);
1070     }
1071     forgetindent = FALSE;
1072     indenttab[icontexts] = indent;
1073 #ifdef HSP_DEBUG
1074     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1075 #endif
1076 }
1077
1078
1079 /* Exit a layout context */
1080 void
1081 hsendindent(void)
1082 {
1083     --icontexts;
1084 #ifdef HSP_DEBUG
1085     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1086 #endif
1087 }
1088
1089 /*
1090  *      Return checks the indentation level and returns ;, } or the specified token.
1091  */
1092
1093 static int
1094 Return(int tok)
1095 {
1096 #ifdef HSP_DEBUG
1097     extern int yyleng;
1098 #endif
1099
1100     if (hsshouldindent()) {
1101         if (hspcolno < INDENTPT) {
1102 #ifdef HSP_DEBUG
1103             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1104 #endif
1105             hssttok = tok;
1106             return (VCCURLY);
1107         } else if (hspcolno == INDENTPT) {
1108 #ifdef HSP_DEBUG
1109             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1110 #endif
1111             hssttok = -tok;
1112             return (SEMI);
1113         }
1114     }
1115     hssttok = -1;
1116 #ifdef HSP_DEBUG
1117     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1118 #endif
1119     return (tok);
1120 }
1121
1122
1123 /*
1124  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1125  */
1126 int
1127 yylex()
1128 {
1129     int tok;
1130     static BOOLEAN eof = FALSE;
1131
1132     if (!eof) {
1133         if (hssttok != -1) {
1134             if (hssttok < 0) {
1135                 tok = -hssttok;
1136                 hssttok = -1;
1137                 return tok;
1138             }
1139             RETURN(hssttok);
1140         } else {
1141             endlineno = hslineno;
1142             if ((tok = yylex1()) != EOF)
1143                 return tok;
1144             else
1145                 eof = TRUE;
1146         }
1147     }
1148     if (icontexts > icontexts_save) {
1149         if (INDENTON) {
1150             eof = TRUE;
1151             indenttab[icontexts] = 0;
1152             return (VCCURLY);
1153         } else
1154             hsperror("missing '}' at end of file");
1155     } else if (hsbuf_save != NULL) {
1156         fclose(yyin);
1157         yy_delete_buffer(YY_CURRENT_BUFFER);
1158         yy_switch_to_buffer(hsbuf_save);
1159         hsbuf_save = NULL;
1160         new_filename(filename_save);
1161         free(filename_save);
1162         hslineno = hslineno_save;
1163         hsplineno = hsplineno_save;
1164         hscolno = hscolno_save;
1165         hspcolno = hspcolno_save;
1166         etags = etags_save;
1167         in_interface = FALSE;
1168         icontexts = icontexts_save - 1;
1169         icontexts_save = 0;
1170 #ifdef HSP_DEBUG
1171         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1172 #endif
1173         eof = FALSE;
1174         RETURN(LEOF);
1175     } else {
1176         yyterminate();
1177     }
1178     abort(); /* should never get here! */
1179     return(0);
1180 }
1181
1182 /**********************************************************************
1183 *                                                                     *
1184 *                                                                     *
1185 *     Input Processing for Interfaces                                 *
1186 *                                                                     *
1187 *                                                                     *
1188 **********************************************************************/
1189
1190 /* setyyin(file)        open file as new lex input buffer */
1191 extern FILE *yyin;
1192
1193 void
1194 setyyin(char *file)
1195 {
1196     hsbuf_save = YY_CURRENT_BUFFER;
1197     if ((yyin = fopen(file, "r")) == NULL) {
1198         char errbuf[ERR_BUF_SIZE];
1199
1200         sprintf(errbuf, "can't read \"%-.50s\"", file);
1201         hsperror(errbuf);
1202     }
1203     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1204
1205     hslineno_save = hslineno;
1206     hsplineno_save = hsplineno;
1207     hslineno = hsplineno = 1;
1208
1209     filename_save = input_filename;
1210     input_filename = NULL;
1211     new_filename(file);
1212     hscolno_save = hscolno;
1213     hspcolno_save = hspcolno;
1214     hscolno = hspcolno = 0;
1215     in_interface = TRUE;
1216     etags_save = etags; /* do not do "etags" stuff in interfaces */
1217     etags = 0;          /* We remember whether we are doing it in
1218                            the module, so we can restore it later [WDP 94/09] */
1219     hsentercontext(-1);         /* partain: changed this from 0 */
1220     icontexts_save = icontexts;
1221 #ifdef HSP_DEBUG
1222     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1223 #endif
1224 }
1225
1226 static void
1227 layout_input(char *text, int len)
1228 {
1229 #ifdef HSP_DEBUG
1230     fprintf(stderr, "Scanning \"%s\"\n", text);
1231 #endif
1232
1233     hsplineno = hslineno;
1234     hspcolno = hscolno;
1235
1236     while (len-- > 0) {
1237         switch (*text++) {
1238         case '\n':
1239         case '\r':
1240         case '\f':
1241             hslineno++;
1242             hscolno = 0;
1243             break;
1244         case '\t':
1245             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1246             break;
1247         case '\v':
1248             break;
1249         default:
1250             ++hscolno;
1251             break;
1252         }
1253     }
1254 }
1255
1256 void
1257 setstartlineno(void)
1258 {
1259     startlineno = hsplineno;
1260
1261     if (modulelineno == 0) {
1262         modulelineno = startlineno;
1263     }
1264
1265 #if 1/*etags*/
1266 #else
1267     if (etags)
1268         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1269 #endif
1270 }
1271
1272 /**********************************************************************
1273 *                                                                     *
1274 *                                                                     *
1275 *                      Text Caching                                   *
1276 *                                                                     *
1277 *                                                                     *
1278 **********************************************************************/
1279
1280 #define CACHE_SIZE YY_BUF_SIZE
1281
1282 static struct {
1283     unsigned allocated;
1284     unsigned next;
1285     char *text;
1286 } textcache = { 0, 0, NULL };
1287
1288 static void
1289 cleartext(void)
1290 {
1291 /*  fprintf(stderr, "cleartext\n"); */
1292     textcache.next = 0;
1293     if (textcache.allocated == 0) {
1294         textcache.allocated = CACHE_SIZE;
1295         textcache.text = xmalloc(CACHE_SIZE);
1296     }
1297 }
1298
1299 static void
1300 addtext(char *text, unsigned length)
1301 {
1302 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1303
1304     if (length == 0)
1305         return;
1306
1307     if (textcache.next + length + 1 >= textcache.allocated) {
1308         textcache.allocated += length + CACHE_SIZE;
1309         textcache.text = xrealloc(textcache.text, textcache.allocated);
1310     }
1311     bcopy(text, textcache.text + textcache.next, length);
1312     textcache.next += length;
1313 }
1314
1315 static void
1316 addchar(char c)
1317 {
1318 /*  fprintf(stderr, "addchar: %c\n", c); */
1319
1320     if (textcache.next + 2 >= textcache.allocated) {
1321         textcache.allocated += CACHE_SIZE;
1322         textcache.text = xrealloc(textcache.text, textcache.allocated);
1323     }
1324     textcache.text[textcache.next++] = c;
1325 }
1326
1327 static char *
1328 fetchtext(unsigned *length)
1329 {
1330 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1331
1332     *length = textcache.next;
1333     textcache.text[textcache.next] = '\0';
1334     return textcache.text;
1335 }
1336
1337 /**********************************************************************
1338 *                                                                     *
1339 *                                                                     *
1340 *    Identifier Processing                                             *
1341 *                                                                     *
1342 *                                                                     *
1343 **********************************************************************/
1344
1345 /*
1346         hsnewid         Enters an id of length n into the symbol table.
1347 */
1348
1349 static void
1350 hsnewid(char *name, int length)
1351 {
1352     char save = name[length];
1353
1354     name[length] = '\0';
1355     yylval.uid = installid(name);
1356     name[length] = save;
1357 }
1358
1359 BOOLEAN
1360 hsnewqid(char *name, int length)
1361 {
1362     char* dot;
1363     char save = name[length];
1364     name[length] = '\0';
1365
1366     dot = strchr(name, '.');
1367     *dot = '\0';
1368     yylval.uqid = mkaqual(installid(name),installid(dot+1));
1369     *dot = '.';
1370     name[length] = save;
1371
1372     return _isconstr(dot+1);
1373 }
1374
1375 BOOLEAN
1376 isconstr(char *s) /* walks past leading underscores before using the macro */
1377 {
1378     char *temp = s;
1379
1380     for ( ; temp != NULL && *temp == '_' ; temp++ );
1381
1382     return _isconstr(temp);
1383 }