ghc/compiler/yaccParser/hslexer.flex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       LEX grammar for Haskell.                                      *
   6 *       ------------------------                                      *
   7 *                                                                     *
   8 *       (c) Copyright K. Hammond, University of Glasgow,              *
   9 *               10th. February 1989                                   *
  10 *                                                                     *
  11 *       Modification History                                          *
  12 *       --------------------                                          *
  13 *                                                                     *
  14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
  15 *       18/10/91 kh             Added 'ccall'.                        *
  16 *       19/11/91 kh             Tidied generally.                     *
  17 *       04/12/91 kh             Added Int#.                           *
  18 *       31/01/92 kh             Haskell 1.2 version.                  *
  19 *       24/04/92 ps             Added 'scc'.                          *
  20 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
  21 *       23/08/93 jsm            Changed to support flex               *
  22 *                                                                     *
  23 *                                                                     *
  24 *       Known Problems:                                               *
  25 *                                                                     *
  26 *               None, any more.                                       *
  27 *                                                                     *
  28 **********************************************************************/
  29
  30 #include "../../includes/config.h"
  31
  32 #include <stdio.h>
  33
  34 #if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
  35 #include <string.h>
  36 /* An ANSI string.h and pre-ANSI memory.h might conflict.  */
  37 #if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
  38 #include <memory.h>
  39 #endif /* not STDC_HEADERS and HAVE_MEMORY_H */
  40 #define index strchr
  41 #define rindex strrchr
  42 #define bcopy(s, d, n) memcpy ((d), (s), (n))
  43 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  44 #define bzero(s, n) memset ((s), 0, (n))
  45 #else /* not STDC_HEADERS and not HAVE_STRING_H */
  46 #include <strings.h>
  47 /* memory.h and strings.h conflict on some systems.  */
  48 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
  49
  50 #include "hspincl.h"
  51 #include "hsparser.tab.h"
  52 #include "constants.h"
  53 #include "utils.h"
  54
  55 /* Our substitute for <ctype.h> */
  56
  57 #define NCHARS  256
  58 #define _S      0x1
  59 #define _D      0x2
  60 #define _H      0x4
  61 #define _O      0x8
  62 #define _C      0x10
  63
  64 #define _isconstr(s)    (CharTable[*s]&(_C))
  65 BOOLEAN isconstr PROTO((char *)); /* fwd decl */
  66
  67 unsigned char CharTable[NCHARS] = {
  68 /* nul */       0,      0,      0,      0,      0,      0,      0,      0,
  69 /* bs  */       0,      _S,     _S,     _S,     _S,     0,      0,      0,
  70 /* dle */       0,      0,      0,      0,      0,      0,      0,      0,
  71 /* can */       0,      0,      0,      0,      0,      0,      0,      0,
  72 /* sp  */       _S,     0,      0,      0,      0,      0,      0,      0,
  73 /* '(' */       0,      0,      0,      0,      0,      0,      0,      0,
  74 /* '0' */       _D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,_D|_H|_O,
  75 /* '8' */       _D|_H,  _D|_H,  _C,     0,      0,      0,      0,      0,
  76 /* '@' */       0,      _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _H|_C,  _C,
  77 /* 'H' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  78 /* 'P' */       _C,     _C,     _C,     _C,     _C,     _C,     _C,     _C,
  79 /* 'X' */       _C,     _C,     _C,     0,      0,      0,      0,      0,
  80 /* '`' */       0,      _H,     _H,     _H,     _H,     _H,     _H,     0,
  81 /* 'h' */       0,      0,      0,      0,      0,      0,      0,      0,
  82 /* 'p' */       0,      0,      0,      0,      0,      0,      0,      0,
  83 /* 'x' */       0,      0,      0,      0,      0,      0,      0,      0,
  84
  85 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  86 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  87 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  88 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  89 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  90 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  91 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  92 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  93 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  94 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  95 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  96 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  97 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  98 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
  99 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
 100 /*     */       0,      0,      0,      0,      0,      0,      0,      0,
 101 };
 102
 103 /**********************************************************************
 104 *                                                                     *
 105 *                                                                     *
 106 *      Declarations                                                   *
 107 *                                                                     *
 108 *                                                                     *
 109 **********************************************************************/
 110
 111 char *input_filename = NULL;    /* Always points to a dynamically allocated string */
 112
 113 /*
 114  * For my own sanity, things that are not part of the flex skeleton
 115  * have been renamed as hsXXXXX rather than yyXXXXX.  --JSM
 116  */
 117
 118 int hslineno = 0;               /* Line number at end of token */
 119 int hsplineno = 0;              /* Line number at end of previous token */
 120
 121 int hscolno = 0;                /* Column number at end of token */
 122 int hspcolno = 0;               /* Column number at end of previous token */
 123 int hsmlcolno = 0;              /* Column number for multiple-rule lexemes */
 124
 125 int startlineno = 0;            /* The line number where something starts */
 126 int endlineno = 0;              /* The line number where something ends */
 127
 128 static BOOLEAN noGap = TRUE;    /* For checking string gaps */
 129 static BOOLEAN forgetindent = FALSE;    /* Don't bother applying indentation rules */
 130
 131 static int nested_comments;     /* For counting comment nesting depth */
 132
 133 /* Hacky definition of yywrap: see flex doc.
 134
 135    If we don't do this, then we'll have to get the default
 136    yywrap from the flex library, which is often something
 137    we are not good at locating.  This avoids that difficulty.
 138    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
 139    WDP 94/09/05
 140 */
 141 #define yywrap() 1
 142
 143 /* Essential forward declarations */
 144
 145 static VOID hsnewid      PROTO((char *, int));
 146 static VOID layout_input PROTO((char *, int));
 147 static VOID cleartext    (NO_ARGS);
 148 static VOID addtext      PROTO((char *, unsigned));
 149 static VOID addchar      PROTO((char));
 150 static char *fetchtext   PROTO((unsigned *));
 151
 152 /* Special file handling for IMPORTS */
 153 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 154
 155 static YY_BUFFER_STATE hsbuf_save = NULL;       /* Saved input buffer    */
 156 static char *filename_save;             /* File Name                     */
 157 static int hslineno_save = 0,           /* Line Number                   */
 158  hsplineno_save = 0,                    /* Line Number of Prev. token    */
 159  hscolno_save = 0,                      /* Indentation                   */
 160  hspcolno_save = 0;                     /* Left Indentation              */
 161 static short icontexts_save = 0;        /* Indent Context Level          */
 162
 163 static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
 164 extern BOOLEAN etags;      /* that which is saved */
 165
 166 extern BOOLEAN nonstandardFlag; /* Glasgow extensions allowed */
 167
 168 static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
 169
 170 extern BOOLEAN ignorePragmas;           /* True when we should ignore pragmas */
 171 extern int minAcceptablePragmaVersion;  /* see documentation in main.c */
 172 extern int maxAcceptablePragmaVersion;
 173 extern int thisIfacePragmaVersion;
 174
 175 static int hssttok = -1;        /* Stacked Token: -1   -- no token; -ve  -- ";"
 176                                  * inserted before token +ve  -- "}" inserted before
 177                                  * token */
 178
 179 short icontexts = 0;            /* Which context we're in */
 180
 181
 182
 183 /*
 184         Table of indentations:  right bit indicates whether to use
 185           indentation rules (1 = use rules; 0 = ignore)
 186
 187     partain:
 188     push one of these "contexts" at every "case" or "where"; the right bit says
 189     whether user supplied braces, etc., or not.  pop appropriately (hsendindent).
 190
 191     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 192     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 193     push is shown just below.
 194
 195 */
 196
 197
 198 static short indenttab[MAX_CONTEXTS] = {-1};
 199
 200 #define INDENTPT (indenttab[icontexts]>>1)
 201 #define INDENTON (indenttab[icontexts]&1)
 202
 203 #define RETURN(tok) return(Return(tok))
 204
 205 #undef YY_DECL
 206 #define YY_DECL int yylex1()
 207
 208 /* We should not peek at yy_act, but flex calls us even for the internal action
 209    triggered on 'end-of-buffer' (This is not true of flex 2.4.4 and up, but
 210    to support older versions of flex, we'll continue to peek for now.
 211  */
 212 #define YY_USER_ACTION \
 213     if (yy_act != YY_END_OF_BUFFER) layout_input(yytext, yyleng);
 214
 215 #if 0/*debug*/
 216 #undef YY_BREAK
 217 #define YY_BREAK if (etags) fprintf(stderr,"%d %d / %d %d / %d\n",hsplineno,hspcolno,hslineno,hscolno,startlineno); break;
 218 #endif
 219
 220 /* Each time we enter a new start state, we push it onto the state stack.
 221    Note that the rules do not allow us to underflow or overflow the stack.
 222    (At least, they shouldn't.)  The maximum expected depth is 4:
 223    0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 224 */
 225 static int StateStack[5];
 226 static int StateDepth = -1;
 227
 228 #ifdef HSP_DEBUG
 229 #define PUSH_STATE(n)   do {\
 230     fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
 231     StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 232 #define POP_STATE       do {--StateDepth;\
 233     fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
 234     BEGIN(StateStack[StateDepth]);} while(0)
 235 #else
 236 #define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
 237 #define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
 238 #endif
 239
 240 %}
 241
 242 /* The start states are:
 243    Code -- normal Haskell code (principal lexer)
 244    GlaExt -- Haskell code with Glasgow extensions
 245    Comment -- Nested comment processing
 246    String -- Inside a string literal with backslashes
 247    StringEsc -- Immediately following a backslash in a string literal
 248    Char -- Inside a character literal with backslashes
 249    CharEsc -- Immediately following a backslash in a character literal
 250
 251    Note that the INITIAL state is unused.  Also note that these states
 252    are _exclusive_.  All rules should be prefixed with an appropriate
 253    list of start states.
 254  */
 255
 256 %x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
 257
 258 D                       [0-9]
 259 O                       [0-7]
 260 H                       [0-9A-Fa-f]
 261 N                       {D}+
 262 F                       {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 263 S                       [!#$%&*+./<=>?@\\^|~:]
 264 SId                     ({S}|~|-){S}*
 265 CHAR                    [ !#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
 266 L                       [A-Z]
 267 I                       [A-Za-z]
 268 i                       [A-Za-z0-9'_]
 269 Id                      {I}({i})*
 270 WS                      [ \t\n\r\f\v]
 271 CNTRL                   [@A-Z\[\\\]^_]
 272 NL                      [\n\r]
 273
 274 %%
 275
 276 %{
 277     /*
 278      * Special GHC pragma rules.  Do we need a start state for interface files,
 279      * so these won't be matched in source files? --JSM
 280      */
 281 %}
 282
 283 <Code,GlaExt>^"# ".*{NL}    {
 284                           char tempf[FILENAME_SIZE];
 285                           sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf);
 286                           new_filename(tempf);
 287                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 288                         }
 289
 290 <Code,GlaExt>^"#line ".*{NL}    {
 291                           char tempf[FILENAME_SIZE];
 292                           sscanf(yytext+5, "%d \"%[^\"]", &hslineno, tempf);
 293                           new_filename(tempf);
 294                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 295                         }
 296
 297 <Code,GlaExt>"{-# LINE ".*"-}"{NL} {
 298                           /* partain: pragma-style line directive */
 299                           char tempf[FILENAME_SIZE];
 300                           sscanf(yytext+9, "%d \"%[^\"]", &hslineno, tempf);
 301                           new_filename(tempf);
 302                           hsplineno = hslineno; hscolno = 0; hspcolno = 0;
 303                         }
 304 <Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
 305                           sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
 306                         }
 307 <Code,GlaExt>"{-# GHC_PRAGMA "   {
 308                           if ( ignorePragmas ||
 309                                thisIfacePragmaVersion < minAcceptablePragmaVersion ||
 310                                thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
 311                              nested_comments = 1;
 312                              PUSH_STATE(Comment);
 313                           } else {
 314                              PUSH_STATE(GhcPragma);
 315                              RETURN(GHC_PRAGMA);
 316                           }
 317                         }
 318 <GhcPragma>"_N_"            { RETURN(NO_PRAGMA); }
 319 <GhcPragma>"_NI_"           { RETURN(NOINFO_PRAGMA); }
 320 <GhcPragma>"_ABSTRACT_"     { RETURN(ABSTRACT_PRAGMA); }
 321 <GhcPragma>"_DEFOREST_"     { RETURN(DEFOREST_PRAGMA); }
 322 <GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
 323 <GhcPragma>"_M_"            { RETURN(MODNAME_PRAGMA); }
 324 <GhcPragma>"_A_"            { RETURN(ARITY_PRAGMA); }
 325 <GhcPragma>"_U_"            { RETURN(UPDATE_PRAGMA); }
 326 <GhcPragma>"_S_"            { RETURN(STRICTNESS_PRAGMA); }
 327 <GhcPragma>"_K_"            { RETURN(KIND_PRAGMA); }
 328 <GhcPragma>"_MF_"           { RETURN(MAGIC_UNFOLDING_PRAGMA); }
 329 <GhcPragma>"_F_"            { RETURN(UNFOLDING_PRAGMA); }
 330
 331 <GhcPragma>"_!_"            { RETURN(COCON); }
 332 <GhcPragma>"_#_"            { RETURN(COPRIM); }
 333 <GhcPragma>"_APP_"          { RETURN(COAPP); }
 334 <GhcPragma>"_TYAPP_"        { RETURN(COTYAPP); }
 335 <GhcPragma>"_ALG_"          { RETURN(CO_ALG_ALTS); }
 336 <GhcPragma>"_PRIM_"         { RETURN(CO_PRIM_ALTS); }
 337 <GhcPragma>"_NO_DEFLT_"     { RETURN(CO_NO_DEFAULT); }
 338 <GhcPragma>"_LETREC_"       { RETURN(CO_LETREC); }
 339
 340 <GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
 341 <GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
 342 <GhcPragma>"_USER_CC_"      { RETURN(CO_USER_CC); }
 343 <GhcPragma>"_AUTO_CC_"      { RETURN(CO_AUTO_CC); }
 344 <GhcPragma>"_DICT_CC_"      { RETURN(CO_DICT_CC); }
 345
 346 <GhcPragma>"_DUPD_CC_"      { RETURN(CO_DUPD_CC); }
 347 <GhcPragma>"_CAF_CC_"       { RETURN(CO_CAF_CC); }
 348
 349 <GhcPragma>"_SDSEL_"        { RETURN(CO_SDSEL_ID); }
 350 <GhcPragma>"_METH_"         { RETURN(CO_METH_ID); }
 351 <GhcPragma>"_DEFM_"         { RETURN(CO_DEFM_ID); }
 352 <GhcPragma>"_DFUN_"         { RETURN(CO_DFUN_ID); }
 353 <GhcPragma>"_CONSTM_"       { RETURN(CO_CONSTM_ID); }
 354 <GhcPragma>"_SPEC_"         { RETURN(CO_SPEC_ID); }
 355 <GhcPragma>"_WRKR_"         { RETURN(CO_WRKR_ID); }
 356 <GhcPragma>"_ORIG_"         { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
 357
 358 <GhcPragma>"_ALWAYS_"       { RETURN(UNFOLD_ALWAYS); }
 359 <GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
 360
 361 <GhcPragma>"_NOREP_I_"      { RETURN(NOREP_INTEGER); }
 362 <GhcPragma>"_NOREP_R_"      { RETURN(NOREP_RATIONAL); }
 363 <GhcPragma>"_NOREP_S_"      { RETURN(NOREP_STRING); }
 364
 365 <GhcPragma>" #-}"           { POP_STATE; RETURN(END_PRAGMA); }
 366
 367 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
 368                               PUSH_STATE(UserPragma);
 369                               RETURN(SPECIALISE_UPRAGMA);
 370                             }
 371 <Code,GlaExt>"{-#"{WS}*"INLINE" {
 372                               PUSH_STATE(UserPragma);
 373                               RETURN(INLINE_UPRAGMA);
 374                             }
 375 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
 376                               PUSH_STATE(UserPragma);
 377                               RETURN(MAGIC_UNFOLDING_UPRAGMA);
 378                             }
 379 <Code,GlaExt>"{-#"{WS}*"DEFOREST" {
 380                               PUSH_STATE(UserPragma);
 381                               RETURN(DEFOREST_UPRAGMA);
 382                             }
 383 <Code,GlaExt>"{-#"{WS}*"ABSTRACT" {
 384                               PUSH_STATE(UserPragma);
 385                               RETURN(ABSTRACT_UPRAGMA);
 386                             }
 387 <UserPragma>"#-}"           { POP_STATE; RETURN(END_UPRAGMA); }
 388
 389 %{
 390     /*
 391      * Haskell keywords.  `scc' is actually a Glasgow extension, but it is
 392      * intentionally accepted as a keyword even for normal <Code>.
 393      */
 394 %}
 395
 396 <Code,GlaExt,GhcPragma>"case"   { RETURN(CASE); }
 397 <Code,GlaExt>"class"            { RETURN(CLASS); }
 398 <Code,GlaExt,UserPragma>"data"  { RETURN(DATA); }
 399 <Code,GlaExt>"default"          { RETURN(DEFAULT); }
 400 <Code,GlaExt>"deriving"         { RETURN(DERIVING); }
 401 <Code,GlaExt>"else"             { RETURN(ELSE); }
 402 <Code,GlaExt>"hiding"           { RETURN(HIDING); }
 403 <Code,GlaExt>"if"               { RETURN(IF); }
 404 <Code,GlaExt>"import"           { RETURN(IMPORT); }
 405 <Code,GlaExt>"infix"            { RETURN(INFIX); }
 406 <Code,GlaExt>"infixl"           { RETURN(INFIXL); }
 407 <Code,GlaExt>"infixr"           { RETURN(INFIXR); }
 408 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
 409 <Code,GlaExt>"interface"        { RETURN(INTERFACE); }
 410 <Code,GlaExt>"module"           { RETURN(MODULE); }
 411 <Code,GlaExt,GhcPragma>"of"     { RETURN(OF); }
 412 <Code,GlaExt>"renaming"         { RETURN(RENAMING); }
 413 <Code,GlaExt>"then"             { RETURN(THEN); }
 414 <Code,GlaExt>"to"               { RETURN(TO); }
 415 <Code,GlaExt>"type"             { RETURN(TYPE); }
 416 <Code,GlaExt>"where"            { RETURN(WHERE); }
 417 <Code,GlaExt,GhcPragma>"in"     { RETURN(IN); }
 418 <Code,GlaExt,GhcPragma>"let"    { RETURN(LET); }
 419 <GlaExt,GhcPragma>"_ccall_"     { RETURN(CCALL); }
 420 <GlaExt,GhcPragma>"_ccall_GC_"  { RETURN(CCALL_GC); }
 421 <GlaExt,GhcPragma>"_casm_"      { RETURN(CASM); }
 422 <GlaExt,GhcPragma>"_casm_GC_"   { RETURN(CASM_GC); }
 423 <Code,GlaExt,GhcPragma>"_scc_"  { RETURN(SCC); }
 424 <GhcPragma>"_forall_"           { RETURN(FORALL); }
 425
 426 %{
 427     /*
 428      * Haskell operators.  Nothing special about these.
 429      */
 430 %}
 431
 432 <Code,GlaExt>".."                       { RETURN(DOTDOT); }
 433 <Code,GlaExt,GhcPragma>";"              { RETURN(SEMI); }
 434 <Code,GlaExt,GhcPragma,UserPragma>","   { RETURN(COMMA); }
 435 <Code,GlaExt,GhcPragma>"|"              { RETURN(VBAR); }
 436 <Code,GlaExt,GhcPragma,UserPragma>"="   { RETURN(EQUAL); }
 437 <Code,GlaExt>"<-"                       { RETURN(LARROW); }
 438 <Code,GlaExt,GhcPragma,UserPragma>"->"  { RETURN(RARROW); }
 439 <Code,GlaExt,GhcPragma,UserPragma>"=>"  { RETURN(DARROW); }
 440 <Code,GlaExt,GhcPragma,UserPragma>"::"  { RETURN(DCOLON); }
 441 <Code,GlaExt,GhcPragma,UserPragma>"("   { RETURN(OPAREN); }
 442 <Code,GlaExt,GhcPragma,UserPragma>")"   { RETURN(CPAREN); }
 443 <Code,GlaExt,GhcPragma,UserPragma>"["   { RETURN(OBRACK); }
 444 <Code,GlaExt,GhcPragma,UserPragma>"]"   { RETURN(CBRACK); }
 445 <Code,GlaExt,GhcPragma>"{"              { RETURN(OCURLY); }
 446 <Code,GlaExt,GhcPragma>"}"              { RETURN(CCURLY); }
 447 <Code,GlaExt>"+"                        { RETURN(PLUS); }
 448 <Code,GlaExt>"@"                        { RETURN(AT); }
 449 <Code,GlaExt,GhcPragma>"\\"             { RETURN(LAMBDA); }
 450 <GhcPragma>"_/\\_"                      { RETURN(TYLAMBDA); }
 451 <Code,GlaExt>"_"                        { RETURN(WILDCARD); }
 452 <Code,GlaExt,GhcPragma>"`"              { RETURN(BQUOTE); }
 453 <Code,GlaExt>"~"                        { RETURN(LAZY); }
 454 <Code,GlaExt>"-"                        { RETURN(MINUS); }
 455
 456 %{
 457     /*
 458      * Integers and (for Glasgow extensions) primitive integers.  Note that
 459      * we pass all of the text on to the parser, because flex/C can't handle
 460      * arbitrary precision numbers.
 461      */
 462 %}
 463
 464 <GlaExt>("-")?"0o"{O}+"#" { /* octal */
 465                          yylval.uid = xstrndup(yytext, yyleng - 1);
 466                          RETURN(INTPRIM);
 467                         }
 468 <Code,GlaExt>"0o"{O}+   { /* octal */
 469                          yylval.uid = xstrndup(yytext, yyleng);
 470                          RETURN(INTEGER);
 471                         }
 472 <GlaExt>("-")?"0x"{H}+"#" { /* hexadecimal */
 473                          yylval.uid = xstrndup(yytext, yyleng - 1);
 474                          RETURN(INTPRIM);
 475                         }
 476 <Code,GlaExt>"0x"{H}+   { /* hexadecimal */
 477                          yylval.uid = xstrndup(yytext, yyleng);
 478                          RETURN(INTEGER);
 479                         }
 480 <GlaExt,GhcPragma>("-")?{N}"#"  {
 481                          yylval.uid = xstrndup(yytext, yyleng - 1);
 482                          RETURN(INTPRIM);
 483                         }
 484 <Code,GlaExt,GhcPragma>{N} {
 485                          yylval.uid = xstrndup(yytext, yyleng);
 486                          RETURN(INTEGER);
 487                         }
 488
 489 %{
 490     /*
 491      * Floats and (for Glasgow extensions) primitive floats/doubles.
 492      */
 493 %}
 494
 495 <GlaExt,GhcPragma>("-")?{F}"##" {
 496                          yylval.uid = xstrndup(yytext, yyleng - 2);
 497                          RETURN(DOUBLEPRIM);
 498                         }
 499 <GlaExt,GhcPragma>("-")?{F}"#" {
 500                          yylval.uid = xstrndup(yytext, yyleng - 1);
 501                          RETURN(FLOATPRIM);
 502                         }
 503 <Code,GlaExt>{F}        {
 504                          yylval.uid = xstrndup(yytext, yyleng);
 505                          RETURN(FLOAT);
 506                         }
 507
 508 %{
 509     /*
 510      * Funky ``foo'' style C literals for Glasgow extensions
 511      */
 512 %}
 513
 514 <GlaExt,GhcPragma>"``"[^']+"''" {
 515                          hsnewid(yytext + 2, yyleng - 4);
 516                          RETURN(CLITLIT);
 517                         }
 518
 519 %{
 520     /*
 521      * Identifiers, both variables and operators.  The trailing hash is allowed
 522      * for Glasgow extensions.
 523      */
 524 %}
 525
 526 <GhcPragma>"_NIL_"              { hsnewid(yytext, yyleng); RETURN(CONID); }
 527 <GhcPragma>"_TUP_"{D}+          { hsnewid(yytext, yyleng); RETURN(CONID); }
 528 <GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
 529
 530 <GlaExt,GhcPragma,UserPragma>{Id}"#" {
 531                          hsnewid(yytext, yyleng);
 532                          RETURN(_isconstr(yytext) ? CONID : VARID);
 533                         }
 534 %{
 535 /* This SHOULDNAE work in "Code" (sigh) */
 536 %}
 537 <Code,GlaExt,GhcPragma,UserPragma>_+{Id} {
 538                          if (! (nonstandardFlag || in_interface)) {
 539                             char errbuf[ERR_BUF_SIZE];
 540                             sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
 541                             hsperror(errbuf);
 542                          }
 543                          hsnewid(yytext, yyleng);
 544                          RETURN(isconstr(yytext) ? CONID : VARID);
 545                          /* NB: ^^^^^^^^ : not the macro! */
 546                         }
 547 <Code,GlaExt,GhcPragma,UserPragma>{Id}  {
 548                          hsnewid(yytext, yyleng);
 549                          RETURN(_isconstr(yytext) ? CONID : VARID);
 550                         }
 551 <Code,GlaExt,GhcPragma,UserPragma>{SId} {
 552                          hsnewid(yytext, yyleng);
 553                          RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
 554                         }
 555
 556 %{
 557     /* Why is `{Id}#` matched this way, and `{Id}` lexed as three tokens? --JSM */
 558
 559     /* Because we can make the former well-behaved (we defined them).
 560
 561        Sadly, the latter is defined by Haskell, which allows such
 562        la-la land constructs as `{-a 900-line comment-} foo`.  (WDP 94/12)
 563     */
 564 %}
 565
 566 <GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"        {
 567                          hsnewid(yytext + 1, yyleng - 2);
 568                          RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
 569                         }
 570
 571 %{
 572     /*
 573      * Character literals.  The first form is the quick form, for character
 574      * literals that don't contain backslashes.  Literals with backslashes are
 575      * lexed through multiple rules.  First, we match the open ' and as many
 576      * normal characters as possible.  This puts us into the <Char> state, where
 577      * a backslash is legal.  Then, we match the backslash and move into the
 578      * <CharEsc> state.  When we drop out of <CharEsc>, we collect more normal
 579      * characters and the close '.  We may end up with too many characters, but
 580      * this allows us to easily share the lex rules with strings.  Excess characters
 581      * are ignored with a warning.
 582      */
 583 %}
 584
 585 <GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
 586                          yylval.uhstring = installHstring(1, yytext+1);
 587                          RETURN(CHARPRIM);
 588                         }
 589 <Code,GlaExt>'({CHAR}|"\"")'    {
 590                          yylval.uhstring = installHstring(1, yytext+1);
 591                          RETURN(CHAR);
 592                         }
 593 <Code,GlaExt>''         {char errbuf[ERR_BUF_SIZE];
 594                          sprintf(errbuf, "'' is not a valid character (or string) literal\n");
 595                          hsperror(errbuf);
 596                         }
 597 <Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
 598                          hsmlcolno = hspcolno;
 599                          cleartext();
 600                          addtext(yytext+1, yyleng-1);
 601                          PUSH_STATE(Char);
 602                         }
 603 <Char>({CHAR}|"\"")*'#  {
 604                          unsigned length;
 605                          char *text;
 606
 607                          addtext(yytext, yyleng - 2);
 608                          text = fetchtext(&length);
 609
 610                          if (! (nonstandardFlag || in_interface)) {
 611                             char errbuf[ERR_BUF_SIZE];
 612                             sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
 613                             hsperror(errbuf);
 614                          }
 615
 616                          if (length > 1) {
 617                             fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
 618                               input_filename, hsplineno, hspcolno + 1);
 619                             format_string(stderr, (unsigned char *) text, length);
 620                             fputs("' too long\n", stderr);
 621                             hsperror("");
 622                          }
 623                          yylval.uhstring = installHstring(1, text);
 624                          hspcolno = hsmlcolno;
 625                          POP_STATE;
 626                          RETURN(CHARPRIM);
 627                         }
 628 <Char>({CHAR}|"\"")*'   {
 629                          unsigned length;
 630                          char *text;
 631
 632                          addtext(yytext, yyleng - 1);
 633                          text = fetchtext(&length);
 634
 635                          if (length > 1) {
 636                             fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
 637                               input_filename, hsplineno, hspcolno + 1);
 638                             format_string(stderr, (unsigned char *) text, length);
 639                             fputs("' too long\n", stderr);
 640                             hsperror("");
 641                          }
 642                          yylval.uhstring = installHstring(1, text);
 643                          hspcolno = hsmlcolno;
 644                          POP_STATE;
 645                          RETURN(CHAR);
 646                         }
 647 <Char>({CHAR}|"\"")+    { addtext(yytext, yyleng); }
 648
 649
 650 %{
 651     /*
 652      * String literals.  The first form is the quick form, for string literals
 653      * that don't contain backslashes.  Literals with backslashes are lexed
 654      * through multiple rules.  First, we match the open " and as many normal
 655      * characters as possible.  This puts us into the <String> state, where
 656      * a backslash is legal.  Then, we match the backslash and move into the
 657      * <StringEsc> state.  When we drop out of <StringEsc>, we collect more normal
 658      * characters, moving back and forth between <String> and <StringEsc> as more
 659      * backslashes are encountered.  (We may even digress into <Comment> mode if we
 660      * find a comment in a gap between backslashes.)  Finally, we read the last chunk
 661      * of normal characters and the close ".
 662      */
 663 %}
 664
 665 <GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
 666                          yylval.uhstring = installHstring(yyleng-3, yytext+1);
 667                             /* the -3 accounts for the " on front, "# on the end */
 668                          RETURN(STRINGPRIM);
 669                         }
 670 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
 671                          yylval.uhstring = installHstring(yyleng-2, yytext+1);
 672                          RETURN(STRING);
 673                         }
 674 <Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
 675                          hsmlcolno = hspcolno;
 676                          cleartext();
 677                          addtext(yytext+1, yyleng-1);
 678                          PUSH_STATE(String);
 679                         }
 680 <String>({CHAR}|"'")*"\"#"   {
 681                          unsigned length;
 682                          char *text;
 683
 684                          addtext(yytext, yyleng-2);
 685                          text = fetchtext(&length);
 686
 687                          if (! (nonstandardFlag || in_interface)) {
 688                             char errbuf[ERR_BUF_SIZE];
 689                             sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
 690                             hsperror(errbuf);
 691                          }
 692
 693                          yylval.uhstring = installHstring(length, text);
 694                          hspcolno = hsmlcolno;
 695                          POP_STATE;
 696                          RETURN(STRINGPRIM);
 697                         }
 698 <String>({CHAR}|"'")*"\""   {
 699                          unsigned length;
 700                          char *text;
 701
 702                          addtext(yytext, yyleng-1);
 703                          text = fetchtext(&length);
 704
 705                          yylval.uhstring = installHstring(length, text);
 706                          hspcolno = hsmlcolno;
 707                          POP_STATE;
 708                          RETURN(STRING);
 709                         }
 710 <String>({CHAR}|"'")+   { addtext(yytext, yyleng); }
 711
 712 %{
 713     /*
 714      * Character and string escapes are roughly the same, but strings have the
 715      * extra `\&' sequence which is not allowed for characters.  Also, comments
 716      * are allowed in the <StringEsc> state.  (See the comment section much
 717      * further down.)
 718      *
 719      * NB: Backslashes and tabs are stored in strings as themselves.
 720      * But if we print them (in printtree.c), they must go out as
 721      * "\\\\" and "\\t" respectively.  (This is because of the bogus
 722      * intermediate format that the parser produces.  It uses '\t' fpr end of
 723      * string, so it needs to be able to escape tabs, which means that it
 724      * also needs to be able to escape the escape character ('\\').  Sigh.
 725      */
 726 %}
 727
 728 <Char>\\                { PUSH_STATE(CharEsc); }
 729 <String>\\&             /* Ignore */ ;
 730 <String>\\              { PUSH_STATE(StringEsc); noGap = TRUE; }
 731
 732 <CharEsc>\\             { addchar(*yytext); POP_STATE; }
 733 <StringEsc>\\           { if (noGap) { addchar(*yytext); } POP_STATE; }
 734
 735 <CharEsc,StringEsc>["'] { addchar(*yytext); POP_STATE; }
 736 <CharEsc,StringEsc>NUL  { addchar('\000'); POP_STATE; }
 737 <CharEsc,StringEsc>SOH  { addchar('\001'); POP_STATE; }
 738 <CharEsc,StringEsc>STX  { addchar('\002'); POP_STATE; }
 739 <CharEsc,StringEsc>ETX  { addchar('\003'); POP_STATE; }
 740 <CharEsc,StringEsc>EOT  { addchar('\004'); POP_STATE; }
 741 <CharEsc,StringEsc>ENQ  { addchar('\005'); POP_STATE; }
 742 <CharEsc,StringEsc>ACK  { addchar('\006'); POP_STATE; }
 743 <CharEsc,StringEsc>BEL  |
 744 <CharEsc,StringEsc>a    { addchar('\007'); POP_STATE; }
 745 <CharEsc,StringEsc>BS   |
 746 <CharEsc,StringEsc>b    { addchar('\010'); POP_STATE; }
 747 <CharEsc,StringEsc>HT   |
 748 <CharEsc,StringEsc>t    { addchar('\011'); POP_STATE; }
 749 <CharEsc,StringEsc>LF   |
 750 <CharEsc,StringEsc>n    { addchar('\012'); POP_STATE; }
 751 <CharEsc,StringEsc>VT   |
 752 <CharEsc,StringEsc>v    { addchar('\013'); POP_STATE; }
 753 <CharEsc,StringEsc>FF   |
 754 <CharEsc,StringEsc>f    { addchar('\014'); POP_STATE; }
 755 <CharEsc,StringEsc>CR   |
 756 <CharEsc,StringEsc>r    { addchar('\015'); POP_STATE; }
 757 <CharEsc,StringEsc>SO   { addchar('\016'); POP_STATE; }
 758 <CharEsc,StringEsc>SI   { addchar('\017'); POP_STATE; }
 759 <CharEsc,StringEsc>DLE  { addchar('\020'); POP_STATE; }
 760 <CharEsc,StringEsc>DC1  { addchar('\021'); POP_STATE; }
 761 <CharEsc,StringEsc>DC2  { addchar('\022'); POP_STATE; }
 762 <CharEsc,StringEsc>DC3  { addchar('\023'); POP_STATE; }
 763 <CharEsc,StringEsc>DC4  { addchar('\024'); POP_STATE; }
 764 <CharEsc,StringEsc>NAK  { addchar('\025'); POP_STATE; }
 765 <CharEsc,StringEsc>SYN  { addchar('\026'); POP_STATE; }
 766 <CharEsc,StringEsc>ETB  { addchar('\027'); POP_STATE; }
 767 <CharEsc,StringEsc>CAN  { addchar('\030'); POP_STATE; }
 768 <CharEsc,StringEsc>EM   { addchar('\031'); POP_STATE; }
 769 <CharEsc,StringEsc>SUB  { addchar('\032'); POP_STATE; }
 770 <CharEsc,StringEsc>ESC  { addchar('\033'); POP_STATE; }
 771 <CharEsc,StringEsc>FS   { addchar('\034'); POP_STATE; }
 772 <CharEsc,StringEsc>GS   { addchar('\035'); POP_STATE; }
 773 <CharEsc,StringEsc>RS   { addchar('\036'); POP_STATE; }
 774 <CharEsc,StringEsc>US   { addchar('\037'); POP_STATE; }
 775 <CharEsc,StringEsc>SP   { addchar('\040'); POP_STATE; }
 776 <CharEsc,StringEsc>DEL  { addchar('\177'); POP_STATE; }
 777 <CharEsc,StringEsc>"^"{CNTRL} { char c = yytext[1] - '@'; addchar(c); POP_STATE; }
 778 <CharEsc,StringEsc>{D}+  {
 779                           int i = strtol(yytext, NULL, 10);
 780                           if (i < NCHARS) {
 781                              addchar((char) i);
 782                           } else {
 783                              char errbuf[ERR_BUF_SIZE];
 784                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 785                                 yytext);
 786                              hsperror(errbuf);
 787                           }
 788                           POP_STATE;
 789                         }
 790 <CharEsc,StringEsc>o{O}+ {
 791                           int i = strtol(yytext + 1, NULL, 8);
 792                           if (i < NCHARS) {
 793                              addchar((char) i);
 794                           } else {
 795                              char errbuf[ERR_BUF_SIZE];
 796                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 797                                 yytext);
 798                              hsperror(errbuf);
 799                           }
 800                           POP_STATE;
 801                         }
 802 <CharEsc,StringEsc>x{H}+ {
 803                           int i = strtol(yytext + 1, NULL, 16);
 804                           if (i < NCHARS) {
 805                              addchar((char) i);
 806                           } else {
 807                              char errbuf[ERR_BUF_SIZE];
 808                              sprintf(errbuf, "Numeric escape \"\\%s\" out of range\n",
 809                                 yytext);
 810                              hsperror(errbuf);
 811                           }
 812                           POP_STATE;
 813                         }
 814
 815 %{
 816     /*
 817      * Simple comments and whitespace.  Normally, we would just ignore these, but
 818      * in case we're processing a string escape, we need to note that we've seen
 819      * a gap.
 820      */
 821 %}
 822
 823 <Code,GlaExt,StringEsc>"--".*{NL}{WS}* |
 824 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+       { noGap = FALSE; }
 825
 826 %{
 827     /*
 828      * Nested comments.  The major complication here is in trying to match the
 829      * longest lexemes possible, for better performance.  (See the flex document.)
 830      * That's why the rules look so bizarre.
 831      */
 832 %}
 833
 834 <Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"        {
 835                           noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment);
 836                         }
 837
 838 <Comment>[^-{]*         |
 839 <Comment>"-"+[^-{}]+    |
 840 <Comment>"{"+[^-{}]+    ;
 841 <Comment>"{-"           { nested_comments++; }
 842 <Comment>"-}"           { if (--nested_comments == 0) POP_STATE; }
 843 <Comment>(.|\n)         ;
 844
 845 %{
 846     /*
 847      * Illegal characters.  This used to be a single rule, but we might as well
 848      * pass on as much information as we have, so now we indicate our state in
 849      * the error message.
 850      */
 851 %}
 852
 853 <INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)        {
 854                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 855                             input_filename, hsplineno, hspcolno + 1);
 856                          format_string(stderr, (unsigned char *) yytext, 1);
 857                          fputs("'\n", stderr);
 858                          hsperror("");
 859                         }
 860 <Char>(.|\n)            {
 861                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 862                             input_filename, hsplineno, hspcolno + 1);
 863                          format_string(stderr, (unsigned char *) yytext, 1);
 864                          fputs("' in a character literal\n", stderr);
 865                          hsperror("");
 866                         }
 867 <CharEsc>(.|\n)         {
 868                          fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
 869                             input_filename, hsplineno, hspcolno + 1);
 870                          format_string(stderr, (unsigned char *) yytext, 1);
 871                          fputs("'\n", stderr);
 872                          hsperror("");
 873                         }
 874 <String>(.|\n)          { if (nonstandardFlag) {
 875                              addtext(yytext, yyleng);
 876                           } else {
 877                                 fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 878                                 input_filename, hsplineno, hspcolno + 1);
 879                                 format_string(stderr, (unsigned char *) yytext, 1);
 880                                 fputs("' in a string literal\n", stderr);
 881                                 hsperror("");
 882                           }
 883                         }
 884 <StringEsc>(.|\n)       {
 885                          if (noGap) {
 886                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\",
 887                                 input_filename, hsplineno, hspcolno + 1);
 888                              format_string(stderr, (unsigned char *) yytext, 1);
 889                              fputs("'\n", stderr);
 890                              hsperror("");
 891                          } else {
 892                              fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
 893                                 input_filename, hsplineno, hspcolno + 1);
 894                              format_string(stderr, (unsigned char *) yytext, 1);
 895                              fputs("' in a string gap\n", stderr);
 896                              hsperror("");
 897                          }
 898                         }
 899
 900 %{
 901     /*
 902      * End of file.  In any sub-state, this is an error.  However, for the primary
 903      * <Code> and <GlaExt> states, this is perfectly normal.  We just return an EOF
 904      * and let the yylex() wrapper deal with whatever has to be done next (e.g.
 905      * adding virtual close curlies, or closing an interface and returning to the
 906      * primary source file.
 907      *
 908      * Note that flex does not call YY_USER_ACTION for <<EOF>> rules.  Hence the
 909      * line/column advancement has to be done by hand.
 910      */
 911 %}
 912
 913 <Char,CharEsc><<EOF>>   {
 914                           hsplineno = hslineno; hspcolno = hscolno;
 915                           hsperror("unterminated character literal");
 916                         }
 917 <Comment><<EOF>>        {
 918                           hsplineno = hslineno; hspcolno = hscolno;
 919                           hsperror("unterminated comment");
 920                         }
 921 <String,StringEsc><<EOF>>   {
 922                           hsplineno = hslineno; hspcolno = hscolno;
 923                           hsperror("unterminated string literal");
 924                         }
 925 <GhcPragma><<EOF>>      {
 926                           hsplineno = hslineno; hspcolno = hscolno;
 927                           hsperror("unterminated interface pragma");
 928                         }
 929 <UserPragma><<EOF>>     {
 930                           hsplineno = hslineno; hspcolno = hscolno;
 931                           hsperror("unterminated user-specified pragma");
 932                         }
 933 <Code,GlaExt><<EOF>>    { hsplineno = hslineno; hspcolno = hscolno; return(EOF); }
 934
 935 %%
 936
 937 /**********************************************************************
 938 *                                                                     *
 939 *                                                                     *
 940 *     YACC/LEX Initialisation etc.                                    *
 941 *                                                                     *
 942 *                                                                     *
 943 **********************************************************************/
 944
 945 /*
 946    We initialise input_filename to "<stdin>".
 947    This allows unnamed sources to be piped into the parser.
 948 */
 949
 950 void
 951 yyinit()
 952 {
 953     extern BOOLEAN acceptPrim;
 954
 955     input_filename = xstrdup("<stdin>");
 956
 957     /* We must initialize the input buffer _now_, because we call
 958        setyyin _before_ calling yylex for the first time! */
 959     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 960
 961     if (acceptPrim)
 962         PUSH_STATE(GlaExt);
 963     else
 964         PUSH_STATE(Code);
 965 }
 966
 967 void
 968 new_filename(f) /* This looks pretty dodgy to me (WDP) */
 969   char *f;
 970 {
 971     if (input_filename != NULL)
 972         free(input_filename);
 973     input_filename = xstrdup(f);
 974 }
 975
 976 /**********************************************************************
 977 *                                                                     *
 978 *                                                                     *
 979 *     Layout Processing                                               *
 980 *                                                                     *
 981 *                                                                     *
 982 **********************************************************************/
 983
 984 /*
 985         The following section deals with Haskell Layout conventions
 986         forcing insertion of ; or } as appropriate
 987 */
 988
 989 BOOLEAN
 990 hsshouldindent()
 991 {
 992     return (!forgetindent && INDENTON);
 993 }
 994
 995
 996 /* Enter new context and set new indentation level */
 997 void
 998 hssetindent()
 999 {
1000 #ifdef HSP_DEBUG
1001     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1002 #endif
1003
1004     /*
1005      * partain: first chk that new indent won't be less than current one; this code
1006      * doesn't make sense to me; hscolno tells the position of the _end_ of the
1007      * current token; what that has to do with indenting, I don't know.
1008      */
1009
1010
1011     if (hscolno - 1 <= INDENTPT) {
1012         if (INDENTPT == -1)
1013             return;             /* Empty input OK for Haskell 1.1 */
1014         else {
1015             char errbuf[ERR_BUF_SIZE];
1016
1017             sprintf(errbuf, "Layout error -- indentation should be > %d cols", INDENTPT);
1018             hsperror(errbuf);
1019         }
1020     }
1021     hsentercontext((hspcolno << 1) | 1);
1022 }
1023
1024
1025 /* Enter a new context without changing the indentation level */
1026 void
1027 hsincindent()
1028 {
1029 #ifdef HSP_DEBUG
1030     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1031 #endif
1032     hsentercontext(indenttab[icontexts] & ~1);
1033 }
1034
1035
1036 /* Turn off indentation processing, usually because an explicit "{" has been seen */
1037 void
1038 hsindentoff()
1039 {
1040     forgetindent = TRUE;
1041 }
1042
1043
1044 /* Enter a new layout context. */
1045 void
1046 hsentercontext(indent)
1047   int indent;
1048 {
1049     /* Enter new context and set indentation as specified */
1050     if (++icontexts >= MAX_CONTEXTS) {
1051         char errbuf[ERR_BUF_SIZE];
1052
1053         sprintf(errbuf, "`wheres' and `cases' nested too deeply (>%d)", MAX_CONTEXTS - 1);
1054         hsperror(errbuf);
1055     }
1056     forgetindent = FALSE;
1057     indenttab[icontexts] = indent;
1058 #ifdef HSP_DEBUG
1059     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
1060 #endif
1061 }
1062
1063
1064 /* Exit a layout context */
1065 void
1066 hsendindent()
1067 {
1068     --icontexts;
1069 #ifdef HSP_DEBUG
1070     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
1071 #endif
1072 }
1073
1074 /*
1075  *      Return checks the indentation level and returns ;, } or the specified token.
1076  */
1077
1078 int
1079 Return(tok)
1080   int tok;
1081 {
1082 #ifdef HSP_DEBUG
1083     extern int yyleng;
1084 #endif
1085
1086     if (hsshouldindent()) {
1087         if (hspcolno < INDENTPT) {
1088 #ifdef HSP_DEBUG
1089             fprintf(stderr, "inserted '}' before %d (%d:%d:%d:%d)\n", tok, hspcolno, hscolno, yyleng, INDENTPT);
1090 #endif
1091             hssttok = tok;
1092             return (VCCURLY);
1093         } else if (hspcolno == INDENTPT) {
1094 #ifdef HSP_DEBUG
1095             fprintf(stderr, "inserted ';' before %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1096 #endif
1097             hssttok = -tok;
1098             return (SEMI);
1099         }
1100     }
1101     hssttok = -1;
1102 #ifdef HSP_DEBUG
1103     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
1104 #endif
1105     return (tok);
1106 }
1107
1108
1109 /*
1110  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
1111  */
1112 int
1113 yylex()
1114 {
1115     int tok;
1116     static BOOLEAN eof = FALSE;
1117
1118     if (!eof) {
1119         if (hssttok != -1) {
1120             if (hssttok < 0) {
1121                 tok = -hssttok;
1122                 hssttok = -1;
1123                 return tok;
1124             }
1125             RETURN(hssttok);
1126         } else {
1127             endlineno = hslineno;
1128             if ((tok = yylex1()) != EOF)
1129                 return tok;
1130             else
1131                 eof = TRUE;
1132         }
1133     }
1134     if (icontexts > icontexts_save) {
1135         if (INDENTON) {
1136             eof = TRUE;
1137             indenttab[icontexts] = 0;
1138             return (VCCURLY);
1139         } else
1140             hsperror("missing '}' at end of file");
1141     } else if (hsbuf_save != NULL) {
1142         fclose(yyin);
1143         yy_delete_buffer(YY_CURRENT_BUFFER);
1144         yy_switch_to_buffer(hsbuf_save);
1145         hsbuf_save = NULL;
1146         new_filename(filename_save);
1147         free(filename_save);
1148         hslineno = hslineno_save;
1149         hsplineno = hsplineno_save;
1150         hscolno = hscolno_save;
1151         hspcolno = hspcolno_save;
1152         etags = etags_save;
1153         in_interface = FALSE;
1154         icontexts = icontexts_save - 1;
1155         icontexts_save = 0;
1156 #ifdef HSP_DEBUG
1157         fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
1158 #endif
1159         eof = FALSE;
1160         RETURN(LEOF);
1161     } else {
1162         yyterminate();
1163     }
1164     abort(); /* should never get here! */
1165     return(0);
1166 }
1167
1168 /**********************************************************************
1169 *                                                                     *
1170 *                                                                     *
1171 *     Input Processing for Interfaces                                 *
1172 *                                                                     *
1173 *                                                                     *
1174 **********************************************************************/
1175
1176 /* setyyin(file)        open file as new lex input buffer */
1177 void
1178 setyyin(file)
1179   char *file;
1180 {
1181     extern FILE *yyin;
1182
1183     hsbuf_save = YY_CURRENT_BUFFER;
1184     if ((yyin = fopen(file, "r")) == NULL) {
1185         char errbuf[ERR_BUF_SIZE];
1186
1187         sprintf(errbuf, "can't read \"%-.50s\"", file);
1188         hsperror(errbuf);
1189     }
1190     yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
1191
1192     hslineno_save = hslineno;
1193     hsplineno_save = hsplineno;
1194     hslineno = hsplineno = 1;
1195
1196     filename_save = input_filename;
1197     input_filename = NULL;
1198     new_filename(file);
1199     hscolno_save = hscolno;
1200     hspcolno_save = hspcolno;
1201     hscolno = hspcolno = 0;
1202     in_interface = TRUE;
1203     etags_save = etags; /* do not do "etags" stuff in interfaces */
1204     etags = 0;          /* We remember whether we are doing it in
1205                            the module, so we can restore it later [WDP 94/09] */
1206     hsentercontext(-1);         /* partain: changed this from 0 */
1207     icontexts_save = icontexts;
1208 #ifdef HSP_DEBUG
1209     fprintf(stderr, "reading %s (%d:%d:%d)\n", input_filename, hscolno_save, hspcolno_save, INDENTPT);
1210 #endif
1211 }
1212
1213 static VOID
1214 layout_input(text, len)
1215 char *text;
1216 int len;
1217 {
1218 #ifdef HSP_DEBUG
1219     fprintf(stderr, "Scanning \"%s\"\n", text);
1220 #endif
1221
1222     hsplineno = hslineno;
1223     hspcolno = hscolno;
1224
1225     while (len-- > 0) {
1226         switch (*text++) {
1227         case '\n':
1228         case '\r':
1229         case '\f':
1230             hslineno++;
1231             hscolno = 0;
1232             break;
1233         case '\t':
1234             hscolno += 8 - (hscolno % 8);       /* Tabs stops are 8 columns apart */
1235             break;
1236         case '\v':
1237             break;
1238         default:
1239             ++hscolno;
1240             break;
1241         }
1242     }
1243 }
1244
1245 void
1246 setstartlineno()
1247 {
1248     startlineno = hsplineno;
1249 #if 1/*etags*/
1250 #else
1251     if (etags)
1252         fprintf(stderr,"%u\tsetstartlineno (col %u)\n",startlineno,hscolno);
1253 #endif
1254 }
1255
1256 /**********************************************************************
1257 *                                                                     *
1258 *                                                                     *
1259 *                      Text Caching                                   *
1260 *                                                                     *
1261 *                                                                     *
1262 **********************************************************************/
1263
1264 #define CACHE_SIZE YY_BUF_SIZE
1265
1266 static struct {
1267     unsigned allocated;
1268     unsigned next;
1269     char *text;
1270 } textcache = { 0, 0, NULL };
1271
1272 static VOID
1273 cleartext()
1274 {
1275 /*  fprintf(stderr, "cleartext\n"); */
1276     textcache.next = 0;
1277     if (textcache.allocated == 0) {
1278         textcache.allocated = CACHE_SIZE;
1279         textcache.text = xmalloc(CACHE_SIZE);
1280     }
1281 }
1282
1283 static VOID
1284 addtext(text, length)
1285 char *text;
1286 unsigned length;
1287 {
1288 /*  fprintf(stderr, "addtext: %d %s\n", length, text); */
1289
1290     if (length == 0)
1291         return;
1292
1293     if (textcache.next + length + 1 >= textcache.allocated) {
1294         textcache.allocated += length + CACHE_SIZE;
1295         textcache.text = xrealloc(textcache.text, textcache.allocated);
1296     }
1297     bcopy(text, textcache.text + textcache.next, length);
1298     textcache.next += length;
1299 }
1300
1301 static VOID
1302 #ifdef __STDC__
1303 addchar(char c)
1304 #else
1305 addchar(c)
1306   char c;
1307 #endif
1308 {
1309 /*  fprintf(stderr, "addchar: %c\n", c); */
1310
1311     if (textcache.next + 2 >= textcache.allocated) {
1312         textcache.allocated += CACHE_SIZE;
1313         textcache.text = xrealloc(textcache.text, textcache.allocated);
1314     }
1315     textcache.text[textcache.next++] = c;
1316 }
1317
1318 static char *
1319 fetchtext(length)
1320 unsigned *length;
1321 {
1322 /*  fprintf(stderr, "fetchtext: %d\n", textcache.next); */
1323
1324     *length = textcache.next;
1325     textcache.text[textcache.next] = '\0';
1326     return textcache.text;
1327 }
1328
1329 /**********************************************************************
1330 *                                                                     *
1331 *                                                                     *
1332 *    Identifier Processing                                             *
1333 *                                                                     *
1334 *                                                                     *
1335 **********************************************************************/
1336
1337 /*
1338         hsnewid         Enters an id of length n into the symbol table.
1339 */
1340
1341 static VOID
1342 hsnewid(name, length)
1343 char *name;
1344 int length;
1345 {
1346     char save = name[length];
1347
1348     name[length] = '\0';
1349     yylval.uid = installid(name);
1350     name[length] = save;
1351 }
1352
1353 BOOLEAN
1354 isconstr(s) /* walks past leading underscores before using the macro */
1355   char *s;
1356 {
1357     char *temp = s;
1358
1359     for ( ; temp != NULL && *temp == '_' ; temp++ );
1360
1361     return _isconstr(temp);
1362 }