ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
   6  * Haskell Group 1994-99, and is distributed as Open Source software
   7  * under the Artistic License; see the file "Artistic" that is included
   8  * in the distribution for details.
   9  *
  10  * $RCSfile: input.c,v $
  11  * $Revision: 1.3 $
  12  * $Date: 1999/02/03 17:08:30 $
  13  * ------------------------------------------------------------------------*/
  14
  15 #include "prelude.h"
  16 #include "storage.h"
  17 #include "backend.h"
  18 #include "connect.h"
  19 #include "command.h"
  20 #include "errors.h"
  21 #include <ctype.h>
  22 #if HAVE_GETDELIM_H
  23 #include "getdelim.h"
  24 #endif
  25
  26 #if HUGS_FOR_WINDOWS
  27 #undef IN
  28 #endif
  29
  30 /* --------------------------------------------------------------------------
  31  * Global data:
  32  * ------------------------------------------------------------------------*/
  33
  34 List tyconDefns      = NIL;             /* type constructor definitions    */
  35 List typeInDefns     = NIL;             /* type synonym restrictions       */
  36 List valDefns        = NIL;             /* value definitions in script     */
  37 List classDefns      = NIL;             /* class defns in script           */
  38 List instDefns       = NIL;             /* instance defns in script        */
  39 List selDefns        = NIL;             /* list of selector lists          */
  40 List genDefns        = NIL;             /* list of generated names         */
  41 List unqualImports   = NIL;             /* unqualified import list         */
  42 List foreignImports  = NIL;             /* foreign imports                 */
  43 List foreignExports  = NIL;             /* foreign exportsd                */
  44 List defaultDefns    = NIL;             /* default definitions (if any)    */
  45 Int  defaultLine     = 0;               /* line in which default defs occur*/
  46 List evalDefaults    = NIL;             /* defaults for evaluator          */
  47
  48 Cell inputExpr       = NIL;             /* input expression                */
  49 Bool literateScripts = FALSE;           /* TRUE => default to lit scripts  */
  50 Bool literateErrors  = TRUE;            /* TRUE => report errs in lit scrs */
  51
  52 String repeatStr     = 0;               /* Repeat last expr                */
  53
  54 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  55 String preprocessor  = 0;
  56 #endif
  57
  58 /* --------------------------------------------------------------------------
  59  * Local function prototypes:
  60  * ------------------------------------------------------------------------*/
  61
  62 static Void local initCharTab     Args((Void));
  63 static Void local fileInput       Args((String,Long));
  64 static Bool local literateMode    Args((String));
  65 static Bool local linecmp         Args((String,String));
  66 static Int  local nextLine        Args((Void));
  67 static Void local skip            Args((Void));
  68 static Void local thisLineIs      Args((Int));
  69 static Void local newlineSkip     Args((Void));
  70 static Void local closeAnyInput   Args((Void));
  71
  72        Int  yyparse         Args((Void)); /* can't stop yacc making this   */
  73                                           /* public, but don't advertise   */
  74                                           /* it in a header file.          */
  75
  76 static Void local endToken        Args((Void));
  77 static Text local readOperator    Args((Void));
  78 static Text local readIdent       Args((Void));
  79 static Cell local readRadixNumber Args((Int));
  80 static Cell local readNumber      Args((Void));
  81 static Cell local readChar        Args((Void));
  82 static Cell local readString      Args((Void));
  83 static Void local saveStrChr      Args((Char));
  84 static Cell local readAChar       Args((Bool));
  85
  86 static Bool local lazyReadMatches Args((String));
  87 static Cell local readEscapeChar  Args((Bool));
  88 static Void local skipGap         Args((Void));
  89 static Cell local readCtrlChar    Args((Void));
  90 static Cell local readOctChar     Args((Void));
  91 static Cell local readHexChar     Args((Void));
  92 static Int  local readHexDigit    Args((Char));
  93 static Cell local readDecChar     Args((Void));
  94
  95 static Void local goOffside       Args((Int));
  96 static Void local unOffside       Args((Void));
  97 static Bool local canUnOffside    Args((Void));
  98
  99 static Void local skipWhitespace  Args((Void));
 100 static Int  local yylex           Args((Void));
 101 static Int  local repeatLast      Args((Void));
 102
 103 static Void local parseInput      Args((Int));
 104
 105 /* --------------------------------------------------------------------------
 106  * Text values for reserved words and special symbols:
 107  * ------------------------------------------------------------------------*/
 108
 109 static Text textCase,    textOfK,      textData,   textType,   textIf;
 110 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 111 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 112 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 113
 114 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 115 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 116 static Text textBang,    textDot,      textAll,    textImplies;
 117 static Text textWildcard;
 118
 119 static Text textModule,  textImport;
 120 static Text textHiding,  textQualified, textAsMod;
 121 static Text textExport,  textInterface, textRequires, textUnsafe;
 122
 123 Text   textNum;                         /* Num                             */
 124 Text   textPrelude;                     /* Prelude                         */
 125 Text   textPlus;                        /* (+)                             */
 126
 127 static Cell conMain;                    /* Main                            */
 128 static Cell varMain;                    /* main                            */
 129
 130 static Cell varMinus;                   /* (-)                             */
 131 static Cell varPlus;                    /* (+)                             */
 132 static Cell varBang;                    /* (!)                             */
 133 static Cell varDot;                     /* (.)                             */
 134 static Cell varHiding;                  /* hiding                          */
 135 static Cell varQualified;               /* qualified                       */
 136 static Cell varAsMod;                   /* as                              */
 137
 138 static List imps;                       /* List of imports to be chased    */
 139
 140
 141 /* --------------------------------------------------------------------------
 142  * Character set handling:
 143  *
 144  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 145  * character set.  The following code provides methods for classifying
 146  * input characters according to the lexical structure specified by the
 147  * report.  Hugs should still accept older programs because ASCII is
 148  * essentially just a subset of the ISO character set.
 149  *
 150  * Notes: If you want to port Hugs to a machine that uses something
 151  * substantially different from the ISO character set, then you will need
 152  * to insert additional code to map between character sets.
 153  *
 154  * At some point, the following data structures may be exported in a .h
 155  * file to allow the information contained here to be picked up in the
 156  * implementation of LibChar is* primitives.
 157  *
 158  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 159  * ------------------------------------------------------------------------*/
 160
 161 static  Bool            charTabBuilt;
 162 static  unsigned char   ctable[NUM_CHARS];
 163 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 164 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 165
 166 #define DIGIT           0x01
 167 #define SMALL           0x02
 168 #define LARGE           0x04
 169 #define SYMBOL          0x08
 170 #define IDAFTER         0x10
 171 #define SPACE           0x20
 172 #define PRINT           0x40
 173
 174 static Void local initCharTab() {       /* Initialize char decode table    */
 175 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 176 #define setChar(x,c)    ctable[c] |= (x)
 177 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 178 #define setCopy(x,c)    {Int i;                         \
 179                          for (i=0; i<NUM_CHARS; ++i)    \
 180                              if (isIn(i,c))             \
 181                                  ctable[i]|=x;          \
 182                         }
 183
 184     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 185
 186     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 187     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 188     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 189     setChar (SMALL,     '_');
 190
 191     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 192     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 193     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 194
 195     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 196     setRange(SYMBOL,    215,215);
 197     setChar (SYMBOL,    247);
 198     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 199
 200     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 201     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 202
 203     setChar (SPACE,     ' ');           /* ASCII space character           */
 204     setChar (SPACE,     160);           /* ISO non breaking space          */
 205     setRange(SPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 206
 207     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 208     setChars(PRINT,     " '\"");        /* Space and quotes                */
 209     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 210
 211     charTabBuilt = TRUE;
 212 #undef setRange
 213 #undef setChar
 214 #undef setChars
 215 #undef setCopy
 216 }
 217
 218
 219 /* --------------------------------------------------------------------------
 220  * Single character input routines:
 221  *
 222  * At the lowest level of input, characters are read one at a time, with the
 223  * current character held in c0 and the following (lookahead) character in
 224  * c1.  The corrdinates of c0 within the file are held in (column,row).
 225  * The input stream is advanced by one character using the skip() function.
 226  * ------------------------------------------------------------------------*/
 227
 228 #define TABSIZE    8                   /* spacing between tabstops         */
 229
 230 #define NOTHING    0                   /* what kind of input is being read?*/
 231 #define KEYBOARD   1                   /* - keyboard/console?              */
 232 #define SCRIPTFILE 2                   /* - script file                    */
 233 #define PROJFILE   3                   /* - project file                   */
 234 #define STRING     4                   /* - string buffer?                 */
 235
 236 static Int    reading   = NOTHING;
 237
 238 static Target readSoFar;
 239 static Int    row, column, startColumn;
 240 static int    c0, c1;
 241 static FILE   *inputStream = 0;
 242 static Bool   thisLiterate;
 243 static String nextStringChar;          /* next char in string buffer       */
 244
 245 #if     USE_READLINE                   /* for command line editors         */
 246 static  String currentLine;            /* editline or GNU readline         */
 247 static  String nextChar;
 248 #define nextConsoleChar() (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 249 extern  Void add_history  Args((String));
 250 extern  String readline   Args((String));
 251 #else
 252 #define nextConsoleChar() getc(stdin)
 253 #endif
 254
 255 static  Int litLines;                  /* count defn lines in lit script   */
 256 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 257 static  Int lastLine;                  /* records type of last line read:  */
 258 #define STARTLINE 0                    /* - at start of file, none read    */
 259 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 260 #define TEXTLINE  2                    /* - text comment                   */
 261 #define DEFNLINE  3                    /* - line containing definition     */
 262 #define CODELINE  4                    /* - line inside code block         */
 263
 264 #define BEGINCODE "\\begin{code}"
 265 #define ENDCODE   "\\end{code}"
 266
 267 #if HAVE_GETDELIM_H
 268 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 269 #else
 270 #define LINEBUFFER_SIZE 1000
 271 static char lineBuffer[LINEBUFFER_SIZE];
 272 #endif
 273 static int lineLength = 0;
 274 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 275 static int linePtr = 0;
 276
 277 Void consoleInput(prompt)              /* prepare to input characters from */
 278 String prompt; {                       /* standard in (i.e. console/kbd)   */
 279     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 280     c0          =                      /* i.e. input terminated by '\n'    */
 281     c1          = ' ';
 282     column      = (-1);
 283     row         = 0;
 284
 285 #if USE_READLINE
 286     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 287      * avoids accidentally freeing currentLine twice.
 288      */
 289     if (currentLine) {
 290         String oldCurrentLine = currentLine;
 291         currentLine = 0;           /* We may lose the space of currentLine */
 292         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 293     }
 294     currentLine = readline(prompt);
 295     nextChar    = currentLine;
 296     if (currentLine) {
 297         if (*currentLine)
 298             add_history(currentLine);
 299     }
 300     else
 301         c0 = c1 = EOF;
 302 #else
 303     Printf("%s",prompt);
 304     FlushStdout();
 305 #endif
 306 }
 307
 308 Void projInput(nm)                     /* prepare to input characters from */
 309 String nm; {                           /* from named project file          */
 310     if ((inputStream = fopen(nm,"r"))!=0) {
 311         reading = PROJFILE;
 312         c0      = ' ';
 313         c1      = '\n';
 314         column  = 1;
 315         row     = 0;
 316     }
 317     else {
 318         ERRMSG(0) "Unable to open project file \"%s\"", nm
 319         EEND;
 320     }
 321 }
 322
 323 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 324 String nm;                              /* named file (specified length is */
 325 Long   len; {                           /* used to set target for reading) */
 326 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 327     if (preprocessor) {
 328         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 329         char *cmd = malloc(reallen);
 330         if (cmd == NULL) {
 331             ERRMSG(0) "Unable to allocate memory for filter command."
 332             EEND;
 333         }
 334         strcpy(cmd,preprocessor);
 335         strcat(cmd," ");
 336         strcat(cmd,nm);
 337         inputStream = popen(cmd,"r");
 338         free(cmd);
 339     } else {
 340         inputStream = fopen(nm,"r");
 341     }
 342 #else
 343     inputStream = fopen(nm,"r");
 344 #endif
 345     if (inputStream) {
 346         reading      = SCRIPTFILE;
 347         c0           = ' ';
 348         c1           = '\n';
 349         column       = 1;
 350         row          = 0;
 351
 352         lastLine     = STARTLINE;       /* literate file processing */
 353         litLines     = 0;
 354         linePtr      = 0;
 355         lineLength   = 0;
 356         thisLiterate = literateMode(nm);
 357         inCodeBlock  = FALSE;
 358
 359         readSoFar    = 0;
 360         setGoal("Parsing", (Target)len);
 361     }
 362     else {
 363         ERRMSG(0) "Unable to open file \"%s\"", nm
 364         EEND;
 365     }
 366 }
 367
 368 Void stringInput(s)             /* prepare to input characters from string */
 369 String s; {
 370     reading      = STRING;
 371     c0           = EOF;
 372     c1           = EOF;
 373     if (*s) c0 = *s++;
 374     if (*s) c1 = *s++;
 375     column       = 1;
 376     row          = 1;
 377
 378     nextStringChar = s;
 379     if (!charTabBuilt)
 380         initCharTab();
 381 }
 382
 383 static Bool local literateMode(nm)      /* Select literate mode for file   */
 384 String nm; {
 385     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 386     if (dot) {
 387         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 388             return FALSE;
 389         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 390             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 391             return TRUE;
 392     }
 393     return literateScripts;             /* otherwise, use the default      */
 394 }
 395
 396
 397 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 398  * I've removed the loop (since newLineSkip contains a loop too) and
 399  * replaced the warnings with errors. ADR
 400  */
 401 /*
 402  * To deal with literate \begin{code}...\end{code} blocks,
 403  * add a line buffer that rooms the current line. The old c0 and c1
 404  * stream pointers are used as before within that buffer -- sof
 405  *
 406  * Upon reading a new line into the line buffer, we check to see if
 407  * we're reading in a line containing \begin{code} or \end{code} and
 408  * take appropriate action.
 409  */
 410
 411 static Bool local linecmp(s,line)       /* compare string with line        */
 412 String s;                               /* line may end in whitespace      */
 413 String line; {
 414     Int i=0;
 415     while (s[i] != '\0' && s[i] == line[i]) {
 416         ++i;
 417     }
 418     /* s[0..i-1] == line[0..i-1] */
 419     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 420         return FALSE;
 421     }
 422     while (isIn(line[i], SPACE)) {      /* allow whitespace at end of line */
 423         ++i;
 424     }
 425     return (line[i] == '\0');
 426 }
 427
 428 /* Returns line length (including \n) or 0 upon EOF. */
 429 static Int local nextLine()
 430 {
 431 #if HAVE_GETDELIM_H
 432     /*
 433        Forget about fgets(), it is utterly braindead.
 434        (Assumes \NUL free streams and does not gracefully deal
 435        with overflow.) Instead, use GNU libc's getline().
 436        */
 437     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 438 #else
 439     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 440         lineLength = strlen(lineBuffer);
 441     else
 442         lineLength = 0;
 443 #endif
 444     /* printf("Read: \"%s\"", lineBuffer); */
 445     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 446         return lineLength;
 447     }
 448     else if (lineLength >= 2 && lineBuffer[0] == '#' && lineBuffer[1] == '!') {
 449         lineBuffer[0]='\n'; /* pretend it's a blank line */
 450         lineBuffer[1]='\0';
 451         lineLength=1;
 452     } else if (thisLiterate) {
 453         if (linecmp(BEGINCODE, lineBuffer)) {
 454             if (!inCodeBlock) {             /* Entered a code block        */
 455                 inCodeBlock = TRUE;
 456                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 457                 lineBuffer[1]='\0';
 458                 lineLength=1;
 459             }
 460             else {
 461                 ERRMSG(row) "\\begin{code} encountered inside code block"
 462                 EEND;
 463             }
 464         }
 465         else if (linecmp(ENDCODE, lineBuffer)) {
 466             if (inCodeBlock) {              /* Finished code block         */
 467                 inCodeBlock = FALSE;
 468                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 469                 lineBuffer[1]='\0';
 470                 lineLength=1;
 471             }
 472             else {
 473                 ERRMSG(row) "\\end{code} encountered outside code block"
 474                 EEND;
 475             }
 476         }
 477     }
 478     /* printf("Read: \"%s\"", lineBuffer); */
 479     return lineLength;
 480 }
 481
 482 static Void local skip() {              /* move forward one char in input  */
 483     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 484         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 485             row++;
 486             column=1;
 487             if (reading==SCRIPTFILE)
 488                 soFar(readSoFar);
 489         }
 490         else if (c0=='\t')
 491             column += TABSIZE - ((column-1)%TABSIZE);
 492         else
 493             column++;
 494
 495         c0 = c1;
 496         readSoFar++;
 497
 498         if (c0==EOF) {
 499             column = 0;
 500             if (reading==SCRIPTFILE)
 501                 done();
 502             closeAnyInput();
 503         }
 504         else if (reading==KEYBOARD) {
 505             allowBreak();
 506             if (c0=='\n')
 507                 c1 = EOF;
 508             else {
 509                 c1 = nextConsoleChar();
 510                 /* On Win32, hitting ctrl-C causes the next getchar to
 511                  * fail - returning "-1" to indicate an error.
 512                  * This is one of the rare cases where "-1" does not mean EOF.
 513                  */
 514                 if (EOF == c1 && !feof(stdin)) {
 515                     c1 = ' ';
 516                 }
 517             }
 518         }
 519         else if (reading==STRING) {
 520             c1 = (unsigned char) *nextStringChar++;
 521             if (c1 == '\0')
 522                 c1 = EOF;
 523         }
 524         else {
 525             if (lineLength <=0 || linePtr == lineLength) {
 526                 /* Current line, exhausted - get new one */
 527                 if (nextLine() <= 0) { /* EOF */
 528                     c1 = EOF;
 529                 }
 530                 else {
 531                     linePtr = 0;
 532                     c1 = (unsigned char)lineBuffer[linePtr++];
 533                 }
 534             }
 535             else {
 536                 c1 = (unsigned char)lineBuffer[linePtr++];
 537             }
 538         }
 539
 540     }
 541 }
 542
 543 static Void local thisLineIs(kind)     /* register kind of current line    */
 544 Int kind; {                            /* & check for literate script errs */
 545     if (literateErrors) {
 546         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 547             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 548             ERRMSG(row) "Program line next to comment"
 549             EEND;
 550         }
 551         lastLine = kind;
 552     }
 553 }
 554
 555 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 556     /* assert(c0=='\n'); */
 557     if (reading==SCRIPTFILE && thisLiterate) {
 558         do {
 559             skip();
 560             if (inCodeBlock) {         /* pass chars on definition lines   */
 561                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 562                 litLines++;
 563                 return;
 564             }
 565             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 566                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 567                 skip();
 568                 litLines++;
 569                 return;
 570             }
 571             while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank?   */
 572                 skip();
 573             if (c0=='\n' || c0==EOF)
 574                 thisLineIs(BLANKLINE);
 575             else {
 576                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 577                 while (c0!='\n' && c0!=EOF)
 578                     skip();
 579             }                          /* by now, c0=='\n' or c0==EOF      */
 580         } while (c0!=EOF);             /* if new line, start again         */
 581
 582         if (litLines==0 && literateErrors) {
 583             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 584                         DEFNCHAR
 585             EEND;
 586         }
 587         return;
 588     }
 589     skip();
 590 }
 591
 592 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 593     switch (reading) {                 /* or skip to end of console line   */
 594         case PROJFILE   :
 595         case SCRIPTFILE : if (inputStream) {
 596 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 597                               if (preprocessor) {
 598                                   pclose(inputStream);
 599                               } else {
 600                                   fclose(inputStream);
 601                               }
 602 #else
 603                               fclose(inputStream);
 604 #endif
 605                               inputStream = 0;
 606                           }
 607                           break;
 608         case KEYBOARD   : while (c0!=EOF)
 609                               skip();
 610                           break;
 611     }
 612     reading=NOTHING;
 613 }
 614
 615 /* --------------------------------------------------------------------------
 616  * Parser: Uses table driven parser generated from parser.y using yacc
 617  * ------------------------------------------------------------------------*/
 618
 619 #include "parser.c"
 620
 621 /* --------------------------------------------------------------------------
 622  * Single token input routines:
 623  *
 624  * The following routines read the values of particular kinds of token given
 625  * that the first character of the token has already been located in c0 on
 626  * entry to the routine.
 627  * ------------------------------------------------------------------------*/
 628
 629 #define MAX_TOKEN           4000
 630 #define startToken()        tokPos = 0
 631 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 632 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 633 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 634
 635 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 636 static Int  tokPos;                    /* input position in buffer         */
 637 static Int  identType;                 /* identifier type: CONID / VARID   */
 638 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 639
 640 static Void local endToken() {         /* check for token overflow         */
 641     if (tokPos>MAX_TOKEN) {
 642         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 643         EEND;
 644     }
 645     tokenStr[tokPos] = '\0';
 646 }
 647
 648 static Text local readOperator() {     /* read operator symbol             */
 649     startToken();
 650     do {
 651         saveTokenChar(c0);
 652         skip();
 653     } while (isISO(c0) && isIn(c0,SYMBOL));
 654     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 655     endToken();
 656     return findText(tokenStr);
 657 }
 658
 659 static Text local readIdent() {        /* read identifier                  */
 660     startToken();
 661     do {
 662         saveTokenChar(c0);
 663         skip();
 664     } while (isISO(c0) && isIn(c0,IDAFTER));
 665     endToken();
 666     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 667     return findText(tokenStr);
 668 }
 669
 670 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 671 Int r; {                               /* from input of the form 0c{digs}  */
 672     Int d;
 673     skip();                            /* skip leading zero                */
 674     if ((d=readHexDigit(c1))<0 || d>=r)/* Special case; no digits, lex as  */
 675         return mkInt(0);               /* if it had been written "0 c..."  */
 676     else {
 677         Int  n = 0;
 678 #if BIGNUMS
 679         Cell big = NIL;
 680 #endif
 681         skip();
 682         do {
 683 #if BIGNUMS
 684             if (nonNull(big))
 685                 big = bigShift(big,d,r);
 686             else if (overflows(n,r,d,MAXPOSINT))
 687                 big = bigShift(bigInt(n),d,r);
 688             else
 689 #else
 690             if (overflows(n,r,d,MAXPOSINT)) {
 691                 ERRMSG(row) "Integer literal out of range"
 692                 EEND;
 693             }
 694             else
 695 #endif
 696                 n = r*n + d;
 697             skip();
 698             d = readHexDigit(c0);
 699         } while (d>=0 && d<r);
 700 #if BIGNUMS
 701         return nonNull(big) ? big : mkInt(n);
 702 #else
 703         return mkInt(n);
 704 #endif
 705     }
 706 }
 707
 708 static Cell local readNumber() {        /* read numeric constant           */
 709     Int   n           = 0;
 710     Bool  intTooLarge = FALSE;
 711
 712     if (c0=='0') {
 713         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 714             return readRadixNumber(16);
 715         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 716             return readRadixNumber(8);
 717     }
 718
 719     startToken();
 720     do {
 721         if (overflows(n,10,(c0-'0'),MAXPOSINT))
 722             intTooLarge = TRUE;
 723         n  = 10*n  + (c0-'0');
 724         saveTokenChar(c0);
 725         skip();
 726     } while (isISO(c0) && isIn(c0,DIGIT));
 727
 728     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 729         endToken();
 730         if (!intTooLarge)
 731             return mkInt(n);
 732 #if BIGNUMS
 733         return bigStr(tokenStr);
 734 #else
 735         ERRMSG(row) "Integer literal out of range"
 736         EEND;
 737 #endif
 738     }
 739
 740     saveTokenChar(c0);                  /* save decimal point              */
 741     skip();
 742     do {                                /* process fractional part ...     */
 743         saveTokenChar(c0);
 744         skip();
 745     } while (isISO(c0) && isIn(c0,DIGIT));
 746
 747     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 748         saveTokenChar('e');
 749         skip();
 750         if (c0=='-') {
 751             saveTokenChar('-');
 752             skip();
 753         }
 754         else if (c0=='+')
 755             skip();
 756
 757         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 758             ERRMSG(row) "Missing digits in exponent"
 759             EEND;
 760         }
 761         else {
 762             do {
 763                 saveTokenChar(c0);
 764                 skip();
 765             } while (isISO(c0) && isIn(c0,DIGIT));
 766         }
 767     }
 768
 769     endToken();
 770 #ifndef HAVE_LIBM
 771     ERRMSG(row) "No floating point numbers in this implementation"
 772     EEND;
 773 #endif
 774
 775     return mkFloat(stringToFloat(tokenStr));
 776 }
 777
 778 static Cell local readChar() {         /* read character constant          */
 779     Cell charRead;
 780
 781     skip(/* '\'' */);
 782     if (c0=='\'' || c0=='\n' || c0==EOF) {
 783         ERRMSG(row) "Illegal character constant"
 784         EEND;
 785     }
 786
 787     charRead = readAChar(FALSE);
 788
 789     if (c0=='\'')
 790         skip(/* '\'' */);
 791     else {
 792         ERRMSG(row) "Improperly terminated character constant"
 793         EEND;
 794     }
 795     return charRead;
 796 }
 797
 798 static Cell local readString() {       /* read string literal              */
 799     Cell c;
 800
 801     startToken();
 802     skip(/* '\"' */);
 803     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 804         c = readAChar(TRUE);
 805         if (nonNull(c))
 806             saveStrChr(charOf(c));
 807     }
 808
 809     if (c0=='\"')
 810         skip(/* '\"' */);
 811     else {
 812         ERRMSG(row) "Improperly terminated string"
 813         EEND;
 814     }
 815     endToken();
 816     return mkStr(findText(tokenStr));
 817 }
 818
 819 static Void local saveStrChr(c)        /* save character in string         */
 820 Char c; {
 821     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 822         saveTokenChar(c);
 823     }
 824     else {                             /* save null char as TWO null chars */
 825         if (tokPos+1<MAX_TOKEN) {
 826             saveChar('\\');
 827             if (c=='\\')
 828                 saveChar('\\');
 829             else
 830                 saveChar('0');
 831         }
 832     }
 833 }
 834
 835 static Cell local readAChar(isStrLit)  /* read single char constant        */
 836 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 837     Cell c = mkChar(c0);
 838
 839     if (c0=='\\')                      /* escape character?                */
 840         return readEscapeChar(isStrLit);
 841     if (!isISO(c0)) {
 842         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 843         EEND;
 844     }
 845     skip();                            /* normal character?                */
 846     return c;
 847 }
 848
 849 /* --------------------------------------------------------------------------
 850  * Character escape code sequences:
 851  * ------------------------------------------------------------------------*/
 852
 853 static struct {                        /* table of special escape codes    */
 854     char *codename;
 855     int  codenumber;
 856 } escapes[] = {
 857    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 858    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 859    {"\'",'\''}, {"v",   11},
 860    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 861    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 862    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 863    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 864    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 865    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 866    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 867    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 868    {"SP",  32}, {"DEL", 127},
 869    {0,0}
 870 };
 871
 872 static Int  alreadyMatched;            /* Record portion of input stream   */
 873 static char alreadyRead[10];           /* that has been read w/o a match   */
 874
 875 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 876 String s; {                            /* possibly using characters that   */
 877     int i;                             /* have already been read           */
 878
 879     for (i=0; i<alreadyMatched; ++i)
 880         if (alreadyRead[i]!=s[i])
 881             return FALSE;
 882
 883     while (s[i] && s[i]==c0) {
 884         alreadyRead[alreadyMatched++]=(char)c0;
 885         skip();
 886         i++;
 887     }
 888
 889     return s[i]=='\0';
 890 }
 891
 892 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 893 Bool isStrLit; {
 894     int i=0;
 895
 896     skip(/* '\\' */);
 897     switch (c0) {
 898         case '&'  : if (isStrLit) {
 899                         skip();
 900                         return NIL;
 901                     }
 902                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 903                     EEND;
 904                     break;/*NOTREACHED*/
 905
 906         case '^'  : return readCtrlChar();
 907
 908         case 'o'  : return readOctChar();
 909         case 'x'  : return readHexChar();
 910
 911         default   : if (!isISO(c0)) {
 912                         ERRMSG(row) "Illegal escape sequence"
 913                         EEND;
 914                     }
 915                     else if (isIn(c0,SPACE)) {
 916                         if (isStrLit) {
 917                             skipGap();
 918                             return NIL;
 919                         }
 920                         ERRMSG(row) "Illegal use of gap in character constant"
 921                         EEND;
 922                         break;
 923                     }
 924                     else if (isIn(c0,DIGIT))
 925                         return readDecChar();
 926     }
 927
 928     for (alreadyMatched=0; escapes[i].codename; i++)
 929         if (lazyReadMatches(escapes[i].codename))
 930             return mkChar(escapes[i].codenumber);
 931
 932     alreadyRead[alreadyMatched++] = (char)c0;
 933     alreadyRead[alreadyMatched++] = '\0';
 934     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
 935                 alreadyRead
 936     EEND;
 937     return NIL;/*NOTREACHED*/
 938 }
 939
 940 static Void local skipGap() {          /* skip over gap in string literal  */
 941     do                                 /* (simplified in Haskell 1.1)      */
 942         if (c0=='\n')
 943             newlineSkip();
 944         else
 945             skip();
 946     while (isISO(c0) && isIn(c0,SPACE));
 947     if (c0!='\\') {
 948         ERRMSG(row) "Missing `\\' terminating string literal gap"
 949         EEND;
 950     }
 951     skip(/* '\\' */);
 952 }
 953
 954 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
 955     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
 956     String which;
 957
 958     skip(/* '^' */);
 959     if ((which = strchr(controls,c0))==NULL) {
 960         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
 961         EEND;
 962     }
 963     skip();
 964     return mkChar(which-controls);
 965 }
 966
 967 static Cell local readOctChar() {      /* read octal character constant    */
 968     Int n = 0;
 969     Int d;
 970
 971     skip(/* 'o' */);
 972     if ((d = readHexDigit(c0))<0 || d>=8) {
 973         ERRMSG(row) "Empty octal character escape"
 974         EEND;
 975     }
 976     do {
 977         if (overflows(n,8,d,MAXCHARVAL)) {
 978             ERRMSG(row) "Octal character escape out of range"
 979             EEND;
 980         }
 981         n = 8*n + d;
 982         skip();
 983     } while ((d = readHexDigit(c0))>=0 && d<8);
 984
 985     return mkChar(n);
 986 }
 987
 988 static Cell local readHexChar() {      /* read hex character constant      */
 989     Int n = 0;
 990     Int d;
 991
 992     skip(/* 'x' */);
 993     if ((d = readHexDigit(c0))<0) {
 994         ERRMSG(row) "Empty hexadecimal character escape"
 995         EEND;
 996     }
 997     do {
 998         if (overflows(n,16,d,MAXCHARVAL)) {
 999             ERRMSG(row) "Hexadecimal character escape out of range"
1000             EEND;
1001         }
1002         n = 16*n + d;
1003         skip();
1004     } while ((d = readHexDigit(c0))>=0);
1005
1006     return mkChar(n);
1007 }
1008
1009 static Int local readHexDigit(c)       /* read single hex digit            */
1010 Char c; {
1011     if ('0'<=c && c<='9')
1012         return c-'0';
1013     if ('A'<=c && c<='F')
1014         return 10 + (c-'A');
1015     if ('a'<=c && c<='f')
1016         return 10 + (c-'a');
1017     return -1;
1018 }
1019
1020 static Cell local readDecChar() {      /* read decimal character constant  */
1021     Int n = 0;
1022
1023     do {
1024         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1025             ERRMSG(row) "Decimal character escape out of range"
1026             EEND;
1027         }
1028         n = 10*n + (c0-'0');
1029         skip();
1030     } while (c0!=EOF && isIn(c0,DIGIT));
1031
1032     return mkChar(n);
1033 }
1034
1035 /* --------------------------------------------------------------------------
1036  * Produce printable representation of character:
1037  * ------------------------------------------------------------------------*/
1038
1039 String unlexChar(c,quote)              /* return string representation of  */
1040 Char c;                                /* character...                     */
1041 Char quote; {                          /* protect quote character          */
1042     static char buffer[12];
1043
1044     if (c<0)                           /* deal with sign extended chars..  */
1045         c += NUM_CHARS;
1046
1047     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1048         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1049             buffer[0] = '\\';
1050             buffer[1] = (char)c;
1051             buffer[2] = '\0';
1052         }
1053         else {
1054             buffer[0] = (char)c;
1055             buffer[1] = '\0';
1056         }
1057     }
1058     else {                             /* look for escape code             */
1059         Int escs;
1060         for (escs=0; escapes[escs].codename; escs++)
1061             if (escapes[escs].codenumber==c) {
1062                 sprintf(buffer,"\\%s",escapes[escs].codename);
1063                 return buffer;
1064             }
1065         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1066     }
1067     return buffer;
1068 }
1069
1070 Void printString(s)                    /* print string s, using quotes and */
1071 String s; {                            /* escapes if any parts need them   */
1072     if (s) {
1073         String t = s;
1074         Char   c;
1075         while ((c = *t)!=0 && isISO(c)
1076                            && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1077             t++;
1078         }
1079         if (*t) {
1080             Putchar('"');
1081             for (t=s; *t; t++)
1082                 Printf("%s",unlexChar(*t,'"'));
1083             Putchar('"');
1084         }
1085         else
1086             Printf("%s",s);
1087     }
1088 }
1089
1090 /* -------------------------------------------------------------------------
1091  * Handle special types of input for use in interpreter:
1092  * -----------------------------------------------------------------------*/
1093
1094 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1095 struct cmd *cmds;                      /* line in interpreter              */
1096 Char   start;                          /* characters introducing a cmd     */
1097 Char   sys; {                          /* character for shell escape       */
1098     while (c0==' ' || c0 =='\t')
1099         skip();
1100
1101     if (c0=='\n')                      /* look for blank command lines     */
1102         return NOCMD;
1103     if (c0==EOF)                       /* look for end of input stream     */
1104         return QUIT;
1105     if (c0==sys) {                     /* single character system escape   */
1106         skip();
1107         return SYSTEM;
1108     }
1109     if (c0==start && c1==sys) {        /* two character system escape      */
1110         skip();
1111         skip();
1112         return SYSTEM;
1113     }
1114
1115     startToken();                      /* All cmds start with start        */
1116     if (c0==start)                     /* except default (usually EVAL)    */
1117         do {                           /* which is empty                   */
1118             saveTokenChar(c0);
1119             skip();
1120         } while (c0!=EOF && !isIn(c0,SPACE));
1121     endToken();
1122
1123     for (; cmds->cmdString; ++cmds)
1124         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1125             (tokenStr[0]==start &&
1126              tokenStr[1]==(cmds->cmdString)[1] &&
1127              tokenStr[2]=='\0'))
1128             return (cmds->cmdCode);
1129     return BADCMD;
1130 }
1131
1132 String readFilename() {                /* Read filename from input (if any)*/
1133     if (reading==PROJFILE)
1134         skipWhitespace();
1135     else
1136         while (c0==' ' || c0=='\t')
1137             skip();
1138
1139     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1140         return 0;
1141
1142     startToken();
1143     while (c0!=EOF && !isIn(c0,SPACE)) {
1144         if (c0=='"') {
1145             skip();
1146             while (c0!=EOF && c0!='\"') {
1147                 Cell c = readAChar(TRUE);
1148                 if (nonNull(c)) {
1149                     saveTokenChar(charOf(c));
1150                 }
1151             }
1152             if (c0=='"')
1153                 skip();
1154             else {
1155                 ERRMSG(row) "a closing quote, '\"', was expected"
1156                 EEND;
1157             }
1158         }
1159         else {
1160             saveTokenChar(c0);
1161             skip();
1162         }
1163     }
1164     endToken();
1165     return tokenStr;
1166 }
1167
1168 String readLine() {                    /* Read command line from input     */
1169     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1170         skip();
1171
1172     startToken();
1173     while (c0!='\n' && c0!=EOF) {
1174         saveTokenChar(c0);
1175         skip();
1176     }
1177     endToken();
1178
1179     return tokenStr;
1180 }
1181
1182 /* --------------------------------------------------------------------------
1183  * This lexer supports the Haskell layout rule:
1184  *
1185  * - Layout area bounded by { ... }, with `;'s in between.
1186  * - A `{' is a HARD indentation and can only be matched by a corresponding
1187  *   HARD '}'
1188  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1189  *   is inserted with the column number of the first token after the
1190  *   WHERE/LET/OF keyword.
1191  * - When a soft indentation is uppermost on the indetation stack with
1192  *   column col' we insert:
1193  *    `}'  in front of token with column<col' and pop indentation off stack,
1194  *    `;'  in front of token with column==col'.
1195  * ------------------------------------------------------------------------*/
1196
1197 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1198 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1199 #define HARD       (-1)                /* indicates hard indentation       */
1200 static  Int        indentDepth = (-1); /* current indentation nesting      */
1201
1202 static Void local goOffside(col)       /* insert offside marker            */
1203 Int col; {                             /* for specified column             */
1204     if (indentDepth>=MAXINDENT) {
1205         ERRMSG(row) "Too many levels of program nesting"
1206         EEND;
1207     }
1208     layout[++indentDepth] = col;
1209 }
1210
1211 static Void local unOffside() {        /* leave layout rule area           */
1212     indentDepth--;
1213 }
1214
1215 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1216     return indentDepth>=0 && layout[indentDepth]!=HARD;
1217 }
1218
1219 /* --------------------------------------------------------------------------
1220  * Main tokeniser:
1221  * ------------------------------------------------------------------------*/
1222
1223 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1224     for (;;)                           /* Strictly speaking, this code is  */
1225         if (c0==EOF)                   /* a little more liberal than the   */
1226             return;                    /* report allows ...                */
1227         else if (c0=='\n')
1228             newlineSkip();
1229         else if (isIn(c0,SPACE))
1230             skip();
1231         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1232             Int nesting = 1;
1233             Int origRow = row;         /* Save original row number         */
1234             skip();
1235             skip();
1236             while (nesting>0 && c0!=EOF)
1237                 if (c0=='{' && c1=='-') {
1238                     skip();
1239                     skip();
1240                     nesting++;
1241                 }
1242                 else if (c0=='-' && c1=='}') {
1243                     skip();
1244                     skip();
1245                     nesting--;
1246                 }
1247                 else if (c0=='\n')
1248                     newlineSkip();
1249                 else
1250                     skip();
1251             if (nesting>0) {
1252                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1253                 EEND;
1254             }
1255         }
1256         else if (c0=='-' && c1=='-') {  /* One line comment                */
1257             do
1258                 skip();
1259             while (c0!='\n' && c0!=EOF);
1260             if (c0=='\n')
1261                 newlineSkip();
1262         }
1263         else
1264             return;
1265 }
1266
1267 static Bool firstToken;                /* Set to TRUE for first token      */
1268 static Int  firstTokenIs;              /* ... with token value stored here */
1269
1270 static Int local yylex() {             /* Read next input token ...        */
1271     static Bool insertOpen    = FALSE;
1272     static Bool insertedToken = FALSE;
1273     static Text textRepeat;
1274
1275 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1276
1277     if (firstToken) {                  /* Special case for first token     */
1278         indentDepth   = (-1);
1279         firstToken    = FALSE;
1280         insertOpen    = FALSE;
1281         insertedToken = FALSE;
1282         if (reading==KEYBOARD)
1283             textRepeat = findText(repeatStr);
1284         return firstTokenIs;
1285     }
1286
1287     if (insertOpen) {                  /* insert `soft' opening brace      */
1288         insertOpen    = FALSE;
1289         insertedToken = TRUE;
1290         goOffside(column);
1291         push(yylval = mkInt(row));
1292         return '{';
1293     }
1294
1295     /* ----------------------------------------------------------------------
1296      * Skip white space, and insert tokens to support layout rules as reqd.
1297      * --------------------------------------------------------------------*/
1298
1299     skipWhitespace();
1300     startColumn = column;
1301     push(yylval = mkInt(row));         /* default token value is line no.  */
1302     /* subsequent changes to yylval must also set top() to the same value  */
1303
1304     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1305         if (insertedToken)             /* avoid inserting multiple `;'s    */
1306             insertedToken = FALSE;     /* or putting `;' after `{'         */
1307         else
1308         if (layout[indentDepth]!=HARD) {
1309             if (column<layout[indentDepth]) {
1310                 unOffside();
1311                 return '}';
1312             }
1313             else if (column==layout[indentDepth] && c0!=EOF) {
1314                 insertedToken = TRUE;
1315                 return ';';
1316             }
1317         }
1318     }
1319
1320     /* ----------------------------------------------------------------------
1321      * Now try to identify token type:
1322      * --------------------------------------------------------------------*/
1323
1324     switch (c0) {
1325         case EOF  : return 0;                   /* End of file/input       */
1326
1327         /* The next 10 characters make up the `special' category in 1.3    */
1328         case '('  : skip(); return '(';
1329         case ')'  : skip(); return ')';
1330         case ','  : skip(); return ',';
1331         case ';'  : skip(); return ';';
1332         case '['  : skip(); return '[';
1333         case ']'  : skip(); return ']';
1334         case '`'  : skip(); return '`';
1335         case '{'  : goOffside(HARD);
1336                     skip();
1337                     return '{';
1338         case '}'  : if (indentDepth<0) {
1339                         ERRMSG(row) "Misplaced `}'"
1340                         EEND;
1341                     }
1342                     if (layout[indentDepth]==HARD)      /* skip over hard }*/
1343                         skip();
1344                     unOffside();        /* otherwise, we have to insert a }*/
1345                     return '}';         /* to (try to) avoid an error...   */
1346
1347         /* Character and string literals                                   */
1348         case '\'' : top() = yylval = readChar();
1349                     return CHARLIT;
1350
1351         case '\"' : top() = yylval = readString();
1352                     return STRINGLIT;
1353     }
1354
1355 #if TREX
1356     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1357         Text it;                        /* Look for record selector name   */
1358         skip();
1359         it    = readIdent();
1360         top() = yylval = ap(RECSEL,mkExt(it));
1361         return identType=RECSELID;
1362     }
1363 #endif
1364     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1365         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1366         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1367             Text it2 = NIL;
1368             skip();                     /* Skip qualifying dot             */
1369             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1370                 it2 = readOperator();
1371                 if (opType==CONOP) {
1372                     top() = yylval = mkQConOp(it,it2);
1373                     return QCONOP;
1374                 } else {
1375                     top() = yylval = mkQVarOp(it,it2);
1376                     return QVAROP;
1377                 }
1378             } else {               /* Qualified identifier */
1379                 it2 = readIdent();
1380                 if (identType==CONID) {
1381                     top() = yylval = mkQCon(it,it2);
1382                     return QCONID;
1383                 } else {
1384                     top() = yylval = mkQVar(it,it2);
1385                     return QVARID;
1386                 }
1387             }
1388         } else {
1389             top() = yylval = mkCon(it);
1390             return identType;
1391         }
1392     }
1393     if (isIn(c0,(SMALL|LARGE))) {
1394         Text it = readIdent();
1395
1396         if (it==textCase)              return CASEXP;
1397         if (it==textOfK)               lookAhead(OF);
1398         if (it==textData)              return DATA;
1399         if (it==textType)              return TYPE;
1400         if (it==textIf)                return IF;
1401         if (it==textThen)              return THEN;
1402         if (it==textElse)              return ELSE;
1403         if (it==textWhere)             lookAhead(WHERE);
1404         if (it==textLet)               lookAhead(LET);
1405         if (it==textIn)                return IN;
1406         if (it==textInfix)             return INFIXN;
1407         if (it==textInfixl)            return INFIXL;
1408         if (it==textInfixr)            return INFIXR;
1409         if (it==textForeign)           return FOREIGN;
1410         if (it==textUnsafe)            return UNSAFE;
1411         if (it==textNewtype)           return TNEWTYPE;
1412         if (it==textDefault)           return DEFAULT;
1413         if (it==textDeriving)          return DERIVING;
1414         if (it==textDo)                lookAhead(DO);
1415         if (it==textClass)             return TCLASS;
1416         if (it==textInstance)          return TINSTANCE;
1417         if (it==textModule)            return TMODULE;
1418         if (it==textImport)            return IMPORT;
1419         if (it==textExport)            return EXPORT;
1420         if (it==textHiding)            return HIDING;
1421         if (it==textQualified)         return QUALIFIED;
1422         if (it==textAsMod)             return ASMOD;
1423         if (it==textWildcard)          return '_';
1424         if (it==textAll && !haskell98) return ALL;
1425         if (it==textRepeat && reading==KEYBOARD)
1426             return repeatLast();
1427
1428         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1429         return identType;
1430     }
1431
1432     if (isIn(c0,SYMBOL)) {
1433         Text it = readOperator();
1434
1435         if (it==textCoco)    return COCO;
1436         if (it==textEq)      return '=';
1437         if (it==textUpto)    return UPTO;
1438         if (it==textAs)      return '@';
1439         if (it==textLambda)  return '\\';
1440         if (it==textBar)     return '|';
1441         if (it==textFrom)    return FROM;
1442         if (it==textMinus)   return '-';
1443         if (it==textPlus)    return '+';
1444         if (it==textBang)    return '!';
1445         if (it==textDot)     return '.';
1446         if (it==textArrow)   return ARROW;
1447         if (it==textLazy)    return '~';
1448         if (it==textImplies) return IMPLIES;
1449         if (it==textRepeat && reading==KEYBOARD)
1450             return repeatLast();
1451
1452         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1453         return opType;
1454     }
1455
1456     if (isIn(c0,DIGIT)) {
1457         top() = yylval = readNumber();
1458         return NUMLIT;
1459     }
1460
1461     ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
1462     EEND;
1463     return 0; /*NOTREACHED*/
1464 }
1465
1466 static Int local repeatLast() {         /* Obtain last expression entered  */
1467     if (isNull(yylval=getLastExpr())) {
1468         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1469         EEND;
1470     }
1471     return REPEAT;
1472 }
1473
1474 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1475 Text t; {                               /* by t ...                        */
1476     String s = textToStr(t);
1477     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1478 }
1479
1480 Syntax syntaxOf(n)                      /* Find syntax for name            */
1481 Name n; {
1482     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1483         return defaultSyntax(name(n).text);
1484     return name(n).syntax;
1485 }
1486
1487 /* --------------------------------------------------------------------------
1488  * main entry points to parser/lexer:
1489  * ------------------------------------------------------------------------*/
1490
1491 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1492 Int startWith; {                       /* determining whether to read a    */
1493     firstToken   = TRUE;               /* script or an expression          */
1494     firstTokenIs = startWith;
1495
1496     clearStack();
1497     if (yyparse()) {                   /* This can only be parser overflow */
1498         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1499         EEND;                          /* in the parser...                 */
1500     }
1501     drop();
1502     if (!stackEmpty())                 /* stack should now be empty        */
1503         internal("parseInput");
1504 }
1505
1506 #ifdef HSCRIPT
1507 static String memPrefix = "@mem@";
1508 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1509
1510 Void makeMemScript(mem,fname)
1511 String mem;
1512 String fname; {
1513    strcat(fname,memPrefix);
1514    itoa((int)mem, fname+strlen(fname), 10);
1515 }
1516
1517 Bool isMemScript(fname)
1518 String fname; {
1519    return (strstr(fname,memPrefix) != NULL);
1520 }
1521
1522 String memScriptString(fname)
1523 String fname; {
1524     String p = strstr(fname,memPrefix);
1525     if (p) {
1526         return (String)atoi(p+lenMemPrefix);
1527     } else {
1528         return NULL;
1529     }
1530 }
1531
1532 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1533 String fname;
1534 Long len; {
1535     input(RESET);
1536     if (isMemScript(fname)) {
1537         char* s = memScriptString(fname);
1538         stringInput(s);
1539     } else {
1540         fileInput(fname,len);
1541     }
1542     parseInput(SCRIPT);
1543 }
1544 #else
1545 Void parseScript(nm,len)               /* Read a script                    */
1546 String nm;
1547 Long   len; {                          /* Used to set a target for reading */
1548     input(RESET);
1549     fileInput(nm,len);
1550     parseInput(SCRIPT);
1551 }
1552 #endif
1553
1554 Void parseExp() {                      /* Read an expression to evaluate   */
1555     parseInput(EXPR);
1556     setLastExpr(inputExpr);
1557 }
1558
1559 /* --------------------------------------------------------------------------
1560  * Input control:
1561  * ------------------------------------------------------------------------*/
1562
1563 Void input(what)
1564 Int what; {
1565     switch (what) {
1566         case INSTALL : initCharTab();
1567                        textCase       = findText("case");
1568                        textOfK        = findText("of");
1569                        textData       = findText("data");
1570                        textType       = findText("type");
1571                        textIf         = findText("if");
1572                        textThen       = findText("then");
1573                        textElse       = findText("else");
1574                        textWhere      = findText("where");
1575                        textLet        = findText("let");
1576                        textIn         = findText("in");
1577                        textInfix      = findText("infix");
1578                        textInfixl     = findText("infixl");
1579                        textInfixr     = findText("infixr");
1580                        textForeign    = findText("foreign");
1581                        textUnsafe     = findText("unsafe");
1582                        textNewtype    = findText("newtype");
1583                        textDefault    = findText("default");
1584                        textDeriving   = findText("deriving");
1585                        textDo         = findText("do");
1586                        textClass      = findText("class");
1587                        textInstance   = findText("instance");
1588                        textCoco       = findText("::");
1589                        textEq         = findText("=");
1590                        textUpto       = findText("..");
1591                        textAs         = findText("@");
1592                        textLambda     = findText("\\");
1593                        textBar        = findText("|");
1594                        textMinus      = findText("-");
1595                        textPlus       = findText("+");
1596                        textFrom       = findText("<-");
1597                        textArrow      = findText("->");
1598                        textLazy       = findText("~");
1599                        textBang       = findText("!");
1600                        textDot        = findText(".");
1601                        textImplies    = findText("=>");
1602                        textPrelude    = findText("Prelude");
1603                        textNum        = findText("Num");
1604                        textModule     = findText("module");
1605                        textImport     = findText("import");
1606                        textHiding     = findText("hiding");
1607                        textQualified  = findText("qualified");
1608                        textAsMod      = findText("as");
1609                        textWildcard   = findText("_");
1610                        textAll        = findText("forall");
1611                        varMinus       = mkVar(textMinus);
1612                        varPlus        = mkVar(textPlus);
1613                        varBang        = mkVar(textBang);
1614                        varDot         = mkVar(textDot);
1615                        varHiding      = mkVar(textHiding);
1616                        varQualified   = mkVar(textQualified);
1617                        varAsMod       = mkVar(textAsMod);
1618                        conMain        = mkCon(findText("Main"));
1619                        varMain        = mkVar(findText("main"));
1620                        evalDefaults   = NIL;
1621
1622                        input(RESET);
1623                        break;
1624
1625         case RESET   : tyconDefns   = NIL;
1626                        typeInDefns  = NIL;
1627                        valDefns     = NIL;
1628                        classDefns   = NIL;
1629                        instDefns    = NIL;
1630                        selDefns     = NIL;
1631                        genDefns     = NIL;
1632                        //primDefns    = NIL;
1633                        unqualImports= NIL;
1634                        foreignImports= NIL;
1635                        foreignExports= NIL;
1636                        defaultDefns = NIL;
1637                        defaultLine  = 0;
1638                        inputExpr    = NIL;
1639                        imps         = NIL;
1640                        closeAnyInput();
1641                        break;
1642
1643         case BREAK   : if (reading==KEYBOARD)
1644                            c0 = EOF;
1645                        break;
1646
1647         case MARK    : mark(tyconDefns);
1648                        mark(typeInDefns);
1649                        mark(valDefns);
1650                        mark(classDefns);
1651                        mark(instDefns);
1652                        mark(selDefns);
1653                        mark(genDefns);
1654                        //mark(primDefns);
1655                        mark(unqualImports);
1656                        mark(foreignImports);
1657                        mark(foreignExports);
1658                        mark(defaultDefns);
1659                        mark(evalDefaults);
1660                        mark(inputExpr);
1661                        mark(varMinus);
1662                        mark(varPlus);
1663                        mark(varBang);
1664                        mark(varDot);
1665                        mark(varHiding);
1666                        mark(varQualified);
1667                        mark(varAsMod);
1668                        mark(varMain);
1669                        mark(conMain);
1670                        mark(imps);
1671                        break;
1672     }
1673 }
1674
1675 /*-------------------------------------------------------------------------*/