ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
   6  * Haskell Group 1994-99, and is distributed as Open Source software
   7  * under the Artistic License; see the file "Artistic" that is included
   8  * in the distribution for details.
   9  *
  10  * $RCSfile: input.c,v $
  11  * $Revision: 1.5 $
  12  * $Date: 1999/04/27 10:06:53 $
  13  * ------------------------------------------------------------------------*/
  14
  15 #include "prelude.h"
  16 #include "storage.h"
  17 #include "backend.h"
  18 #include "connect.h"
  19 #include "command.h"
  20 #include "errors.h"
  21 #include <ctype.h>
  22 #if HAVE_GETDELIM_H
  23 #include "getdelim.h"
  24 #endif
  25
  26 #if HUGS_FOR_WINDOWS
  27 #undef IN
  28 #endif
  29
  30 /* --------------------------------------------------------------------------
  31  * Global data:
  32  * ------------------------------------------------------------------------*/
  33
  34 List tyconDefns      = NIL;             /* type constructor definitions    */
  35 List typeInDefns     = NIL;             /* type synonym restrictions       */
  36 List valDefns        = NIL;             /* value definitions in script     */
  37 List classDefns      = NIL;             /* class defns in script           */
  38 List instDefns       = NIL;             /* instance defns in script        */
  39 List selDefns        = NIL;             /* list of selector lists          */
  40 List genDefns        = NIL;             /* list of generated names         */
  41 List unqualImports   = NIL;             /* unqualified import list         */
  42 List foreignImports  = NIL;             /* foreign imports                 */
  43 List foreignExports  = NIL;             /* foreign exportsd                */
  44 List defaultDefns    = NIL;             /* default definitions (if any)    */
  45 Int  defaultLine     = 0;               /* line in which default defs occur*/
  46 List evalDefaults    = NIL;             /* defaults for evaluator          */
  47
  48 Cell inputExpr       = NIL;             /* input expression                */
  49 Bool literateScripts = FALSE;           /* TRUE => default to lit scripts  */
  50 Bool literateErrors  = TRUE;            /* TRUE => report errs in lit scrs */
  51
  52 String repeatStr     = 0;               /* Repeat last expr                */
  53
  54 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  55 String preprocessor  = 0;
  56 #endif
  57
  58 /* --------------------------------------------------------------------------
  59  * Local function prototypes:
  60  * ------------------------------------------------------------------------*/
  61
  62 static Void local initCharTab     Args((Void));
  63 static Void local fileInput       Args((String,Long));
  64 static Bool local literateMode    Args((String));
  65 static Bool local linecmp         Args((String,String));
  66 static Int  local nextLine        Args((Void));
  67 static Void local skip            Args((Void));
  68 static Void local thisLineIs      Args((Int));
  69 static Void local newlineSkip     Args((Void));
  70 static Void local closeAnyInput   Args((Void));
  71
  72        Int  yyparse         Args((Void)); /* can't stop yacc making this   */
  73                                           /* public, but don't advertise   */
  74                                           /* it in a header file.          */
  75
  76 static Void local endToken        Args((Void));
  77 static Text local readOperator    Args((Void));
  78 static Text local readIdent       Args((Void));
  79 static Cell local readRadixNumber Args((Int));
  80 static Cell local readNumber      Args((Void));
  81 static Cell local readChar        Args((Void));
  82 static Cell local readString      Args((Void));
  83 static Void local saveStrChr      Args((Char));
  84 static Cell local readAChar       Args((Bool));
  85
  86 static Bool local lazyReadMatches Args((String));
  87 static Cell local readEscapeChar  Args((Bool));
  88 static Void local skipGap         Args((Void));
  89 static Cell local readCtrlChar    Args((Void));
  90 static Cell local readOctChar     Args((Void));
  91 static Cell local readHexChar     Args((Void));
  92 static Int  local readHexDigit    Args((Char));
  93 static Cell local readDecChar     Args((Void));
  94
  95 static Void local goOffside       Args((Int));
  96 static Void local unOffside       Args((Void));
  97 static Bool local canUnOffside    Args((Void));
  98
  99 static Void local skipWhitespace  Args((Void));
 100 static Int  local yylex           Args((Void));
 101 static Int  local repeatLast      Args((Void));
 102
 103 static Void local parseInput      Args((Int));
 104
 105 static Bool local doesNotExceed   Args((String,Int,Int));
 106 static Int  local stringToInt     Args((String,Int));
 107
 108
 109 /* --------------------------------------------------------------------------
 110  * Text values for reserved words and special symbols:
 111  * ------------------------------------------------------------------------*/
 112
 113 static Text textCase,    textOfK,      textData,   textType,   textIf;
 114 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 115 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 116 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 117
 118 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 119 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 120 static Text textBang,    textDot,      textAll,    textImplies;
 121 static Text textWildcard;
 122
 123 static Text textModule,  textImport;
 124 static Text textHiding,  textQualified, textAsMod;
 125 static Text textExport,  textUnsafe;
 126
 127 Text   textNum;                         /* Num                             */
 128 Text   textPrelude;                     /* Prelude                         */
 129 Text   textPlus;                        /* (+)                             */
 130
 131 static Cell conMain;                    /* Main                            */
 132 static Cell varMain;                    /* main                            */
 133
 134 static Cell varMinus;                   /* (-)                             */
 135 static Cell varPlus;                    /* (+)                             */
 136 static Cell varBang;                    /* (!)                             */
 137 static Cell varDot;                     /* (.)                             */
 138 static Cell varHiding;                  /* hiding                          */
 139 static Cell varQualified;               /* qualified                       */
 140 static Cell varAsMod;                   /* as                              */
 141
 142 static List imps;                       /* List of imports to be chased    */
 143
 144
 145 /* --------------------------------------------------------------------------
 146  * Character set handling:
 147  *
 148  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 149  * character set.  The following code provides methods for classifying
 150  * input characters according to the lexical structure specified by the
 151  * report.  Hugs should still accept older programs because ASCII is
 152  * essentially just a subset of the ISO character set.
 153  *
 154  * Notes: If you want to port Hugs to a machine that uses something
 155  * substantially different from the ISO character set, then you will need
 156  * to insert additional code to map between character sets.
 157  *
 158  * At some point, the following data structures may be exported in a .h
 159  * file to allow the information contained here to be picked up in the
 160  * implementation of LibChar is* primitives.
 161  *
 162  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 163  * ------------------------------------------------------------------------*/
 164
 165 static  Bool            charTabBuilt;
 166 static  unsigned char   ctable[NUM_CHARS];
 167 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 168 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 169
 170 #define DIGIT           0x01
 171 #define SMALL           0x02
 172 #define LARGE           0x04
 173 #define SYMBOL          0x08
 174 #define IDAFTER         0x10
 175 #define SPACE           0x20
 176 #define PRINT           0x40
 177
 178 static Void local initCharTab() {       /* Initialize char decode table    */
 179 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 180 #define setChar(x,c)    ctable[c] |= (x)
 181 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 182 #define setCopy(x,c)    {Int i;                         \
 183                          for (i=0; i<NUM_CHARS; ++i)    \
 184                              if (isIn(i,c))             \
 185                                  ctable[i]|=x;          \
 186                         }
 187
 188     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 189
 190     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 191     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 192     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 193     setChar (SMALL,     '_');
 194
 195     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 196     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 197     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 198
 199     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 200     setRange(SYMBOL,    215,215);
 201     setChar (SYMBOL,    247);
 202     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 203
 204     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 205     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 206
 207     setChar (SPACE,     ' ');           /* ASCII space character           */
 208     setChar (SPACE,     160);           /* ISO non breaking space          */
 209     setRange(SPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 210
 211     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 212     setChars(PRINT,     " '\"");        /* Space and quotes                */
 213     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 214
 215     charTabBuilt = TRUE;
 216 #undef setRange
 217 #undef setChar
 218 #undef setChars
 219 #undef setCopy
 220 }
 221
 222
 223 /* --------------------------------------------------------------------------
 224  * Single character input routines:
 225  *
 226  * At the lowest level of input, characters are read one at a time, with the
 227  * current character held in c0 and the following (lookahead) character in
 228  * c1.  The corrdinates of c0 within the file are held in (column,row).
 229  * The input stream is advanced by one character using the skip() function.
 230  * ------------------------------------------------------------------------*/
 231
 232 #define TABSIZE    8                   /* spacing between tabstops         */
 233
 234 #define NOTHING    0                   /* what kind of input is being read?*/
 235 #define KEYBOARD   1                   /* - keyboard/console?              */
 236 #define SCRIPTFILE 2                   /* - script file                    */
 237 #define PROJFILE   3                   /* - project file                   */
 238 #define STRING     4                   /* - string buffer?                 */
 239
 240 static Int    reading   = NOTHING;
 241
 242 static Target readSoFar;
 243 static Int    row, column, startColumn;
 244 static int    c0, c1;
 245 static FILE   *inputStream = 0;
 246 static Bool   thisLiterate;
 247 static String nextStringChar;          /* next char in string buffer       */
 248
 249 #if     USE_READLINE                   /* for command line editors         */
 250 static  String currentLine;            /* editline or GNU readline         */
 251 static  String nextChar;
 252 #define nextConsoleChar() (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 253 extern  Void add_history  Args((String));
 254 extern  String readline   Args((String));
 255 #else
 256 #define nextConsoleChar() getc(stdin)
 257 #endif
 258
 259 static  Int litLines;                  /* count defn lines in lit script   */
 260 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 261 static  Int lastLine;                  /* records type of last line read:  */
 262 #define STARTLINE 0                    /* - at start of file, none read    */
 263 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 264 #define TEXTLINE  2                    /* - text comment                   */
 265 #define DEFNLINE  3                    /* - line containing definition     */
 266 #define CODELINE  4                    /* - line inside code block         */
 267
 268 #define BEGINCODE "\\begin{code}"
 269 #define ENDCODE   "\\end{code}"
 270
 271 #if HAVE_GETDELIM_H
 272 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 273 #else
 274 #define LINEBUFFER_SIZE 1000
 275 static char lineBuffer[LINEBUFFER_SIZE];
 276 #endif
 277 static int lineLength = 0;
 278 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 279 static int linePtr = 0;
 280
 281 Void consoleInput(prompt)              /* prepare to input characters from */
 282 String prompt; {                       /* standard in (i.e. console/kbd)   */
 283     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 284     c0          =                      /* i.e. input terminated by '\n'    */
 285     c1          = ' ';
 286     column      = (-1);
 287     row         = 0;
 288
 289 #if USE_READLINE
 290     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 291      * avoids accidentally freeing currentLine twice.
 292      */
 293     if (currentLine) {
 294         String oldCurrentLine = currentLine;
 295         currentLine = 0;           /* We may lose the space of currentLine */
 296         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 297     }
 298     currentLine = readline(prompt);
 299     nextChar    = currentLine;
 300     if (currentLine) {
 301         if (*currentLine)
 302             add_history(currentLine);
 303     }
 304     else
 305         c0 = c1 = EOF;
 306 #else
 307     Printf("%s",prompt);
 308     FlushStdout();
 309 #endif
 310 }
 311
 312 Void projInput(nm)                     /* prepare to input characters from */
 313 String nm; {                           /* from named project file          */
 314     if ((inputStream = fopen(nm,"r"))!=0) {
 315         reading = PROJFILE;
 316         c0      = ' ';
 317         c1      = '\n';
 318         column  = 1;
 319         row     = 0;
 320     }
 321     else {
 322         ERRMSG(0) "Unable to open project file \"%s\"", nm
 323         EEND;
 324     }
 325 }
 326
 327 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 328 String nm;                              /* named file (specified length is */
 329 Long   len; {                           /* used to set target for reading) */
 330 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 331     if (preprocessor) {
 332         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 333         char *cmd = malloc(reallen);
 334         if (cmd == NULL) {
 335             ERRMSG(0) "Unable to allocate memory for filter command."
 336             EEND;
 337         }
 338         strcpy(cmd,preprocessor);
 339         strcat(cmd," ");
 340         strcat(cmd,nm);
 341         inputStream = popen(cmd,"r");
 342         free(cmd);
 343     } else {
 344         inputStream = fopen(nm,"r");
 345     }
 346 #else
 347     inputStream = fopen(nm,"r");
 348 #endif
 349     if (inputStream) {
 350         reading      = SCRIPTFILE;
 351         c0           = ' ';
 352         c1           = '\n';
 353         column       = 1;
 354         row          = 0;
 355
 356         lastLine     = STARTLINE;       /* literate file processing */
 357         litLines     = 0;
 358         linePtr      = 0;
 359         lineLength   = 0;
 360         thisLiterate = literateMode(nm);
 361         inCodeBlock  = FALSE;
 362
 363         readSoFar    = 0;
 364         setGoal("Parsing", (Target)len);
 365     }
 366     else {
 367         ERRMSG(0) "Unable to open file \"%s\"", nm
 368         EEND;
 369     }
 370 }
 371
 372 Void stringInput(s)             /* prepare to input characters from string */
 373 String s; {
 374     reading      = STRING;
 375     c0           = EOF;
 376     c1           = EOF;
 377     if (*s) c0 = *s++;
 378     if (*s) c1 = *s++;
 379     column       = 1;
 380     row          = 1;
 381
 382     nextStringChar = s;
 383     if (!charTabBuilt)
 384         initCharTab();
 385 }
 386
 387 static Bool local literateMode(nm)      /* Select literate mode for file   */
 388 String nm; {
 389     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 390     if (dot) {
 391         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 392             return FALSE;
 393         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 394             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 395             return TRUE;
 396     }
 397     return literateScripts;             /* otherwise, use the default      */
 398 }
 399
 400
 401 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 402  * I've removed the loop (since newLineSkip contains a loop too) and
 403  * replaced the warnings with errors. ADR
 404  */
 405 /*
 406  * To deal with literate \begin{code}...\end{code} blocks,
 407  * add a line buffer that rooms the current line. The old c0 and c1
 408  * stream pointers are used as before within that buffer -- sof
 409  *
 410  * Upon reading a new line into the line buffer, we check to see if
 411  * we're reading in a line containing \begin{code} or \end{code} and
 412  * take appropriate action.
 413  */
 414
 415 static Bool local linecmp(s,line)       /* compare string with line        */
 416 String s;                               /* line may end in whitespace      */
 417 String line; {
 418     Int i=0;
 419     while (s[i] != '\0' && s[i] == line[i]) {
 420         ++i;
 421     }
 422     /* s[0..i-1] == line[0..i-1] */
 423     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 424         return FALSE;
 425     }
 426     while (isIn(line[i], SPACE)) {      /* allow whitespace at end of line */
 427         ++i;
 428     }
 429     return (line[i] == '\0');
 430 }
 431
 432 /* Returns line length (including \n) or 0 upon EOF. */
 433 static Int local nextLine()
 434 {
 435 #if HAVE_GETDELIM_H
 436     /*
 437        Forget about fgets(), it is utterly braindead.
 438        (Assumes \NUL free streams and does not gracefully deal
 439        with overflow.) Instead, use GNU libc's getline().
 440        */
 441     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 442 #else
 443     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 444         lineLength = strlen(lineBuffer);
 445     else
 446         lineLength = 0;
 447 #endif
 448     /* printf("Read: \"%s\"", lineBuffer); */
 449     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 450         return lineLength;
 451     }
 452     else if (lineLength >= 2 && lineBuffer[0] == '#' && lineBuffer[1] == '!') {
 453         lineBuffer[0]='\n'; /* pretend it's a blank line */
 454         lineBuffer[1]='\0';
 455         lineLength=1;
 456     } else if (thisLiterate) {
 457         if (linecmp(BEGINCODE, lineBuffer)) {
 458             if (!inCodeBlock) {             /* Entered a code block        */
 459                 inCodeBlock = TRUE;
 460                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 461                 lineBuffer[1]='\0';
 462                 lineLength=1;
 463             }
 464             else {
 465                 ERRMSG(row) "\\begin{code} encountered inside code block"
 466                 EEND;
 467             }
 468         }
 469         else if (linecmp(ENDCODE, lineBuffer)) {
 470             if (inCodeBlock) {              /* Finished code block         */
 471                 inCodeBlock = FALSE;
 472                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 473                 lineBuffer[1]='\0';
 474                 lineLength=1;
 475             }
 476             else {
 477                 ERRMSG(row) "\\end{code} encountered outside code block"
 478                 EEND;
 479             }
 480         }
 481     }
 482     /* printf("Read: \"%s\"", lineBuffer); */
 483     return lineLength;
 484 }
 485
 486 static Void local skip() {              /* move forward one char in input  */
 487     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 488         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 489             row++;
 490             column=1;
 491             if (reading==SCRIPTFILE)
 492                 soFar(readSoFar);
 493         }
 494         else if (c0=='\t')
 495             column += TABSIZE - ((column-1)%TABSIZE);
 496         else
 497             column++;
 498
 499         c0 = c1;
 500         readSoFar++;
 501
 502         if (c0==EOF) {
 503             column = 0;
 504             if (reading==SCRIPTFILE)
 505                 done();
 506             closeAnyInput();
 507         }
 508         else if (reading==KEYBOARD) {
 509             allowBreak();
 510             if (c0=='\n')
 511                 c1 = EOF;
 512             else {
 513                 c1 = nextConsoleChar();
 514                 /* On Win32, hitting ctrl-C causes the next getchar to
 515                  * fail - returning "-1" to indicate an error.
 516                  * This is one of the rare cases where "-1" does not mean EOF.
 517                  */
 518                 if (EOF == c1 && !feof(stdin)) {
 519                     c1 = ' ';
 520                 }
 521             }
 522         }
 523         else if (reading==STRING) {
 524             c1 = (unsigned char) *nextStringChar++;
 525             if (c1 == '\0')
 526                 c1 = EOF;
 527         }
 528         else {
 529             if (lineLength <=0 || linePtr == lineLength) {
 530                 /* Current line, exhausted - get new one */
 531                 if (nextLine() <= 0) { /* EOF */
 532                     c1 = EOF;
 533                 }
 534                 else {
 535                     linePtr = 0;
 536                     c1 = (unsigned char)lineBuffer[linePtr++];
 537                 }
 538             }
 539             else {
 540                 c1 = (unsigned char)lineBuffer[linePtr++];
 541             }
 542         }
 543
 544     }
 545 }
 546
 547 static Void local thisLineIs(kind)     /* register kind of current line    */
 548 Int kind; {                            /* & check for literate script errs */
 549     if (literateErrors) {
 550         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 551             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 552             ERRMSG(row) "Program line next to comment"
 553             EEND;
 554         }
 555         lastLine = kind;
 556     }
 557 }
 558
 559 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 560     /* assert(c0=='\n'); */
 561     if (reading==SCRIPTFILE && thisLiterate) {
 562         do {
 563             skip();
 564             if (inCodeBlock) {         /* pass chars on definition lines   */
 565                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 566                 litLines++;
 567                 return;
 568             }
 569             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 570                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 571                 skip();
 572                 litLines++;
 573                 return;
 574             }
 575             while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank?   */
 576                 skip();
 577             if (c0=='\n' || c0==EOF)
 578                 thisLineIs(BLANKLINE);
 579             else {
 580                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 581                 while (c0!='\n' && c0!=EOF)
 582                     skip();
 583             }                          /* by now, c0=='\n' or c0==EOF      */
 584         } while (c0!=EOF);             /* if new line, start again         */
 585
 586         if (litLines==0 && literateErrors) {
 587             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 588                         DEFNCHAR
 589             EEND;
 590         }
 591         return;
 592     }
 593     skip();
 594 }
 595
 596 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 597     switch (reading) {                 /* or skip to end of console line   */
 598         case PROJFILE   :
 599         case SCRIPTFILE : if (inputStream) {
 600 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 601                               if (preprocessor) {
 602                                   pclose(inputStream);
 603                               } else {
 604                                   fclose(inputStream);
 605                               }
 606 #else
 607                               fclose(inputStream);
 608 #endif
 609                               inputStream = 0;
 610                           }
 611                           break;
 612         case KEYBOARD   : while (c0!=EOF)
 613                               skip();
 614                           break;
 615     }
 616     reading=NOTHING;
 617 }
 618
 619 /* --------------------------------------------------------------------------
 620  * Parser: Uses table driven parser generated from parser.y using yacc
 621  * ------------------------------------------------------------------------*/
 622
 623 #include "parser.c"
 624
 625 /* --------------------------------------------------------------------------
 626  * Single token input routines:
 627  *
 628  * The following routines read the values of particular kinds of token given
 629  * that the first character of the token has already been located in c0 on
 630  * entry to the routine.
 631  * ------------------------------------------------------------------------*/
 632
 633 #define MAX_TOKEN           4000
 634 #define startToken()        tokPos = 0
 635 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 636 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 637 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 638
 639 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 640 static Int  tokPos;                    /* input position in buffer         */
 641 static Int  identType;                 /* identifier type: CONID / VARID   */
 642 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 643
 644 static Void local endToken() {         /* check for token overflow         */
 645     if (tokPos>MAX_TOKEN) {
 646         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 647         EEND;
 648     }
 649     tokenStr[tokPos] = '\0';
 650 }
 651
 652 static Text local readOperator() {     /* read operator symbol             */
 653     startToken();
 654     do {
 655         saveTokenChar(c0);
 656         skip();
 657     } while (isISO(c0) && isIn(c0,SYMBOL));
 658     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 659     endToken();
 660     return findText(tokenStr);
 661 }
 662
 663 static Text local readIdent() {        /* read identifier                  */
 664     startToken();
 665     do {
 666         saveTokenChar(c0);
 667         skip();
 668     } while (isISO(c0) && isIn(c0,IDAFTER));
 669     endToken();
 670     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 671     return findText(tokenStr);
 672 }
 673
 674
 675 static Bool local doesNotExceed(s,radix,limit)
 676 String s;
 677 Int    radix;
 678 Int    limit; {
 679     Int n = 0;
 680     Int p = 0;
 681     while (TRUE) {
 682         if (s[p] == 0) return TRUE;
 683         if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
 684         n = radix*n + (s[p]-'0');
 685         p++;
 686     }
 687 }
 688
 689 static Int local stringToInt(s,radix)
 690 String s;
 691 Int    radix; {
 692     Int n = 0;
 693     Int p = 0;
 694     while (TRUE) {
 695         if (s[p] == 0) return n;
 696         n = radix*n + (s[p]-'0');
 697         p++;
 698     }
 699 }
 700
 701 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 702 Int r; {                               /* from input of the form 0c{digs}  */
 703     Int d;
 704     startToken();
 705     skip();                            /* skip leading zero                */
 706     if ((d=readHexDigit(c1))<0 || d>=r) {
 707         /* Special case; no digits, lex as  */
 708         /* if it had been written "0 c..."  */
 709         saveTokenChar('0');
 710     } else {
 711         skip();
 712         do {
 713             saveTokenChar('0'+readHexDigit(c0));
 714             skip();
 715             d = readHexDigit(c0);
 716         } while (d>=0 && d<r);
 717     }
 718     endToken();
 719
 720     if (doesNotExceed(tokenStr,r,MAXPOSINT))
 721         return mkInt(stringToInt(tokenStr,r));
 722     else
 723     if (r == 10)
 724         return stringToBignum(tokenStr);
 725     else {
 726         ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
 727         EEND;
 728     }
 729 }
 730
 731 static Cell local readNumber() {        /* read numeric constant           */
 732
 733     if (c0=='0') {
 734         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 735             return readRadixNumber(16);
 736         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 737             return readRadixNumber(8);
 738     }
 739
 740     startToken();
 741     do {
 742         saveTokenChar(c0);
 743         skip();
 744     } while (isISO(c0) && isIn(c0,DIGIT));
 745
 746     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 747         endToken();
 748         if (doesNotExceed(tokenStr,10,MAXPOSINT))
 749             return mkInt(stringToInt(tokenStr,10)); else
 750             return stringToBignum(tokenStr);
 751     }
 752
 753     saveTokenChar(c0);                  /* save decimal point              */
 754     skip();
 755     do {                                /* process fractional part ...     */
 756         saveTokenChar(c0);
 757         skip();
 758     } while (isISO(c0) && isIn(c0,DIGIT));
 759
 760     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 761         saveTokenChar('e');
 762         skip();
 763         if (c0=='-') {
 764             saveTokenChar('-');
 765             skip();
 766         }
 767         else if (c0=='+')
 768             skip();
 769
 770         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 771             ERRMSG(row) "Missing digits in exponent"
 772             EEND;
 773         }
 774         else {
 775             do {
 776                 saveTokenChar(c0);
 777                 skip();
 778             } while (isISO(c0) && isIn(c0,DIGIT));
 779         }
 780     }
 781
 782     endToken();
 783     return mkFloat(stringToFloat(tokenStr));
 784 }
 785
 786
 787
 788
 789
 790
 791
 792 static Cell local readChar() {         /* read character constant          */
 793     Cell charRead;
 794
 795     skip(/* '\'' */);
 796     if (c0=='\'' || c0=='\n' || c0==EOF) {
 797         ERRMSG(row) "Illegal character constant"
 798         EEND;
 799     }
 800
 801     charRead = readAChar(FALSE);
 802
 803     if (c0=='\'')
 804         skip(/* '\'' */);
 805     else {
 806         ERRMSG(row) "Improperly terminated character constant"
 807         EEND;
 808     }
 809     return charRead;
 810 }
 811
 812 static Cell local readString() {       /* read string literal              */
 813     Cell c;
 814
 815     startToken();
 816     skip(/* '\"' */);
 817     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 818         c = readAChar(TRUE);
 819         if (nonNull(c))
 820             saveStrChr(charOf(c));
 821     }
 822
 823     if (c0=='\"')
 824         skip(/* '\"' */);
 825     else {
 826         ERRMSG(row) "Improperly terminated string"
 827         EEND;
 828     }
 829     endToken();
 830     return mkStr(findText(tokenStr));
 831 }
 832
 833 static Void local saveStrChr(c)        /* save character in string         */
 834 Char c; {
 835     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 836         saveTokenChar(c);
 837     }
 838     else {                             /* save null char as TWO null chars */
 839         if (tokPos+1<MAX_TOKEN) {
 840             saveChar('\\');
 841             if (c=='\\')
 842                 saveChar('\\');
 843             else
 844                 saveChar('0');
 845         }
 846     }
 847 }
 848
 849 static Cell local readAChar(isStrLit)  /* read single char constant        */
 850 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 851     Cell c = mkChar(c0);
 852
 853     if (c0=='\\')                      /* escape character?                */
 854         return readEscapeChar(isStrLit);
 855     if (!isISO(c0)) {
 856         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 857         EEND;
 858     }
 859     skip();                            /* normal character?                */
 860     return c;
 861 }
 862
 863 /* --------------------------------------------------------------------------
 864  * Character escape code sequences:
 865  * ------------------------------------------------------------------------*/
 866
 867 static struct {                        /* table of special escape codes    */
 868     char *codename;
 869     int  codenumber;
 870 } escapes[] = {
 871    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 872    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 873    {"\'",'\''}, {"v",   11},
 874    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 875    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 876    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 877    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 878    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 879    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 880    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 881    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 882    {"SP",  32}, {"DEL", 127},
 883    {0,0}
 884 };
 885
 886 static Int  alreadyMatched;            /* Record portion of input stream   */
 887 static char alreadyRead[10];           /* that has been read w/o a match   */
 888
 889 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 890 String s; {                            /* possibly using characters that   */
 891     int i;                             /* have already been read           */
 892
 893     for (i=0; i<alreadyMatched; ++i)
 894         if (alreadyRead[i]!=s[i])
 895             return FALSE;
 896
 897     while (s[i] && s[i]==c0) {
 898         alreadyRead[alreadyMatched++]=(char)c0;
 899         skip();
 900         i++;
 901     }
 902
 903     return s[i]=='\0';
 904 }
 905
 906 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 907 Bool isStrLit; {
 908     int i=0;
 909
 910     skip(/* '\\' */);
 911     switch (c0) {
 912         case '&'  : if (isStrLit) {
 913                         skip();
 914                         return NIL;
 915                     }
 916                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 917                     EEND;
 918                     break;/*NOTREACHED*/
 919
 920         case '^'  : return readCtrlChar();
 921
 922         case 'o'  : return readOctChar();
 923         case 'x'  : return readHexChar();
 924
 925         default   : if (!isISO(c0)) {
 926                         ERRMSG(row) "Illegal escape sequence"
 927                         EEND;
 928                     }
 929                     else if (isIn(c0,SPACE)) {
 930                         if (isStrLit) {
 931                             skipGap();
 932                             return NIL;
 933                         }
 934                         ERRMSG(row) "Illegal use of gap in character constant"
 935                         EEND;
 936                         break;
 937                     }
 938                     else if (isIn(c0,DIGIT))
 939                         return readDecChar();
 940     }
 941
 942     for (alreadyMatched=0; escapes[i].codename; i++)
 943         if (lazyReadMatches(escapes[i].codename))
 944             return mkChar(escapes[i].codenumber);
 945
 946     alreadyRead[alreadyMatched++] = (char)c0;
 947     alreadyRead[alreadyMatched++] = '\0';
 948     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
 949                 alreadyRead
 950     EEND;
 951     return NIL;/*NOTREACHED*/
 952 }
 953
 954 static Void local skipGap() {          /* skip over gap in string literal  */
 955     do                                 /* (simplified in Haskell 1.1)      */
 956         if (c0=='\n')
 957             newlineSkip();
 958         else
 959             skip();
 960     while (isISO(c0) && isIn(c0,SPACE));
 961     if (c0!='\\') {
 962         ERRMSG(row) "Missing `\\' terminating string literal gap"
 963         EEND;
 964     }
 965     skip(/* '\\' */);
 966 }
 967
 968 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
 969     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
 970     String which;
 971
 972     skip(/* '^' */);
 973     if ((which = strchr(controls,c0))==NULL) {
 974         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
 975         EEND;
 976     }
 977     skip();
 978     return mkChar(which-controls);
 979 }
 980
 981 static Cell local readOctChar() {      /* read octal character constant    */
 982     Int n = 0;
 983     Int d;
 984
 985     skip(/* 'o' */);
 986     if ((d = readHexDigit(c0))<0 || d>=8) {
 987         ERRMSG(row) "Empty octal character escape"
 988         EEND;
 989     }
 990     do {
 991         if (overflows(n,8,d,MAXCHARVAL)) {
 992             ERRMSG(row) "Octal character escape out of range"
 993             EEND;
 994         }
 995         n = 8*n + d;
 996         skip();
 997     } while ((d = readHexDigit(c0))>=0 && d<8);
 998
 999     return mkChar(n);
1000 }
1001
1002 static Cell local readHexChar() {      /* read hex character constant      */
1003     Int n = 0;
1004     Int d;
1005
1006     skip(/* 'x' */);
1007     if ((d = readHexDigit(c0))<0) {
1008         ERRMSG(row) "Empty hexadecimal character escape"
1009         EEND;
1010     }
1011     do {
1012         if (overflows(n,16,d,MAXCHARVAL)) {
1013             ERRMSG(row) "Hexadecimal character escape out of range"
1014             EEND;
1015         }
1016         n = 16*n + d;
1017         skip();
1018     } while ((d = readHexDigit(c0))>=0);
1019
1020     return mkChar(n);
1021 }
1022
1023 static Int local readHexDigit(c)       /* read single hex digit            */
1024 Char c; {
1025     if ('0'<=c && c<='9')
1026         return c-'0';
1027     if ('A'<=c && c<='F')
1028         return 10 + (c-'A');
1029     if ('a'<=c && c<='f')
1030         return 10 + (c-'a');
1031     return -1;
1032 }
1033
1034 static Cell local readDecChar() {      /* read decimal character constant  */
1035     Int n = 0;
1036
1037     do {
1038         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1039             ERRMSG(row) "Decimal character escape out of range"
1040             EEND;
1041         }
1042         n = 10*n + (c0-'0');
1043         skip();
1044     } while (c0!=EOF && isIn(c0,DIGIT));
1045
1046     return mkChar(n);
1047 }
1048
1049 /* --------------------------------------------------------------------------
1050  * Produce printable representation of character:
1051  * ------------------------------------------------------------------------*/
1052
1053 String unlexChar(c,quote)              /* return string representation of  */
1054 Char c;                                /* character...                     */
1055 Char quote; {                          /* protect quote character          */
1056     static char buffer[12];
1057
1058     if (c<0)                           /* deal with sign extended chars..  */
1059         c += NUM_CHARS;
1060
1061     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1062         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1063             buffer[0] = '\\';
1064             buffer[1] = (char)c;
1065             buffer[2] = '\0';
1066         }
1067         else {
1068             buffer[0] = (char)c;
1069             buffer[1] = '\0';
1070         }
1071     }
1072     else {                             /* look for escape code             */
1073         Int escs;
1074         for (escs=0; escapes[escs].codename; escs++)
1075             if (escapes[escs].codenumber==c) {
1076                 sprintf(buffer,"\\%s",escapes[escs].codename);
1077                 return buffer;
1078             }
1079         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1080     }
1081     return buffer;
1082 }
1083
1084 Void printString(s)                    /* print string s, using quotes and */
1085 String s; {                            /* escapes if any parts need them   */
1086     if (s) {
1087         String t = s;
1088         Char   c;
1089         while ((c = *t)!=0 && isISO(c)
1090                            && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1091             t++;
1092         }
1093         if (*t) {
1094             Putchar('"');
1095             for (t=s; *t; t++)
1096                 Printf("%s",unlexChar(*t,'"'));
1097             Putchar('"');
1098         }
1099         else
1100             Printf("%s",s);
1101     }
1102 }
1103
1104 /* -------------------------------------------------------------------------
1105  * Handle special types of input for use in interpreter:
1106  * -----------------------------------------------------------------------*/
1107
1108 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1109 struct cmd *cmds;                      /* line in interpreter              */
1110 Char   start;                          /* characters introducing a cmd     */
1111 Char   sys; {                          /* character for shell escape       */
1112     while (c0==' ' || c0 =='\t')
1113         skip();
1114
1115     if (c0=='\n')                      /* look for blank command lines     */
1116         return NOCMD;
1117     if (c0==EOF)                       /* look for end of input stream     */
1118         return QUIT;
1119     if (c0==sys) {                     /* single character system escape   */
1120         skip();
1121         return SYSTEM;
1122     }
1123     if (c0==start && c1==sys) {        /* two character system escape      */
1124         skip();
1125         skip();
1126         return SYSTEM;
1127     }
1128
1129     startToken();                      /* All cmds start with start        */
1130     if (c0==start)                     /* except default (usually EVAL)    */
1131         do {                           /* which is empty                   */
1132             saveTokenChar(c0);
1133             skip();
1134         } while (c0!=EOF && !isIn(c0,SPACE));
1135     endToken();
1136
1137     for (; cmds->cmdString; ++cmds)
1138         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1139             (tokenStr[0]==start &&
1140              tokenStr[1]==(cmds->cmdString)[1] &&
1141              tokenStr[2]=='\0'))
1142             return (cmds->cmdCode);
1143     return BADCMD;
1144 }
1145
1146 String readFilename() {                /* Read filename from input (if any)*/
1147     if (reading==PROJFILE)
1148         skipWhitespace();
1149     else
1150         while (c0==' ' || c0=='\t')
1151             skip();
1152
1153     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1154         return 0;
1155
1156     startToken();
1157     while (c0!=EOF && !isIn(c0,SPACE)) {
1158         if (c0=='"') {
1159             skip();
1160             while (c0!=EOF && c0!='\"') {
1161                 Cell c = readAChar(TRUE);
1162                 if (nonNull(c)) {
1163                     saveTokenChar(charOf(c));
1164                 }
1165             }
1166             if (c0=='"')
1167                 skip();
1168             else {
1169                 ERRMSG(row) "a closing quote, '\"', was expected"
1170                 EEND;
1171             }
1172         }
1173         else {
1174             saveTokenChar(c0);
1175             skip();
1176         }
1177     }
1178     endToken();
1179     return tokenStr;
1180 }
1181
1182 String readLine() {                    /* Read command line from input     */
1183     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1184         skip();
1185
1186     startToken();
1187     while (c0!='\n' && c0!=EOF) {
1188         saveTokenChar(c0);
1189         skip();
1190     }
1191     endToken();
1192
1193     return tokenStr;
1194 }
1195
1196 /* --------------------------------------------------------------------------
1197  * This lexer supports the Haskell layout rule:
1198  *
1199  * - Layout area bounded by { ... }, with `;'s in between.
1200  * - A `{' is a HARD indentation and can only be matched by a corresponding
1201  *   HARD '}'
1202  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1203  *   is inserted with the column number of the first token after the
1204  *   WHERE/LET/OF keyword.
1205  * - When a soft indentation is uppermost on the indetation stack with
1206  *   column col' we insert:
1207  *    `}'  in front of token with column<col' and pop indentation off stack,
1208  *    `;'  in front of token with column==col'.
1209  * ------------------------------------------------------------------------*/
1210
1211 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1212 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1213 #define HARD       (-1)                /* indicates hard indentation       */
1214 static  Int        indentDepth = (-1); /* current indentation nesting      */
1215
1216 static Void local goOffside(col)       /* insert offside marker            */
1217 Int col; {                             /* for specified column             */
1218     if (indentDepth>=MAXINDENT) {
1219         ERRMSG(row) "Too many levels of program nesting"
1220         EEND;
1221     }
1222     layout[++indentDepth] = col;
1223 }
1224
1225 static Void local unOffside() {        /* leave layout rule area           */
1226     indentDepth--;
1227 }
1228
1229 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1230     return indentDepth>=0 && layout[indentDepth]!=HARD;
1231 }
1232
1233 /* --------------------------------------------------------------------------
1234  * Main tokeniser:
1235  * ------------------------------------------------------------------------*/
1236
1237 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1238     for (;;)                           /* Strictly speaking, this code is  */
1239         if (c0==EOF)                   /* a little more liberal than the   */
1240             return;                    /* report allows ...                */
1241         else if (c0=='\n')
1242             newlineSkip();
1243         else if (isIn(c0,SPACE))
1244             skip();
1245         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1246             Int nesting = 1;
1247             Int origRow = row;         /* Save original row number         */
1248             skip();
1249             skip();
1250             while (nesting>0 && c0!=EOF)
1251                 if (c0=='{' && c1=='-') {
1252                     skip();
1253                     skip();
1254                     nesting++;
1255                 }
1256                 else if (c0=='-' && c1=='}') {
1257                     skip();
1258                     skip();
1259                     nesting--;
1260                 }
1261                 else if (c0=='\n')
1262                     newlineSkip();
1263                 else
1264                     skip();
1265             if (nesting>0) {
1266                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1267                 EEND;
1268             }
1269         }
1270         else if (c0=='-' && c1=='-') {  /* One line comment                */
1271             do
1272                 skip();
1273             while (c0!='\n' && c0!=EOF);
1274             if (c0=='\n')
1275                 newlineSkip();
1276         }
1277         else
1278             return;
1279 }
1280
1281 static Bool firstToken;                /* Set to TRUE for first token      */
1282 static Int  firstTokenIs;              /* ... with token value stored here */
1283
1284 static Int local yylex() {             /* Read next input token ...        */
1285     static Bool insertOpen    = FALSE;
1286     static Bool insertedToken = FALSE;
1287     static Text textRepeat;
1288
1289 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1290
1291     if (firstToken) {                  /* Special case for first token     */
1292         indentDepth   = (-1);
1293         firstToken    = FALSE;
1294         insertOpen    = FALSE;
1295         insertedToken = FALSE;
1296         if (reading==KEYBOARD)
1297             textRepeat = findText(repeatStr);
1298         return firstTokenIs;
1299     }
1300
1301     if (insertOpen) {                  /* insert `soft' opening brace      */
1302         insertOpen    = FALSE;
1303         insertedToken = TRUE;
1304         goOffside(column);
1305         push(yylval = mkInt(row));
1306         return '{';
1307     }
1308
1309     /* ----------------------------------------------------------------------
1310      * Skip white space, and insert tokens to support layout rules as reqd.
1311      * --------------------------------------------------------------------*/
1312
1313     skipWhitespace();
1314     startColumn = column;
1315     push(yylval = mkInt(row));         /* default token value is line no.  */
1316     /* subsequent changes to yylval must also set top() to the same value  */
1317
1318     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1319         if (insertedToken)             /* avoid inserting multiple `;'s    */
1320             insertedToken = FALSE;     /* or putting `;' after `{'         */
1321         else
1322         if (layout[indentDepth]!=HARD) {
1323             if (column<layout[indentDepth]) {
1324                 unOffside();
1325                 return '}';
1326             }
1327             else if (column==layout[indentDepth] && c0!=EOF) {
1328                 insertedToken = TRUE;
1329                 return ';';
1330             }
1331         }
1332     }
1333
1334     /* ----------------------------------------------------------------------
1335      * Now try to identify token type:
1336      * --------------------------------------------------------------------*/
1337
1338     switch (c0) {
1339         case EOF  : return 0;                   /* End of file/input       */
1340
1341         /* The next 10 characters make up the `special' category in 1.3    */
1342         case '('  : skip(); return '(';
1343         case ')'  : skip(); return ')';
1344         case ','  : skip(); return ',';
1345         case ';'  : skip(); return ';';
1346         case '['  : skip(); return '[';
1347         case ']'  : skip(); return ']';
1348         case '`'  : skip(); return '`';
1349         case '{'  : goOffside(HARD);
1350                     skip();
1351                     return '{';
1352         case '}'  : if (indentDepth<0) {
1353                         ERRMSG(row) "Misplaced `}'"
1354                         EEND;
1355                     }
1356                     if (layout[indentDepth]==HARD)      /* skip over hard }*/
1357                         skip();
1358                     unOffside();        /* otherwise, we have to insert a }*/
1359                     return '}';         /* to (try to) avoid an error...   */
1360
1361         /* Character and string literals                                   */
1362         case '\'' : top() = yylval = readChar();
1363                     return CHARLIT;
1364
1365         case '\"' : top() = yylval = readString();
1366                     return STRINGLIT;
1367     }
1368
1369 #if TREX
1370     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1371         Text it;                        /* Look for record selector name   */
1372         skip();
1373         it    = readIdent();
1374         top() = yylval = ap(RECSEL,mkExt(it));
1375         return identType=RECSELID;
1376     }
1377 #endif
1378     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1379         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1380         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1381             Text it2 = NIL;
1382             skip();                     /* Skip qualifying dot             */
1383             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1384                 it2 = readOperator();
1385                 if (opType==CONOP) {
1386                     top() = yylval = mkQConOp(it,it2);
1387                     return QCONOP;
1388                 } else {
1389                     top() = yylval = mkQVarOp(it,it2);
1390                     return QVAROP;
1391                 }
1392             } else {               /* Qualified identifier */
1393                 it2 = readIdent();
1394                 if (identType==CONID) {
1395                     top() = yylval = mkQCon(it,it2);
1396                     return QCONID;
1397                 } else {
1398                     top() = yylval = mkQVar(it,it2);
1399                     return QVARID;
1400                 }
1401             }
1402         } else {
1403             top() = yylval = mkCon(it);
1404             return identType;
1405         }
1406     }
1407     if (isIn(c0,(SMALL|LARGE))) {
1408         Text it = readIdent();
1409
1410         if (it==textCase)              return CASEXP;
1411         if (it==textOfK)               lookAhead(OF);
1412         if (it==textData)              return DATA;
1413         if (it==textType)              return TYPE;
1414         if (it==textIf)                return IF;
1415         if (it==textThen)              return THEN;
1416         if (it==textElse)              return ELSE;
1417         if (it==textWhere)             lookAhead(WHERE);
1418         if (it==textLet)               lookAhead(LET);
1419         if (it==textIn)                return IN;
1420         if (it==textInfix)             return INFIXN;
1421         if (it==textInfixl)            return INFIXL;
1422         if (it==textInfixr)            return INFIXR;
1423         if (it==textForeign)           return FOREIGN;
1424         if (it==textUnsafe)            return UNSAFE;
1425         if (it==textNewtype)           return TNEWTYPE;
1426         if (it==textDefault)           return DEFAULT;
1427         if (it==textDeriving)          return DERIVING;
1428         if (it==textDo)                lookAhead(DO);
1429         if (it==textClass)             return TCLASS;
1430         if (it==textInstance)          return TINSTANCE;
1431         if (it==textModule)            return TMODULE;
1432         if (it==textImport)            return IMPORT;
1433         if (it==textExport)            return EXPORT;
1434         if (it==textHiding)            return HIDING;
1435         if (it==textQualified)         return QUALIFIED;
1436         if (it==textAsMod)             return ASMOD;
1437         if (it==textWildcard)          return '_';
1438         if (it==textAll && !haskell98) return ALL;
1439         if (it==textRepeat && reading==KEYBOARD)
1440             return repeatLast();
1441
1442         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1443         return identType;
1444     }
1445
1446     if (isIn(c0,SYMBOL)) {
1447         Text it = readOperator();
1448
1449         if (it==textCoco)    return COCO;
1450         if (it==textEq)      return '=';
1451         if (it==textUpto)    return UPTO;
1452         if (it==textAs)      return '@';
1453         if (it==textLambda)  return '\\';
1454         if (it==textBar)     return '|';
1455         if (it==textFrom)    return FROM;
1456         if (it==textMinus)   return '-';
1457         if (it==textPlus)    return '+';
1458         if (it==textBang)    return '!';
1459         if (it==textDot)     return '.';
1460         if (it==textArrow)   return ARROW;
1461         if (it==textLazy)    return '~';
1462         if (it==textImplies) return IMPLIES;
1463         if (it==textRepeat && reading==KEYBOARD)
1464             return repeatLast();
1465
1466         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1467         return opType;
1468     }
1469
1470     if (isIn(c0,DIGIT)) {
1471         top() = yylval = readNumber();
1472         return NUMLIT;
1473     }
1474
1475     ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
1476     EEND;
1477     return 0; /*NOTREACHED*/
1478 }
1479
1480 static Int local repeatLast() {         /* Obtain last expression entered  */
1481     if (isNull(yylval=getLastExpr())) {
1482         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1483         EEND;
1484     }
1485     return REPEAT;
1486 }
1487
1488 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1489 Text t; {                               /* by t ...                        */
1490     String s = textToStr(t);
1491     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1492 }
1493
1494 Syntax syntaxOf(n)                      /* Find syntax for name            */
1495 Name n; {
1496     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1497         return defaultSyntax(name(n).text);
1498     return name(n).syntax;
1499 }
1500
1501 /* --------------------------------------------------------------------------
1502  * main entry points to parser/lexer:
1503  * ------------------------------------------------------------------------*/
1504
1505 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1506 Int startWith; {                       /* determining whether to read a    */
1507     firstToken   = TRUE;               /* script or an expression          */
1508     firstTokenIs = startWith;
1509
1510     clearStack();
1511     if (yyparse()) {                   /* This can only be parser overflow */
1512         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1513         EEND;                          /* in the parser...                 */
1514     }
1515     drop();
1516     if (!stackEmpty())                 /* stack should now be empty        */
1517         internal("parseInput");
1518 }
1519
1520 #ifdef HSCRIPT
1521 static String memPrefix = "@mem@";
1522 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1523
1524 Void makeMemScript(mem,fname)
1525 String mem;
1526 String fname; {
1527    strcat(fname,memPrefix);
1528    itoa((int)mem, fname+strlen(fname), 10);
1529 }
1530
1531 Bool isMemScript(fname)
1532 String fname; {
1533    return (strstr(fname,memPrefix) != NULL);
1534 }
1535
1536 String memScriptString(fname)
1537 String fname; {
1538     String p = strstr(fname,memPrefix);
1539     if (p) {
1540         return (String)atoi(p+lenMemPrefix);
1541     } else {
1542         return NULL;
1543     }
1544 }
1545
1546 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1547 String fname;
1548 Long len; {
1549     input(RESET);
1550     if (isMemScript(fname)) {
1551         char* s = memScriptString(fname);
1552         stringInput(s);
1553     } else {
1554         fileInput(fname,len);
1555     }
1556     parseInput(SCRIPT);
1557 }
1558 #else
1559 Void parseScript(nm,len)               /* Read a script                    */
1560 String nm;
1561 Long   len; {                          /* Used to set a target for reading */
1562     input(RESET);
1563     fileInput(nm,len);
1564     parseInput(SCRIPT);
1565 }
1566 #endif
1567
1568 Void parseExp() {                      /* Read an expression to evaluate   */
1569     parseInput(EXPR);
1570     setLastExpr(inputExpr);
1571 }
1572
1573 /* --------------------------------------------------------------------------
1574  * Input control:
1575  * ------------------------------------------------------------------------*/
1576
1577 Void input(what)
1578 Int what; {
1579     switch (what) {
1580         case INSTALL : initCharTab();
1581                        textCase       = findText("case");
1582                        textOfK        = findText("of");
1583                        textData       = findText("data");
1584                        textType       = findText("type");
1585                        textIf         = findText("if");
1586                        textThen       = findText("then");
1587                        textElse       = findText("else");
1588                        textWhere      = findText("where");
1589                        textLet        = findText("let");
1590                        textIn         = findText("in");
1591                        textInfix      = findText("infix");
1592                        textInfixl     = findText("infixl");
1593                        textInfixr     = findText("infixr");
1594                        textForeign    = findText("foreign");
1595                        textUnsafe     = findText("unsafe");
1596                        textNewtype    = findText("newtype");
1597                        textDefault    = findText("default");
1598                        textDeriving   = findText("deriving");
1599                        textDo         = findText("do");
1600                        textClass      = findText("class");
1601                        textInstance   = findText("instance");
1602                        textCoco       = findText("::");
1603                        textEq         = findText("=");
1604                        textUpto       = findText("..");
1605                        textAs         = findText("@");
1606                        textLambda     = findText("\\");
1607                        textBar        = findText("|");
1608                        textMinus      = findText("-");
1609                        textPlus       = findText("+");
1610                        textFrom       = findText("<-");
1611                        textArrow      = findText("->");
1612                        textLazy       = findText("~");
1613                        textBang       = findText("!");
1614                        textDot        = findText(".");
1615                        textImplies    = findText("=>");
1616                        textPrelude    = findText("Prelude");
1617                        textNum        = findText("Num");
1618                        textModule     = findText("module");
1619                        textImport     = findText("import");
1620                        textHiding     = findText("hiding");
1621                        textQualified  = findText("qualified");
1622                        textAsMod      = findText("as");
1623                        textWildcard   = findText("_");
1624                        textAll        = findText("forall");
1625                        varMinus       = mkVar(textMinus);
1626                        varPlus        = mkVar(textPlus);
1627                        varBang        = mkVar(textBang);
1628                        varDot         = mkVar(textDot);
1629                        varHiding      = mkVar(textHiding);
1630                        varQualified   = mkVar(textQualified);
1631                        varAsMod       = mkVar(textAsMod);
1632                        conMain        = mkCon(findText("Main"));
1633                        varMain        = mkVar(findText("main"));
1634                        evalDefaults   = NIL;
1635
1636                        input(RESET);
1637                        break;
1638
1639         case RESET   : tyconDefns   = NIL;
1640                        typeInDefns  = NIL;
1641                        valDefns     = NIL;
1642                        classDefns   = NIL;
1643                        instDefns    = NIL;
1644                        selDefns     = NIL;
1645                        genDefns     = NIL;
1646                        //primDefns    = NIL;
1647                        unqualImports= NIL;
1648                        foreignImports= NIL;
1649                        foreignExports= NIL;
1650                        defaultDefns = NIL;
1651                        defaultLine  = 0;
1652                        inputExpr    = NIL;
1653                        imps         = NIL;
1654                        closeAnyInput();
1655                        break;
1656
1657         case BREAK   : if (reading==KEYBOARD)
1658                            c0 = EOF;
1659                        break;
1660
1661         case MARK    : mark(tyconDefns);
1662                        mark(typeInDefns);
1663                        mark(valDefns);
1664                        mark(classDefns);
1665                        mark(instDefns);
1666                        mark(selDefns);
1667                        mark(genDefns);
1668                        //mark(primDefns);
1669                        mark(unqualImports);
1670                        mark(foreignImports);
1671                        mark(foreignExports);
1672                        mark(defaultDefns);
1673                        mark(evalDefaults);
1674                        mark(inputExpr);
1675                        mark(varMinus);
1676                        mark(varPlus);
1677                        mark(varBang);
1678                        mark(varDot);
1679                        mark(varHiding);
1680                        mark(varQualified);
1681                        mark(varAsMod);
1682                        mark(varMain);
1683                        mark(conMain);
1684                        mark(imps);
1685                        break;
1686     }
1687 }
1688
1689 /*-------------------------------------------------------------------------*/