ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
   6  * Yale Haskell Group, and the Oregon Graduate Institute of Science and
   7  * Technology, 1994-1999, All rights reserved.  It is distributed as
   8  * free software under the license in the file "License", which is
   9  * included in the distribution.
  10  *
  11  * $RCSfile: input.c,v $
  12  * $Revision: 1.19 $
  13  * $Date: 2000/03/09 02:47:13 $
  14  * ------------------------------------------------------------------------*/
  15
  16 #include "prelude.h"
  17 #include "storage.h"
  18 #include "backend.h"
  19 #include "connect.h"
  20 #include "command.h"
  21 #include "errors.h"
  22 #include "link.h"
  23 #include <ctype.h>
  24 #if HAVE_GETDELIM_H
  25 #include "getdelim.h"
  26 #endif
  27
  28 #if IS_WIN32
  29 #include <windows.h>
  30 #endif
  31
  32 #if IS_WIN32 || HUGS_FOR_WINDOWS
  33 #undef IN
  34 #endif
  35
  36 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H && HAVE_READLINE_HISTORY_H
  37 #define USE_READLINE 1
  38 #else
  39 #define USE_READLINE 0
  40 #endif
  41
  42 #if USE_READLINE
  43 #include <readline/readline.h>
  44 #include <readline/history.h>
  45 #endif
  46
  47
  48 /* --------------------------------------------------------------------------
  49  * Global data:
  50  * ------------------------------------------------------------------------*/
  51
  52 List tyconDefns       = NIL;            /* type constructor definitions    */
  53 List typeInDefns      = NIL;            /* type synonym restrictions       */
  54 List valDefns         = NIL;            /* value definitions in script     */
  55 List classDefns       = NIL;            /* class defns in script           */
  56 List instDefns        = NIL;            /* instance defns in script        */
  57 List selDefns         = NIL;            /* list of selector lists          */
  58 List genDefns         = NIL;            /* list of generated names         */
  59 List unqualImports    = NIL;            /* unqualified import list         */
  60 List foreignImports   = NIL;            /* foreign imports                 */
  61 List foreignExports   = NIL;            /* foreign exportsd                */
  62 List defaultDefns     = NIL;            /* default definitions (if any)    */
  63 Int  defaultLine      = 0;              /* line in which default defs occur*/
  64 List evalDefaults     = NIL;            /* defaults for evaluator          */
  65
  66 Cell inputExpr        = NIL;            /* input expression                */
  67 Cell inputContext     = NIL;            /* input context                   */
  68 Bool literateScripts  = FALSE;          /* TRUE => default to lit scripts  */
  69 Bool literateErrors   = TRUE;           /* TRUE => report errs in lit scrs */
  70 Bool offsideON        = TRUE;           /* TRUE => implement offside rule  */
  71 Bool readingInterface = FALSE;
  72
  73 String repeatStr     = 0;               /* Repeat last expr                */
  74
  75 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  76 String preprocessor  = 0;
  77 #endif
  78
  79 /* --------------------------------------------------------------------------
  80  * Local function prototypes:
  81  * ------------------------------------------------------------------------*/
  82
  83 static Void local initCharTab     Args((Void));
  84 static Void local fileInput       Args((String,Long));
  85 static Bool local literateMode    Args((String));
  86 static Bool local linecmp         Args((String,String));
  87 static Int  local nextLine        Args((Void));
  88 static Void local skip            Args((Void));
  89 static Void local thisLineIs      Args((Int));
  90 static Void local newlineSkip     Args((Void));
  91 static Void local closeAnyInput   Args((Void));
  92
  93        Int  yyparse         Args((Void)); /* can't stop yacc making this   */
  94                                           /* public, but don't advertise   */
  95                                           /* it in a header file.          */
  96
  97 static Void local endToken        Args((Void));
  98 static Text local readOperator    Args((Void));
  99 static Text local readIdent       Args((Void));
 100 static Cell local readRadixNumber Args((Int));
 101 static Cell local readNumber      Args((Void));
 102 static Cell local readChar        Args((Void));
 103 static Cell local readString      Args((Void));
 104 static Void local saveStrChr      Args((Char));
 105 static Cell local readAChar       Args((Bool));
 106
 107 static Bool local lazyReadMatches Args((String));
 108 static Cell local readEscapeChar  Args((Bool));
 109 static Void local skipGap         Args((Void));
 110 static Cell local readCtrlChar    Args((Void));
 111 static Cell local readOctChar     Args((Void));
 112 static Cell local readHexChar     Args((Void));
 113 static Int  local readHexDigit    Args((Char));
 114 static Cell local readDecChar     Args((Void));
 115
 116 static Void local goOffside       Args((Int));
 117 static Void local unOffside       Args((Void));
 118 static Bool local canUnOffside    Args((Void));
 119
 120 static Void local skipWhitespace  Args((Void));
 121 static Int  local yylex           Args((Void));
 122 static Int  local repeatLast      Args((Void));
 123
 124 static Cell local parseInput      Args((Int));
 125
 126 static Bool local doesNotExceed   Args((String,Int,Int));
 127 static Int  local stringToInt     Args((String,Int));
 128
 129
 130 /* --------------------------------------------------------------------------
 131  * Text values for reserved words and special symbols:
 132  * ------------------------------------------------------------------------*/
 133
 134 static Text textCase,    textOfK,      textData,   textType,   textIf;
 135 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 136 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 137 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 138 #if IPARAM
 139 static Text textWith,  textDlet;
 140 #endif
 141
 142 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 143 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 144 static Text textBang,    textDot,      textAll,    textImplies;
 145 static Text textWildcard;
 146
 147 static Text textModule,  textImport,    textInterface,  textInstImport;
 148 static Text textHiding,  textQualified, textAsMod,      textPrivileged;
 149 static Text textExport,  textDynamic,   textUUExport;
 150 static Text textUnsafe,  textUUAll,     textUUUsage;
 151
 152 Text   textCcall;                       /* ccall                           */
 153 Text   textStdcall;                     /* stdcall                         */
 154
 155 Text   textNum;                         /* Num                             */
 156 Text   textPrelude;                     /* Prelude                         */
 157 Text   textPlus;                        /* (+)                             */
 158
 159 static Cell conMain;                    /* Main                            */
 160 static Cell varMain;                    /* main                            */
 161
 162 static Cell varMinus;                   /* (-)                             */
 163 static Cell varPlus;                    /* (+)                             */
 164 static Cell varBang;                    /* (!)                             */
 165 static Cell varDot;                     /* (.)                             */
 166 static Cell varHiding;                  /* hiding                          */
 167 static Cell varQualified;               /* qualified                       */
 168 static Cell varAsMod;                   /* as                              */
 169 static Cell varPrivileged;              /* privileged                      */
 170
 171 static List imps;                       /* List of imports to be chased    */
 172
 173
 174 /* --------------------------------------------------------------------------
 175  * Character set handling:
 176  *
 177  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 178  * character set.  The following code provides methods for classifying
 179  * input characters according to the lexical structure specified by the
 180  * report.  Hugs should still accept older programs because ASCII is
 181  * essentially just a subset of the ISO character set.
 182  *
 183  * Notes: If you want to port Hugs to a machine that uses something
 184  * substantially different from the ISO character set, then you will need
 185  * to insert additional code to map between character sets.
 186  *
 187  * At some point, the following data structures may be exported in a .h
 188  * file to allow the information contained here to be picked up in the
 189  * implementation of LibChar is* primitives.
 190  *
 191  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 192  * ------------------------------------------------------------------------*/
 193
 194 static  Bool            charTabBuilt;
 195 static  unsigned char   ctable[NUM_CHARS];
 196 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 197 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 198
 199 #define DIGIT           0x01
 200 #define SMALL           0x02
 201 #define LARGE           0x04
 202 #define SYMBOL          0x08
 203 #define IDAFTER         0x10
 204 #define ZPACE           0x20
 205 #define PRINT           0x40
 206
 207 static Void local initCharTab() {       /* Initialize char decode table    */
 208 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 209 #define setChar(x,c)    ctable[c] |= (x)
 210 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 211 #define setCopy(x,c)    {Int i;                         \
 212                          for (i=0; i<NUM_CHARS; ++i)    \
 213                              if (isIn(i,c))             \
 214                                  ctable[i]|=x;          \
 215                         }
 216
 217     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 218
 219     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 220     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 221     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 222     setChar (SMALL,     '_');
 223
 224     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 225     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 226     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 227
 228     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 229     setRange(SYMBOL,    215,215);
 230     setChar (SYMBOL,    247);
 231     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 232
 233     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 234     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 235
 236     setChar (ZPACE,     ' ');           /* ASCII space character           */
 237     setChar (ZPACE,     160);           /* ISO non breaking space          */
 238     setRange(ZPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 239
 240     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 241     setChars(PRINT,     " '\"");        /* Space and quotes                */
 242     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 243
 244     charTabBuilt = TRUE;
 245 #undef setRange
 246 #undef setChar
 247 #undef setChars
 248 #undef setCopy
 249 }
 250
 251
 252 /* --------------------------------------------------------------------------
 253  * Single character input routines:
 254  *
 255  * At the lowest level of input, characters are read one at a time, with the
 256  * current character held in c0 and the following (lookahead) character in
 257  * c1.  The coordinates of c0 within the file are held in (column,row).
 258  * The input stream is advanced by one character using the skip() function.
 259  * ------------------------------------------------------------------------*/
 260
 261 #define TABSIZE    8                   /* spacing between tabstops         */
 262
 263 #define NOTHING    0                   /* what kind of input is being read?*/
 264 #define KEYBOARD   1                   /* - keyboard/console?              */
 265 #define SCRIPTFILE 2                   /* - script file                    */
 266 #define PROJFILE   3                   /* - project file                   */
 267 #define STRING     4                   /* - string buffer?                 */
 268
 269 static Int    reading   = NOTHING;
 270
 271 static Target readSoFar;
 272 static Int    row, column, startColumn;
 273 static int    c0, c1;
 274 static FILE   *inputStream = 0;
 275 static Bool   thisLiterate;
 276 static String nextStringChar;          /* next char in string buffer       */
 277
 278 #if     USE_READLINE                   /* for command line editors         */
 279 static  String currentLine;            /* editline or GNU readline         */
 280 static  String nextChar;
 281 #define nextConsoleChar() \
 282            (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 283 #else
 284 #define nextConsoleChar() getc(stdin)
 285 #endif
 286
 287 static  Int litLines;                  /* count defn lines in lit script   */
 288 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 289 static  Int lastLine;                  /* records type of last line read:  */
 290 #define STARTLINE 0                    /* - at start of file, none read    */
 291 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 292 #define TEXTLINE  2                    /* - text comment                   */
 293 #define DEFNLINE  3                    /* - line containing definition     */
 294 #define CODELINE  4                    /* - line inside code block         */
 295
 296 #define BEGINCODE "\\begin{code}"
 297 #define ENDCODE   "\\end{code}"
 298
 299 #if HAVE_GETDELIM_H
 300 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 301 #else
 302 #define LINEBUFFER_SIZE 1000
 303 static char lineBuffer[LINEBUFFER_SIZE];
 304 #endif
 305 static int lineLength = 0;
 306 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 307 static int linePtr = 0;
 308
 309 Void consoleInput(prompt)              /* prepare to input characters from */
 310 String prompt; {                       /* standard in (i.e. console/kbd)   */
 311     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 312     c0          =                      /* i.e. input terminated by '\n'    */
 313     c1          = ' ';
 314     column      = (-1);
 315     row         = 0;
 316
 317 #if USE_READLINE
 318     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 319      * avoids accidentally freeing currentLine twice.
 320      */
 321     if (currentLine) {
 322         String oldCurrentLine = currentLine;
 323         currentLine = 0;           /* We may lose the space of currentLine */
 324         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 325     }
 326     currentLine = readline(prompt);
 327     nextChar    = currentLine;
 328     if (currentLine) {
 329         if (*currentLine)
 330             add_history(currentLine);
 331     }
 332     else
 333         c0 = c1 = EOF;
 334 #else
 335     Printf("%s",prompt);
 336     FlushStdout();
 337 #endif
 338 }
 339
 340 Void projInput(nm)                     /* prepare to input characters from */
 341 String nm; {                           /* from named project file          */
 342     if ((inputStream = fopen(nm,"r"))!=0) {
 343         reading = PROJFILE;
 344         c0      = ' ';
 345         c1      = '\n';
 346         column  = 1;
 347         row     = 0;
 348     }
 349     else {
 350         ERRMSG(0) "Unable to open project file \"%s\"", nm
 351         EEND;
 352     }
 353 }
 354
 355 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 356 String nm;                              /* named file (specified length is */
 357 Long   len; {                           /* used to set target for reading) */
 358 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 359     if (preprocessor) {
 360         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 361         char *cmd = malloc(reallen);
 362         if (cmd == NULL) {
 363             ERRMSG(0) "Unable to allocate memory for filter command."
 364             EEND;
 365         }
 366         strcpy(cmd,preprocessor);
 367         strcat(cmd," ");
 368         strcat(cmd,nm);
 369         inputStream = popen(cmd,"r");
 370         free(cmd);
 371     } else {
 372         inputStream = fopen(nm,"r");
 373     }
 374 #else
 375     inputStream = fopen(nm,"r");
 376 #endif
 377     if (inputStream) {
 378         reading      = SCRIPTFILE;
 379         c0           = ' ';
 380         c1           = '\n';
 381         column       = 1;
 382         row          = 0;
 383
 384         lastLine     = STARTLINE;       /* literate file processing */
 385         litLines     = 0;
 386         linePtr      = 0;
 387         lineLength   = 0;
 388         thisLiterate = literateMode(nm);
 389         inCodeBlock  = FALSE;
 390
 391         readSoFar    = 0;
 392         setGoal("Parsing", (Target)len);
 393     }
 394     else {
 395         ERRMSG(0) "Unable to open file \"%s\"", nm
 396         EEND;
 397     }
 398 }
 399
 400 Void stringInput(s)             /* prepare to input characters from string */
 401 String s; {
 402     reading      = STRING;
 403     c0           = EOF;
 404     c1           = EOF;
 405     if (*s) c0 = *s++;
 406     if (*s) c1 = *s++;
 407     column       = 1;
 408     row          = 1;
 409
 410     nextStringChar = s;
 411     if (!charTabBuilt)
 412         initCharTab();
 413 }
 414
 415 static Bool local literateMode(nm)      /* Select literate mode for file   */
 416 String nm; {
 417     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 418     if (dot) {
 419         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 420             return FALSE;
 421         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 422             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 423             return TRUE;
 424     }
 425     return literateScripts;             /* otherwise, use the default      */
 426 }
 427
 428
 429 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
 430 {
 431    Int len;
 432    String dot;
 433    len = 1 + strlen ( srcName );
 434    *hiName = malloc(len);
 435    *oName  = malloc(len);
 436    if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
 437    (*hiName)[0] = (*oName)[0] = 0;
 438    dot = strrchr(srcName, '.');
 439    if (!dot) return;
 440    if (filenamecmp(dot+1, "hs")==0 &&
 441        filenamecmp(dot+1, "lhs")==0 &&
 442        filenamecmp(dot+1, "verb")==0) return;
 443
 444    strcpy(*hiName, srcName);
 445    dot = strrchr(*hiName, '.');
 446    dot[1] = 'h';
 447    dot[2] = 'i';
 448    dot[3] = 0;
 449
 450    strcpy(*oName, srcName);
 451    dot = strrchr(*oName, '.');
 452    dot[1] = 'o';
 453    dot[2] = 0;
 454 }
 455
 456
 457
 458 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 459  * I've removed the loop (since newLineSkip contains a loop too) and
 460  * replaced the warnings with errors. ADR
 461  */
 462 /*
 463  * To deal with literate \begin{code}...\end{code} blocks,
 464  * add a line buffer that rooms the current line. The old c0 and c1
 465  * stream pointers are used as before within that buffer -- sof
 466  *
 467  * Upon reading a new line into the line buffer, we check to see if
 468  * we're reading in a line containing \begin{code} or \end{code} and
 469  * take appropriate action.
 470  */
 471
 472 static Bool local linecmp(s,line)       /* compare string with line        */
 473 String s;                               /* line may end in whitespace      */
 474 String line; {
 475     Int i=0;
 476     while (s[i] != '\0' && s[i] == line[i]) {
 477         ++i;
 478     }
 479     /* s[0..i-1] == line[0..i-1] */
 480     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 481         return FALSE;
 482     }
 483     while (isIn(line[i], ZPACE)) {      /* allow whitespace at end of line */
 484         ++i;
 485     }
 486     return (line[i] == '\0');
 487 }
 488
 489 /* Returns line length (including \n) or 0 upon EOF. */
 490 static Int local nextLine()
 491 {
 492 #if HAVE_GETDELIM_H
 493     /*
 494        Forget about fgets(), it is utterly braindead.
 495        (Assumes \NUL free streams and does not gracefully deal
 496        with overflow.) Instead, use GNU libc's getline().
 497        */
 498     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 499 #else
 500     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 501         lineLength = strlen(lineBuffer);
 502     else
 503         lineLength = 0;
 504 #endif
 505     /* printf("Read: \"%s\"", lineBuffer); */
 506     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 507         return lineLength;
 508     }
 509     else if (lineLength >= 2 && lineBuffer[0] == '#' &&
 510              lineBuffer[1] == '!') {
 511         lineBuffer[0]='\n'; /* pretend it's a blank line */
 512         lineBuffer[1]='\0';
 513         lineLength=1;
 514     } else if (thisLiterate) {
 515         if (linecmp(BEGINCODE, lineBuffer)) {
 516             if (!inCodeBlock) {             /* Entered a code block        */
 517                 inCodeBlock = TRUE;
 518                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 519                 lineBuffer[1]='\0';
 520                 lineLength=1;
 521             }
 522             else {
 523                 ERRMSG(row) "\\begin{code} encountered inside code block"
 524                 EEND;
 525             }
 526         }
 527         else if (linecmp(ENDCODE, lineBuffer)) {
 528             if (inCodeBlock) {              /* Finished code block         */
 529                 inCodeBlock = FALSE;
 530                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 531                 lineBuffer[1]='\0';
 532                 lineLength=1;
 533             }
 534             else {
 535                 ERRMSG(row) "\\end{code} encountered outside code block"
 536                 EEND;
 537             }
 538         }
 539     }
 540     /* printf("Read: \"%s\"", lineBuffer); */
 541     return lineLength;
 542 }
 543
 544 static Void local skip() {              /* move forward one char in input  */
 545     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 546         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 547             row++;
 548             column=1;
 549             if (reading==SCRIPTFILE)
 550                 soFar(readSoFar);
 551         }
 552         else if (c0=='\t')
 553             column += TABSIZE - ((column-1)%TABSIZE);
 554         else
 555             column++;
 556
 557         c0 = c1;
 558         readSoFar++;
 559
 560         if (c0==EOF) {
 561             column = 0;
 562             if (reading==SCRIPTFILE)
 563                 done();
 564             closeAnyInput();
 565         }
 566         else if (reading==KEYBOARD) {
 567             allowBreak();
 568             if (c0=='\n')
 569                 c1 = EOF;
 570             else {
 571                 c1 = nextConsoleChar();
 572 #if IS_WIN32 && !HUGS_FOR_WINDOWS
 573                 Sleep(0);
 574 #endif
 575                 /* On Win32, hitting ctrl-C causes the next getchar to
 576                  * fail - returning "-1" to indicate an error.
 577                  * This is one of the rare cases where "-1" does not mean EOF.
 578                  */
 579                 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
 580                     c1 = ' ';
 581                 }
 582             }
 583         }
 584         else if (reading==STRING) {
 585             c1 = (unsigned char) *nextStringChar++;
 586             if (c1 == '\0')
 587                 c1 = EOF;
 588         }
 589         else {
 590             if (lineLength <=0 || linePtr == lineLength) {
 591                 /* Current line, exhausted - get new one */
 592                 if (nextLine() <= 0) { /* EOF */
 593                     c1 = EOF;
 594                 }
 595                 else {
 596                     linePtr = 0;
 597                     c1 = (unsigned char)lineBuffer[linePtr++];
 598                 }
 599             }
 600             else {
 601                 c1 = (unsigned char)lineBuffer[linePtr++];
 602             }
 603         }
 604
 605     }
 606 }
 607
 608 static Void local thisLineIs(kind)     /* register kind of current line    */
 609 Int kind; {                            /* & check for literate script errs */
 610     if (literateErrors) {
 611         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 612             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 613             ERRMSG(row) "Program line next to comment"
 614             EEND;
 615         }
 616         lastLine = kind;
 617     }
 618 }
 619
 620 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 621     /* assert(c0=='\n'); */
 622     if (reading==SCRIPTFILE && thisLiterate) {
 623         do {
 624             skip();
 625             if (inCodeBlock) {         /* pass chars on definition lines   */
 626                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 627                 litLines++;
 628                 return;
 629             }
 630             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 631                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 632                 skip();
 633                 litLines++;
 634                 return;
 635             }
 636             while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank?   */
 637                 skip();
 638             if (c0=='\n' || c0==EOF)
 639                 thisLineIs(BLANKLINE);
 640             else {
 641                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 642                 while (c0!='\n' && c0!=EOF)
 643                     skip();
 644             }                          /* by now, c0=='\n' or c0==EOF      */
 645         } while (c0!=EOF);             /* if new line, start again         */
 646
 647         if (litLines==0 && literateErrors) {
 648             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 649                         DEFNCHAR
 650             EEND;
 651         }
 652         return;
 653     }
 654     skip();
 655 }
 656
 657 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 658     switch (reading) {                 /* or skip to end of console line   */
 659         case PROJFILE   :
 660         case SCRIPTFILE : if (inputStream) {
 661 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 662                               if (preprocessor) {
 663                                   pclose(inputStream);
 664                               } else {
 665                                   fclose(inputStream);
 666                               }
 667 #else
 668                               fclose(inputStream);
 669 #endif
 670                               inputStream = 0;
 671                           }
 672                           break;
 673         case KEYBOARD   : while (c0!=EOF)
 674                               skip();
 675                           break;
 676     }
 677     reading=NOTHING;
 678 }
 679
 680 /* --------------------------------------------------------------------------
 681  * Parser: Uses table driven parser generated from parser.y using yacc
 682  * ------------------------------------------------------------------------*/
 683
 684 #include "parser.c"
 685
 686 /* --------------------------------------------------------------------------
 687  * Single token input routines:
 688  *
 689  * The following routines read the values of particular kinds of token given
 690  * that the first character of the token has already been located in c0 on
 691  * entry to the routine.
 692  * ------------------------------------------------------------------------*/
 693
 694 #define MAX_TOKEN           4000
 695 #define startToken()        tokPos = 0
 696 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 697 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 698 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 699
 700 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 701 static Int  tokPos;                    /* input position in buffer         */
 702 static Int  identType;                 /* identifier type: CONID / VARID   */
 703 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 704
 705 static Void local endToken() {         /* check for token overflow         */
 706     if (tokPos>MAX_TOKEN) {
 707         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 708         EEND;
 709     }
 710     tokenStr[tokPos] = '\0';
 711 }
 712
 713 static Text local readOperator() {     /* read operator symbol             */
 714     startToken();
 715     do {
 716         saveTokenChar(c0);
 717         skip();
 718     } while (isISO(c0) && isIn(c0,SYMBOL));
 719     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 720     endToken();
 721     return findText(tokenStr);
 722 }
 723
 724 static Text local readIdent() {        /* read identifier                  */
 725     startToken();
 726     do {
 727         saveTokenChar(c0);
 728         skip();
 729     } while (isISO(c0) && isIn(c0,IDAFTER));
 730     endToken();
 731     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 732     if (readingInterface)
 733        return unZcodeThenFindText(tokenStr); else
 734        return findText(tokenStr);
 735 }
 736
 737
 738 static Bool local doesNotExceed(s,radix,limit)
 739 String s;
 740 Int    radix;
 741 Int    limit; {
 742     Int n = 0;
 743     Int p = 0;
 744     while (TRUE) {
 745         if (s[p] == 0) return TRUE;
 746         if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
 747         n = radix*n + (s[p]-'0');
 748         p++;
 749     }
 750 }
 751
 752 static Int local stringToInt(s,radix)
 753 String s;
 754 Int    radix; {
 755     Int n = 0;
 756     Int p = 0;
 757     while (TRUE) {
 758         if (s[p] == 0) return n;
 759         n = radix*n + (s[p]-'0');
 760         p++;
 761     }
 762 }
 763
 764 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 765 Int r; {                               /* from input of the form 0c{digs}  */
 766     Int d;
 767     startToken();
 768     skip();                            /* skip leading zero                */
 769     if ((d=readHexDigit(c1))<0 || d>=r) {
 770         /* Special case; no digits, lex as  */
 771         /* if it had been written "0 c..."  */
 772         saveTokenChar('0');
 773     } else {
 774         skip();
 775         do {
 776             saveTokenChar('0'+readHexDigit(c0));
 777             skip();
 778             d = readHexDigit(c0);
 779         } while (d>=0 && d<r);
 780     }
 781     endToken();
 782
 783     if (doesNotExceed(tokenStr,r,MAXPOSINT))
 784         return mkInt(stringToInt(tokenStr,r));
 785     else
 786     if (r == 10)
 787         return stringToBignum(tokenStr);
 788     else {
 789         ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
 790         EEND;
 791     }
 792 }
 793
 794 static Cell local readNumber() {        /* read numeric constant           */
 795
 796     if (c0=='0') {
 797         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 798             return readRadixNumber(16);
 799         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 800             return readRadixNumber(8);
 801     }
 802
 803     startToken();
 804     do {
 805         saveTokenChar(c0);
 806         skip();
 807     } while (isISO(c0) && isIn(c0,DIGIT));
 808
 809     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 810         endToken();
 811         if (doesNotExceed(tokenStr,10,MAXPOSINT))
 812             return mkInt(stringToInt(tokenStr,10)); else
 813             return stringToBignum(tokenStr);
 814     }
 815
 816     saveTokenChar(c0);                  /* save decimal point              */
 817     skip();
 818     do {                                /* process fractional part ...     */
 819         saveTokenChar(c0);
 820         skip();
 821     } while (isISO(c0) && isIn(c0,DIGIT));
 822
 823     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 824         saveTokenChar('e');
 825         skip();
 826         if (c0=='-') {
 827             saveTokenChar('-');
 828             skip();
 829         }
 830         else if (c0=='+')
 831             skip();
 832
 833         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 834             ERRMSG(row) "Missing digits in exponent"
 835             EEND;
 836         }
 837         else {
 838             do {
 839                 saveTokenChar(c0);
 840                 skip();
 841             } while (isISO(c0) && isIn(c0,DIGIT));
 842         }
 843     }
 844
 845     endToken();
 846     return mkFloat(stringToFloat(tokenStr));
 847 }
 848
 849
 850
 851
 852
 853
 854
 855 static Cell local readChar() {         /* read character constant          */
 856     Cell charRead;
 857
 858     skip(/* '\'' */);
 859     if (c0=='\'' || c0=='\n' || c0==EOF) {
 860         ERRMSG(row) "Illegal character constant"
 861         EEND;
 862     }
 863
 864     charRead = readAChar(FALSE);
 865
 866     if (c0=='\'')
 867         skip(/* '\'' */);
 868     else {
 869         ERRMSG(row) "Improperly terminated character constant"
 870         EEND;
 871     }
 872     return charRead;
 873 }
 874
 875 static Cell local readString() {       /* read string literal              */
 876     Cell c;
 877
 878     startToken();
 879     skip(/* '\"' */);
 880     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 881         c = readAChar(TRUE);
 882         if (nonNull(c))
 883             saveStrChr(charOf(c));
 884     }
 885
 886     if (c0=='\"')
 887         skip(/* '\"' */);
 888     else {
 889         ERRMSG(row) "Improperly terminated string"
 890         EEND;
 891     }
 892     endToken();
 893     return mkStr(findText(tokenStr));
 894 }
 895
 896 static Void local saveStrChr(c)        /* save character in string         */
 897 Char c; {
 898     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 899         saveTokenChar(c);
 900     }
 901     else {                             /* save null char as TWO null chars */
 902         if (tokPos+1<MAX_TOKEN) {
 903             saveChar('\\');
 904             if (c=='\\')
 905                 saveChar('\\');
 906             else
 907                 saveChar('0');
 908         }
 909     }
 910 }
 911
 912 static Cell local readAChar(isStrLit)  /* read single char constant        */
 913 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 914     Cell c = mkChar(c0);
 915
 916     if (c0=='\\')                      /* escape character?                */
 917         return readEscapeChar(isStrLit);
 918     if (!isISO(c0)) {
 919         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 920         EEND;
 921     }
 922     skip();                            /* normal character?                */
 923     return c;
 924 }
 925
 926 /* --------------------------------------------------------------------------
 927  * Character escape code sequences:
 928  * ------------------------------------------------------------------------*/
 929
 930 static struct {                        /* table of special escape codes    */
 931     char *codename;
 932     int  codenumber;
 933 } escapes[] = {
 934    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 935    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 936    {"\'",'\''}, {"v",   11},
 937    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 938    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 939    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 940    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 941    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 942    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 943    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 944    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 945    {"SP",  32}, {"DEL", 127},
 946    {0,0}
 947 };
 948
 949 static Int  alreadyMatched;            /* Record portion of input stream   */
 950 static char alreadyRead[10];           /* that has been read w/o a match   */
 951
 952 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 953 String s; {                            /* possibly using characters that   */
 954     int i;                             /* have already been read           */
 955
 956     for (i=0; i<alreadyMatched; ++i)
 957         if (alreadyRead[i]!=s[i])
 958             return FALSE;
 959
 960     while (s[i] && s[i]==c0) {
 961         alreadyRead[alreadyMatched++]=(char)c0;
 962         skip();
 963         i++;
 964     }
 965
 966     return s[i]=='\0';
 967 }
 968
 969 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 970 Bool isStrLit; {
 971     int i=0;
 972
 973     skip(/* '\\' */);
 974     switch (c0) {
 975         case '&'  : if (isStrLit) {
 976                         skip();
 977                         return NIL;
 978                     }
 979                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 980                     EEND;
 981                     break;/*NOTREACHED*/
 982
 983         case '^'  : return readCtrlChar();
 984
 985         case 'o'  : return readOctChar();
 986         case 'x'  : return readHexChar();
 987
 988         default   : if (!isISO(c0)) {
 989                         ERRMSG(row) "Illegal escape sequence"
 990                         EEND;
 991                     }
 992                     else if (isIn(c0,ZPACE)) {
 993                         if (isStrLit) {
 994                             skipGap();
 995                             return NIL;
 996                         }
 997                         ERRMSG(row) "Illegal use of gap in character constant"
 998                         EEND;
 999                         break;
1000                     }
1001                     else if (isIn(c0,DIGIT))
1002                         return readDecChar();
1003     }
1004
1005     for (alreadyMatched=0; escapes[i].codename; i++)
1006         if (lazyReadMatches(escapes[i].codename))
1007             return mkChar(escapes[i].codenumber);
1008
1009     alreadyRead[alreadyMatched++] = (char)c0;
1010     alreadyRead[alreadyMatched++] = '\0';
1011     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1012                 alreadyRead
1013     EEND;
1014     return NIL;/*NOTREACHED*/
1015 }
1016
1017 static Void local skipGap() {          /* skip over gap in string literal  */
1018     do                                 /* (simplified in Haskell 1.1)      */
1019         if (c0=='\n')
1020             newlineSkip();
1021         else
1022             skip();
1023     while (isISO(c0) && isIn(c0,ZPACE));
1024     if (c0!='\\') {
1025         ERRMSG(row) "Missing `\\' terminating string literal gap"
1026         EEND;
1027     }
1028     skip(/* '\\' */);
1029 }
1030
1031 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
1032     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1033     String which;
1034
1035     skip(/* '^' */);
1036     if ((which = strchr(controls,c0))==NULL) {
1037         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1038         EEND;
1039     }
1040     skip();
1041     return mkChar(which-controls);
1042 }
1043
1044 static Cell local readOctChar() {      /* read octal character constant    */
1045     Int n = 0;
1046     Int d;
1047
1048     skip(/* 'o' */);
1049     if ((d = readHexDigit(c0))<0 || d>=8) {
1050         ERRMSG(row) "Empty octal character escape"
1051         EEND;
1052     }
1053     do {
1054         if (overflows(n,8,d,MAXCHARVAL)) {
1055             ERRMSG(row) "Octal character escape out of range"
1056             EEND;
1057         }
1058         n = 8*n + d;
1059         skip();
1060     } while ((d = readHexDigit(c0))>=0 && d<8);
1061
1062     return mkChar(n);
1063 }
1064
1065 static Cell local readHexChar() {      /* read hex character constant      */
1066     Int n = 0;
1067     Int d;
1068
1069     skip(/* 'x' */);
1070     if ((d = readHexDigit(c0))<0) {
1071         ERRMSG(row) "Empty hexadecimal character escape"
1072         EEND;
1073     }
1074     do {
1075         if (overflows(n,16,d,MAXCHARVAL)) {
1076             ERRMSG(row) "Hexadecimal character escape out of range"
1077             EEND;
1078         }
1079         n = 16*n + d;
1080         skip();
1081     } while ((d = readHexDigit(c0))>=0);
1082
1083     return mkChar(n);
1084 }
1085
1086 static Int local readHexDigit(c)       /* read single hex digit            */
1087 Char c; {
1088     if ('0'<=c && c<='9')
1089         return c-'0';
1090     if ('A'<=c && c<='F')
1091         return 10 + (c-'A');
1092     if ('a'<=c && c<='f')
1093         return 10 + (c-'a');
1094     return -1;
1095 }
1096
1097 static Cell local readDecChar() {      /* read decimal character constant  */
1098     Int n = 0;
1099
1100     do {
1101         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1102             ERRMSG(row) "Decimal character escape out of range"
1103             EEND;
1104         }
1105         n = 10*n + (c0-'0');
1106         skip();
1107     } while (c0!=EOF && isIn(c0,DIGIT));
1108
1109     return mkChar(n);
1110 }
1111
1112 /* --------------------------------------------------------------------------
1113  * Produce printable representation of character:
1114  * ------------------------------------------------------------------------*/
1115
1116 String unlexChar(c,quote)              /* return string representation of  */
1117 Char c;                                /* character...                     */
1118 Char quote; {                          /* protect quote character          */
1119     static char buffer[12];
1120
1121     if (c<0)                           /* deal with sign extended chars..  */
1122         c += NUM_CHARS;
1123
1124     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1125         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1126             buffer[0] = '\\';
1127             buffer[1] = (char)c;
1128             buffer[2] = '\0';
1129         }
1130         else {
1131             buffer[0] = (char)c;
1132             buffer[1] = '\0';
1133         }
1134     }
1135     else {                             /* look for escape code             */
1136         Int escs;
1137         for (escs=0; escapes[escs].codename; escs++)
1138             if (escapes[escs].codenumber==c) {
1139                 sprintf(buffer,"\\%s",escapes[escs].codename);
1140                 return buffer;
1141             }
1142         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1143     }
1144     return buffer;
1145 }
1146
1147 Void printString(s)                    /* print string s, using quotes and */
1148 String s; {                            /* escapes if any parts need them   */
1149     if (s) {
1150         String t = s;
1151         Char   c;
1152         while ((c = *t)!=0 && isISO(c)
1153                            && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1154             t++;
1155         }
1156         if (*t) {
1157             Putchar('"');
1158             for (t=s; *t; t++)
1159                 Printf("%s",unlexChar(*t,'"'));
1160             Putchar('"');
1161         }
1162         else
1163             Printf("%s",s);
1164     }
1165 }
1166
1167 /* -------------------------------------------------------------------------
1168  * Handle special types of input for use in interpreter:
1169  * -----------------------------------------------------------------------*/
1170
1171 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1172 struct cmd *cmds;                      /* line in interpreter              */
1173 Char   start;                          /* characters introducing a cmd     */
1174 Char   sys; {                          /* character for shell escape       */
1175     while (c0==' ' || c0 =='\t')
1176         skip();
1177
1178     if (c0=='\n')                      /* look for blank command lines     */
1179         return NOCMD;
1180     if (c0==EOF)                       /* look for end of input stream     */
1181         return QUIT;
1182     if (c0==sys) {                     /* single character system escape   */
1183         skip();
1184         return SYSTEM;
1185     }
1186     if (c0==start && c1==sys) {        /* two character system escape      */
1187         skip();
1188         skip();
1189         return SYSTEM;
1190     }
1191
1192     startToken();                      /* All cmds start with start        */
1193     if (c0==start)                     /* except default (usually EVAL)    */
1194         do {                           /* which is empty                   */
1195             saveTokenChar(c0);
1196             skip();
1197         } while (c0!=EOF && !isIn(c0,ZPACE));
1198     endToken();
1199
1200     for (; cmds->cmdString; ++cmds)
1201         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1202             (tokenStr[0]==start &&
1203              tokenStr[1]==(cmds->cmdString)[1] &&
1204              tokenStr[2]=='\0'))
1205             return (cmds->cmdCode);
1206     return BADCMD;
1207 }
1208
1209 String readFilename() {                /* Read filename from input (if any)*/
1210     if (reading==PROJFILE)
1211         skipWhitespace();
1212     else
1213         while (c0==' ' || c0=='\t')
1214             skip();
1215
1216     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1217         return 0;
1218
1219     startToken();
1220     while (c0!=EOF && !isIn(c0,ZPACE)) {
1221         if (c0=='"') {
1222             skip();
1223             while (c0!=EOF && c0!='\"') {
1224                 Cell c = readAChar(TRUE);
1225                 if (nonNull(c)) {
1226                     saveTokenChar(charOf(c));
1227                 }
1228             }
1229             if (c0=='"')
1230                 skip();
1231             else {
1232                 ERRMSG(row) "a closing quote, '\"', was expected"
1233                 EEND;
1234             }
1235         }
1236         else {
1237             saveTokenChar(c0);
1238             skip();
1239         }
1240     }
1241     endToken();
1242     return tokenStr;
1243 }
1244
1245 String readLine() {                    /* Read command line from input     */
1246     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1247         skip();
1248
1249     startToken();
1250     while (c0!='\n' && c0!=EOF) {
1251         saveTokenChar(c0);
1252         skip();
1253     }
1254     endToken();
1255
1256     return tokenStr;
1257 }
1258
1259 /* --------------------------------------------------------------------------
1260  * This lexer supports the Haskell layout rule:
1261  *
1262  * - Layout area bounded by { ... }, with `;'s in between.
1263  * - A `{' is a HARD indentation and can only be matched by a corresponding
1264  *   HARD '}'
1265  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1266  *   is inserted with the column number of the first token after the
1267  *   WHERE/LET/OF keyword.
1268  * - When a soft indentation is uppermost on the indetation stack with
1269  *   column col' we insert:
1270  *    `}'  in front of token with column<col' and pop indentation off stack,
1271  *    `;'  in front of token with column==col'.
1272  * ------------------------------------------------------------------------*/
1273
1274 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1275 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1276 #define HARD       (-1)                /* indicates hard indentation       */
1277 static  Int        indentDepth = (-1); /* current indentation nesting      */
1278
1279 static Void local goOffside(col)       /* insert offside marker            */
1280 Int col; {                             /* for specified column             */
1281     assert(offsideON);
1282     if (indentDepth>=MAXINDENT) {
1283         ERRMSG(row) "Too many levels of program nesting"
1284         EEND;
1285     }
1286     layout[++indentDepth] = col;
1287 }
1288
1289 static Void local unOffside() {        /* leave layout rule area           */
1290     assert(offsideON);
1291     indentDepth--;
1292 }
1293
1294 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1295     assert(offsideON);
1296     return indentDepth>=0 && layout[indentDepth]!=HARD;
1297 }
1298
1299 /* --------------------------------------------------------------------------
1300  * Main tokeniser:
1301  * ------------------------------------------------------------------------*/
1302
1303 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1304     for (;;)                           /* Strictly speaking, this code is  */
1305         if (c0==EOF)                   /* a little more liberal than the   */
1306             return;                    /* report allows ...                */
1307         else if (c0=='\n')
1308             newlineSkip();
1309         else if (isIn(c0,ZPACE))
1310             skip();
1311         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1312             Int nesting = 1;
1313             Int origRow = row;         /* Save original row number         */
1314             skip();
1315             skip();
1316             while (nesting>0 && c0!=EOF)
1317                 if (c0=='{' && c1=='-') {
1318                     skip();
1319                     skip();
1320                     nesting++;
1321                 }
1322                 else if (c0=='-' && c1=='}') {
1323                     skip();
1324                     skip();
1325                     nesting--;
1326                 }
1327                 else if (c0=='\n')
1328                     newlineSkip();
1329                 else
1330                     skip();
1331             if (nesting>0) {
1332                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1333                 EEND;
1334             }
1335         }
1336         else if (c0=='-' && c1=='-') {  /* One line comment                */
1337             do
1338                 skip();
1339             while (c0!='\n' && c0!=EOF);
1340             if (c0=='\n')
1341                 newlineSkip();
1342         }
1343         else
1344             return;
1345 }
1346
1347 static Bool firstToken;                /* Set to TRUE for first token      */
1348 static Int  firstTokenIs;              /* ... with token value stored here */
1349
1350 static Int local yylex() {             /* Read next input token ...        */
1351     static Bool insertOpen    = FALSE;
1352     static Bool insertedToken = FALSE;
1353     static Text textRepeat;
1354
1355 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1356
1357     if (firstToken) {                  /* Special case for first token     */
1358         indentDepth   = (-1);
1359         firstToken    = FALSE;
1360         insertOpen    = FALSE;
1361         insertedToken = FALSE;
1362         if (reading==KEYBOARD)
1363             textRepeat = findText(repeatStr);
1364         return firstTokenIs;
1365     }
1366
1367     if (offsideON && insertOpen) {     /* insert `soft' opening brace      */
1368         insertOpen    = FALSE;
1369         insertedToken = TRUE;
1370         goOffside(column);
1371         push(yylval = mkInt(row));
1372         return '{';
1373     }
1374
1375     /* ----------------------------------------------------------------------
1376      * Skip white space, and insert tokens to support layout rules as reqd.
1377      * --------------------------------------------------------------------*/
1378
1379     skipWhitespace();
1380     startColumn = column;
1381     push(yylval = mkInt(row));         /* default token value is line no.  */
1382     /* subsequent changes to yylval must also set top() to the same value  */
1383
1384     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1385         if (insertedToken)             /* avoid inserting multiple `;'s    */
1386             insertedToken = FALSE;     /* or putting `;' after `{'         */
1387         else
1388         if (offsideON && layout[indentDepth]!=HARD) {
1389             if (column<layout[indentDepth]) {
1390                 unOffside();
1391                 return '}';
1392             }
1393             else if (column==layout[indentDepth] && c0!=EOF) {
1394                 insertedToken = TRUE;
1395                 return ';';
1396             }
1397         }
1398     }
1399
1400     /* ----------------------------------------------------------------------
1401      * Now try to identify token type:
1402      * --------------------------------------------------------------------*/
1403
1404     if (readingInterface) {
1405        if (c0 == '(' && c1 == '#') { skip(); skip(); return UTL; };
1406        if (c0 == '#' && c1 == ')') { skip(); skip(); return UTR; };
1407     }
1408
1409     switch (c0) {
1410         case EOF  : return 0;                   /* End of file/input       */
1411
1412         /* The next 10 characters make up the `special' category in 1.3    */
1413         case '('  : skip(); return '(';
1414         case ')'  : skip(); return ')';
1415         case ','  : skip(); return ',';
1416         case ';'  : skip(); return ';';
1417         case '['  : skip(); return '[';
1418         case ']'  : skip(); return ']';
1419         case '`'  : skip(); return '`';
1420         case '{'  : if (offsideON) goOffside(HARD);
1421                     skip();
1422                     return '{';
1423         case '}'  : if (offsideON && indentDepth<0) {
1424                         ERRMSG(row) "Misplaced `}'"
1425                         EEND;
1426                     }
1427                     if (!(offsideON && layout[indentDepth]!=HARD))
1428                         skip();                         /* skip over hard }*/
1429                     if (offsideON)
1430                         unOffside();    /* otherwise, we have to insert a }*/
1431                     return '}';         /* to (try to) avoid an error...   */
1432
1433         /* Character and string literals                                   */
1434         case '\'' : top() = yylval = readChar();
1435                     return CHARLIT;
1436
1437         case '\"' : top() = yylval = readString();
1438                     return STRINGLIT;
1439     }
1440
1441 #if IPARAM
1442     if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1443         Text it;                        /* Look for implicit param name    */
1444         skip();
1445         it    = readIdent();
1446         top() = yylval = ap(IPVAR,it);
1447         return identType=IPVARID;
1448     }
1449 #endif
1450 #if TREX
1451     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1452         Text it;                        /* Look for record selector name   */
1453         skip();
1454         it    = readIdent();
1455         top() = yylval = ap(RECSEL,mkExt(it));
1456         return identType=RECSELID;
1457     }
1458 #endif
1459     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1460         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1461         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1462             Text it2 = NIL;
1463             skip();                     /* Skip qualifying dot             */
1464             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1465                 it2 = readOperator();
1466                 if (opType==CONOP) {
1467                     top() = yylval = mkQConOp(it,it2);
1468                     return QCONOP;
1469                 } else {
1470                     top() = yylval = mkQVarOp(it,it2);
1471                     return QVAROP;
1472                 }
1473             } else {               /* Qualified identifier */
1474                 it2 = readIdent();
1475                 if (identType==CONID) {
1476                     top() = yylval = mkQCon(it,it2);
1477                     return QCONID;
1478                 } else {
1479                     top() = yylval = mkQVar(it,it2);
1480                     return QVARID;
1481                 }
1482             }
1483         } else {
1484             top() = yylval = mkCon(it);
1485             return identType;
1486         }
1487     }
1488     if (isIn(c0,(SMALL|LARGE))) {
1489         Text it = readIdent();
1490
1491         if (it==textCase)              return CASEXP;
1492         if (it==textOfK)               lookAhead(OF);
1493         if (it==textData)              return DATA;
1494         if (it==textType)              return TYPE;
1495         if (it==textIf)                return IF;
1496         if (it==textThen)              return THEN;
1497         if (it==textElse)              return ELSE;
1498         if (it==textWhere)             lookAhead(WHERE);
1499         if (it==textLet)               lookAhead(LET);
1500         if (it==textIn)                return IN;
1501         if (it==textInfix)             return INFIXN;
1502         if (it==textInfixl)            return INFIXL;
1503         if (it==textInfixr)            return INFIXR;
1504         if (it==textForeign)           return FOREIGN;
1505         if (it==textUnsafe)            return UNSAFE;
1506         if (it==textNewtype)           return TNEWTYPE;
1507         if (it==textDefault)           return DEFAULT;
1508         if (it==textDeriving)          return DERIVING;
1509         if (it==textDo)                lookAhead(DO);
1510         if (it==textClass)             return TCLASS;
1511         if (it==textInstance)          return TINSTANCE;
1512         if (it==textModule)            return TMODULE;
1513         if (it==textInterface)         return INTERFACE;
1514         if (it==textInstImport)        return INSTIMPORT;
1515         if (it==textImport)            return IMPORT;
1516         if (it==textExport)            return EXPORT;
1517         if (it==textDynamic)           return DYNAMIC;
1518         if (it==textCcall)             return CCALL;
1519         if (it==textStdcall)           return STDKALL;
1520         if (it==textUUExport)          return UUEXPORT;
1521         if (it==textHiding)            return HIDING;
1522         if (it==textQualified)         return QUALIFIED;
1523         if (it==textAsMod)             return ASMOD;
1524         if (it==textPrivileged)        return PRIVILEGED;
1525         if (it==textWildcard)          return '_';
1526         if (it==textAll && !haskell98) return ALL;
1527 #if IPARAM
1528         if (it==textWith && !haskell98) lookAhead(WITH);
1529         if (it==textDlet && !haskell98) lookAhead(DLET);
1530 #endif
1531         if (it==textUUAll)             return ALL;
1532         if (it==textUUUsage)           return UUUSAGE;
1533         if (it==textRepeat && reading==KEYBOARD)
1534             return repeatLast();
1535
1536         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1537         return identType;
1538     }
1539
1540     if (isIn(c0,SYMBOL)) {
1541         Text it = readOperator();
1542
1543         if (it==textCoco)    return COCO;
1544         if (it==textEq)      return '=';
1545         if (it==textUpto)    return UPTO;
1546         if (it==textAs)      return '@';
1547         if (it==textLambda)  return '\\';
1548         if (it==textBar)     return '|';
1549         if (it==textFrom)    return FROM;
1550         if (it==textMinus)   return '-';
1551         if (it==textPlus)    return '+';
1552         if (it==textBang)    return '!';
1553         if (it==textDot)     return '.';
1554         if (it==textArrow)   return ARROW;
1555         if (it==textLazy)    return '~';
1556         if (it==textImplies) return IMPLIES;
1557         if (it==textRepeat && reading==KEYBOARD)
1558             return repeatLast();
1559
1560         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1561         return opType;
1562     }
1563
1564     if (isIn(c0,DIGIT)) {
1565         top() = yylval = readNumber();
1566         return NUMLIT;
1567     }
1568
1569     ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1570                 ((int)c0), column
1571     EEND;
1572     return 0; /*NOTREACHED*/
1573 }
1574
1575 static Int local repeatLast() {         /* Obtain last expression entered  */
1576     if (isNull(yylval=getLastExpr())) {
1577         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1578         EEND;
1579     }
1580     return REPEAT;
1581 }
1582
1583 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1584 Text t; {                               /* by t ...                        */
1585     String s = textToStr(t);
1586     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1587 }
1588
1589 Syntax syntaxOf(n)                      /* Find syntax for name            */
1590 Name n; {
1591     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1592         return defaultSyntax(name(n).text);
1593     return name(n).syntax;
1594 }
1595
1596 /* --------------------------------------------------------------------------
1597  * main entry points to parser/lexer:
1598  * ------------------------------------------------------------------------*/
1599
1600 static Cell local parseInput(startWith)/* Parse input with given first tok,*/
1601 Int startWith; {                       /* determining whether to read a    */
1602     Cell final   = NIL;                /* script or an expression          */
1603     firstToken   = TRUE;
1604     firstTokenIs = startWith;
1605     if (startWith==INTERFACE) {
1606        offsideON = FALSE; readingInterface = TRUE;
1607     } else {
1608        offsideON = TRUE; readingInterface = FALSE;
1609     }
1610
1611     clearStack();
1612     if (yyparse()) {                   /* This can only be parser overflow */
1613         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1614         EEND;                          /* in the parser...                 */
1615     }
1616     final = pop();
1617     if (!stackEmpty())                 /* stack should now be empty        */
1618         internal("parseInput");
1619     return final;
1620 }
1621
1622 #ifdef HSCRIPT
1623 static String memPrefix = "@mem@";
1624 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1625
1626 Void makeMemScript(mem,fname)
1627 String mem;
1628 String fname; {
1629    strcat(fname,memPrefix);
1630    itoa((int)mem, fname+strlen(fname), 10);
1631 }
1632
1633 Bool isMemScript(fname)
1634 String fname; {
1635    return (strstr(fname,memPrefix) != NULL);
1636 }
1637
1638 String memScriptString(fname)
1639 String fname; {
1640     String p = strstr(fname,memPrefix);
1641     if (p) {
1642         return (String)atoi(p+lenMemPrefix);
1643     } else {
1644         return NULL;
1645     }
1646 }
1647
1648 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1649 String fname;
1650 Long len; {
1651     input(RESET);
1652     if (isMemScript(fname)) {
1653         char* s = memScriptString(fname);
1654         stringInput(s);
1655     } else {
1656         fileInput(fname,len);
1657     }
1658     parseInput(SCRIPT);
1659 }
1660 #else
1661 Void parseScript(nm,len)               /* Read a script                    */
1662 String nm;
1663 Long   len; {                          /* Used to set a target for reading */
1664     input(RESET);
1665     fileInput(nm,len);
1666     parseInput(SCRIPT);
1667 }
1668 #endif
1669
1670 Void parseExp() {                      /* Read an expression to evaluate   */
1671     parseInput(EXPR);
1672     setLastExpr(inputExpr);
1673 }
1674
1675
1676 #if EXPLAIN_INSTANCE_RESOLUTION
1677 Void parseContext() {                  /* Read a context to prove   */
1678     parseInput(CONTEXT);
1679 }
1680 #endif
1681
1682 Cell parseInterface(nm,len)            /* Read a GHC interface file        */
1683 String nm;
1684 Long   len; {                          /* Used to set a target for reading */
1685    input(RESET);
1686    fileInput(nm,len);
1687    return parseInput(INTERFACE);
1688 }
1689
1690
1691 /* --------------------------------------------------------------------------
1692  * Input control:
1693  * ------------------------------------------------------------------------*/
1694
1695 Void input(what)
1696 Int what; {
1697     switch (what) {
1698         case POSTPREL: break;
1699
1700         case PREPREL : initCharTab();
1701                        textCase       = findText("case");
1702                        textOfK        = findText("of");
1703                        textData       = findText("data");
1704                        textType       = findText("type");
1705                        textIf         = findText("if");
1706                        textThen       = findText("then");
1707                        textElse       = findText("else");
1708                        textWhere      = findText("where");
1709                        textLet        = findText("let");
1710                        textIn         = findText("in");
1711                        textInfix      = findText("infix");
1712                        textInfixl     = findText("infixl");
1713                        textInfixr     = findText("infixr");
1714                        textForeign    = findText("foreign");
1715                        textUnsafe     = findText("unsafe");
1716                        textNewtype    = findText("newtype");
1717                        textDefault    = findText("default");
1718                        textDeriving   = findText("deriving");
1719                        textDo         = findText("do");
1720                        textClass      = findText("class");
1721 #if IPARAM
1722                        textWith       = findText("with");
1723                        textDlet       = findText("dlet");
1724 #endif
1725                        textInstance   = findText("instance");
1726                        textCoco       = findText("::");
1727                        textEq         = findText("=");
1728                        textUpto       = findText("..");
1729                        textAs         = findText("@");
1730                        textLambda     = findText("\\");
1731                        textBar        = findText("|");
1732                        textMinus      = findText("-");
1733                        textPlus       = findText("+");
1734                        textFrom       = findText("<-");
1735                        textArrow      = findText("->");
1736                        textLazy       = findText("~");
1737                        textBang       = findText("!");
1738                        textDot        = findText(".");
1739                        textImplies    = findText("=>");
1740                        textPrelude    = findText("Prelude");
1741                        textNum        = findText("Num");
1742                        textModule     = findText("module");
1743                        textInterface  = findText("__interface");
1744                        textInstImport = findText("__instimport");
1745                        textExport     = findText("export");
1746                        textDynamic    = findText("dynamic");
1747                        textCcall      = findText("ccall");
1748                        textStdcall    = findText("stdcall");
1749                        textUUExport   = findText("__export");
1750                        textImport     = findText("import");
1751                        textHiding     = findText("hiding");
1752                        textQualified  = findText("qualified");
1753                        textAsMod      = findText("as");
1754                        textPrivileged = findText("privileged");
1755                        textWildcard   = findText("_");
1756                        textAll        = findText("forall");
1757                        textUUAll      = findText("__forall");
1758                        textUUUsage    = findText("__u");
1759                        varMinus       = mkVar(textMinus);
1760                        varPlus        = mkVar(textPlus);
1761                        varBang        = mkVar(textBang);
1762                        varDot         = mkVar(textDot);
1763                        varHiding      = mkVar(textHiding);
1764                        varQualified   = mkVar(textQualified);
1765                        varAsMod       = mkVar(textAsMod);
1766                        varPrivileged  = mkVar(textPrivileged);
1767                        conMain        = mkCon(findText("Main"));
1768                        varMain        = mkVar(findText("main"));
1769                        evalDefaults   = NIL;
1770
1771                        input(RESET);
1772                        break;
1773
1774         case RESET   : tyconDefns   = NIL;
1775                        typeInDefns  = NIL;
1776                        valDefns     = NIL;
1777                        classDefns   = NIL;
1778                        instDefns    = NIL;
1779                        selDefns     = NIL;
1780                        genDefns     = NIL;
1781                        unqualImports= NIL;
1782                        foreignImports= NIL;
1783                        foreignExports= NIL;
1784                        defaultDefns = NIL;
1785                        defaultLine  = 0;
1786                        inputExpr    = NIL;
1787                        imps         = NIL;
1788                        closeAnyInput();
1789                        break;
1790
1791         case BREAK   : if (reading==KEYBOARD)
1792                            c0 = EOF;
1793                        break;
1794
1795         case MARK    : mark(tyconDefns);
1796                        mark(typeInDefns);
1797                        mark(valDefns);
1798                        mark(classDefns);
1799                        mark(instDefns);
1800                        mark(selDefns);
1801                        mark(genDefns);
1802                        mark(unqualImports);
1803                        mark(foreignImports);
1804                        mark(foreignExports);
1805                        mark(defaultDefns);
1806                        mark(evalDefaults);
1807                        mark(inputExpr);
1808                        mark(varMinus);
1809                        mark(varPlus);
1810                        mark(varBang);
1811                        mark(varDot);
1812                        mark(varHiding);
1813                        mark(varQualified);
1814                        mark(varAsMod);
1815                        mark(varPrivileged);
1816                        mark(varMain);
1817                        mark(conMain);
1818                        mark(imps);
1819                        break;
1820     }
1821 }
1822
1823 /*-------------------------------------------------------------------------*/