ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
   6  * Yale Haskell Group, and the Oregon Graduate Institute of Science and
   7  * Technology, 1994-1999, All rights reserved.  It is distributed as
   8  * free software under the license in the file "License", which is
   9  * included in the distribution.
  10  *
  11  * $RCSfile: input.c,v $
  12  * $Revision: 1.21 $
  13  * $Date: 2000/03/13 11:37:16 $
  14  * ------------------------------------------------------------------------*/
  15
  16 #include "prelude.h"
  17 #include "storage.h"
  18 #include "connect.h"
  19 #include "errors.h"
  20
  21 #include <ctype.h>
  22 #if HAVE_GETDELIM_H
  23 #include "getdelim.h"
  24 #endif
  25
  26 #if IS_WIN32
  27 #include <windows.h>
  28 #endif
  29
  30 #if IS_WIN32 || HUGS_FOR_WINDOWS
  31 #undef IN
  32 #endif
  33
  34 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H && HAVE_READLINE_HISTORY_H
  35 #define USE_READLINE 1
  36 #else
  37 #define USE_READLINE 0
  38 #endif
  39
  40 #if USE_READLINE
  41 #include <readline/readline.h>
  42 #include <readline/history.h>
  43 #endif
  44
  45
  46 /* --------------------------------------------------------------------------
  47  * Global data:
  48  * ------------------------------------------------------------------------*/
  49
  50 List tyconDefns       = NIL;            /* type constructor definitions    */
  51 List typeInDefns      = NIL;            /* type synonym restrictions       */
  52 List valDefns         = NIL;            /* value definitions in script     */
  53 List classDefns       = NIL;            /* class defns in script           */
  54 List instDefns        = NIL;            /* instance defns in script        */
  55 List selDefns         = NIL;            /* list of selector lists          */
  56 List genDefns         = NIL;            /* list of generated names         */
  57 List unqualImports    = NIL;            /* unqualified import list         */
  58 List foreignImports   = NIL;            /* foreign imports                 */
  59 List foreignExports   = NIL;            /* foreign exportsd                */
  60 List defaultDefns     = NIL;            /* default definitions (if any)    */
  61 Int  defaultLine      = 0;              /* line in which default defs occur*/
  62 List evalDefaults     = NIL;            /* defaults for evaluator          */
  63
  64 Cell inputExpr        = NIL;            /* input expression                */
  65 Cell inputContext     = NIL;            /* input context                   */
  66 Bool literateScripts  = FALSE;          /* TRUE => default to lit scripts  */
  67 Bool literateErrors   = TRUE;           /* TRUE => report errs in lit scrs */
  68 Bool offsideON        = TRUE;           /* TRUE => implement offside rule  */
  69 Bool readingInterface = FALSE;
  70
  71 String repeatStr     = 0;               /* Repeat last expr                */
  72
  73 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  74 String preprocessor  = 0;
  75 #endif
  76
  77 /* --------------------------------------------------------------------------
  78  * Local function prototypes:
  79  * ------------------------------------------------------------------------*/
  80
  81 static Void local initCharTab     ( Void );
  82 static Void local fileInput       ( String,Long );
  83 static Bool local literateMode    ( String );
  84 static Bool local linecmp         ( String,String );
  85 static Int  local nextLine        ( Void );
  86 static Void local skip            ( Void );
  87 static Void local thisLineIs      ( Int );
  88 static Void local newlineSkip     ( Void );
  89 static Void local closeAnyInput   ( Void );
  90
  91        Int  yyparse               ( Void ); /* can't stop yacc making this   */
  92                                           /* public, but don't advertise   */
  93                                           /* it in a header file.          */
  94
  95 static Void local endToken        ( Void );
  96 static Text local readOperator    ( Void );
  97 static Text local readIdent       ( Void );
  98 static Cell local readRadixNumber ( Int );
  99 static Cell local readNumber      ( Void );
 100 static Cell local readChar        ( Void );
 101 static Cell local readString      ( Void );
 102 static Void local saveStrChr      ( Char );
 103 static Cell local readAChar       ( Bool );
 104
 105 static Bool local lazyReadMatches ( String );
 106 static Cell local readEscapeChar  ( Bool );
 107 static Void local skipGap         ( Void );
 108 static Cell local readCtrlChar    ( Void );
 109 static Cell local readOctChar     ( Void );
 110 static Cell local readHexChar     ( Void );
 111 static Int  local readHexDigit    ( Char );
 112 static Cell local readDecChar     ( Void );
 113
 114 static Void local goOffside       ( Int );
 115 static Void local unOffside       ( Void );
 116 static Bool local canUnOffside    ( Void );
 117
 118 static Void local skipWhitespace  ( Void );
 119 static Int  local yylex           ( Void );
 120 static Int  local repeatLast      ( Void );
 121
 122 static Cell local parseInput      ( Int );
 123
 124 static Bool local doesNotExceed   ( String,Int,Int );
 125 static Int  local stringToInt     ( String,Int );
 126
 127
 128 /* --------------------------------------------------------------------------
 129  * Text values for reserved words and special symbols:
 130  * ------------------------------------------------------------------------*/
 131
 132 static Text textCase,    textOfK,      textData,   textType,   textIf;
 133 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 134 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 135 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 136 #if IPARAM
 137 static Text textWith,  textDlet;
 138 #endif
 139
 140 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 141 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 142 static Text textBang,    textDot,      textAll,    textImplies;
 143 static Text textWildcard;
 144
 145 static Text textModule,  textImport,    textInterface,  textInstImport;
 146 static Text textHiding,  textQualified, textAsMod,      textPrivileged;
 147 static Text textExport,  textDynamic,   textUUExport;
 148 static Text textUnsafe,  textUUAll,     textUUUsage;
 149
 150 Text   textCcall;                       /* ccall                           */
 151 Text   textStdcall;                     /* stdcall                         */
 152
 153 Text   textNum;                         /* Num                             */
 154 Text   textPrelude;                     /* Prelude                         */
 155 Text   textPlus;                        /* (+)                             */
 156
 157 static Cell conMain;                    /* Main                            */
 158 static Cell varMain;                    /* main                            */
 159
 160 static Cell varMinus;                   /* (-)                             */
 161 static Cell varPlus;                    /* (+)                             */
 162 static Cell varBang;                    /* (!)                             */
 163 static Cell varDot;                     /* (.)                             */
 164 static Cell varHiding;                  /* hiding                          */
 165 static Cell varQualified;               /* qualified                       */
 166 static Cell varAsMod;                   /* as                              */
 167 static Cell varPrivileged;              /* privileged                      */
 168
 169 static List imps;                       /* List of imports to be chased    */
 170
 171
 172 /* --------------------------------------------------------------------------
 173  * Character set handling:
 174  *
 175  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 176  * character set.  The following code provides methods for classifying
 177  * input characters according to the lexical structure specified by the
 178  * report.  Hugs should still accept older programs because ASCII is
 179  * essentially just a subset of the ISO character set.
 180  *
 181  * Notes: If you want to port Hugs to a machine that uses something
 182  * substantially different from the ISO character set, then you will need
 183  * to insert additional code to map between character sets.
 184  *
 185  * At some point, the following data structures may be exported in a .h
 186  * file to allow the information contained here to be picked up in the
 187  * implementation of LibChar is* primitives.
 188  *
 189  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 190  * ------------------------------------------------------------------------*/
 191
 192 static  Bool            charTabBuilt;
 193 static  unsigned char   ctable[NUM_CHARS];
 194 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 195 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 196
 197 #define DIGIT           0x01
 198 #define SMALL           0x02
 199 #define LARGE           0x04
 200 #define SYMBOL          0x08
 201 #define IDAFTER         0x10
 202 #define ZPACE           0x20
 203 #define PRINT           0x40
 204
 205 static Void local initCharTab() {       /* Initialize char decode table    */
 206 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 207 #define setChar(x,c)    ctable[c] |= (x)
 208 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 209 #define setCopy(x,c)    {Int i;                         \
 210                          for (i=0; i<NUM_CHARS; ++i)    \
 211                              if (isIn(i,c))             \
 212                                  ctable[i]|=x;          \
 213                         }
 214
 215     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 216
 217     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 218     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 219     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 220     setChar (SMALL,     '_');
 221
 222     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 223     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 224     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 225
 226     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 227     setRange(SYMBOL,    215,215);
 228     setChar (SYMBOL,    247);
 229     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 230
 231     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 232     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 233
 234     setChar (ZPACE,     ' ');           /* ASCII space character           */
 235     setChar (ZPACE,     160);           /* ISO non breaking space          */
 236     setRange(ZPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 237
 238     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 239     setChars(PRINT,     " '\"");        /* Space and quotes                */
 240     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 241
 242     charTabBuilt = TRUE;
 243 #undef setRange
 244 #undef setChar
 245 #undef setChars
 246 #undef setCopy
 247 }
 248
 249
 250 /* --------------------------------------------------------------------------
 251  * Single character input routines:
 252  *
 253  * At the lowest level of input, characters are read one at a time, with the
 254  * current character held in c0 and the following (lookahead) character in
 255  * c1.  The coordinates of c0 within the file are held in (column,row).
 256  * The input stream is advanced by one character using the skip() function.
 257  * ------------------------------------------------------------------------*/
 258
 259 #define TABSIZE    8                   /* spacing between tabstops         */
 260
 261 #define NOTHING    0                   /* what kind of input is being read?*/
 262 #define KEYBOARD   1                   /* - keyboard/console?              */
 263 #define SCRIPTFILE 2                   /* - script file                    */
 264 #define PROJFILE   3                   /* - project file                   */
 265 #define STRING     4                   /* - string buffer?                 */
 266
 267 static Int    reading   = NOTHING;
 268
 269 static Target readSoFar;
 270 static Int    row, column, startColumn;
 271 static int    c0, c1;
 272 static FILE   *inputStream = 0;
 273 static Bool   thisLiterate;
 274 static String nextStringChar;          /* next char in string buffer       */
 275
 276 #if     USE_READLINE                   /* for command line editors         */
 277 static  String currentLine;            /* editline or GNU readline         */
 278 static  String nextChar;
 279 #define nextConsoleChar() \
 280            (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 281 #else
 282 #define nextConsoleChar() getc(stdin)
 283 #endif
 284
 285 static  Int litLines;                  /* count defn lines in lit script   */
 286 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 287 static  Int lastLine;                  /* records type of last line read:  */
 288 #define STARTLINE 0                    /* - at start of file, none read    */
 289 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 290 #define TEXTLINE  2                    /* - text comment                   */
 291 #define DEFNLINE  3                    /* - line containing definition     */
 292 #define CODELINE  4                    /* - line inside code block         */
 293
 294 #define BEGINCODE "\\begin{code}"
 295 #define ENDCODE   "\\end{code}"
 296
 297 #if HAVE_GETDELIM_H
 298 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 299 #else
 300 #define LINEBUFFER_SIZE 1000
 301 static char lineBuffer[LINEBUFFER_SIZE];
 302 #endif
 303 static int lineLength = 0;
 304 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 305 static int linePtr = 0;
 306
 307 Void consoleInput(prompt)              /* prepare to input characters from */
 308 String prompt; {                       /* standard in (i.e. console/kbd)   */
 309     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 310     c0          =                      /* i.e. input terminated by '\n'    */
 311     c1          = ' ';
 312     column      = (-1);
 313     row         = 0;
 314
 315 #if USE_READLINE
 316     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 317      * avoids accidentally freeing currentLine twice.
 318      */
 319     if (currentLine) {
 320         String oldCurrentLine = currentLine;
 321         currentLine = 0;           /* We may lose the space of currentLine */
 322         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 323     }
 324     currentLine = readline(prompt);
 325     nextChar    = currentLine;
 326     if (currentLine) {
 327         if (*currentLine)
 328             add_history(currentLine);
 329     }
 330     else
 331         c0 = c1 = EOF;
 332 #else
 333     Printf("%s",prompt);
 334     FlushStdout();
 335 #endif
 336 }
 337
 338 Void projInput(nm)                     /* prepare to input characters from */
 339 String nm; {                           /* from named project file          */
 340     if ((inputStream = fopen(nm,"r"))!=0) {
 341         reading = PROJFILE;
 342         c0      = ' ';
 343         c1      = '\n';
 344         column  = 1;
 345         row     = 0;
 346     }
 347     else {
 348         ERRMSG(0) "Unable to open project file \"%s\"", nm
 349         EEND;
 350     }
 351 }
 352
 353 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 354 String nm;                              /* named file (specified length is */
 355 Long   len; {                           /* used to set target for reading) */
 356 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 357     if (preprocessor) {
 358         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 359         char *cmd = malloc(reallen);
 360         if (cmd == NULL) {
 361             ERRMSG(0) "Unable to allocate memory for filter command."
 362             EEND;
 363         }
 364         strcpy(cmd,preprocessor);
 365         strcat(cmd," ");
 366         strcat(cmd,nm);
 367         inputStream = popen(cmd,"r");
 368         free(cmd);
 369     } else {
 370         inputStream = fopen(nm,"r");
 371     }
 372 #else
 373     inputStream = fopen(nm,"r");
 374 #endif
 375     if (inputStream) {
 376         reading      = SCRIPTFILE;
 377         c0           = ' ';
 378         c1           = '\n';
 379         column       = 1;
 380         row          = 0;
 381
 382         lastLine     = STARTLINE;       /* literate file processing */
 383         litLines     = 0;
 384         linePtr      = 0;
 385         lineLength   = 0;
 386         thisLiterate = literateMode(nm);
 387         inCodeBlock  = FALSE;
 388
 389         readSoFar    = 0;
 390         setGoal("Parsing", (Target)len);
 391     }
 392     else {
 393         ERRMSG(0) "Unable to open file \"%s\"", nm
 394         EEND;
 395     }
 396 }
 397
 398 Void stringInput(s)             /* prepare to input characters from string */
 399 String s; {
 400     reading      = STRING;
 401     c0           = EOF;
 402     c1           = EOF;
 403     if (*s) c0 = *s++;
 404     if (*s) c1 = *s++;
 405     column       = 1;
 406     row          = 1;
 407
 408     nextStringChar = s;
 409     if (!charTabBuilt)
 410         initCharTab();
 411 }
 412
 413 static Bool local literateMode(nm)      /* Select literate mode for file   */
 414 String nm; {
 415     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 416     if (dot) {
 417         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 418             return FALSE;
 419         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 420             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 421             return TRUE;
 422     }
 423     return literateScripts;             /* otherwise, use the default      */
 424 }
 425
 426
 427 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
 428 {
 429    Int len;
 430    String dot;
 431    len = 1 + strlen ( srcName );
 432    *hiName = malloc(len);
 433    *oName  = malloc(len);
 434    if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
 435    (*hiName)[0] = (*oName)[0] = 0;
 436    dot = strrchr(srcName, '.');
 437    if (!dot) return;
 438    if (filenamecmp(dot+1, "hs")==0 &&
 439        filenamecmp(dot+1, "lhs")==0 &&
 440        filenamecmp(dot+1, "verb")==0) return;
 441
 442    strcpy(*hiName, srcName);
 443    dot = strrchr(*hiName, '.');
 444    dot[1] = 'h';
 445    dot[2] = 'i';
 446    dot[3] = 0;
 447
 448    strcpy(*oName, srcName);
 449    dot = strrchr(*oName, '.');
 450    dot[1] = 'o';
 451    dot[2] = 0;
 452 }
 453
 454
 455
 456 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 457  * I've removed the loop (since newLineSkip contains a loop too) and
 458  * replaced the warnings with errors. ADR
 459  */
 460 /*
 461  * To deal with literate \begin{code}...\end{code} blocks,
 462  * add a line buffer that rooms the current line. The old c0 and c1
 463  * stream pointers are used as before within that buffer -- sof
 464  *
 465  * Upon reading a new line into the line buffer, we check to see if
 466  * we're reading in a line containing \begin{code} or \end{code} and
 467  * take appropriate action.
 468  */
 469
 470 static Bool local linecmp(s,line)       /* compare string with line        */
 471 String s;                               /* line may end in whitespace      */
 472 String line; {
 473     Int i=0;
 474     while (s[i] != '\0' && s[i] == line[i]) {
 475         ++i;
 476     }
 477     /* s[0..i-1] == line[0..i-1] */
 478     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 479         return FALSE;
 480     }
 481     while (isIn(line[i], ZPACE)) {      /* allow whitespace at end of line */
 482         ++i;
 483     }
 484     return (line[i] == '\0');
 485 }
 486
 487 /* Returns line length (including \n) or 0 upon EOF. */
 488 static Int local nextLine()
 489 {
 490 #if HAVE_GETDELIM_H
 491     /*
 492        Forget about fgets(), it is utterly braindead.
 493        (Assumes \NUL free streams and does not gracefully deal
 494        with overflow.) Instead, use GNU libc's getline().
 495        */
 496     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 497 #else
 498     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 499         lineLength = strlen(lineBuffer);
 500     else
 501         lineLength = 0;
 502 #endif
 503     /* printf("Read: \"%s\"", lineBuffer); */
 504     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 505         return lineLength;
 506     }
 507     else if (lineLength >= 2 && lineBuffer[0] == '#' &&
 508              lineBuffer[1] == '!') {
 509         lineBuffer[0]='\n'; /* pretend it's a blank line */
 510         lineBuffer[1]='\0';
 511         lineLength=1;
 512     } else if (thisLiterate) {
 513         if (linecmp(BEGINCODE, lineBuffer)) {
 514             if (!inCodeBlock) {             /* Entered a code block        */
 515                 inCodeBlock = TRUE;
 516                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 517                 lineBuffer[1]='\0';
 518                 lineLength=1;
 519             }
 520             else {
 521                 ERRMSG(row) "\\begin{code} encountered inside code block"
 522                 EEND;
 523             }
 524         }
 525         else if (linecmp(ENDCODE, lineBuffer)) {
 526             if (inCodeBlock) {              /* Finished code block         */
 527                 inCodeBlock = FALSE;
 528                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 529                 lineBuffer[1]='\0';
 530                 lineLength=1;
 531             }
 532             else {
 533                 ERRMSG(row) "\\end{code} encountered outside code block"
 534                 EEND;
 535             }
 536         }
 537     }
 538     /* printf("Read: \"%s\"", lineBuffer); */
 539     return lineLength;
 540 }
 541
 542 static Void local skip() {              /* move forward one char in input  */
 543     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 544         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 545             row++;
 546             column=1;
 547             if (reading==SCRIPTFILE)
 548                 soFar(readSoFar);
 549         }
 550         else if (c0=='\t')
 551             column += TABSIZE - ((column-1)%TABSIZE);
 552         else
 553             column++;
 554
 555         c0 = c1;
 556         readSoFar++;
 557
 558         if (c0==EOF) {
 559             column = 0;
 560             if (reading==SCRIPTFILE)
 561                 done();
 562             closeAnyInput();
 563         }
 564         else if (reading==KEYBOARD) {
 565             allowBreak();
 566             if (c0=='\n')
 567                 c1 = EOF;
 568             else {
 569                 c1 = nextConsoleChar();
 570 #if IS_WIN32 && !HUGS_FOR_WINDOWS
 571                 Sleep(0);
 572 #endif
 573                 /* On Win32, hitting ctrl-C causes the next getchar to
 574                  * fail - returning "-1" to indicate an error.
 575                  * This is one of the rare cases where "-1" does not mean EOF.
 576                  */
 577                 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
 578                     c1 = ' ';
 579                 }
 580             }
 581         }
 582         else if (reading==STRING) {
 583             c1 = (unsigned char) *nextStringChar++;
 584             if (c1 == '\0')
 585                 c1 = EOF;
 586         }
 587         else {
 588             if (lineLength <=0 || linePtr == lineLength) {
 589                 /* Current line, exhausted - get new one */
 590                 if (nextLine() <= 0) { /* EOF */
 591                     c1 = EOF;
 592                 }
 593                 else {
 594                     linePtr = 0;
 595                     c1 = (unsigned char)lineBuffer[linePtr++];
 596                 }
 597             }
 598             else {
 599                 c1 = (unsigned char)lineBuffer[linePtr++];
 600             }
 601         }
 602
 603     }
 604 }
 605
 606 static Void local thisLineIs(kind)     /* register kind of current line    */
 607 Int kind; {                            /* & check for literate script errs */
 608     if (literateErrors) {
 609         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 610             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 611             ERRMSG(row) "Program line next to comment"
 612             EEND;
 613         }
 614         lastLine = kind;
 615     }
 616 }
 617
 618 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 619     /* assert(c0=='\n'); */
 620     if (reading==SCRIPTFILE && thisLiterate) {
 621         do {
 622             skip();
 623             if (inCodeBlock) {         /* pass chars on definition lines   */
 624                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 625                 litLines++;
 626                 return;
 627             }
 628             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 629                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 630                 skip();
 631                 litLines++;
 632                 return;
 633             }
 634             while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank?   */
 635                 skip();
 636             if (c0=='\n' || c0==EOF)
 637                 thisLineIs(BLANKLINE);
 638             else {
 639                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 640                 while (c0!='\n' && c0!=EOF)
 641                     skip();
 642             }                          /* by now, c0=='\n' or c0==EOF      */
 643         } while (c0!=EOF);             /* if new line, start again         */
 644
 645         if (litLines==0 && literateErrors) {
 646             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 647                         DEFNCHAR
 648             EEND;
 649         }
 650         return;
 651     }
 652     skip();
 653 }
 654
 655 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 656     switch (reading) {                 /* or skip to end of console line   */
 657         case PROJFILE   :
 658         case SCRIPTFILE : if (inputStream) {
 659 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 660                               if (preprocessor) {
 661                                   pclose(inputStream);
 662                               } else {
 663                                   fclose(inputStream);
 664                               }
 665 #else
 666                               fclose(inputStream);
 667 #endif
 668                               inputStream = 0;
 669                           }
 670                           break;
 671         case KEYBOARD   : while (c0!=EOF)
 672                               skip();
 673                           break;
 674     }
 675     reading=NOTHING;
 676 }
 677
 678 /* --------------------------------------------------------------------------
 679  * Parser: Uses table driven parser generated from parser.y using yacc
 680  * ------------------------------------------------------------------------*/
 681
 682 #include "parser.c"
 683
 684 /* --------------------------------------------------------------------------
 685  * Single token input routines:
 686  *
 687  * The following routines read the values of particular kinds of token given
 688  * that the first character of the token has already been located in c0 on
 689  * entry to the routine.
 690  * ------------------------------------------------------------------------*/
 691
 692 #define MAX_TOKEN           4000
 693 #define startToken()        tokPos = 0
 694 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 695 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 696 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 697
 698 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 699 static Int  tokPos;                    /* input position in buffer         */
 700 static Int  identType;                 /* identifier type: CONID / VARID   */
 701 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 702
 703 static Void local endToken() {         /* check for token overflow         */
 704     if (tokPos>MAX_TOKEN) {
 705         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 706         EEND;
 707     }
 708     tokenStr[tokPos] = '\0';
 709 }
 710
 711 static Text local readOperator() {     /* read operator symbol             */
 712     startToken();
 713     do {
 714         saveTokenChar(c0);
 715         skip();
 716     } while (isISO(c0) && isIn(c0,SYMBOL));
 717     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 718     endToken();
 719     return findText(tokenStr);
 720 }
 721
 722 static Text local readIdent() {        /* read identifier                  */
 723     startToken();
 724     do {
 725         saveTokenChar(c0);
 726         skip();
 727     } while (isISO(c0) && isIn(c0,IDAFTER));
 728     endToken();
 729     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 730     if (readingInterface)
 731        return unZcodeThenFindText(tokenStr); else
 732        return findText(tokenStr);
 733 }
 734
 735
 736 static Bool local doesNotExceed(s,radix,limit)
 737 String s;
 738 Int    radix;
 739 Int    limit; {
 740     Int n = 0;
 741     Int p = 0;
 742     while (TRUE) {
 743         if (s[p] == 0) return TRUE;
 744         if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
 745         n = radix*n + (s[p]-'0');
 746         p++;
 747     }
 748 }
 749
 750 static Int local stringToInt(s,radix)
 751 String s;
 752 Int    radix; {
 753     Int n = 0;
 754     Int p = 0;
 755     while (TRUE) {
 756         if (s[p] == 0) return n;
 757         n = radix*n + (s[p]-'0');
 758         p++;
 759     }
 760 }
 761
 762 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 763 Int r; {                               /* from input of the form 0c{digs}  */
 764     Int d;
 765     startToken();
 766     skip();                            /* skip leading zero                */
 767     if ((d=readHexDigit(c1))<0 || d>=r) {
 768         /* Special case; no digits, lex as  */
 769         /* if it had been written "0 c..."  */
 770         saveTokenChar('0');
 771     } else {
 772         skip();
 773         do {
 774             saveTokenChar('0'+readHexDigit(c0));
 775             skip();
 776             d = readHexDigit(c0);
 777         } while (d>=0 && d<r);
 778     }
 779     endToken();
 780
 781     if (doesNotExceed(tokenStr,r,MAXPOSINT))
 782         return mkInt(stringToInt(tokenStr,r));
 783     else
 784     if (r == 10)
 785         return stringToBignum(tokenStr);
 786     else {
 787         ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
 788         EEND;
 789     }
 790 }
 791
 792 static Cell local readNumber() {        /* read numeric constant           */
 793
 794     if (c0=='0') {
 795         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 796             return readRadixNumber(16);
 797         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 798             return readRadixNumber(8);
 799     }
 800
 801     startToken();
 802     do {
 803         saveTokenChar(c0);
 804         skip();
 805     } while (isISO(c0) && isIn(c0,DIGIT));
 806
 807     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 808         endToken();
 809         if (doesNotExceed(tokenStr,10,MAXPOSINT))
 810             return mkInt(stringToInt(tokenStr,10)); else
 811             return stringToBignum(tokenStr);
 812     }
 813
 814     saveTokenChar(c0);                  /* save decimal point              */
 815     skip();
 816     do {                                /* process fractional part ...     */
 817         saveTokenChar(c0);
 818         skip();
 819     } while (isISO(c0) && isIn(c0,DIGIT));
 820
 821     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 822         saveTokenChar('e');
 823         skip();
 824         if (c0=='-') {
 825             saveTokenChar('-');
 826             skip();
 827         }
 828         else if (c0=='+')
 829             skip();
 830
 831         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 832             ERRMSG(row) "Missing digits in exponent"
 833             EEND;
 834         }
 835         else {
 836             do {
 837                 saveTokenChar(c0);
 838                 skip();
 839             } while (isISO(c0) && isIn(c0,DIGIT));
 840         }
 841     }
 842
 843     endToken();
 844     return mkFloat(stringToFloat(tokenStr));
 845 }
 846
 847
 848
 849
 850
 851
 852
 853 static Cell local readChar() {         /* read character constant          */
 854     Cell charRead;
 855
 856     skip(/* '\'' */);
 857     if (c0=='\'' || c0=='\n' || c0==EOF) {
 858         ERRMSG(row) "Illegal character constant"
 859         EEND;
 860     }
 861
 862     charRead = readAChar(FALSE);
 863
 864     if (c0=='\'')
 865         skip(/* '\'' */);
 866     else {
 867         ERRMSG(row) "Improperly terminated character constant"
 868         EEND;
 869     }
 870     return charRead;
 871 }
 872
 873 static Cell local readString() {       /* read string literal              */
 874     Cell c;
 875
 876     startToken();
 877     skip(/* '\"' */);
 878     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 879         c = readAChar(TRUE);
 880         if (nonNull(c))
 881             saveStrChr(charOf(c));
 882     }
 883
 884     if (c0=='\"')
 885         skip(/* '\"' */);
 886     else {
 887         ERRMSG(row) "Improperly terminated string"
 888         EEND;
 889     }
 890     endToken();
 891     return mkStr(findText(tokenStr));
 892 }
 893
 894 static Void local saveStrChr(c)        /* save character in string         */
 895 Char c; {
 896     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 897         saveTokenChar(c);
 898     }
 899     else {                             /* save null char as TWO null chars */
 900         if (tokPos+1<MAX_TOKEN) {
 901             saveChar('\\');
 902             if (c=='\\')
 903                 saveChar('\\');
 904             else
 905                 saveChar('0');
 906         }
 907     }
 908 }
 909
 910 static Cell local readAChar(isStrLit)  /* read single char constant        */
 911 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 912     Cell c = mkChar(c0);
 913
 914     if (c0=='\\')                      /* escape character?                */
 915         return readEscapeChar(isStrLit);
 916     if (!isISO(c0)) {
 917         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 918         EEND;
 919     }
 920     skip();                            /* normal character?                */
 921     return c;
 922 }
 923
 924 /* --------------------------------------------------------------------------
 925  * Character escape code sequences:
 926  * ------------------------------------------------------------------------*/
 927
 928 static struct {                        /* table of special escape codes    */
 929     char *codename;
 930     int  codenumber;
 931 } escapes[] = {
 932    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 933    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 934    {"\'",'\''}, {"v",   11},
 935    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 936    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 937    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 938    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 939    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 940    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 941    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 942    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 943    {"SP",  32}, {"DEL", 127},
 944    {0,0}
 945 };
 946
 947 static Int  alreadyMatched;            /* Record portion of input stream   */
 948 static char alreadyRead[10];           /* that has been read w/o a match   */
 949
 950 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 951 String s; {                            /* possibly using characters that   */
 952     int i;                             /* have already been read           */
 953
 954     for (i=0; i<alreadyMatched; ++i)
 955         if (alreadyRead[i]!=s[i])
 956             return FALSE;
 957
 958     while (s[i] && s[i]==c0) {
 959         alreadyRead[alreadyMatched++]=(char)c0;
 960         skip();
 961         i++;
 962     }
 963
 964     return s[i]=='\0';
 965 }
 966
 967 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 968 Bool isStrLit; {
 969     int i=0;
 970
 971     skip(/* '\\' */);
 972     switch (c0) {
 973         case '&'  : if (isStrLit) {
 974                         skip();
 975                         return NIL;
 976                     }
 977                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 978                     EEND;
 979                     break;/*NOTREACHED*/
 980
 981         case '^'  : return readCtrlChar();
 982
 983         case 'o'  : return readOctChar();
 984         case 'x'  : return readHexChar();
 985
 986         default   : if (!isISO(c0)) {
 987                         ERRMSG(row) "Illegal escape sequence"
 988                         EEND;
 989                     }
 990                     else if (isIn(c0,ZPACE)) {
 991                         if (isStrLit) {
 992                             skipGap();
 993                             return NIL;
 994                         }
 995                         ERRMSG(row) "Illegal use of gap in character constant"
 996                         EEND;
 997                         break;
 998                     }
 999                     else if (isIn(c0,DIGIT))
1000                         return readDecChar();
1001     }
1002
1003     for (alreadyMatched=0; escapes[i].codename; i++)
1004         if (lazyReadMatches(escapes[i].codename))
1005             return mkChar(escapes[i].codenumber);
1006
1007     alreadyRead[alreadyMatched++] = (char)c0;
1008     alreadyRead[alreadyMatched++] = '\0';
1009     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1010                 alreadyRead
1011     EEND;
1012     return NIL;/*NOTREACHED*/
1013 }
1014
1015 static Void local skipGap() {          /* skip over gap in string literal  */
1016     do                                 /* (simplified in Haskell 1.1)      */
1017         if (c0=='\n')
1018             newlineSkip();
1019         else
1020             skip();
1021     while (isISO(c0) && isIn(c0,ZPACE));
1022     if (c0!='\\') {
1023         ERRMSG(row) "Missing `\\' terminating string literal gap"
1024         EEND;
1025     }
1026     skip(/* '\\' */);
1027 }
1028
1029 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
1030     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1031     String which;
1032
1033     skip(/* '^' */);
1034     if ((which = strchr(controls,c0))==NULL) {
1035         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1036         EEND;
1037     }
1038     skip();
1039     return mkChar(which-controls);
1040 }
1041
1042 static Cell local readOctChar() {      /* read octal character constant    */
1043     Int n = 0;
1044     Int d;
1045
1046     skip(/* 'o' */);
1047     if ((d = readHexDigit(c0))<0 || d>=8) {
1048         ERRMSG(row) "Empty octal character escape"
1049         EEND;
1050     }
1051     do {
1052         if (overflows(n,8,d,MAXCHARVAL)) {
1053             ERRMSG(row) "Octal character escape out of range"
1054             EEND;
1055         }
1056         n = 8*n + d;
1057         skip();
1058     } while ((d = readHexDigit(c0))>=0 && d<8);
1059
1060     return mkChar(n);
1061 }
1062
1063 static Cell local readHexChar() {      /* read hex character constant      */
1064     Int n = 0;
1065     Int d;
1066
1067     skip(/* 'x' */);
1068     if ((d = readHexDigit(c0))<0) {
1069         ERRMSG(row) "Empty hexadecimal character escape"
1070         EEND;
1071     }
1072     do {
1073         if (overflows(n,16,d,MAXCHARVAL)) {
1074             ERRMSG(row) "Hexadecimal character escape out of range"
1075             EEND;
1076         }
1077         n = 16*n + d;
1078         skip();
1079     } while ((d = readHexDigit(c0))>=0);
1080
1081     return mkChar(n);
1082 }
1083
1084 static Int local readHexDigit(c)       /* read single hex digit            */
1085 Char c; {
1086     if ('0'<=c && c<='9')
1087         return c-'0';
1088     if ('A'<=c && c<='F')
1089         return 10 + (c-'A');
1090     if ('a'<=c && c<='f')
1091         return 10 + (c-'a');
1092     return -1;
1093 }
1094
1095 static Cell local readDecChar() {      /* read decimal character constant  */
1096     Int n = 0;
1097
1098     do {
1099         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1100             ERRMSG(row) "Decimal character escape out of range"
1101             EEND;
1102         }
1103         n = 10*n + (c0-'0');
1104         skip();
1105     } while (c0!=EOF && isIn(c0,DIGIT));
1106
1107     return mkChar(n);
1108 }
1109
1110 /* --------------------------------------------------------------------------
1111  * Produce printable representation of character:
1112  * ------------------------------------------------------------------------*/
1113
1114 String unlexChar(c,quote)              /* return string representation of  */
1115 Char c;                                /* character...                     */
1116 Char quote; {                          /* protect quote character          */
1117     static char buffer[12];
1118
1119     if (c<0)                           /* deal with sign extended chars..  */
1120         c += NUM_CHARS;
1121
1122     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1123         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1124             buffer[0] = '\\';
1125             buffer[1] = (char)c;
1126             buffer[2] = '\0';
1127         }
1128         else {
1129             buffer[0] = (char)c;
1130             buffer[1] = '\0';
1131         }
1132     }
1133     else {                             /* look for escape code             */
1134         Int escs;
1135         for (escs=0; escapes[escs].codename; escs++)
1136             if (escapes[escs].codenumber==c) {
1137                 sprintf(buffer,"\\%s",escapes[escs].codename);
1138                 return buffer;
1139             }
1140         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1141     }
1142     return buffer;
1143 }
1144
1145 Void printString(s)                    /* print string s, using quotes and */
1146 String s; {                            /* escapes if any parts need them   */
1147     if (s) {
1148         String t = s;
1149         Char   c;
1150         while ((c = *t)!=0 && isISO(c)
1151                            && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1152             t++;
1153         }
1154         if (*t) {
1155             Putchar('"');
1156             for (t=s; *t; t++)
1157                 Printf("%s",unlexChar(*t,'"'));
1158             Putchar('"');
1159         }
1160         else
1161             Printf("%s",s);
1162     }
1163 }
1164
1165 /* -------------------------------------------------------------------------
1166  * Handle special types of input for use in interpreter:
1167  * -----------------------------------------------------------------------*/
1168
1169 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1170 struct cmd *cmds;                      /* line in interpreter              */
1171 Char   start;                          /* characters introducing a cmd     */
1172 Char   sys; {                          /* character for shell escape       */
1173     while (c0==' ' || c0 =='\t')
1174         skip();
1175
1176     if (c0=='\n')                      /* look for blank command lines     */
1177         return NOCMD;
1178     if (c0==EOF)                       /* look for end of input stream     */
1179         return QUIT;
1180     if (c0==sys) {                     /* single character system escape   */
1181         skip();
1182         return SYSTEM;
1183     }
1184     if (c0==start && c1==sys) {        /* two character system escape      */
1185         skip();
1186         skip();
1187         return SYSTEM;
1188     }
1189
1190     startToken();                      /* All cmds start with start        */
1191     if (c0==start)                     /* except default (usually EVAL)    */
1192         do {                           /* which is empty                   */
1193             saveTokenChar(c0);
1194             skip();
1195         } while (c0!=EOF && !isIn(c0,ZPACE));
1196     endToken();
1197
1198     for (; cmds->cmdString; ++cmds)
1199         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1200             (tokenStr[0]==start &&
1201              tokenStr[1]==(cmds->cmdString)[1] &&
1202              tokenStr[2]=='\0'))
1203             return (cmds->cmdCode);
1204     return BADCMD;
1205 }
1206
1207 String readFilename() {                /* Read filename from input (if any)*/
1208     if (reading==PROJFILE)
1209         skipWhitespace();
1210     else
1211         while (c0==' ' || c0=='\t')
1212             skip();
1213
1214     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1215         return 0;
1216
1217     startToken();
1218     while (c0!=EOF && !isIn(c0,ZPACE)) {
1219         if (c0=='"') {
1220             skip();
1221             while (c0!=EOF && c0!='\"') {
1222                 Cell c = readAChar(TRUE);
1223                 if (nonNull(c)) {
1224                     saveTokenChar(charOf(c));
1225                 }
1226             }
1227             if (c0=='"')
1228                 skip();
1229             else {
1230                 ERRMSG(row) "a closing quote, '\"', was expected"
1231                 EEND;
1232             }
1233         }
1234         else {
1235             saveTokenChar(c0);
1236             skip();
1237         }
1238     }
1239     endToken();
1240     return tokenStr;
1241 }
1242
1243 String readLine() {                    /* Read command line from input     */
1244     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1245         skip();
1246
1247     startToken();
1248     while (c0!='\n' && c0!=EOF) {
1249         saveTokenChar(c0);
1250         skip();
1251     }
1252     endToken();
1253
1254     return tokenStr;
1255 }
1256
1257 /* --------------------------------------------------------------------------
1258  * This lexer supports the Haskell layout rule:
1259  *
1260  * - Layout area bounded by { ... }, with `;'s in between.
1261  * - A `{' is a HARD indentation and can only be matched by a corresponding
1262  *   HARD '}'
1263  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1264  *   is inserted with the column number of the first token after the
1265  *   WHERE/LET/OF keyword.
1266  * - When a soft indentation is uppermost on the indetation stack with
1267  *   column col' we insert:
1268  *    `}'  in front of token with column<col' and pop indentation off stack,
1269  *    `;'  in front of token with column==col'.
1270  * ------------------------------------------------------------------------*/
1271
1272 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1273 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1274 #define HARD       (-1)                /* indicates hard indentation       */
1275 static  Int        indentDepth = (-1); /* current indentation nesting      */
1276
1277 static Void local goOffside(col)       /* insert offside marker            */
1278 Int col; {                             /* for specified column             */
1279     assert(offsideON);
1280     if (indentDepth>=MAXINDENT) {
1281         ERRMSG(row) "Too many levels of program nesting"
1282         EEND;
1283     }
1284     layout[++indentDepth] = col;
1285 }
1286
1287 static Void local unOffside() {        /* leave layout rule area           */
1288     assert(offsideON);
1289     indentDepth--;
1290 }
1291
1292 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1293     assert(offsideON);
1294     return indentDepth>=0 && layout[indentDepth]!=HARD;
1295 }
1296
1297 /* --------------------------------------------------------------------------
1298  * Main tokeniser:
1299  * ------------------------------------------------------------------------*/
1300
1301 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1302     for (;;)                           /* Strictly speaking, this code is  */
1303         if (c0==EOF)                   /* a little more liberal than the   */
1304             return;                    /* report allows ...                */
1305         else if (c0=='\n')
1306             newlineSkip();
1307         else if (isIn(c0,ZPACE))
1308             skip();
1309         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1310             Int nesting = 1;
1311             Int origRow = row;         /* Save original row number         */
1312             skip();
1313             skip();
1314             while (nesting>0 && c0!=EOF)
1315                 if (c0=='{' && c1=='-') {
1316                     skip();
1317                     skip();
1318                     nesting++;
1319                 }
1320                 else if (c0=='-' && c1=='}') {
1321                     skip();
1322                     skip();
1323                     nesting--;
1324                 }
1325                 else if (c0=='\n')
1326                     newlineSkip();
1327                 else
1328                     skip();
1329             if (nesting>0) {
1330                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1331                 EEND;
1332             }
1333         }
1334         else if (c0=='-' && c1=='-') {  /* One line comment                */
1335             do
1336                 skip();
1337             while (c0!='\n' && c0!=EOF);
1338             if (c0=='\n')
1339                 newlineSkip();
1340         }
1341         else
1342             return;
1343 }
1344
1345 static Bool firstToken;                /* Set to TRUE for first token      */
1346 static Int  firstTokenIs;              /* ... with token value stored here */
1347
1348 static Int local yylex() {             /* Read next input token ...        */
1349     static Bool insertOpen    = FALSE;
1350     static Bool insertedToken = FALSE;
1351     static Text textRepeat;
1352
1353 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1354
1355     if (firstToken) {                  /* Special case for first token     */
1356         indentDepth   = (-1);
1357         firstToken    = FALSE;
1358         insertOpen    = FALSE;
1359         insertedToken = FALSE;
1360         if (reading==KEYBOARD)
1361             textRepeat = findText(repeatStr);
1362         return firstTokenIs;
1363     }
1364
1365     if (offsideON && insertOpen) {     /* insert `soft' opening brace      */
1366         insertOpen    = FALSE;
1367         insertedToken = TRUE;
1368         goOffside(column);
1369         push(yylval = mkInt(row));
1370         return '{';
1371     }
1372
1373     /* ----------------------------------------------------------------------
1374      * Skip white space, and insert tokens to support layout rules as reqd.
1375      * --------------------------------------------------------------------*/
1376
1377     skipWhitespace();
1378     startColumn = column;
1379     push(yylval = mkInt(row));         /* default token value is line no.  */
1380     /* subsequent changes to yylval must also set top() to the same value  */
1381
1382     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1383         if (insertedToken)             /* avoid inserting multiple `;'s    */
1384             insertedToken = FALSE;     /* or putting `;' after `{'         */
1385         else
1386         if (offsideON && layout[indentDepth]!=HARD) {
1387             if (column<layout[indentDepth]) {
1388                 unOffside();
1389                 return '}';
1390             }
1391             else if (column==layout[indentDepth] && c0!=EOF) {
1392                 insertedToken = TRUE;
1393                 return ';';
1394             }
1395         }
1396     }
1397
1398     /* ----------------------------------------------------------------------
1399      * Now try to identify token type:
1400      * --------------------------------------------------------------------*/
1401
1402     if (readingInterface) {
1403        if (c0 == '(' && c1 == '#') { skip(); skip(); return UTL; };
1404        if (c0 == '#' && c1 == ')') { skip(); skip(); return UTR; };
1405     }
1406
1407     switch (c0) {
1408         case EOF  : return 0;                   /* End of file/input       */
1409
1410         /* The next 10 characters make up the `special' category in 1.3    */
1411         case '('  : skip(); return '(';
1412         case ')'  : skip(); return ')';
1413         case ','  : skip(); return ',';
1414         case ';'  : skip(); return ';';
1415         case '['  : skip(); return '[';
1416         case ']'  : skip(); return ']';
1417         case '`'  : skip(); return '`';
1418         case '{'  : if (offsideON) goOffside(HARD);
1419                     skip();
1420                     return '{';
1421         case '}'  : if (offsideON && indentDepth<0) {
1422                         ERRMSG(row) "Misplaced `}'"
1423                         EEND;
1424                     }
1425                     if (!(offsideON && layout[indentDepth]!=HARD))
1426                         skip();                         /* skip over hard }*/
1427                     if (offsideON)
1428                         unOffside();    /* otherwise, we have to insert a }*/
1429                     return '}';         /* to (try to) avoid an error...   */
1430
1431         /* Character and string literals                                   */
1432         case '\'' : top() = yylval = readChar();
1433                     return CHARLIT;
1434
1435         case '\"' : top() = yylval = readString();
1436                     return STRINGLIT;
1437     }
1438
1439 #if IPARAM
1440     if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1441         Text it;                        /* Look for implicit param name    */
1442         skip();
1443         it    = readIdent();
1444         top() = yylval = ap(IPVAR,it);
1445         return identType=IPVARID;
1446     }
1447 #endif
1448 #if TREX
1449     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1450         Text it;                        /* Look for record selector name   */
1451         skip();
1452         it    = readIdent();
1453         top() = yylval = ap(RECSEL,mkExt(it));
1454         return identType=RECSELID;
1455     }
1456 #endif
1457     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1458         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1459         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1460             Text it2 = NIL;
1461             skip();                     /* Skip qualifying dot             */
1462             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1463                 it2 = readOperator();
1464                 if (opType==CONOP) {
1465                     top() = yylval = mkQConOp(it,it2);
1466                     return QCONOP;
1467                 } else {
1468                     top() = yylval = mkQVarOp(it,it2);
1469                     return QVAROP;
1470                 }
1471             } else {               /* Qualified identifier */
1472                 it2 = readIdent();
1473                 if (identType==CONID) {
1474                     top() = yylval = mkQCon(it,it2);
1475                     return QCONID;
1476                 } else {
1477                     top() = yylval = mkQVar(it,it2);
1478                     return QVARID;
1479                 }
1480             }
1481         } else {
1482             top() = yylval = mkCon(it);
1483             return identType;
1484         }
1485     }
1486     if (isIn(c0,(SMALL|LARGE))) {
1487         Text it = readIdent();
1488
1489         if (it==textCase)              return CASEXP;
1490         if (it==textOfK)               lookAhead(OF);
1491         if (it==textData)              return DATA;
1492         if (it==textType)              return TYPE;
1493         if (it==textIf)                return IF;
1494         if (it==textThen)              return THEN;
1495         if (it==textElse)              return ELSE;
1496         if (it==textWhere)             lookAhead(WHERE);
1497         if (it==textLet)               lookAhead(LET);
1498         if (it==textIn)                return IN;
1499         if (it==textInfix)             return INFIXN;
1500         if (it==textInfixl)            return INFIXL;
1501         if (it==textInfixr)            return INFIXR;
1502         if (it==textForeign)           return FOREIGN;
1503         if (it==textUnsafe)            return UNSAFE;
1504         if (it==textNewtype)           return TNEWTYPE;
1505         if (it==textDefault)           return DEFAULT;
1506         if (it==textDeriving)          return DERIVING;
1507         if (it==textDo)                lookAhead(DO);
1508         if (it==textClass)             return TCLASS;
1509         if (it==textInstance)          return TINSTANCE;
1510         if (it==textModule)            return TMODULE;
1511         if (it==textInterface)         return INTERFACE;
1512         if (it==textInstImport)        return INSTIMPORT;
1513         if (it==textImport)            return IMPORT;
1514         if (it==textExport)            return EXPORT;
1515         if (it==textDynamic)           return DYNAMIC;
1516         if (it==textCcall)             return CCALL;
1517         if (it==textStdcall)           return STDKALL;
1518         if (it==textUUExport)          return UUEXPORT;
1519         if (it==textHiding)            return HIDING;
1520         if (it==textQualified)         return QUALIFIED;
1521         if (it==textAsMod)             return ASMOD;
1522         if (it==textPrivileged)        return PRIVILEGED;
1523         if (it==textWildcard)          return '_';
1524         if (it==textAll && !haskell98) return ALL;
1525 #if IPARAM
1526         if (it==textWith && !haskell98) lookAhead(WITH);
1527         if (it==textDlet && !haskell98) lookAhead(DLET);
1528 #endif
1529         if (it==textUUAll)             return ALL;
1530         if (it==textUUUsage)           return UUUSAGE;
1531         if (it==textRepeat && reading==KEYBOARD)
1532             return repeatLast();
1533
1534         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1535         return identType;
1536     }
1537
1538     if (isIn(c0,SYMBOL)) {
1539         Text it = readOperator();
1540
1541         if (it==textCoco)    return COCO;
1542         if (it==textEq)      return '=';
1543         if (it==textUpto)    return UPTO;
1544         if (it==textAs)      return '@';
1545         if (it==textLambda)  return '\\';
1546         if (it==textBar)     return '|';
1547         if (it==textFrom)    return FROM;
1548         if (it==textMinus)   return '-';
1549         if (it==textPlus)    return '+';
1550         if (it==textBang)    return '!';
1551         if (it==textDot)     return '.';
1552         if (it==textArrow)   return ARROW;
1553         if (it==textLazy)    return '~';
1554         if (it==textImplies) return IMPLIES;
1555         if (it==textRepeat && reading==KEYBOARD)
1556             return repeatLast();
1557
1558         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1559         return opType;
1560     }
1561
1562     if (isIn(c0,DIGIT)) {
1563         top() = yylval = readNumber();
1564         return NUMLIT;
1565     }
1566
1567     ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1568                 ((int)c0), column
1569     EEND;
1570     return 0; /*NOTREACHED*/
1571 }
1572
1573 static Int local repeatLast() {         /* Obtain last expression entered  */
1574     if (isNull(yylval=getLastExpr())) {
1575         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1576         EEND;
1577     }
1578     return REPEAT;
1579 }
1580
1581 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1582 Text t; {                               /* by t ...                        */
1583     String s = textToStr(t);
1584     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1585 }
1586
1587 Syntax syntaxOf(n)                      /* Find syntax for name            */
1588 Name n; {
1589     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1590         return defaultSyntax(name(n).text);
1591     return name(n).syntax;
1592 }
1593
1594 /* --------------------------------------------------------------------------
1595  * main entry points to parser/lexer:
1596  * ------------------------------------------------------------------------*/
1597
1598 static Cell local parseInput(startWith)/* Parse input with given first tok,*/
1599 Int startWith; {                       /* determining whether to read a    */
1600     Cell final   = NIL;                /* script or an expression          */
1601     firstToken   = TRUE;
1602     firstTokenIs = startWith;
1603     if (startWith==INTERFACE) {
1604        offsideON = FALSE; readingInterface = TRUE;
1605     } else {
1606        offsideON = TRUE; readingInterface = FALSE;
1607     }
1608
1609     clearStack();
1610     if (yyparse()) {                   /* This can only be parser overflow */
1611         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1612         EEND;                          /* in the parser...                 */
1613     }
1614     final = pop();
1615     if (!stackEmpty())                 /* stack should now be empty        */
1616         internal("parseInput");
1617     return final;
1618 }
1619
1620 #ifdef HSCRIPT
1621 static String memPrefix = "@mem@";
1622 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1623
1624 Void makeMemScript(mem,fname)
1625 String mem;
1626 String fname; {
1627    strcat(fname,memPrefix);
1628    itoa((int)mem, fname+strlen(fname), 10);
1629 }
1630
1631 Bool isMemScript(fname)
1632 String fname; {
1633    return (strstr(fname,memPrefix) != NULL);
1634 }
1635
1636 String memScriptString(fname)
1637 String fname; {
1638     String p = strstr(fname,memPrefix);
1639     if (p) {
1640         return (String)atoi(p+lenMemPrefix);
1641     } else {
1642         return NULL;
1643     }
1644 }
1645
1646 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1647 String fname;
1648 Long len; {
1649     input(RESET);
1650     if (isMemScript(fname)) {
1651         char* s = memScriptString(fname);
1652         stringInput(s);
1653     } else {
1654         fileInput(fname,len);
1655     }
1656     parseInput(SCRIPT);
1657 }
1658 #else
1659 Void parseScript(nm,len)               /* Read a script                    */
1660 String nm;
1661 Long   len; {                          /* Used to set a target for reading */
1662     input(RESET);
1663     fileInput(nm,len);
1664     parseInput(SCRIPT);
1665 }
1666 #endif
1667
1668 Void parseExp() {                      /* Read an expression to evaluate   */
1669     parseInput(EXPR);
1670     setLastExpr(inputExpr);
1671 }
1672
1673
1674 #if EXPLAIN_INSTANCE_RESOLUTION
1675 Void parseContext() {                  /* Read a context to prove   */
1676     parseInput(CONTEXT);
1677 }
1678 #endif
1679
1680 Cell parseInterface(nm,len)            /* Read a GHC interface file        */
1681 String nm;
1682 Long   len; {                          /* Used to set a target for reading */
1683    input(RESET);
1684    fileInput(nm,len);
1685    return parseInput(INTERFACE);
1686 }
1687
1688
1689 /* --------------------------------------------------------------------------
1690  * Input control:
1691  * ------------------------------------------------------------------------*/
1692
1693 Void input(what)
1694 Int what; {
1695     switch (what) {
1696         case POSTPREL: break;
1697
1698         case PREPREL : initCharTab();
1699                        textCase       = findText("case");
1700                        textOfK        = findText("of");
1701                        textData       = findText("data");
1702                        textType       = findText("type");
1703                        textIf         = findText("if");
1704                        textThen       = findText("then");
1705                        textElse       = findText("else");
1706                        textWhere      = findText("where");
1707                        textLet        = findText("let");
1708                        textIn         = findText("in");
1709                        textInfix      = findText("infix");
1710                        textInfixl     = findText("infixl");
1711                        textInfixr     = findText("infixr");
1712                        textForeign    = findText("foreign");
1713                        textUnsafe     = findText("unsafe");
1714                        textNewtype    = findText("newtype");
1715                        textDefault    = findText("default");
1716                        textDeriving   = findText("deriving");
1717                        textDo         = findText("do");
1718                        textClass      = findText("class");
1719 #if IPARAM
1720                        textWith       = findText("with");
1721                        textDlet       = findText("dlet");
1722 #endif
1723                        textInstance   = findText("instance");
1724                        textCoco       = findText("::");
1725                        textEq         = findText("=");
1726                        textUpto       = findText("..");
1727                        textAs         = findText("@");
1728                        textLambda     = findText("\\");
1729                        textBar        = findText("|");
1730                        textMinus      = findText("-");
1731                        textPlus       = findText("+");
1732                        textFrom       = findText("<-");
1733                        textArrow      = findText("->");
1734                        textLazy       = findText("~");
1735                        textBang       = findText("!");
1736                        textDot        = findText(".");
1737                        textImplies    = findText("=>");
1738                        textPrelude    = findText("Prelude");
1739                        textNum        = findText("Num");
1740                        textModule     = findText("module");
1741                        textInterface  = findText("__interface");
1742                        textInstImport = findText("__instimport");
1743                        textExport     = findText("export");
1744                        textDynamic    = findText("dynamic");
1745                        textCcall      = findText("ccall");
1746                        textStdcall    = findText("stdcall");
1747                        textUUExport   = findText("__export");
1748                        textImport     = findText("import");
1749                        textHiding     = findText("hiding");
1750                        textQualified  = findText("qualified");
1751                        textAsMod      = findText("as");
1752                        textPrivileged = findText("privileged");
1753                        textWildcard   = findText("_");
1754                        textAll        = findText("forall");
1755                        textUUAll      = findText("__forall");
1756                        textUUUsage    = findText("__u");
1757                        varMinus       = mkVar(textMinus);
1758                        varPlus        = mkVar(textPlus);
1759                        varBang        = mkVar(textBang);
1760                        varDot         = mkVar(textDot);
1761                        varHiding      = mkVar(textHiding);
1762                        varQualified   = mkVar(textQualified);
1763                        varAsMod       = mkVar(textAsMod);
1764                        varPrivileged  = mkVar(textPrivileged);
1765                        conMain        = mkCon(findText("Main"));
1766                        varMain        = mkVar(findText("main"));
1767                        evalDefaults   = NIL;
1768
1769                        input(RESET);
1770                        break;
1771
1772         case RESET   : tyconDefns   = NIL;
1773                        typeInDefns  = NIL;
1774                        valDefns     = NIL;
1775                        classDefns   = NIL;
1776                        instDefns    = NIL;
1777                        selDefns     = NIL;
1778                        genDefns     = NIL;
1779                        unqualImports= NIL;
1780                        foreignImports= NIL;
1781                        foreignExports= NIL;
1782                        defaultDefns = NIL;
1783                        defaultLine  = 0;
1784                        inputExpr    = NIL;
1785                        imps         = NIL;
1786                        closeAnyInput();
1787                        break;
1788
1789         case BREAK   : if (reading==KEYBOARD)
1790                            c0 = EOF;
1791                        break;
1792
1793         case MARK    : mark(tyconDefns);
1794                        mark(typeInDefns);
1795                        mark(valDefns);
1796                        mark(classDefns);
1797                        mark(instDefns);
1798                        mark(selDefns);
1799                        mark(genDefns);
1800                        mark(unqualImports);
1801                        mark(foreignImports);
1802                        mark(foreignExports);
1803                        mark(defaultDefns);
1804                        mark(evalDefaults);
1805                        mark(inputExpr);
1806                        mark(varMinus);
1807                        mark(varPlus);
1808                        mark(varBang);
1809                        mark(varDot);
1810                        mark(varHiding);
1811                        mark(varQualified);
1812                        mark(varAsMod);
1813                        mark(varPrivileged);
1814                        mark(varMain);
1815                        mark(conMain);
1816                        mark(imps);
1817                        break;
1818     }
1819 }
1820
1821 /*-------------------------------------------------------------------------*/