ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
   6  * Haskell Group 1994-99, and is distributed as Open Source software
   7  * under the Artistic License; see the file "Artistic" that is included
   8  * in the distribution for details.
   9  *
  10  * $RCSfile: input.c,v $
  11  * $Revision: 1.7 $
  12  * $Date: 1999/10/15 11:02:12 $
  13  * ------------------------------------------------------------------------*/
  14
  15 #include "prelude.h"
  16 #include "storage.h"
  17 #include "backend.h"
  18 #include "connect.h"
  19 #include "command.h"
  20 #include "errors.h"
  21 #include "link.h"
  22 #include <ctype.h>
  23 #if HAVE_GETDELIM_H
  24 #include "getdelim.h"
  25 #endif
  26
  27 #if HUGS_FOR_WINDOWS
  28 #undef IN
  29 #endif
  30
  31 /* --------------------------------------------------------------------------
  32  * Global data:
  33  * ------------------------------------------------------------------------*/
  34
  35 List tyconDefns      = NIL;             /* type constructor definitions    */
  36 List typeInDefns     = NIL;             /* type synonym restrictions       */
  37 List valDefns        = NIL;             /* value definitions in script     */
  38 List classDefns      = NIL;             /* class defns in script           */
  39 List instDefns       = NIL;             /* instance defns in script        */
  40 List selDefns        = NIL;             /* list of selector lists          */
  41 List genDefns        = NIL;             /* list of generated names         */
  42 List unqualImports   = NIL;             /* unqualified import list         */
  43 List foreignImports  = NIL;             /* foreign imports                 */
  44 List foreignExports  = NIL;             /* foreign exportsd                */
  45 List defaultDefns    = NIL;             /* default definitions (if any)    */
  46 Int  defaultLine     = 0;               /* line in which default defs occur*/
  47 List evalDefaults    = NIL;             /* defaults for evaluator          */
  48
  49 Cell inputExpr       = NIL;             /* input expression                */
  50 Bool literateScripts = FALSE;           /* TRUE => default to lit scripts  */
  51 Bool literateErrors  = TRUE;            /* TRUE => report errs in lit scrs */
  52 Bool offsideON       = TRUE;            /* TRUE => implement offside rule  */
  53
  54 String repeatStr     = 0;               /* Repeat last expr                */
  55
  56 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  57 String preprocessor  = 0;
  58 #endif
  59
  60 /* --------------------------------------------------------------------------
  61  * Local function prototypes:
  62  * ------------------------------------------------------------------------*/
  63
  64 static Void local initCharTab     Args((Void));
  65 static Void local fileInput       Args((String,Long));
  66 static Bool local literateMode    Args((String));
  67 static Bool local linecmp         Args((String,String));
  68 static Int  local nextLine        Args((Void));
  69 static Void local skip            Args((Void));
  70 static Void local thisLineIs      Args((Int));
  71 static Void local newlineSkip     Args((Void));
  72 static Void local closeAnyInput   Args((Void));
  73
  74        Int  yyparse         Args((Void)); /* can't stop yacc making this   */
  75                                           /* public, but don't advertise   */
  76                                           /* it in a header file.          */
  77
  78 static Void local endToken        Args((Void));
  79 static Text local readOperator    Args((Void));
  80 static Text local readIdent       Args((Void));
  81 static Cell local readRadixNumber Args((Int));
  82 static Cell local readNumber      Args((Void));
  83 static Cell local readChar        Args((Void));
  84 static Cell local readString      Args((Void));
  85 static Void local saveStrChr      Args((Char));
  86 static Cell local readAChar       Args((Bool));
  87
  88 static Bool local lazyReadMatches Args((String));
  89 static Cell local readEscapeChar  Args((Bool));
  90 static Void local skipGap         Args((Void));
  91 static Cell local readCtrlChar    Args((Void));
  92 static Cell local readOctChar     Args((Void));
  93 static Cell local readHexChar     Args((Void));
  94 static Int  local readHexDigit    Args((Char));
  95 static Cell local readDecChar     Args((Void));
  96
  97 static Void local goOffside       Args((Int));
  98 static Void local unOffside       Args((Void));
  99 static Bool local canUnOffside    Args((Void));
 100
 101 static Void local skipWhitespace  Args((Void));
 102 static Int  local yylex           Args((Void));
 103 static Int  local repeatLast      Args((Void));
 104
 105 static Void local parseInput      Args((Int));
 106
 107 static Bool local doesNotExceed   Args((String,Int,Int));
 108 static Int  local stringToInt     Args((String,Int));
 109
 110
 111 /* --------------------------------------------------------------------------
 112  * Text values for reserved words and special symbols:
 113  * ------------------------------------------------------------------------*/
 114
 115 static Text textCase,    textOfK,      textData,   textType,   textIf;
 116 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 117 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 118 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 119
 120 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 121 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 122 static Text textBang,    textDot,      textAll,    textImplies;
 123 static Text textWildcard;
 124
 125 static Text textModule,  textImport,    textInterface,  textInstImport;
 126 static Text textHiding,  textQualified, textAsMod;
 127 static Text textExport,  textDynamic,   textUUExport;
 128 static Text textUnsafe,  textUUAll;
 129
 130 Text   textNum;                         /* Num                             */
 131 Text   textPrelude;                     /* Prelude                         */
 132 Text   textPlus;                        /* (+)                             */
 133
 134 static Cell conMain;                    /* Main                            */
 135 static Cell varMain;                    /* main                            */
 136
 137 static Cell varMinus;                   /* (-)                             */
 138 static Cell varPlus;                    /* (+)                             */
 139 static Cell varBang;                    /* (!)                             */
 140 static Cell varDot;                     /* (.)                             */
 141 static Cell varHiding;                  /* hiding                          */
 142 static Cell varQualified;               /* qualified                       */
 143 static Cell varAsMod;                   /* as                              */
 144
 145 static List imps;                       /* List of imports to be chased    */
 146
 147
 148 /* --------------------------------------------------------------------------
 149  * Character set handling:
 150  *
 151  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 152  * character set.  The following code provides methods for classifying
 153  * input characters according to the lexical structure specified by the
 154  * report.  Hugs should still accept older programs because ASCII is
 155  * essentially just a subset of the ISO character set.
 156  *
 157  * Notes: If you want to port Hugs to a machine that uses something
 158  * substantially different from the ISO character set, then you will need
 159  * to insert additional code to map between character sets.
 160  *
 161  * At some point, the following data structures may be exported in a .h
 162  * file to allow the information contained here to be picked up in the
 163  * implementation of LibChar is* primitives.
 164  *
 165  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 166  * ------------------------------------------------------------------------*/
 167
 168 static  Bool            charTabBuilt;
 169 static  unsigned char   ctable[NUM_CHARS];
 170 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 171 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 172
 173 #define DIGIT           0x01
 174 #define SMALL           0x02
 175 #define LARGE           0x04
 176 #define SYMBOL          0x08
 177 #define IDAFTER         0x10
 178 #define SPACE           0x20
 179 #define PRINT           0x40
 180
 181 static Void local initCharTab() {       /* Initialize char decode table    */
 182 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 183 #define setChar(x,c)    ctable[c] |= (x)
 184 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 185 #define setCopy(x,c)    {Int i;                         \
 186                          for (i=0; i<NUM_CHARS; ++i)    \
 187                              if (isIn(i,c))             \
 188                                  ctable[i]|=x;          \
 189                         }
 190
 191     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 192
 193     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 194     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 195     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 196     setChar (SMALL,     '_');
 197
 198     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 199     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 200     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 201
 202     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 203     setRange(SYMBOL,    215,215);
 204     setChar (SYMBOL,    247);
 205     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 206
 207     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 208     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 209
 210     setChar (SPACE,     ' ');           /* ASCII space character           */
 211     setChar (SPACE,     160);           /* ISO non breaking space          */
 212     setRange(SPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 213
 214     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 215     setChars(PRINT,     " '\"");        /* Space and quotes                */
 216     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 217
 218     charTabBuilt = TRUE;
 219 #undef setRange
 220 #undef setChar
 221 #undef setChars
 222 #undef setCopy
 223 }
 224
 225
 226 /* --------------------------------------------------------------------------
 227  * Single character input routines:
 228  *
 229  * At the lowest level of input, characters are read one at a time, with the
 230  * current character held in c0 and the following (lookahead) character in
 231  * c1.  The corrdinates of c0 within the file are held in (column,row).
 232  * The input stream is advanced by one character using the skip() function.
 233  * ------------------------------------------------------------------------*/
 234
 235 #define TABSIZE    8                   /* spacing between tabstops         */
 236
 237 #define NOTHING    0                   /* what kind of input is being read?*/
 238 #define KEYBOARD   1                   /* - keyboard/console?              */
 239 #define SCRIPTFILE 2                   /* - script file                    */
 240 #define PROJFILE   3                   /* - project file                   */
 241 #define STRING     4                   /* - string buffer?                 */
 242
 243 static Int    reading   = NOTHING;
 244
 245 static Target readSoFar;
 246 static Int    row, column, startColumn;
 247 static int    c0, c1;
 248 static FILE   *inputStream = 0;
 249 static Bool   thisLiterate;
 250 static String nextStringChar;          /* next char in string buffer       */
 251
 252 #if     USE_READLINE                   /* for command line editors         */
 253 static  String currentLine;            /* editline or GNU readline         */
 254 static  String nextChar;
 255 #define nextConsoleChar() \
 256            (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 257 extern  Void add_history  Args((String));
 258 extern  String readline   Args((String));
 259 #else
 260 #define nextConsoleChar() getc(stdin)
 261 #endif
 262
 263 static  Int litLines;                  /* count defn lines in lit script   */
 264 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 265 static  Int lastLine;                  /* records type of last line read:  */
 266 #define STARTLINE 0                    /* - at start of file, none read    */
 267 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 268 #define TEXTLINE  2                    /* - text comment                   */
 269 #define DEFNLINE  3                    /* - line containing definition     */
 270 #define CODELINE  4                    /* - line inside code block         */
 271
 272 #define BEGINCODE "\\begin{code}"
 273 #define ENDCODE   "\\end{code}"
 274
 275 #if HAVE_GETDELIM_H
 276 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 277 #else
 278 #define LINEBUFFER_SIZE 1000
 279 static char lineBuffer[LINEBUFFER_SIZE];
 280 #endif
 281 static int lineLength = 0;
 282 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 283 static int linePtr = 0;
 284
 285 Void consoleInput(prompt)              /* prepare to input characters from */
 286 String prompt; {                       /* standard in (i.e. console/kbd)   */
 287     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 288     c0          =                      /* i.e. input terminated by '\n'    */
 289     c1          = ' ';
 290     column      = (-1);
 291     row         = 0;
 292
 293 #if USE_READLINE
 294     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 295      * avoids accidentally freeing currentLine twice.
 296      */
 297     if (currentLine) {
 298         String oldCurrentLine = currentLine;
 299         currentLine = 0;           /* We may lose the space of currentLine */
 300         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 301     }
 302     currentLine = readline(prompt);
 303     nextChar    = currentLine;
 304     if (currentLine) {
 305         if (*currentLine)
 306             add_history(currentLine);
 307     }
 308     else
 309         c0 = c1 = EOF;
 310 #else
 311     Printf("%s",prompt);
 312     FlushStdout();
 313 #endif
 314 }
 315
 316 Void projInput(nm)                     /* prepare to input characters from */
 317 String nm; {                           /* from named project file          */
 318     if ((inputStream = fopen(nm,"r"))!=0) {
 319         reading = PROJFILE;
 320         c0      = ' ';
 321         c1      = '\n';
 322         column  = 1;
 323         row     = 0;
 324     }
 325     else {
 326         ERRMSG(0) "Unable to open project file \"%s\"", nm
 327         EEND;
 328     }
 329 }
 330
 331 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 332 String nm;                              /* named file (specified length is */
 333 Long   len; {                           /* used to set target for reading) */
 334 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 335     if (preprocessor) {
 336         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 337         char *cmd = malloc(reallen);
 338         if (cmd == NULL) {
 339             ERRMSG(0) "Unable to allocate memory for filter command."
 340             EEND;
 341         }
 342         strcpy(cmd,preprocessor);
 343         strcat(cmd," ");
 344         strcat(cmd,nm);
 345         inputStream = popen(cmd,"r");
 346         free(cmd);
 347     } else {
 348         inputStream = fopen(nm,"r");
 349     }
 350 #else
 351     inputStream = fopen(nm,"r");
 352 #endif
 353     if (inputStream) {
 354         reading      = SCRIPTFILE;
 355         c0           = ' ';
 356         c1           = '\n';
 357         column       = 1;
 358         row          = 0;
 359
 360         lastLine     = STARTLINE;       /* literate file processing */
 361         litLines     = 0;
 362         linePtr      = 0;
 363         lineLength   = 0;
 364         thisLiterate = literateMode(nm);
 365         inCodeBlock  = FALSE;
 366
 367         readSoFar    = 0;
 368         setGoal("Parsing", (Target)len);
 369     }
 370     else {
 371         ERRMSG(0) "Unable to open file \"%s\"", nm
 372         EEND;
 373     }
 374 }
 375
 376 Void stringInput(s)             /* prepare to input characters from string */
 377 String s; {
 378     reading      = STRING;
 379     c0           = EOF;
 380     c1           = EOF;
 381     if (*s) c0 = *s++;
 382     if (*s) c1 = *s++;
 383     column       = 1;
 384     row          = 1;
 385
 386     nextStringChar = s;
 387     if (!charTabBuilt)
 388         initCharTab();
 389 }
 390
 391 static Bool local literateMode(nm)      /* Select literate mode for file   */
 392 String nm; {
 393     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 394     if (dot) {
 395         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 396             return FALSE;
 397         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 398             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 399             return TRUE;
 400     }
 401     return literateScripts;             /* otherwise, use the default      */
 402 }
 403
 404
 405 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
 406 {
 407    Int len;
 408    String dot;
 409    len = 1 + strlen ( srcName );
 410    *hiName = malloc(len);
 411    *oName  = malloc(len);
 412    if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
 413    (*hiName)[0] = (*oName)[0] = 0;
 414    dot = strrchr(srcName, '.');
 415    if (!dot) return;
 416    if (filenamecmp(dot+1, "hs")==0 &&
 417        filenamecmp(dot+1, "lhs")==0 &&
 418        filenamecmp(dot+1, "verb")==0) return;
 419
 420    strcpy(*hiName, srcName);
 421    dot = strrchr(*hiName, '.');
 422    dot[1] = 'h';
 423    dot[2] = 'i';
 424    dot[3] = 0;
 425
 426    strcpy(*oName, srcName);
 427    dot = strrchr(*oName, '.');
 428    dot[1] = 'o';
 429    dot[2] = 0;
 430 }
 431
 432
 433
 434 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 435  * I've removed the loop (since newLineSkip contains a loop too) and
 436  * replaced the warnings with errors. ADR
 437  */
 438 /*
 439  * To deal with literate \begin{code}...\end{code} blocks,
 440  * add a line buffer that rooms the current line. The old c0 and c1
 441  * stream pointers are used as before within that buffer -- sof
 442  *
 443  * Upon reading a new line into the line buffer, we check to see if
 444  * we're reading in a line containing \begin{code} or \end{code} and
 445  * take appropriate action.
 446  */
 447
 448 static Bool local linecmp(s,line)       /* compare string with line        */
 449 String s;                               /* line may end in whitespace      */
 450 String line; {
 451     Int i=0;
 452     while (s[i] != '\0' && s[i] == line[i]) {
 453         ++i;
 454     }
 455     /* s[0..i-1] == line[0..i-1] */
 456     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 457         return FALSE;
 458     }
 459     while (isIn(line[i], SPACE)) {      /* allow whitespace at end of line */
 460         ++i;
 461     }
 462     return (line[i] == '\0');
 463 }
 464
 465 /* Returns line length (including \n) or 0 upon EOF. */
 466 static Int local nextLine()
 467 {
 468 #if HAVE_GETDELIM_H
 469     /*
 470        Forget about fgets(), it is utterly braindead.
 471        (Assumes \NUL free streams and does not gracefully deal
 472        with overflow.) Instead, use GNU libc's getline().
 473        */
 474     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 475 #else
 476     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 477         lineLength = strlen(lineBuffer);
 478     else
 479         lineLength = 0;
 480 #endif
 481     /* printf("Read: \"%s\"", lineBuffer); */
 482     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 483         return lineLength;
 484     }
 485     else if (lineLength >= 2 && lineBuffer[0] == '#' &&
 486              lineBuffer[1] == '!') {
 487         lineBuffer[0]='\n'; /* pretend it's a blank line */
 488         lineBuffer[1]='\0';
 489         lineLength=1;
 490     } else if (thisLiterate) {
 491         if (linecmp(BEGINCODE, lineBuffer)) {
 492             if (!inCodeBlock) {             /* Entered a code block        */
 493                 inCodeBlock = TRUE;
 494                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 495                 lineBuffer[1]='\0';
 496                 lineLength=1;
 497             }
 498             else {
 499                 ERRMSG(row) "\\begin{code} encountered inside code block"
 500                 EEND;
 501             }
 502         }
 503         else if (linecmp(ENDCODE, lineBuffer)) {
 504             if (inCodeBlock) {              /* Finished code block         */
 505                 inCodeBlock = FALSE;
 506                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 507                 lineBuffer[1]='\0';
 508                 lineLength=1;
 509             }
 510             else {
 511                 ERRMSG(row) "\\end{code} encountered outside code block"
 512                 EEND;
 513             }
 514         }
 515     }
 516     /* printf("Read: \"%s\"", lineBuffer); */
 517     return lineLength;
 518 }
 519
 520 static Void local skip() {              /* move forward one char in input  */
 521     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 522         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 523             row++;
 524             column=1;
 525             if (reading==SCRIPTFILE)
 526                 soFar(readSoFar);
 527         }
 528         else if (c0=='\t')
 529             column += TABSIZE - ((column-1)%TABSIZE);
 530         else
 531             column++;
 532
 533         c0 = c1;
 534         readSoFar++;
 535
 536         if (c0==EOF) {
 537             column = 0;
 538             if (reading==SCRIPTFILE)
 539                 done();
 540             closeAnyInput();
 541         }
 542         else if (reading==KEYBOARD) {
 543             allowBreak();
 544             if (c0=='\n')
 545                 c1 = EOF;
 546             else {
 547                 c1 = nextConsoleChar();
 548                 /* On Win32, hitting ctrl-C causes the next getchar to
 549                  * fail - returning "-1" to indicate an error.
 550                  * This is one of the rare cases where "-1" does not mean EOF.
 551                  */
 552                 if (EOF == c1 && !feof(stdin)) {
 553                     c1 = ' ';
 554                 }
 555             }
 556         }
 557         else if (reading==STRING) {
 558             c1 = (unsigned char) *nextStringChar++;
 559             if (c1 == '\0')
 560                 c1 = EOF;
 561         }
 562         else {
 563             if (lineLength <=0 || linePtr == lineLength) {
 564                 /* Current line, exhausted - get new one */
 565                 if (nextLine() <= 0) { /* EOF */
 566                     c1 = EOF;
 567                 }
 568                 else {
 569                     linePtr = 0;
 570                     c1 = (unsigned char)lineBuffer[linePtr++];
 571                 }
 572             }
 573             else {
 574                 c1 = (unsigned char)lineBuffer[linePtr++];
 575             }
 576         }
 577
 578     }
 579 }
 580
 581 static Void local thisLineIs(kind)     /* register kind of current line    */
 582 Int kind; {                            /* & check for literate script errs */
 583     if (literateErrors) {
 584         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 585             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 586             ERRMSG(row) "Program line next to comment"
 587             EEND;
 588         }
 589         lastLine = kind;
 590     }
 591 }
 592
 593 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 594     /* assert(c0=='\n'); */
 595     if (reading==SCRIPTFILE && thisLiterate) {
 596         do {
 597             skip();
 598             if (inCodeBlock) {         /* pass chars on definition lines   */
 599                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 600                 litLines++;
 601                 return;
 602             }
 603             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 604                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 605                 skip();
 606                 litLines++;
 607                 return;
 608             }
 609             while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank?   */
 610                 skip();
 611             if (c0=='\n' || c0==EOF)
 612                 thisLineIs(BLANKLINE);
 613             else {
 614                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 615                 while (c0!='\n' && c0!=EOF)
 616                     skip();
 617             }                          /* by now, c0=='\n' or c0==EOF      */
 618         } while (c0!=EOF);             /* if new line, start again         */
 619
 620         if (litLines==0 && literateErrors) {
 621             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 622                         DEFNCHAR
 623             EEND;
 624         }
 625         return;
 626     }
 627     skip();
 628 }
 629
 630 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 631     switch (reading) {                 /* or skip to end of console line   */
 632         case PROJFILE   :
 633         case SCRIPTFILE : if (inputStream) {
 634 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 635                               if (preprocessor) {
 636                                   pclose(inputStream);
 637                               } else {
 638                                   fclose(inputStream);
 639                               }
 640 #else
 641                               fclose(inputStream);
 642 #endif
 643                               inputStream = 0;
 644                           }
 645                           break;
 646         case KEYBOARD   : while (c0!=EOF)
 647                               skip();
 648                           break;
 649     }
 650     reading=NOTHING;
 651 }
 652
 653 /* --------------------------------------------------------------------------
 654  * Parser: Uses table driven parser generated from parser.y using yacc
 655  * ------------------------------------------------------------------------*/
 656
 657 #include "parser.c"
 658
 659 /* --------------------------------------------------------------------------
 660  * Single token input routines:
 661  *
 662  * The following routines read the values of particular kinds of token given
 663  * that the first character of the token has already been located in c0 on
 664  * entry to the routine.
 665  * ------------------------------------------------------------------------*/
 666
 667 #define MAX_TOKEN           4000
 668 #define startToken()        tokPos = 0
 669 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 670 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 671 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 672
 673 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 674 static Int  tokPos;                    /* input position in buffer         */
 675 static Int  identType;                 /* identifier type: CONID / VARID   */
 676 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 677
 678 static Void local endToken() {         /* check for token overflow         */
 679     if (tokPos>MAX_TOKEN) {
 680         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 681         EEND;
 682     }
 683     tokenStr[tokPos] = '\0';
 684 }
 685
 686 static Text local readOperator() {     /* read operator symbol             */
 687     startToken();
 688     do {
 689         saveTokenChar(c0);
 690         skip();
 691     } while (isISO(c0) && isIn(c0,SYMBOL));
 692     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 693     endToken();
 694     return findText(tokenStr);
 695 }
 696
 697 static Text local readIdent() {        /* read identifier                  */
 698     startToken();
 699     do {
 700         saveTokenChar(c0);
 701         skip();
 702     } while (isISO(c0) && isIn(c0,IDAFTER));
 703     endToken();
 704     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 705     return findText(tokenStr);
 706 }
 707
 708
 709 static Bool local doesNotExceed(s,radix,limit)
 710 String s;
 711 Int    radix;
 712 Int    limit; {
 713     Int n = 0;
 714     Int p = 0;
 715     while (TRUE) {
 716         if (s[p] == 0) return TRUE;
 717         if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
 718         n = radix*n + (s[p]-'0');
 719         p++;
 720     }
 721 }
 722
 723 static Int local stringToInt(s,radix)
 724 String s;
 725 Int    radix; {
 726     Int n = 0;
 727     Int p = 0;
 728     while (TRUE) {
 729         if (s[p] == 0) return n;
 730         n = radix*n + (s[p]-'0');
 731         p++;
 732     }
 733 }
 734
 735 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 736 Int r; {                               /* from input of the form 0c{digs}  */
 737     Int d;
 738     startToken();
 739     skip();                            /* skip leading zero                */
 740     if ((d=readHexDigit(c1))<0 || d>=r) {
 741         /* Special case; no digits, lex as  */
 742         /* if it had been written "0 c..."  */
 743         saveTokenChar('0');
 744     } else {
 745         skip();
 746         do {
 747             saveTokenChar('0'+readHexDigit(c0));
 748             skip();
 749             d = readHexDigit(c0);
 750         } while (d>=0 && d<r);
 751     }
 752     endToken();
 753
 754     if (doesNotExceed(tokenStr,r,MAXPOSINT))
 755         return mkInt(stringToInt(tokenStr,r));
 756     else
 757     if (r == 10)
 758         return stringToBignum(tokenStr);
 759     else {
 760         ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
 761         EEND;
 762     }
 763 }
 764
 765 static Cell local readNumber() {        /* read numeric constant           */
 766
 767     if (c0=='0') {
 768         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 769             return readRadixNumber(16);
 770         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 771             return readRadixNumber(8);
 772     }
 773
 774     startToken();
 775     do {
 776         saveTokenChar(c0);
 777         skip();
 778     } while (isISO(c0) && isIn(c0,DIGIT));
 779
 780     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 781         endToken();
 782         if (doesNotExceed(tokenStr,10,MAXPOSINT))
 783             return mkInt(stringToInt(tokenStr,10)); else
 784             return stringToBignum(tokenStr);
 785     }
 786
 787     saveTokenChar(c0);                  /* save decimal point              */
 788     skip();
 789     do {                                /* process fractional part ...     */
 790         saveTokenChar(c0);
 791         skip();
 792     } while (isISO(c0) && isIn(c0,DIGIT));
 793
 794     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 795         saveTokenChar('e');
 796         skip();
 797         if (c0=='-') {
 798             saveTokenChar('-');
 799             skip();
 800         }
 801         else if (c0=='+')
 802             skip();
 803
 804         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 805             ERRMSG(row) "Missing digits in exponent"
 806             EEND;
 807         }
 808         else {
 809             do {
 810                 saveTokenChar(c0);
 811                 skip();
 812             } while (isISO(c0) && isIn(c0,DIGIT));
 813         }
 814     }
 815
 816     endToken();
 817     return mkFloat(stringToFloat(tokenStr));
 818 }
 819
 820
 821
 822
 823
 824
 825
 826 static Cell local readChar() {         /* read character constant          */
 827     Cell charRead;
 828
 829     skip(/* '\'' */);
 830     if (c0=='\'' || c0=='\n' || c0==EOF) {
 831         ERRMSG(row) "Illegal character constant"
 832         EEND;
 833     }
 834
 835     charRead = readAChar(FALSE);
 836
 837     if (c0=='\'')
 838         skip(/* '\'' */);
 839     else {
 840         ERRMSG(row) "Improperly terminated character constant"
 841         EEND;
 842     }
 843     return charRead;
 844 }
 845
 846 static Cell local readString() {       /* read string literal              */
 847     Cell c;
 848
 849     startToken();
 850     skip(/* '\"' */);
 851     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 852         c = readAChar(TRUE);
 853         if (nonNull(c))
 854             saveStrChr(charOf(c));
 855     }
 856
 857     if (c0=='\"')
 858         skip(/* '\"' */);
 859     else {
 860         ERRMSG(row) "Improperly terminated string"
 861         EEND;
 862     }
 863     endToken();
 864     return mkStr(findText(tokenStr));
 865 }
 866
 867 static Void local saveStrChr(c)        /* save character in string         */
 868 Char c; {
 869     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 870         saveTokenChar(c);
 871     }
 872     else {                             /* save null char as TWO null chars */
 873         if (tokPos+1<MAX_TOKEN) {
 874             saveChar('\\');
 875             if (c=='\\')
 876                 saveChar('\\');
 877             else
 878                 saveChar('0');
 879         }
 880     }
 881 }
 882
 883 static Cell local readAChar(isStrLit)  /* read single char constant        */
 884 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 885     Cell c = mkChar(c0);
 886
 887     if (c0=='\\')                      /* escape character?                */
 888         return readEscapeChar(isStrLit);
 889     if (!isISO(c0)) {
 890         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 891         EEND;
 892     }
 893     skip();                            /* normal character?                */
 894     return c;
 895 }
 896
 897 /* --------------------------------------------------------------------------
 898  * Character escape code sequences:
 899  * ------------------------------------------------------------------------*/
 900
 901 static struct {                        /* table of special escape codes    */
 902     char *codename;
 903     int  codenumber;
 904 } escapes[] = {
 905    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 906    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 907    {"\'",'\''}, {"v",   11},
 908    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 909    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 910    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 911    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 912    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 913    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 914    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 915    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 916    {"SP",  32}, {"DEL", 127},
 917    {0,0}
 918 };
 919
 920 static Int  alreadyMatched;            /* Record portion of input stream   */
 921 static char alreadyRead[10];           /* that has been read w/o a match   */
 922
 923 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 924 String s; {                            /* possibly using characters that   */
 925     int i;                             /* have already been read           */
 926
 927     for (i=0; i<alreadyMatched; ++i)
 928         if (alreadyRead[i]!=s[i])
 929             return FALSE;
 930
 931     while (s[i] && s[i]==c0) {
 932         alreadyRead[alreadyMatched++]=(char)c0;
 933         skip();
 934         i++;
 935     }
 936
 937     return s[i]=='\0';
 938 }
 939
 940 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 941 Bool isStrLit; {
 942     int i=0;
 943
 944     skip(/* '\\' */);
 945     switch (c0) {
 946         case '&'  : if (isStrLit) {
 947                         skip();
 948                         return NIL;
 949                     }
 950                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 951                     EEND;
 952                     break;/*NOTREACHED*/
 953
 954         case '^'  : return readCtrlChar();
 955
 956         case 'o'  : return readOctChar();
 957         case 'x'  : return readHexChar();
 958
 959         default   : if (!isISO(c0)) {
 960                         ERRMSG(row) "Illegal escape sequence"
 961                         EEND;
 962                     }
 963                     else if (isIn(c0,SPACE)) {
 964                         if (isStrLit) {
 965                             skipGap();
 966                             return NIL;
 967                         }
 968                         ERRMSG(row) "Illegal use of gap in character constant"
 969                         EEND;
 970                         break;
 971                     }
 972                     else if (isIn(c0,DIGIT))
 973                         return readDecChar();
 974     }
 975
 976     for (alreadyMatched=0; escapes[i].codename; i++)
 977         if (lazyReadMatches(escapes[i].codename))
 978             return mkChar(escapes[i].codenumber);
 979
 980     alreadyRead[alreadyMatched++] = (char)c0;
 981     alreadyRead[alreadyMatched++] = '\0';
 982     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
 983                 alreadyRead
 984     EEND;
 985     return NIL;/*NOTREACHED*/
 986 }
 987
 988 static Void local skipGap() {          /* skip over gap in string literal  */
 989     do                                 /* (simplified in Haskell 1.1)      */
 990         if (c0=='\n')
 991             newlineSkip();
 992         else
 993             skip();
 994     while (isISO(c0) && isIn(c0,SPACE));
 995     if (c0!='\\') {
 996         ERRMSG(row) "Missing `\\' terminating string literal gap"
 997         EEND;
 998     }
 999     skip(/* '\\' */);
1000 }
1001
1002 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
1003     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1004     String which;
1005
1006     skip(/* '^' */);
1007     if ((which = strchr(controls,c0))==NULL) {
1008         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1009         EEND;
1010     }
1011     skip();
1012     return mkChar(which-controls);
1013 }
1014
1015 static Cell local readOctChar() {      /* read octal character constant    */
1016     Int n = 0;
1017     Int d;
1018
1019     skip(/* 'o' */);
1020     if ((d = readHexDigit(c0))<0 || d>=8) {
1021         ERRMSG(row) "Empty octal character escape"
1022         EEND;
1023     }
1024     do {
1025         if (overflows(n,8,d,MAXCHARVAL)) {
1026             ERRMSG(row) "Octal character escape out of range"
1027             EEND;
1028         }
1029         n = 8*n + d;
1030         skip();
1031     } while ((d = readHexDigit(c0))>=0 && d<8);
1032
1033     return mkChar(n);
1034 }
1035
1036 static Cell local readHexChar() {      /* read hex character constant      */
1037     Int n = 0;
1038     Int d;
1039
1040     skip(/* 'x' */);
1041     if ((d = readHexDigit(c0))<0) {
1042         ERRMSG(row) "Empty hexadecimal character escape"
1043         EEND;
1044     }
1045     do {
1046         if (overflows(n,16,d,MAXCHARVAL)) {
1047             ERRMSG(row) "Hexadecimal character escape out of range"
1048             EEND;
1049         }
1050         n = 16*n + d;
1051         skip();
1052     } while ((d = readHexDigit(c0))>=0);
1053
1054     return mkChar(n);
1055 }
1056
1057 static Int local readHexDigit(c)       /* read single hex digit            */
1058 Char c; {
1059     if ('0'<=c && c<='9')
1060         return c-'0';
1061     if ('A'<=c && c<='F')
1062         return 10 + (c-'A');
1063     if ('a'<=c && c<='f')
1064         return 10 + (c-'a');
1065     return -1;
1066 }
1067
1068 static Cell local readDecChar() {      /* read decimal character constant  */
1069     Int n = 0;
1070
1071     do {
1072         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1073             ERRMSG(row) "Decimal character escape out of range"
1074             EEND;
1075         }
1076         n = 10*n + (c0-'0');
1077         skip();
1078     } while (c0!=EOF && isIn(c0,DIGIT));
1079
1080     return mkChar(n);
1081 }
1082
1083 /* --------------------------------------------------------------------------
1084  * Produce printable representation of character:
1085  * ------------------------------------------------------------------------*/
1086
1087 String unlexChar(c,quote)              /* return string representation of  */
1088 Char c;                                /* character...                     */
1089 Char quote; {                          /* protect quote character          */
1090     static char buffer[12];
1091
1092     if (c<0)                           /* deal with sign extended chars..  */
1093         c += NUM_CHARS;
1094
1095     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1096         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1097             buffer[0] = '\\';
1098             buffer[1] = (char)c;
1099             buffer[2] = '\0';
1100         }
1101         else {
1102             buffer[0] = (char)c;
1103             buffer[1] = '\0';
1104         }
1105     }
1106     else {                             /* look for escape code             */
1107         Int escs;
1108         for (escs=0; escapes[escs].codename; escs++)
1109             if (escapes[escs].codenumber==c) {
1110                 sprintf(buffer,"\\%s",escapes[escs].codename);
1111                 return buffer;
1112             }
1113         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1114     }
1115     return buffer;
1116 }
1117
1118 Void printString(s)                    /* print string s, using quotes and */
1119 String s; {                            /* escapes if any parts need them   */
1120     if (s) {
1121         String t = s;
1122         Char   c;
1123         while ((c = *t)!=0 && isISO(c)
1124                            && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1125             t++;
1126         }
1127         if (*t) {
1128             Putchar('"');
1129             for (t=s; *t; t++)
1130                 Printf("%s",unlexChar(*t,'"'));
1131             Putchar('"');
1132         }
1133         else
1134             Printf("%s",s);
1135     }
1136 }
1137
1138 /* -------------------------------------------------------------------------
1139  * Handle special types of input for use in interpreter:
1140  * -----------------------------------------------------------------------*/
1141
1142 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1143 struct cmd *cmds;                      /* line in interpreter              */
1144 Char   start;                          /* characters introducing a cmd     */
1145 Char   sys; {                          /* character for shell escape       */
1146     while (c0==' ' || c0 =='\t')
1147         skip();
1148
1149     if (c0=='\n')                      /* look for blank command lines     */
1150         return NOCMD;
1151     if (c0==EOF)                       /* look for end of input stream     */
1152         return QUIT;
1153     if (c0==sys) {                     /* single character system escape   */
1154         skip();
1155         return SYSTEM;
1156     }
1157     if (c0==start && c1==sys) {        /* two character system escape      */
1158         skip();
1159         skip();
1160         return SYSTEM;
1161     }
1162
1163     startToken();                      /* All cmds start with start        */
1164     if (c0==start)                     /* except default (usually EVAL)    */
1165         do {                           /* which is empty                   */
1166             saveTokenChar(c0);
1167             skip();
1168         } while (c0!=EOF && !isIn(c0,SPACE));
1169     endToken();
1170
1171     for (; cmds->cmdString; ++cmds)
1172         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1173             (tokenStr[0]==start &&
1174              tokenStr[1]==(cmds->cmdString)[1] &&
1175              tokenStr[2]=='\0'))
1176             return (cmds->cmdCode);
1177     return BADCMD;
1178 }
1179
1180 String readFilename() {                /* Read filename from input (if any)*/
1181     if (reading==PROJFILE)
1182         skipWhitespace();
1183     else
1184         while (c0==' ' || c0=='\t')
1185             skip();
1186
1187     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1188         return 0;
1189
1190     startToken();
1191     while (c0!=EOF && !isIn(c0,SPACE)) {
1192         if (c0=='"') {
1193             skip();
1194             while (c0!=EOF && c0!='\"') {
1195                 Cell c = readAChar(TRUE);
1196                 if (nonNull(c)) {
1197                     saveTokenChar(charOf(c));
1198                 }
1199             }
1200             if (c0=='"')
1201                 skip();
1202             else {
1203                 ERRMSG(row) "a closing quote, '\"', was expected"
1204                 EEND;
1205             }
1206         }
1207         else {
1208             saveTokenChar(c0);
1209             skip();
1210         }
1211     }
1212     endToken();
1213     return tokenStr;
1214 }
1215
1216 String readLine() {                    /* Read command line from input     */
1217     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1218         skip();
1219
1220     startToken();
1221     while (c0!='\n' && c0!=EOF) {
1222         saveTokenChar(c0);
1223         skip();
1224     }
1225     endToken();
1226
1227     return tokenStr;
1228 }
1229
1230 /* --------------------------------------------------------------------------
1231  * This lexer supports the Haskell layout rule:
1232  *
1233  * - Layout area bounded by { ... }, with `;'s in between.
1234  * - A `{' is a HARD indentation and can only be matched by a corresponding
1235  *   HARD '}'
1236  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1237  *   is inserted with the column number of the first token after the
1238  *   WHERE/LET/OF keyword.
1239  * - When a soft indentation is uppermost on the indetation stack with
1240  *   column col' we insert:
1241  *    `}'  in front of token with column<col' and pop indentation off stack,
1242  *    `;'  in front of token with column==col'.
1243  * ------------------------------------------------------------------------*/
1244
1245 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1246 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1247 #define HARD       (-1)                /* indicates hard indentation       */
1248 static  Int        indentDepth = (-1); /* current indentation nesting      */
1249
1250 static Void local goOffside(col)       /* insert offside marker            */
1251 Int col; {                             /* for specified column             */
1252 assert(offsideON);
1253     if (indentDepth>=MAXINDENT) {
1254         ERRMSG(row) "Too many levels of program nesting"
1255         EEND;
1256     }
1257     layout[++indentDepth] = col;
1258 }
1259
1260 static Void local unOffside() {        /* leave layout rule area           */
1261 assert(offsideON);
1262     indentDepth--;
1263 }
1264
1265 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1266 assert(offsideON);
1267     return indentDepth>=0 && layout[indentDepth]!=HARD;
1268 }
1269
1270 /* --------------------------------------------------------------------------
1271  * Main tokeniser:
1272  * ------------------------------------------------------------------------*/
1273
1274 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1275     for (;;)                           /* Strictly speaking, this code is  */
1276         if (c0==EOF)                   /* a little more liberal than the   */
1277             return;                    /* report allows ...                */
1278         else if (c0=='\n')
1279             newlineSkip();
1280         else if (isIn(c0,SPACE))
1281             skip();
1282         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1283             Int nesting = 1;
1284             Int origRow = row;         /* Save original row number         */
1285             skip();
1286             skip();
1287             while (nesting>0 && c0!=EOF)
1288                 if (c0=='{' && c1=='-') {
1289                     skip();
1290                     skip();
1291                     nesting++;
1292                 }
1293                 else if (c0=='-' && c1=='}') {
1294                     skip();
1295                     skip();
1296                     nesting--;
1297                 }
1298                 else if (c0=='\n')
1299                     newlineSkip();
1300                 else
1301                     skip();
1302             if (nesting>0) {
1303                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1304                 EEND;
1305             }
1306         }
1307         else if (c0=='-' && c1=='-') {  /* One line comment                */
1308             do
1309                 skip();
1310             while (c0!='\n' && c0!=EOF);
1311             if (c0=='\n')
1312                 newlineSkip();
1313         }
1314         else
1315             return;
1316 }
1317
1318 static Bool firstToken;                /* Set to TRUE for first token      */
1319 static Int  firstTokenIs;              /* ... with token value stored here */
1320
1321 static Int local yylex() {             /* Read next input token ...        */
1322     static Bool insertOpen    = FALSE;
1323     static Bool insertedToken = FALSE;
1324     static Text textRepeat;
1325
1326 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1327
1328     if (firstToken) {                  /* Special case for first token     */
1329         indentDepth   = (-1);
1330         firstToken    = FALSE;
1331         insertOpen    = FALSE;
1332         insertedToken = FALSE;
1333         if (reading==KEYBOARD)
1334             textRepeat = findText(repeatStr);
1335         return firstTokenIs;
1336     }
1337
1338     if (offsideON && insertOpen) {     /* insert `soft' opening brace      */
1339         insertOpen    = FALSE;
1340         insertedToken = TRUE;
1341         goOffside(column);
1342         push(yylval = mkInt(row));
1343         return '{';
1344     }
1345
1346     /* ----------------------------------------------------------------------
1347      * Skip white space, and insert tokens to support layout rules as reqd.
1348      * --------------------------------------------------------------------*/
1349
1350     skipWhitespace();
1351     startColumn = column;
1352     push(yylval = mkInt(row));         /* default token value is line no.  */
1353     /* subsequent changes to yylval must also set top() to the same value  */
1354
1355     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1356         if (insertedToken)             /* avoid inserting multiple `;'s    */
1357             insertedToken = FALSE;     /* or putting `;' after `{'         */
1358         else
1359         if (offsideON && layout[indentDepth]!=HARD) {
1360             if (column<layout[indentDepth]) {
1361                 unOffside();
1362                 return '}';
1363             }
1364             else if (column==layout[indentDepth] && c0!=EOF) {
1365                 insertedToken = TRUE;
1366                 return ';';
1367             }
1368         }
1369     }
1370
1371     /* ----------------------------------------------------------------------
1372      * Now try to identify token type:
1373      * --------------------------------------------------------------------*/
1374
1375     switch (c0) {
1376         case EOF  : return 0;                   /* End of file/input       */
1377
1378         /* The next 10 characters make up the `special' category in 1.3    */
1379         case '('  : skip(); return '(';
1380         case ')'  : skip(); return ')';
1381         case ','  : skip(); return ',';
1382         case ';'  : skip(); return ';';
1383         case '['  : skip(); return '[';
1384         case ']'  : skip(); return ']';
1385         case '`'  : skip(); return '`';
1386         case '{'  : if (offsideON) goOffside(HARD);
1387                     skip();
1388                     return '{';
1389         case '}'  : if (offsideON && indentDepth<0) {
1390                         ERRMSG(row) "Misplaced `}'"
1391                         EEND;
1392                     }
1393                     if (!(offsideON && layout[indentDepth]!=HARD))
1394                         skip();                         /* skip over hard }*/
1395                     if (offsideON)
1396                         unOffside();    /* otherwise, we have to insert a }*/
1397                     return '}';         /* to (try to) avoid an error...   */
1398
1399         /* Character and string literals                                   */
1400         case '\'' : top() = yylval = readChar();
1401                     return CHARLIT;
1402
1403         case '\"' : top() = yylval = readString();
1404                     return STRINGLIT;
1405     }
1406
1407 #if TREX
1408     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1409         Text it;                        /* Look for record selector name   */
1410         skip();
1411         it    = readIdent();
1412         top() = yylval = ap(RECSEL,mkExt(it));
1413         return identType=RECSELID;
1414     }
1415 #endif
1416     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1417         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1418         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1419             Text it2 = NIL;
1420             skip();                     /* Skip qualifying dot             */
1421             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1422                 it2 = readOperator();
1423                 if (opType==CONOP) {
1424                     top() = yylval = mkQConOp(it,it2);
1425                     return QCONOP;
1426                 } else {
1427                     top() = yylval = mkQVarOp(it,it2);
1428                     return QVAROP;
1429                 }
1430             } else {               /* Qualified identifier */
1431                 it2 = readIdent();
1432                 if (identType==CONID) {
1433                     top() = yylval = mkQCon(it,it2);
1434                     return QCONID;
1435                 } else {
1436                     top() = yylval = mkQVar(it,it2);
1437                     return QVARID;
1438                 }
1439             }
1440         } else {
1441             top() = yylval = mkCon(it);
1442             return identType;
1443         }
1444     }
1445     if (isIn(c0,(SMALL|LARGE))) {
1446         Text it = readIdent();
1447
1448         if (it==textCase)              return CASEXP;
1449         if (it==textOfK)               lookAhead(OF);
1450         if (it==textData)              return DATA;
1451         if (it==textType)              return TYPE;
1452         if (it==textIf)                return IF;
1453         if (it==textThen)              return THEN;
1454         if (it==textElse)              return ELSE;
1455         if (it==textWhere)             lookAhead(WHERE);
1456         if (it==textLet)               lookAhead(LET);
1457         if (it==textIn)                return IN;
1458         if (it==textInfix)             return INFIXN;
1459         if (it==textInfixl)            return INFIXL;
1460         if (it==textInfixr)            return INFIXR;
1461         if (it==textForeign)           return FOREIGN;
1462         if (it==textUnsafe)            return UNSAFE;
1463         if (it==textNewtype)           return TNEWTYPE;
1464         if (it==textDefault)           return DEFAULT;
1465         if (it==textDeriving)          return DERIVING;
1466         if (it==textDo)                lookAhead(DO);
1467         if (it==textClass)             return TCLASS;
1468         if (it==textInstance)          return TINSTANCE;
1469         if (it==textModule)            return TMODULE;
1470         if (it==textInterface)         return INTERFACE;
1471         if (it==textInstImport)        return INSTIMPORT;
1472         if (it==textImport)            return IMPORT;
1473         if (it==textExport)            return EXPORT;
1474         if (it==textDynamic)           return DYNAMIC;
1475         if (it==textUUExport)          return UUEXPORT;
1476         if (it==textHiding)            return HIDING;
1477         if (it==textQualified)         return QUALIFIED;
1478         if (it==textAsMod)             return ASMOD;
1479         if (it==textWildcard)          return '_';
1480         if (it==textAll && !haskell98) return ALL;
1481         if (it==textUUAll)             return ALL;
1482         if (it==textRepeat && reading==KEYBOARD)
1483             return repeatLast();
1484
1485         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1486         return identType;
1487     }
1488
1489     if (isIn(c0,SYMBOL)) {
1490         Text it = readOperator();
1491
1492         if (it==textCoco)    return COCO;
1493         if (it==textEq)      return '=';
1494         if (it==textUpto)    return UPTO;
1495         if (it==textAs)      return '@';
1496         if (it==textLambda)  return '\\';
1497         if (it==textBar)     return '|';
1498         if (it==textFrom)    return FROM;
1499         if (it==textMinus)   return '-';
1500         if (it==textPlus)    return '+';
1501         if (it==textBang)    return '!';
1502         if (it==textDot)     return '.';
1503         if (it==textArrow)   return ARROW;
1504         if (it==textLazy)    return '~';
1505         if (it==textImplies) return IMPLIES;
1506         if (it==textRepeat && reading==KEYBOARD)
1507             return repeatLast();
1508
1509         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1510         return opType;
1511     }
1512
1513     if (isIn(c0,DIGIT)) {
1514         top() = yylval = readNumber();
1515         return NUMLIT;
1516     }
1517
1518     ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1519                 ((int)c0), column
1520     EEND;
1521     return 0; /*NOTREACHED*/
1522 }
1523
1524 static Int local repeatLast() {         /* Obtain last expression entered  */
1525     if (isNull(yylval=getLastExpr())) {
1526         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1527         EEND;
1528     }
1529     return REPEAT;
1530 }
1531
1532 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1533 Text t; {                               /* by t ...                        */
1534     String s = textToStr(t);
1535     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1536 }
1537
1538 Syntax syntaxOf(n)                      /* Find syntax for name            */
1539 Name n; {
1540     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1541         return defaultSyntax(name(n).text);
1542     return name(n).syntax;
1543 }
1544
1545 /* --------------------------------------------------------------------------
1546  * main entry points to parser/lexer:
1547  * ------------------------------------------------------------------------*/
1548
1549 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1550 Int startWith; {                       /* determining whether to read a    */
1551     firstToken   = TRUE;               /* script or an expression          */
1552     firstTokenIs = startWith;
1553     if (startWith==INTERFACE)
1554        offsideON = FALSE; else
1555        offsideON = TRUE;
1556
1557     clearStack();
1558     if (yyparse()) {                   /* This can only be parser overflow */
1559         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1560         EEND;                          /* in the parser...                 */
1561     }
1562     drop();
1563     if (!stackEmpty())                 /* stack should now be empty        */
1564         internal("parseInput");
1565 }
1566
1567 #ifdef HSCRIPT
1568 static String memPrefix = "@mem@";
1569 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1570
1571 Void makeMemScript(mem,fname)
1572 String mem;
1573 String fname; {
1574    strcat(fname,memPrefix);
1575    itoa((int)mem, fname+strlen(fname), 10);
1576 }
1577
1578 Bool isMemScript(fname)
1579 String fname; {
1580    return (strstr(fname,memPrefix) != NULL);
1581 }
1582
1583 String memScriptString(fname)
1584 String fname; {
1585     String p = strstr(fname,memPrefix);
1586     if (p) {
1587         return (String)atoi(p+lenMemPrefix);
1588     } else {
1589         return NULL;
1590     }
1591 }
1592
1593 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1594 String fname;
1595 Long len; {
1596     input(RESET);
1597     if (isMemScript(fname)) {
1598         char* s = memScriptString(fname);
1599         stringInput(s);
1600     } else {
1601         fileInput(fname,len);
1602     }
1603     parseInput(SCRIPT);
1604 }
1605 #else
1606 Void parseScript(nm,len)               /* Read a script                    */
1607 String nm;
1608 Long   len; {                          /* Used to set a target for reading */
1609     input(RESET);
1610     fileInput(nm,len);
1611     parseInput(SCRIPT);
1612 }
1613 #endif
1614
1615 Void parseExp() {                      /* Read an expression to evaluate   */
1616     parseInput(EXPR);
1617     setLastExpr(inputExpr);
1618 }
1619
1620 Void parseInterface(nm,len)            /* Read a GHC interface file        */
1621 String nm;
1622 Long   len; {                          /* Used to set a target for reading */
1623     input(RESET);
1624     fileInput(nm,len);
1625     parseInput(INTERFACE);
1626 }
1627
1628
1629 /* --------------------------------------------------------------------------
1630  * Input control:
1631  * ------------------------------------------------------------------------*/
1632
1633 Void input(what)
1634 Int what; {
1635     switch (what) {
1636         case INSTALL : initCharTab();
1637                        textCase       = findText("case");
1638                        textOfK        = findText("of");
1639                        textData       = findText("data");
1640                        textType       = findText("type");
1641                        textIf         = findText("if");
1642                        textThen       = findText("then");
1643                        textElse       = findText("else");
1644                        textWhere      = findText("where");
1645                        textLet        = findText("let");
1646                        textIn         = findText("in");
1647                        textInfix      = findText("infix");
1648                        textInfixl     = findText("infixl");
1649                        textInfixr     = findText("infixr");
1650                        textForeign    = findText("foreign");
1651                        textUnsafe     = findText("unsafe");
1652                        textNewtype    = findText("newtype");
1653                        textDefault    = findText("default");
1654                        textDeriving   = findText("deriving");
1655                        textDo         = findText("do");
1656                        textClass      = findText("class");
1657                        textInstance   = findText("instance");
1658                        textCoco       = findText("::");
1659                        textEq         = findText("=");
1660                        textUpto       = findText("..");
1661                        textAs         = findText("@");
1662                        textLambda     = findText("\\");
1663                        textBar        = findText("|");
1664                        textMinus      = findText("-");
1665                        textPlus       = findText("+");
1666                        textFrom       = findText("<-");
1667                        textArrow      = findText("->");
1668                        textLazy       = findText("~");
1669                        textBang       = findText("!");
1670                        textDot        = findText(".");
1671                        textImplies    = findText("=>");
1672                        textPrelude    = findText("Prelude");
1673                        textNum        = findText("Num");
1674                        textModule     = findText("module");
1675                        textInterface  = findText("__interface");
1676                        textInstImport = findText("__instimport");
1677                        textExport     = findText("export");
1678                        textDynamic    = findText("dynamic");
1679                        textUUExport   = findText("__export");
1680                        textImport     = findText("import");
1681                        textHiding     = findText("hiding");
1682                        textQualified  = findText("qualified");
1683                        textAsMod      = findText("as");
1684                        textWildcard   = findText("_");
1685                        textAll        = findText("forall");
1686                        textUUAll      = findText("__forall");
1687                        varMinus       = mkVar(textMinus);
1688                        varPlus        = mkVar(textPlus);
1689                        varBang        = mkVar(textBang);
1690                        varDot         = mkVar(textDot);
1691                        varHiding      = mkVar(textHiding);
1692                        varQualified   = mkVar(textQualified);
1693                        varAsMod       = mkVar(textAsMod);
1694                        conMain        = mkCon(findText("Main"));
1695                        varMain        = mkVar(findText("main"));
1696                        evalDefaults   = NIL;
1697
1698                        input(RESET);
1699                        break;
1700
1701         case RESET   : tyconDefns   = NIL;
1702                        typeInDefns  = NIL;
1703                        valDefns     = NIL;
1704                        classDefns   = NIL;
1705                        instDefns    = NIL;
1706                        selDefns     = NIL;
1707                        genDefns     = NIL;
1708                        //primDefns    = NIL;
1709                        unqualImports= NIL;
1710                        foreignImports= NIL;
1711                        foreignExports= NIL;
1712                        defaultDefns = NIL;
1713                        defaultLine  = 0;
1714                        inputExpr    = NIL;
1715                        imps         = NIL;
1716                        closeAnyInput();
1717                        break;
1718
1719         case BREAK   : if (reading==KEYBOARD)
1720                            c0 = EOF;
1721                        break;
1722
1723         case MARK    : mark(tyconDefns);
1724                        mark(typeInDefns);
1725                        mark(valDefns);
1726                        mark(classDefns);
1727                        mark(instDefns);
1728                        mark(selDefns);
1729                        mark(genDefns);
1730                        //mark(primDefns);
1731                        mark(unqualImports);
1732                        mark(foreignImports);
1733                        mark(foreignExports);
1734                        mark(defaultDefns);
1735                        mark(evalDefaults);
1736                        mark(inputExpr);
1737                        mark(varMinus);
1738                        mark(varPlus);
1739                        mark(varBang);
1740                        mark(varDot);
1741                        mark(varHiding);
1742                        mark(varQualified);
1743                        mark(varAsMod);
1744                        mark(varMain);
1745                        mark(conMain);
1746                        mark(imps);
1747                        break;
1748     }
1749 }
1750
1751 /*-------------------------------------------------------------------------*/