ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
   6  * Yale Haskell Group, and the Oregon Graduate Institute of Science and
   7  * Technology, 1994-1999, All rights reserved.  It is distributed as
   8  * free software under the license in the file "License", which is
   9  * included in the distribution.
  10  *
  11  * $RCSfile: input.c,v $
  12  * $Revision: 1.10 $
  13  * $Date: 1999/10/26 17:27:39 $
  14  * ------------------------------------------------------------------------*/
  15
  16 #include "prelude.h"
  17 #include "storage.h"
  18 #include "backend.h"
  19 #include "connect.h"
  20 #include "command.h"
  21 #include "errors.h"
  22 #include "link.h"
  23 #include <ctype.h>
  24 #if HAVE_GETDELIM_H
  25 #include "getdelim.h"
  26 #endif
  27
  28 #if IS_WIN32
  29 #include <windows.h>
  30 #endif
  31
  32 #if IS_WIN32 | HUGS_FOR_WINDOWS
  33 #undef IN
  34 #endif
  35
  36 /* --------------------------------------------------------------------------
  37  * Global data:
  38  * ------------------------------------------------------------------------*/
  39
  40 List tyconDefns      = NIL;             /* type constructor definitions    */
  41 List typeInDefns     = NIL;             /* type synonym restrictions       */
  42 List valDefns        = NIL;             /* value definitions in script     */
  43 List classDefns      = NIL;             /* class defns in script           */
  44 List instDefns       = NIL;             /* instance defns in script        */
  45 List selDefns        = NIL;             /* list of selector lists          */
  46 List genDefns        = NIL;             /* list of generated names         */
  47 List unqualImports   = NIL;             /* unqualified import list         */
  48 List foreignImports  = NIL;             /* foreign imports                 */
  49 List foreignExports  = NIL;             /* foreign exportsd                */
  50 List defaultDefns    = NIL;             /* default definitions (if any)    */
  51 Int  defaultLine     = 0;               /* line in which default defs occur*/
  52 List evalDefaults    = NIL;             /* defaults for evaluator          */
  53
  54 Cell inputExpr       = NIL;             /* input expression                */
  55 Cell inputContext    = NIL;             /* input context                   */
  56 Bool literateScripts = FALSE;           /* TRUE => default to lit scripts  */
  57 Bool literateErrors  = TRUE;            /* TRUE => report errs in lit scrs */
  58 Bool offsideON       = TRUE;            /* TRUE => implement offside rule  */
  59
  60 String repeatStr     = 0;               /* Repeat last expr                */
  61
  62 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  63 String preprocessor  = 0;
  64 #endif
  65
  66 /* --------------------------------------------------------------------------
  67  * Local function prototypes:
  68  * ------------------------------------------------------------------------*/
  69
  70 static Void local initCharTab     Args((Void));
  71 static Void local fileInput       Args((String,Long));
  72 static Bool local literateMode    Args((String));
  73 static Bool local linecmp         Args((String,String));
  74 static Int  local nextLine        Args((Void));
  75 static Void local skip            Args((Void));
  76 static Void local thisLineIs      Args((Int));
  77 static Void local newlineSkip     Args((Void));
  78 static Void local closeAnyInput   Args((Void));
  79
  80        Int  yyparse         Args((Void)); /* can't stop yacc making this   */
  81                                           /* public, but don't advertise   */
  82                                           /* it in a header file.          */
  83
  84 static Void local endToken        Args((Void));
  85 static Text local readOperator    Args((Void));
  86 static Text local readIdent       Args((Void));
  87 static Cell local readRadixNumber Args((Int));
  88 static Cell local readNumber      Args((Void));
  89 static Cell local readChar        Args((Void));
  90 static Cell local readString      Args((Void));
  91 static Void local saveStrChr      Args((Char));
  92 static Cell local readAChar       Args((Bool));
  93
  94 static Bool local lazyReadMatches Args((String));
  95 static Cell local readEscapeChar  Args((Bool));
  96 static Void local skipGap         Args((Void));
  97 static Cell local readCtrlChar    Args((Void));
  98 static Cell local readOctChar     Args((Void));
  99 static Cell local readHexChar     Args((Void));
 100 static Int  local readHexDigit    Args((Char));
 101 static Cell local readDecChar     Args((Void));
 102
 103 static Void local goOffside       Args((Int));
 104 static Void local unOffside       Args((Void));
 105 static Bool local canUnOffside    Args((Void));
 106
 107 static Void local skipWhitespace  Args((Void));
 108 static Int  local yylex           Args((Void));
 109 static Int  local repeatLast      Args((Void));
 110
 111 static Void local parseInput      Args((Int));
 112
 113 static Bool local doesNotExceed   Args((String,Int,Int));
 114 static Int  local stringToInt     Args((String,Int));
 115
 116
 117 /* --------------------------------------------------------------------------
 118  * Text values for reserved words and special symbols:
 119  * ------------------------------------------------------------------------*/
 120
 121 static Text textCase,    textOfK,      textData,   textType,   textIf;
 122 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 123 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 124 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 125 #if IPARAM
 126 static Text textWith,  textDlet;
 127 #endif
 128
 129 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 130 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 131 static Text textBang,    textDot,      textAll,    textImplies;
 132 static Text textWildcard;
 133
 134 static Text textModule,  textImport,    textInterface,  textInstImport;
 135 static Text textHiding,  textQualified, textAsMod;
 136 static Text textExport,  textDynamic,   textUUExport;
 137 static Text textUnsafe,  textUUAll;
 138
 139 Text   textCcall;                       /* ccall                           */
 140 Text   textStdcall;                     /* stdcall                         */
 141
 142 Text   textNum;                         /* Num                             */
 143 Text   textPrelude;                     /* Prelude                         */
 144 Text   textPlus;                        /* (+)                             */
 145
 146 static Cell conMain;                    /* Main                            */
 147 static Cell varMain;                    /* main                            */
 148
 149 static Cell varMinus;                   /* (-)                             */
 150 static Cell varPlus;                    /* (+)                             */
 151 static Cell varBang;                    /* (!)                             */
 152 static Cell varDot;                     /* (.)                             */
 153 static Cell varHiding;                  /* hiding                          */
 154 static Cell varQualified;               /* qualified                       */
 155 static Cell varAsMod;                   /* as                              */
 156
 157 static List imps;                       /* List of imports to be chased    */
 158
 159
 160 /* --------------------------------------------------------------------------
 161  * Character set handling:
 162  *
 163  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 164  * character set.  The following code provides methods for classifying
 165  * input characters according to the lexical structure specified by the
 166  * report.  Hugs should still accept older programs because ASCII is
 167  * essentially just a subset of the ISO character set.
 168  *
 169  * Notes: If you want to port Hugs to a machine that uses something
 170  * substantially different from the ISO character set, then you will need
 171  * to insert additional code to map between character sets.
 172  *
 173  * At some point, the following data structures may be exported in a .h
 174  * file to allow the information contained here to be picked up in the
 175  * implementation of LibChar is* primitives.
 176  *
 177  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 178  * ------------------------------------------------------------------------*/
 179
 180 static  Bool            charTabBuilt;
 181 static  unsigned char   ctable[NUM_CHARS];
 182 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 183 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 184
 185 #define DIGIT           0x01
 186 #define SMALL           0x02
 187 #define LARGE           0x04
 188 #define SYMBOL          0x08
 189 #define IDAFTER         0x10
 190 #define SPACE           0x20
 191 #define PRINT           0x40
 192
 193 static Void local initCharTab() {       /* Initialize char decode table    */
 194 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 195 #define setChar(x,c)    ctable[c] |= (x)
 196 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 197 #define setCopy(x,c)    {Int i;                         \
 198                          for (i=0; i<NUM_CHARS; ++i)    \
 199                              if (isIn(i,c))             \
 200                                  ctable[i]|=x;          \
 201                         }
 202
 203     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 204
 205     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 206     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 207     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 208     setChar (SMALL,     '_');
 209
 210     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 211     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 212     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 213
 214     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 215     setRange(SYMBOL,    215,215);
 216     setChar (SYMBOL,    247);
 217     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 218
 219     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 220     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 221
 222     setChar (SPACE,     ' ');           /* ASCII space character           */
 223     setChar (SPACE,     160);           /* ISO non breaking space          */
 224     setRange(SPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 225
 226     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 227     setChars(PRINT,     " '\"");        /* Space and quotes                */
 228     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 229
 230     charTabBuilt = TRUE;
 231 #undef setRange
 232 #undef setChar
 233 #undef setChars
 234 #undef setCopy
 235 }
 236
 237
 238 /* --------------------------------------------------------------------------
 239  * Single character input routines:
 240  *
 241  * At the lowest level of input, characters are read one at a time, with the
 242  * current character held in c0 and the following (lookahead) character in
 243  * c1.  The corrdinates of c0 within the file are held in (column,row).
 244  * The input stream is advanced by one character using the skip() function.
 245  * ------------------------------------------------------------------------*/
 246
 247 #define TABSIZE    8                   /* spacing between tabstops         */
 248
 249 #define NOTHING    0                   /* what kind of input is being read?*/
 250 #define KEYBOARD   1                   /* - keyboard/console?              */
 251 #define SCRIPTFILE 2                   /* - script file                    */
 252 #define PROJFILE   3                   /* - project file                   */
 253 #define STRING     4                   /* - string buffer?                 */
 254
 255 static Int    reading   = NOTHING;
 256
 257 static Target readSoFar;
 258 static Int    row, column, startColumn;
 259 static int    c0, c1;
 260 static FILE   *inputStream = 0;
 261 static Bool   thisLiterate;
 262 static String nextStringChar;          /* next char in string buffer       */
 263
 264 #if     USE_READLINE                   /* for command line editors         */
 265 static  String currentLine;            /* editline or GNU readline         */
 266 static  String nextChar;
 267 #define nextConsoleChar() \
 268            (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 269 extern  Void add_history  Args((String));
 270 extern  String readline   Args((String));
 271 #else
 272 #define nextConsoleChar() getc(stdin)
 273 #endif
 274
 275 static  Int litLines;                  /* count defn lines in lit script   */
 276 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 277 static  Int lastLine;                  /* records type of last line read:  */
 278 #define STARTLINE 0                    /* - at start of file, none read    */
 279 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 280 #define TEXTLINE  2                    /* - text comment                   */
 281 #define DEFNLINE  3                    /* - line containing definition     */
 282 #define CODELINE  4                    /* - line inside code block         */
 283
 284 #define BEGINCODE "\\begin{code}"
 285 #define ENDCODE   "\\end{code}"
 286
 287 #if HAVE_GETDELIM_H
 288 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 289 #else
 290 #define LINEBUFFER_SIZE 1000
 291 static char lineBuffer[LINEBUFFER_SIZE];
 292 #endif
 293 static int lineLength = 0;
 294 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 295 static int linePtr = 0;
 296
 297 Void consoleInput(prompt)              /* prepare to input characters from */
 298 String prompt; {                       /* standard in (i.e. console/kbd)   */
 299     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 300     c0          =                      /* i.e. input terminated by '\n'    */
 301     c1          = ' ';
 302     column      = (-1);
 303     row         = 0;
 304
 305 #if USE_READLINE
 306     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 307      * avoids accidentally freeing currentLine twice.
 308      */
 309     if (currentLine) {
 310         String oldCurrentLine = currentLine;
 311         currentLine = 0;           /* We may lose the space of currentLine */
 312         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 313     }
 314     currentLine = readline(prompt);
 315     nextChar    = currentLine;
 316     if (currentLine) {
 317         if (*currentLine)
 318             add_history(currentLine);
 319     }
 320     else
 321         c0 = c1 = EOF;
 322 #else
 323     Printf("%s",prompt);
 324     FlushStdout();
 325 #endif
 326 }
 327
 328 Void projInput(nm)                     /* prepare to input characters from */
 329 String nm; {                           /* from named project file          */
 330     if ((inputStream = fopen(nm,"r"))!=0) {
 331         reading = PROJFILE;
 332         c0      = ' ';
 333         c1      = '\n';
 334         column  = 1;
 335         row     = 0;
 336     }
 337     else {
 338         ERRMSG(0) "Unable to open project file \"%s\"", nm
 339         EEND;
 340     }
 341 }
 342
 343 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 344 String nm;                              /* named file (specified length is */
 345 Long   len; {                           /* used to set target for reading) */
 346 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 347     if (preprocessor) {
 348         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 349         char *cmd = malloc(reallen);
 350         if (cmd == NULL) {
 351             ERRMSG(0) "Unable to allocate memory for filter command."
 352             EEND;
 353         }
 354         strcpy(cmd,preprocessor);
 355         strcat(cmd," ");
 356         strcat(cmd,nm);
 357         inputStream = popen(cmd,"r");
 358         free(cmd);
 359     } else {
 360         inputStream = fopen(nm,"r");
 361     }
 362 #else
 363     inputStream = fopen(nm,"r");
 364 #endif
 365     if (inputStream) {
 366         reading      = SCRIPTFILE;
 367         c0           = ' ';
 368         c1           = '\n';
 369         column       = 1;
 370         row          = 0;
 371
 372         lastLine     = STARTLINE;       /* literate file processing */
 373         litLines     = 0;
 374         linePtr      = 0;
 375         lineLength   = 0;
 376         thisLiterate = literateMode(nm);
 377         inCodeBlock  = FALSE;
 378
 379         readSoFar    = 0;
 380         setGoal("Parsing", (Target)len);
 381     }
 382     else {
 383         ERRMSG(0) "Unable to open file \"%s\"", nm
 384         EEND;
 385     }
 386 }
 387
 388 Void stringInput(s)             /* prepare to input characters from string */
 389 String s; {
 390     reading      = STRING;
 391     c0           = EOF;
 392     c1           = EOF;
 393     if (*s) c0 = *s++;
 394     if (*s) c1 = *s++;
 395     column       = 1;
 396     row          = 1;
 397
 398     nextStringChar = s;
 399     if (!charTabBuilt)
 400         initCharTab();
 401 }
 402
 403 static Bool local literateMode(nm)      /* Select literate mode for file   */
 404 String nm; {
 405     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 406     if (dot) {
 407         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 408             return FALSE;
 409         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 410             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 411             return TRUE;
 412     }
 413     return literateScripts;             /* otherwise, use the default      */
 414 }
 415
 416
 417 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
 418 {
 419    Int len;
 420    String dot;
 421    len = 1 + strlen ( srcName );
 422    *hiName = malloc(len);
 423    *oName  = malloc(len);
 424    if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
 425    (*hiName)[0] = (*oName)[0] = 0;
 426    dot = strrchr(srcName, '.');
 427    if (!dot) return;
 428    if (filenamecmp(dot+1, "hs")==0 &&
 429        filenamecmp(dot+1, "lhs")==0 &&
 430        filenamecmp(dot+1, "verb")==0) return;
 431
 432    strcpy(*hiName, srcName);
 433    dot = strrchr(*hiName, '.');
 434    dot[1] = 'h';
 435    dot[2] = 'i';
 436    dot[3] = 0;
 437
 438    strcpy(*oName, srcName);
 439    dot = strrchr(*oName, '.');
 440    dot[1] = 'o';
 441    dot[2] = 0;
 442 }
 443
 444
 445
 446 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 447  * I've removed the loop (since newLineSkip contains a loop too) and
 448  * replaced the warnings with errors. ADR
 449  */
 450 /*
 451  * To deal with literate \begin{code}...\end{code} blocks,
 452  * add a line buffer that rooms the current line. The old c0 and c1
 453  * stream pointers are used as before within that buffer -- sof
 454  *
 455  * Upon reading a new line into the line buffer, we check to see if
 456  * we're reading in a line containing \begin{code} or \end{code} and
 457  * take appropriate action.
 458  */
 459
 460 static Bool local linecmp(s,line)       /* compare string with line        */
 461 String s;                               /* line may end in whitespace      */
 462 String line; {
 463     Int i=0;
 464     while (s[i] != '\0' && s[i] == line[i]) {
 465         ++i;
 466     }
 467     /* s[0..i-1] == line[0..i-1] */
 468     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 469         return FALSE;
 470     }
 471     while (isIn(line[i], SPACE)) {      /* allow whitespace at end of line */
 472         ++i;
 473     }
 474     return (line[i] == '\0');
 475 }
 476
 477 /* Returns line length (including \n) or 0 upon EOF. */
 478 static Int local nextLine()
 479 {
 480 #if HAVE_GETDELIM_H
 481     /*
 482        Forget about fgets(), it is utterly braindead.
 483        (Assumes \NUL free streams and does not gracefully deal
 484        with overflow.) Instead, use GNU libc's getline().
 485        */
 486     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 487 #else
 488     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 489         lineLength = strlen(lineBuffer);
 490     else
 491         lineLength = 0;
 492 #endif
 493     /* printf("Read: \"%s\"", lineBuffer); */
 494     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 495         return lineLength;
 496     }
 497     else if (lineLength >= 2 && lineBuffer[0] == '#' &&
 498              lineBuffer[1] == '!') {
 499         lineBuffer[0]='\n'; /* pretend it's a blank line */
 500         lineBuffer[1]='\0';
 501         lineLength=1;
 502     } else if (thisLiterate) {
 503         if (linecmp(BEGINCODE, lineBuffer)) {
 504             if (!inCodeBlock) {             /* Entered a code block        */
 505                 inCodeBlock = TRUE;
 506                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 507                 lineBuffer[1]='\0';
 508                 lineLength=1;
 509             }
 510             else {
 511                 ERRMSG(row) "\\begin{code} encountered inside code block"
 512                 EEND;
 513             }
 514         }
 515         else if (linecmp(ENDCODE, lineBuffer)) {
 516             if (inCodeBlock) {              /* Finished code block         */
 517                 inCodeBlock = FALSE;
 518                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 519                 lineBuffer[1]='\0';
 520                 lineLength=1;
 521             }
 522             else {
 523                 ERRMSG(row) "\\end{code} encountered outside code block"
 524                 EEND;
 525             }
 526         }
 527     }
 528     /* printf("Read: \"%s\"", lineBuffer); */
 529     return lineLength;
 530 }
 531
 532 static Void local skip() {              /* move forward one char in input  */
 533     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 534         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 535             row++;
 536             column=1;
 537             if (reading==SCRIPTFILE)
 538                 soFar(readSoFar);
 539         }
 540         else if (c0=='\t')
 541             column += TABSIZE - ((column-1)%TABSIZE);
 542         else
 543             column++;
 544
 545         c0 = c1;
 546         readSoFar++;
 547
 548         if (c0==EOF) {
 549             column = 0;
 550             if (reading==SCRIPTFILE)
 551                 done();
 552             closeAnyInput();
 553         }
 554         else if (reading==KEYBOARD) {
 555             allowBreak();
 556             if (c0=='\n')
 557                 c1 = EOF;
 558             else {
 559                 c1 = nextConsoleChar();
 560 #if IS_WIN32 && !HUGS_FOR_WINDOWS
 561                 Sleep(0);
 562 #endif
 563                 /* On Win32, hitting ctrl-C causes the next getchar to
 564                  * fail - returning "-1" to indicate an error.
 565                  * This is one of the rare cases where "-1" does not mean EOF.
 566                  */
 567                 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
 568                     c1 = ' ';
 569                 }
 570             }
 571         }
 572         else if (reading==STRING) {
 573             c1 = (unsigned char) *nextStringChar++;
 574             if (c1 == '\0')
 575                 c1 = EOF;
 576         }
 577         else {
 578             if (lineLength <=0 || linePtr == lineLength) {
 579                 /* Current line, exhausted - get new one */
 580                 if (nextLine() <= 0) { /* EOF */
 581                     c1 = EOF;
 582                 }
 583                 else {
 584                     linePtr = 0;
 585                     c1 = (unsigned char)lineBuffer[linePtr++];
 586                 }
 587             }
 588             else {
 589                 c1 = (unsigned char)lineBuffer[linePtr++];
 590             }
 591         }
 592
 593     }
 594 }
 595
 596 static Void local thisLineIs(kind)     /* register kind of current line    */
 597 Int kind; {                            /* & check for literate script errs */
 598     if (literateErrors) {
 599         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 600             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 601             ERRMSG(row) "Program line next to comment"
 602             EEND;
 603         }
 604         lastLine = kind;
 605     }
 606 }
 607
 608 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 609     /* assert(c0=='\n'); */
 610     if (reading==SCRIPTFILE && thisLiterate) {
 611         do {
 612             skip();
 613             if (inCodeBlock) {         /* pass chars on definition lines   */
 614                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 615                 litLines++;
 616                 return;
 617             }
 618             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 619                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 620                 skip();
 621                 litLines++;
 622                 return;
 623             }
 624             while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank?   */
 625                 skip();
 626             if (c0=='\n' || c0==EOF)
 627                 thisLineIs(BLANKLINE);
 628             else {
 629                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 630                 while (c0!='\n' && c0!=EOF)
 631                     skip();
 632             }                          /* by now, c0=='\n' or c0==EOF      */
 633         } while (c0!=EOF);             /* if new line, start again         */
 634
 635         if (litLines==0 && literateErrors) {
 636             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 637                         DEFNCHAR
 638             EEND;
 639         }
 640         return;
 641     }
 642     skip();
 643 }
 644
 645 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 646     switch (reading) {                 /* or skip to end of console line   */
 647         case PROJFILE   :
 648         case SCRIPTFILE : if (inputStream) {
 649 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 650                               if (preprocessor) {
 651                                   pclose(inputStream);
 652                               } else {
 653                                   fclose(inputStream);
 654                               }
 655 #else
 656                               fclose(inputStream);
 657 #endif
 658                               inputStream = 0;
 659                           }
 660                           break;
 661         case KEYBOARD   : while (c0!=EOF)
 662                               skip();
 663                           break;
 664     }
 665     reading=NOTHING;
 666 }
 667
 668 /* --------------------------------------------------------------------------
 669  * Parser: Uses table driven parser generated from parser.y using yacc
 670  * ------------------------------------------------------------------------*/
 671
 672 #include "parser.c"
 673
 674 /* --------------------------------------------------------------------------
 675  * Single token input routines:
 676  *
 677  * The following routines read the values of particular kinds of token given
 678  * that the first character of the token has already been located in c0 on
 679  * entry to the routine.
 680  * ------------------------------------------------------------------------*/
 681
 682 #define MAX_TOKEN           4000
 683 #define startToken()        tokPos = 0
 684 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 685 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 686 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 687
 688 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 689 static Int  tokPos;                    /* input position in buffer         */
 690 static Int  identType;                 /* identifier type: CONID / VARID   */
 691 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 692
 693 static Void local endToken() {         /* check for token overflow         */
 694     if (tokPos>MAX_TOKEN) {
 695         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 696         EEND;
 697     }
 698     tokenStr[tokPos] = '\0';
 699 }
 700
 701 static Text local readOperator() {     /* read operator symbol             */
 702     startToken();
 703     do {
 704         saveTokenChar(c0);
 705         skip();
 706     } while (isISO(c0) && isIn(c0,SYMBOL));
 707     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 708     endToken();
 709     return findText(tokenStr);
 710 }
 711
 712 static Text local readIdent() {        /* read identifier                  */
 713     startToken();
 714     do {
 715         saveTokenChar(c0);
 716         skip();
 717     } while (isISO(c0) && isIn(c0,IDAFTER));
 718     endToken();
 719     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 720     return findText(tokenStr);
 721 }
 722
 723
 724 static Bool local doesNotExceed(s,radix,limit)
 725 String s;
 726 Int    radix;
 727 Int    limit; {
 728     Int n = 0;
 729     Int p = 0;
 730     while (TRUE) {
 731         if (s[p] == 0) return TRUE;
 732         if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
 733         n = radix*n + (s[p]-'0');
 734         p++;
 735     }
 736 }
 737
 738 static Int local stringToInt(s,radix)
 739 String s;
 740 Int    radix; {
 741     Int n = 0;
 742     Int p = 0;
 743     while (TRUE) {
 744         if (s[p] == 0) return n;
 745         n = radix*n + (s[p]-'0');
 746         p++;
 747     }
 748 }
 749
 750 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 751 Int r; {                               /* from input of the form 0c{digs}  */
 752     Int d;
 753     startToken();
 754     skip();                            /* skip leading zero                */
 755     if ((d=readHexDigit(c1))<0 || d>=r) {
 756         /* Special case; no digits, lex as  */
 757         /* if it had been written "0 c..."  */
 758         saveTokenChar('0');
 759     } else {
 760         skip();
 761         do {
 762             saveTokenChar('0'+readHexDigit(c0));
 763             skip();
 764             d = readHexDigit(c0);
 765         } while (d>=0 && d<r);
 766     }
 767     endToken();
 768
 769     if (doesNotExceed(tokenStr,r,MAXPOSINT))
 770         return mkInt(stringToInt(tokenStr,r));
 771     else
 772     if (r == 10)
 773         return stringToBignum(tokenStr);
 774     else {
 775         ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
 776         EEND;
 777     }
 778 }
 779
 780 static Cell local readNumber() {        /* read numeric constant           */
 781
 782     if (c0=='0') {
 783         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 784             return readRadixNumber(16);
 785         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 786             return readRadixNumber(8);
 787     }
 788
 789     startToken();
 790     do {
 791         saveTokenChar(c0);
 792         skip();
 793     } while (isISO(c0) && isIn(c0,DIGIT));
 794
 795     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 796         endToken();
 797         if (doesNotExceed(tokenStr,10,MAXPOSINT))
 798             return mkInt(stringToInt(tokenStr,10)); else
 799             return stringToBignum(tokenStr);
 800     }
 801
 802     saveTokenChar(c0);                  /* save decimal point              */
 803     skip();
 804     do {                                /* process fractional part ...     */
 805         saveTokenChar(c0);
 806         skip();
 807     } while (isISO(c0) && isIn(c0,DIGIT));
 808
 809     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 810         saveTokenChar('e');
 811         skip();
 812         if (c0=='-') {
 813             saveTokenChar('-');
 814             skip();
 815         }
 816         else if (c0=='+')
 817             skip();
 818
 819         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 820             ERRMSG(row) "Missing digits in exponent"
 821             EEND;
 822         }
 823         else {
 824             do {
 825                 saveTokenChar(c0);
 826                 skip();
 827             } while (isISO(c0) && isIn(c0,DIGIT));
 828         }
 829     }
 830
 831     endToken();
 832     return mkFloat(stringToFloat(tokenStr));
 833 }
 834
 835
 836
 837
 838
 839
 840
 841 static Cell local readChar() {         /* read character constant          */
 842     Cell charRead;
 843
 844     skip(/* '\'' */);
 845     if (c0=='\'' || c0=='\n' || c0==EOF) {
 846         ERRMSG(row) "Illegal character constant"
 847         EEND;
 848     }
 849
 850     charRead = readAChar(FALSE);
 851
 852     if (c0=='\'')
 853         skip(/* '\'' */);
 854     else {
 855         ERRMSG(row) "Improperly terminated character constant"
 856         EEND;
 857     }
 858     return charRead;
 859 }
 860
 861 static Cell local readString() {       /* read string literal              */
 862     Cell c;
 863
 864     startToken();
 865     skip(/* '\"' */);
 866     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 867         c = readAChar(TRUE);
 868         if (nonNull(c))
 869             saveStrChr(charOf(c));
 870     }
 871
 872     if (c0=='\"')
 873         skip(/* '\"' */);
 874     else {
 875         ERRMSG(row) "Improperly terminated string"
 876         EEND;
 877     }
 878     endToken();
 879     return mkStr(findText(tokenStr));
 880 }
 881
 882 static Void local saveStrChr(c)        /* save character in string         */
 883 Char c; {
 884     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 885         saveTokenChar(c);
 886     }
 887     else {                             /* save null char as TWO null chars */
 888         if (tokPos+1<MAX_TOKEN) {
 889             saveChar('\\');
 890             if (c=='\\')
 891                 saveChar('\\');
 892             else
 893                 saveChar('0');
 894         }
 895     }
 896 }
 897
 898 static Cell local readAChar(isStrLit)  /* read single char constant        */
 899 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 900     Cell c = mkChar(c0);
 901
 902     if (c0=='\\')                      /* escape character?                */
 903         return readEscapeChar(isStrLit);
 904     if (!isISO(c0)) {
 905         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 906         EEND;
 907     }
 908     skip();                            /* normal character?                */
 909     return c;
 910 }
 911
 912 /* --------------------------------------------------------------------------
 913  * Character escape code sequences:
 914  * ------------------------------------------------------------------------*/
 915
 916 static struct {                        /* table of special escape codes    */
 917     char *codename;
 918     int  codenumber;
 919 } escapes[] = {
 920    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 921    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 922    {"\'",'\''}, {"v",   11},
 923    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 924    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 925    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 926    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 927    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 928    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 929    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 930    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 931    {"SP",  32}, {"DEL", 127},
 932    {0,0}
 933 };
 934
 935 static Int  alreadyMatched;            /* Record portion of input stream   */
 936 static char alreadyRead[10];           /* that has been read w/o a match   */
 937
 938 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 939 String s; {                            /* possibly using characters that   */
 940     int i;                             /* have already been read           */
 941
 942     for (i=0; i<alreadyMatched; ++i)
 943         if (alreadyRead[i]!=s[i])
 944             return FALSE;
 945
 946     while (s[i] && s[i]==c0) {
 947         alreadyRead[alreadyMatched++]=(char)c0;
 948         skip();
 949         i++;
 950     }
 951
 952     return s[i]=='\0';
 953 }
 954
 955 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 956 Bool isStrLit; {
 957     int i=0;
 958
 959     skip(/* '\\' */);
 960     switch (c0) {
 961         case '&'  : if (isStrLit) {
 962                         skip();
 963                         return NIL;
 964                     }
 965                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 966                     EEND;
 967                     break;/*NOTREACHED*/
 968
 969         case '^'  : return readCtrlChar();
 970
 971         case 'o'  : return readOctChar();
 972         case 'x'  : return readHexChar();
 973
 974         default   : if (!isISO(c0)) {
 975                         ERRMSG(row) "Illegal escape sequence"
 976                         EEND;
 977                     }
 978                     else if (isIn(c0,SPACE)) {
 979                         if (isStrLit) {
 980                             skipGap();
 981                             return NIL;
 982                         }
 983                         ERRMSG(row) "Illegal use of gap in character constant"
 984                         EEND;
 985                         break;
 986                     }
 987                     else if (isIn(c0,DIGIT))
 988                         return readDecChar();
 989     }
 990
 991     for (alreadyMatched=0; escapes[i].codename; i++)
 992         if (lazyReadMatches(escapes[i].codename))
 993             return mkChar(escapes[i].codenumber);
 994
 995     alreadyRead[alreadyMatched++] = (char)c0;
 996     alreadyRead[alreadyMatched++] = '\0';
 997     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
 998                 alreadyRead
 999     EEND;
1000     return NIL;/*NOTREACHED*/
1001 }
1002
1003 static Void local skipGap() {          /* skip over gap in string literal  */
1004     do                                 /* (simplified in Haskell 1.1)      */
1005         if (c0=='\n')
1006             newlineSkip();
1007         else
1008             skip();
1009     while (isISO(c0) && isIn(c0,SPACE));
1010     if (c0!='\\') {
1011         ERRMSG(row) "Missing `\\' terminating string literal gap"
1012         EEND;
1013     }
1014     skip(/* '\\' */);
1015 }
1016
1017 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
1018     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1019     String which;
1020
1021     skip(/* '^' */);
1022     if ((which = strchr(controls,c0))==NULL) {
1023         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1024         EEND;
1025     }
1026     skip();
1027     return mkChar(which-controls);
1028 }
1029
1030 static Cell local readOctChar() {      /* read octal character constant    */
1031     Int n = 0;
1032     Int d;
1033
1034     skip(/* 'o' */);
1035     if ((d = readHexDigit(c0))<0 || d>=8) {
1036         ERRMSG(row) "Empty octal character escape"
1037         EEND;
1038     }
1039     do {
1040         if (overflows(n,8,d,MAXCHARVAL)) {
1041             ERRMSG(row) "Octal character escape out of range"
1042             EEND;
1043         }
1044         n = 8*n + d;
1045         skip();
1046     } while ((d = readHexDigit(c0))>=0 && d<8);
1047
1048     return mkChar(n);
1049 }
1050
1051 static Cell local readHexChar() {      /* read hex character constant      */
1052     Int n = 0;
1053     Int d;
1054
1055     skip(/* 'x' */);
1056     if ((d = readHexDigit(c0))<0) {
1057         ERRMSG(row) "Empty hexadecimal character escape"
1058         EEND;
1059     }
1060     do {
1061         if (overflows(n,16,d,MAXCHARVAL)) {
1062             ERRMSG(row) "Hexadecimal character escape out of range"
1063             EEND;
1064         }
1065         n = 16*n + d;
1066         skip();
1067     } while ((d = readHexDigit(c0))>=0);
1068
1069     return mkChar(n);
1070 }
1071
1072 static Int local readHexDigit(c)       /* read single hex digit            */
1073 Char c; {
1074     if ('0'<=c && c<='9')
1075         return c-'0';
1076     if ('A'<=c && c<='F')
1077         return 10 + (c-'A');
1078     if ('a'<=c && c<='f')
1079         return 10 + (c-'a');
1080     return -1;
1081 }
1082
1083 static Cell local readDecChar() {      /* read decimal character constant  */
1084     Int n = 0;
1085
1086     do {
1087         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1088             ERRMSG(row) "Decimal character escape out of range"
1089             EEND;
1090         }
1091         n = 10*n + (c0-'0');
1092         skip();
1093     } while (c0!=EOF && isIn(c0,DIGIT));
1094
1095     return mkChar(n);
1096 }
1097
1098 /* --------------------------------------------------------------------------
1099  * Produce printable representation of character:
1100  * ------------------------------------------------------------------------*/
1101
1102 String unlexChar(c,quote)              /* return string representation of  */
1103 Char c;                                /* character...                     */
1104 Char quote; {                          /* protect quote character          */
1105     static char buffer[12];
1106
1107     if (c<0)                           /* deal with sign extended chars..  */
1108         c += NUM_CHARS;
1109
1110     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1111         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1112             buffer[0] = '\\';
1113             buffer[1] = (char)c;
1114             buffer[2] = '\0';
1115         }
1116         else {
1117             buffer[0] = (char)c;
1118             buffer[1] = '\0';
1119         }
1120     }
1121     else {                             /* look for escape code             */
1122         Int escs;
1123         for (escs=0; escapes[escs].codename; escs++)
1124             if (escapes[escs].codenumber==c) {
1125                 sprintf(buffer,"\\%s",escapes[escs].codename);
1126                 return buffer;
1127             }
1128         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1129     }
1130     return buffer;
1131 }
1132
1133 Void printString(s)                    /* print string s, using quotes and */
1134 String s; {                            /* escapes if any parts need them   */
1135     if (s) {
1136         String t = s;
1137         Char   c;
1138         while ((c = *t)!=0 && isISO(c)
1139                            && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1140             t++;
1141         }
1142         if (*t) {
1143             Putchar('"');
1144             for (t=s; *t; t++)
1145                 Printf("%s",unlexChar(*t,'"'));
1146             Putchar('"');
1147         }
1148         else
1149             Printf("%s",s);
1150     }
1151 }
1152
1153 /* -------------------------------------------------------------------------
1154  * Handle special types of input for use in interpreter:
1155  * -----------------------------------------------------------------------*/
1156
1157 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1158 struct cmd *cmds;                      /* line in interpreter              */
1159 Char   start;                          /* characters introducing a cmd     */
1160 Char   sys; {                          /* character for shell escape       */
1161     while (c0==' ' || c0 =='\t')
1162         skip();
1163
1164     if (c0=='\n')                      /* look for blank command lines     */
1165         return NOCMD;
1166     if (c0==EOF)                       /* look for end of input stream     */
1167         return QUIT;
1168     if (c0==sys) {                     /* single character system escape   */
1169         skip();
1170         return SYSTEM;
1171     }
1172     if (c0==start && c1==sys) {        /* two character system escape      */
1173         skip();
1174         skip();
1175         return SYSTEM;
1176     }
1177
1178     startToken();                      /* All cmds start with start        */
1179     if (c0==start)                     /* except default (usually EVAL)    */
1180         do {                           /* which is empty                   */
1181             saveTokenChar(c0);
1182             skip();
1183         } while (c0!=EOF && !isIn(c0,SPACE));
1184     endToken();
1185
1186     for (; cmds->cmdString; ++cmds)
1187         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1188             (tokenStr[0]==start &&
1189              tokenStr[1]==(cmds->cmdString)[1] &&
1190              tokenStr[2]=='\0'))
1191             return (cmds->cmdCode);
1192     return BADCMD;
1193 }
1194
1195 String readFilename() {                /* Read filename from input (if any)*/
1196     if (reading==PROJFILE)
1197         skipWhitespace();
1198     else
1199         while (c0==' ' || c0=='\t')
1200             skip();
1201
1202     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1203         return 0;
1204
1205     startToken();
1206     while (c0!=EOF && !isIn(c0,SPACE)) {
1207         if (c0=='"') {
1208             skip();
1209             while (c0!=EOF && c0!='\"') {
1210                 Cell c = readAChar(TRUE);
1211                 if (nonNull(c)) {
1212                     saveTokenChar(charOf(c));
1213                 }
1214             }
1215             if (c0=='"')
1216                 skip();
1217             else {
1218                 ERRMSG(row) "a closing quote, '\"', was expected"
1219                 EEND;
1220             }
1221         }
1222         else {
1223             saveTokenChar(c0);
1224             skip();
1225         }
1226     }
1227     endToken();
1228     return tokenStr;
1229 }
1230
1231 String readLine() {                    /* Read command line from input     */
1232     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1233         skip();
1234
1235     startToken();
1236     while (c0!='\n' && c0!=EOF) {
1237         saveTokenChar(c0);
1238         skip();
1239     }
1240     endToken();
1241
1242     return tokenStr;
1243 }
1244
1245 /* --------------------------------------------------------------------------
1246  * This lexer supports the Haskell layout rule:
1247  *
1248  * - Layout area bounded by { ... }, with `;'s in between.
1249  * - A `{' is a HARD indentation and can only be matched by a corresponding
1250  *   HARD '}'
1251  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1252  *   is inserted with the column number of the first token after the
1253  *   WHERE/LET/OF keyword.
1254  * - When a soft indentation is uppermost on the indetation stack with
1255  *   column col' we insert:
1256  *    `}'  in front of token with column<col' and pop indentation off stack,
1257  *    `;'  in front of token with column==col'.
1258  * ------------------------------------------------------------------------*/
1259
1260 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1261 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1262 #define HARD       (-1)                /* indicates hard indentation       */
1263 static  Int        indentDepth = (-1); /* current indentation nesting      */
1264
1265 static Void local goOffside(col)       /* insert offside marker            */
1266 Int col; {                             /* for specified column             */
1267 assert(offsideON);
1268     if (indentDepth>=MAXINDENT) {
1269         ERRMSG(row) "Too many levels of program nesting"
1270         EEND;
1271     }
1272     layout[++indentDepth] = col;
1273 }
1274
1275 static Void local unOffside() {        /* leave layout rule area           */
1276 assert(offsideON);
1277     indentDepth--;
1278 }
1279
1280 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1281 assert(offsideON);
1282     return indentDepth>=0 && layout[indentDepth]!=HARD;
1283 }
1284
1285 /* --------------------------------------------------------------------------
1286  * Main tokeniser:
1287  * ------------------------------------------------------------------------*/
1288
1289 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1290     for (;;)                           /* Strictly speaking, this code is  */
1291         if (c0==EOF)                   /* a little more liberal than the   */
1292             return;                    /* report allows ...                */
1293         else if (c0=='\n')
1294             newlineSkip();
1295         else if (isIn(c0,SPACE))
1296             skip();
1297         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1298             Int nesting = 1;
1299             Int origRow = row;         /* Save original row number         */
1300             skip();
1301             skip();
1302             while (nesting>0 && c0!=EOF)
1303                 if (c0=='{' && c1=='-') {
1304                     skip();
1305                     skip();
1306                     nesting++;
1307                 }
1308                 else if (c0=='-' && c1=='}') {
1309                     skip();
1310                     skip();
1311                     nesting--;
1312                 }
1313                 else if (c0=='\n')
1314                     newlineSkip();
1315                 else
1316                     skip();
1317             if (nesting>0) {
1318                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1319                 EEND;
1320             }
1321         }
1322         else if (c0=='-' && c1=='-') {  /* One line comment                */
1323             do
1324                 skip();
1325             while (c0!='\n' && c0!=EOF);
1326             if (c0=='\n')
1327                 newlineSkip();
1328         }
1329         else
1330             return;
1331 }
1332
1333 static Bool firstToken;                /* Set to TRUE for first token      */
1334 static Int  firstTokenIs;              /* ... with token value stored here */
1335
1336 static Int local yylex() {             /* Read next input token ...        */
1337     static Bool insertOpen    = FALSE;
1338     static Bool insertedToken = FALSE;
1339     static Text textRepeat;
1340
1341 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1342
1343     if (firstToken) {                  /* Special case for first token     */
1344         indentDepth   = (-1);
1345         firstToken    = FALSE;
1346         insertOpen    = FALSE;
1347         insertedToken = FALSE;
1348         if (reading==KEYBOARD)
1349             textRepeat = findText(repeatStr);
1350         return firstTokenIs;
1351     }
1352
1353     if (offsideON && insertOpen) {     /* insert `soft' opening brace      */
1354         insertOpen    = FALSE;
1355         insertedToken = TRUE;
1356         goOffside(column);
1357         push(yylval = mkInt(row));
1358         return '{';
1359     }
1360
1361     /* ----------------------------------------------------------------------
1362      * Skip white space, and insert tokens to support layout rules as reqd.
1363      * --------------------------------------------------------------------*/
1364
1365     skipWhitespace();
1366     startColumn = column;
1367     push(yylval = mkInt(row));         /* default token value is line no.  */
1368     /* subsequent changes to yylval must also set top() to the same value  */
1369
1370     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1371         if (insertedToken)             /* avoid inserting multiple `;'s    */
1372             insertedToken = FALSE;     /* or putting `;' after `{'         */
1373         else
1374         if (offsideON && layout[indentDepth]!=HARD) {
1375             if (column<layout[indentDepth]) {
1376                 unOffside();
1377                 return '}';
1378             }
1379             else if (column==layout[indentDepth] && c0!=EOF) {
1380                 insertedToken = TRUE;
1381                 return ';';
1382             }
1383         }
1384     }
1385
1386     /* ----------------------------------------------------------------------
1387      * Now try to identify token type:
1388      * --------------------------------------------------------------------*/
1389
1390     switch (c0) {
1391         case EOF  : return 0;                   /* End of file/input       */
1392
1393         /* The next 10 characters make up the `special' category in 1.3    */
1394         case '('  : skip(); return '(';
1395         case ')'  : skip(); return ')';
1396         case ','  : skip(); return ',';
1397         case ';'  : skip(); return ';';
1398         case '['  : skip(); return '[';
1399         case ']'  : skip(); return ']';
1400         case '`'  : skip(); return '`';
1401         case '{'  : if (offsideON) goOffside(HARD);
1402                     skip();
1403                     return '{';
1404         case '}'  : if (offsideON && indentDepth<0) {
1405                         ERRMSG(row) "Misplaced `}'"
1406                         EEND;
1407                     }
1408                     if (!(offsideON && layout[indentDepth]!=HARD))
1409                         skip();                         /* skip over hard }*/
1410                     if (offsideON)
1411                         unOffside();    /* otherwise, we have to insert a }*/
1412                     return '}';         /* to (try to) avoid an error...   */
1413
1414         /* Character and string literals                                   */
1415         case '\'' : top() = yylval = readChar();
1416                     return CHARLIT;
1417
1418         case '\"' : top() = yylval = readString();
1419                     return STRINGLIT;
1420     }
1421
1422 #if IPARAM
1423     if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1424         Text it;                        /* Look for implicit param name    */
1425         skip();
1426         it    = readIdent();
1427         top() = yylval = ap(IPVAR,it);
1428         return identType=IPVARID;
1429     }
1430 #endif
1431 #if TREX
1432     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1433         Text it;                        /* Look for record selector name   */
1434         skip();
1435         it    = readIdent();
1436         top() = yylval = ap(RECSEL,mkExt(it));
1437         return identType=RECSELID;
1438     }
1439 #endif
1440     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1441         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1442         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1443             Text it2 = NIL;
1444             skip();                     /* Skip qualifying dot             */
1445             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1446                 it2 = readOperator();
1447                 if (opType==CONOP) {
1448                     top() = yylval = mkQConOp(it,it2);
1449                     return QCONOP;
1450                 } else {
1451                     top() = yylval = mkQVarOp(it,it2);
1452                     return QVAROP;
1453                 }
1454             } else {               /* Qualified identifier */
1455                 it2 = readIdent();
1456                 if (identType==CONID) {
1457                     top() = yylval = mkQCon(it,it2);
1458                     return QCONID;
1459                 } else {
1460                     top() = yylval = mkQVar(it,it2);
1461                     return QVARID;
1462                 }
1463             }
1464         } else {
1465             top() = yylval = mkCon(it);
1466             return identType;
1467         }
1468     }
1469     if (isIn(c0,(SMALL|LARGE))) {
1470         Text it = readIdent();
1471
1472         if (it==textCase)              return CASEXP;
1473         if (it==textOfK)               lookAhead(OF);
1474         if (it==textData)              return DATA;
1475         if (it==textType)              return TYPE;
1476         if (it==textIf)                return IF;
1477         if (it==textThen)              return THEN;
1478         if (it==textElse)              return ELSE;
1479         if (it==textWhere)             lookAhead(WHERE);
1480         if (it==textLet)               lookAhead(LET);
1481         if (it==textIn)                return IN;
1482         if (it==textInfix)             return INFIXN;
1483         if (it==textInfixl)            return INFIXL;
1484         if (it==textInfixr)            return INFIXR;
1485         if (it==textForeign)           return FOREIGN;
1486         if (it==textUnsafe)            return UNSAFE;
1487         if (it==textNewtype)           return TNEWTYPE;
1488         if (it==textDefault)           return DEFAULT;
1489         if (it==textDeriving)          return DERIVING;
1490         if (it==textDo)                lookAhead(DO);
1491         if (it==textClass)             return TCLASS;
1492         if (it==textInstance)          return TINSTANCE;
1493         if (it==textModule)            return TMODULE;
1494         if (it==textInterface)         return INTERFACE;
1495         if (it==textInstImport)        return INSTIMPORT;
1496         if (it==textImport)            return IMPORT;
1497         if (it==textExport)            return EXPORT;
1498         if (it==textDynamic)           return DYNAMIC;
1499         if (it==textCcall)             return CCALL;
1500         if (it==textStdcall)           return STDCALL;
1501         if (it==textUUExport)          return UUEXPORT;
1502         if (it==textHiding)            return HIDING;
1503         if (it==textQualified)         return QUALIFIED;
1504         if (it==textAsMod)             return ASMOD;
1505         if (it==textWildcard)          return '_';
1506         if (it==textAll && !haskell98) return ALL;
1507 #if IPARAM
1508         if (it==textWith && !haskell98) lookAhead(WITH);
1509         if (it==textDlet && !haskell98) lookAhead(DLET);
1510 #endif
1511         if (it==textUUAll)             return ALL;
1512         if (it==textRepeat && reading==KEYBOARD)
1513             return repeatLast();
1514
1515         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1516         return identType;
1517     }
1518
1519     if (isIn(c0,SYMBOL)) {
1520         Text it = readOperator();
1521
1522         if (it==textCoco)    return COCO;
1523         if (it==textEq)      return '=';
1524         if (it==textUpto)    return UPTO;
1525         if (it==textAs)      return '@';
1526         if (it==textLambda)  return '\\';
1527         if (it==textBar)     return '|';
1528         if (it==textFrom)    return FROM;
1529         if (it==textMinus)   return '-';
1530         if (it==textPlus)    return '+';
1531         if (it==textBang)    return '!';
1532         if (it==textDot)     return '.';
1533         if (it==textArrow)   return ARROW;
1534         if (it==textLazy)    return '~';
1535         if (it==textImplies) return IMPLIES;
1536         if (it==textRepeat && reading==KEYBOARD)
1537             return repeatLast();
1538
1539         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1540         return opType;
1541     }
1542
1543     if (isIn(c0,DIGIT)) {
1544         top() = yylval = readNumber();
1545         return NUMLIT;
1546     }
1547
1548     ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1549                 ((int)c0), column
1550     EEND;
1551     return 0; /*NOTREACHED*/
1552 }
1553
1554 static Int local repeatLast() {         /* Obtain last expression entered  */
1555     if (isNull(yylval=getLastExpr())) {
1556         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1557         EEND;
1558     }
1559     return REPEAT;
1560 }
1561
1562 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1563 Text t; {                               /* by t ...                        */
1564     String s = textToStr(t);
1565     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1566 }
1567
1568 Syntax syntaxOf(n)                      /* Find syntax for name            */
1569 Name n; {
1570     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1571         return defaultSyntax(name(n).text);
1572     return name(n).syntax;
1573 }
1574
1575 /* --------------------------------------------------------------------------
1576  * main entry points to parser/lexer:
1577  * ------------------------------------------------------------------------*/
1578
1579 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1580 Int startWith; {                       /* determining whether to read a    */
1581     firstToken   = TRUE;               /* script or an expression          */
1582     firstTokenIs = startWith;
1583     if (startWith==INTERFACE)
1584        offsideON = FALSE; else
1585        offsideON = TRUE;
1586
1587     clearStack();
1588     if (yyparse()) {                   /* This can only be parser overflow */
1589         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1590         EEND;                          /* in the parser...                 */
1591     }
1592     drop();
1593     if (!stackEmpty())                 /* stack should now be empty        */
1594         internal("parseInput");
1595 }
1596
1597 #ifdef HSCRIPT
1598 static String memPrefix = "@mem@";
1599 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1600
1601 Void makeMemScript(mem,fname)
1602 String mem;
1603 String fname; {
1604    strcat(fname,memPrefix);
1605    itoa((int)mem, fname+strlen(fname), 10);
1606 }
1607
1608 Bool isMemScript(fname)
1609 String fname; {
1610    return (strstr(fname,memPrefix) != NULL);
1611 }
1612
1613 String memScriptString(fname)
1614 String fname; {
1615     String p = strstr(fname,memPrefix);
1616     if (p) {
1617         return (String)atoi(p+lenMemPrefix);
1618     } else {
1619         return NULL;
1620     }
1621 }
1622
1623 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1624 String fname;
1625 Long len; {
1626     input(RESET);
1627     if (isMemScript(fname)) {
1628         char* s = memScriptString(fname);
1629         stringInput(s);
1630     } else {
1631         fileInput(fname,len);
1632     }
1633     parseInput(SCRIPT);
1634 }
1635 #else
1636 Void parseScript(nm,len)               /* Read a script                    */
1637 String nm;
1638 Long   len; {                          /* Used to set a target for reading */
1639     input(RESET);
1640     fileInput(nm,len);
1641     parseInput(SCRIPT);
1642 }
1643 #endif
1644
1645 Void parseExp() {                      /* Read an expression to evaluate   */
1646     parseInput(EXPR);
1647     setLastExpr(inputExpr);
1648 }
1649
1650 Void parseContext() {                  /* Read a context to prove   */
1651     parseInput(CONTEXT);
1652 }
1653
1654 Void parseInterface(nm,len)            /* Read a GHC interface file        */
1655 String nm;
1656 Long   len; {                          /* Used to set a target for reading */
1657     input(RESET);
1658     fileInput(nm,len);
1659     parseInput(INTERFACE);
1660 }
1661
1662
1663 /* --------------------------------------------------------------------------
1664  * Input control:
1665  * ------------------------------------------------------------------------*/
1666
1667 Void input(what)
1668 Int what; {
1669     switch (what) {
1670         case INSTALL : initCharTab();
1671                        textCase       = findText("case");
1672                        textOfK        = findText("of");
1673                        textData       = findText("data");
1674                        textType       = findText("type");
1675                        textIf         = findText("if");
1676                        textThen       = findText("then");
1677                        textElse       = findText("else");
1678                        textWhere      = findText("where");
1679                        textLet        = findText("let");
1680                        textIn         = findText("in");
1681                        textInfix      = findText("infix");
1682                        textInfixl     = findText("infixl");
1683                        textInfixr     = findText("infixr");
1684                        textForeign    = findText("foreign");
1685                        textUnsafe     = findText("unsafe");
1686                        textNewtype    = findText("newtype");
1687                        textDefault    = findText("default");
1688                        textDeriving   = findText("deriving");
1689                        textDo         = findText("do");
1690                        textClass      = findText("class");
1691 #if IPARAM
1692                        textWith       = findText("with");
1693                        textDlet       = findText("dlet");
1694 #endif
1695                        textInstance   = findText("instance");
1696                        textCoco       = findText("::");
1697                        textEq         = findText("=");
1698                        textUpto       = findText("..");
1699                        textAs         = findText("@");
1700                        textLambda     = findText("\\");
1701                        textBar        = findText("|");
1702                        textMinus      = findText("-");
1703                        textPlus       = findText("+");
1704                        textFrom       = findText("<-");
1705                        textArrow      = findText("->");
1706                        textLazy       = findText("~");
1707                        textBang       = findText("!");
1708                        textDot        = findText(".");
1709                        textImplies    = findText("=>");
1710                        textPrelude    = findText("Prelude");
1711                        textNum        = findText("Num");
1712                        textModule     = findText("module");
1713                        textInterface  = findText("__interface");
1714                        textInstImport = findText("__instimport");
1715                        textExport     = findText("export");
1716                        textDynamic    = findText("dynamic");
1717                        textCcall      = findText("ccall");
1718                        textStdcall    = findText("stdcall");
1719                        textUUExport   = findText("__export");
1720                        textImport     = findText("import");
1721                        textHiding     = findText("hiding");
1722                        textQualified  = findText("qualified");
1723                        textAsMod      = findText("as");
1724                        textWildcard   = findText("_");
1725                        textAll        = findText("forall");
1726                        textUUAll      = findText("__forall");
1727                        varMinus       = mkVar(textMinus);
1728                        varPlus        = mkVar(textPlus);
1729                        varBang        = mkVar(textBang);
1730                        varDot         = mkVar(textDot);
1731                        varHiding      = mkVar(textHiding);
1732                        varQualified   = mkVar(textQualified);
1733                        varAsMod       = mkVar(textAsMod);
1734                        conMain        = mkCon(findText("Main"));
1735                        varMain        = mkVar(findText("main"));
1736                        evalDefaults   = NIL;
1737
1738                        input(RESET);
1739                        break;
1740
1741         case RESET   : tyconDefns   = NIL;
1742                        typeInDefns  = NIL;
1743                        valDefns     = NIL;
1744                        classDefns   = NIL;
1745                        instDefns    = NIL;
1746                        selDefns     = NIL;
1747                        genDefns     = NIL;
1748                        //primDefns    = NIL;
1749                        unqualImports= NIL;
1750                        foreignImports= NIL;
1751                        foreignExports= NIL;
1752                        defaultDefns = NIL;
1753                        defaultLine  = 0;
1754                        inputExpr    = NIL;
1755                        imps         = NIL;
1756                        closeAnyInput();
1757                        break;
1758
1759         case BREAK   : if (reading==KEYBOARD)
1760                            c0 = EOF;
1761                        break;
1762
1763         case MARK    : mark(tyconDefns);
1764                        mark(typeInDefns);
1765                        mark(valDefns);
1766                        mark(classDefns);
1767                        mark(instDefns);
1768                        mark(selDefns);
1769                        mark(genDefns);
1770                        //mark(primDefns);
1771                        mark(unqualImports);
1772                        mark(foreignImports);
1773                        mark(foreignExports);
1774                        mark(defaultDefns);
1775                        mark(evalDefaults);
1776                        mark(inputExpr);
1777                        mark(varMinus);
1778                        mark(varPlus);
1779                        mark(varBang);
1780                        mark(varDot);
1781                        mark(varHiding);
1782                        mark(varQualified);
1783                        mark(varAsMod);
1784                        mark(varMain);
1785                        mark(conMain);
1786                        mark(imps);
1787                        break;
1788     }
1789 }
1790
1791 /*-------------------------------------------------------------------------*/