ghc/interpreter/input.c

   1
   2 /* --------------------------------------------------------------------------
   3  * Input functions, lexical analysis parsing etc...
   4  *
   5  * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
   6  * Yale Haskell Group, and the Oregon Graduate Institute of Science and
   7  * Technology, 1994-1999, All rights reserved.  It is distributed as
   8  * free software under the license in the file "License", which is
   9  * included in the distribution.
  10  *
  11  * $RCSfile: input.c,v $
  12  * $Revision: 1.9 $
  13  * $Date: 1999/10/15 23:52:00 $
  14  * ------------------------------------------------------------------------*/
  15
  16 #include "prelude.h"
  17 #include "storage.h"
  18 #include "backend.h"
  19 #include "connect.h"
  20 #include "command.h"
  21 #include "errors.h"
  22 #include "link.h"
  23 #include <ctype.h>
  24 #if HAVE_GETDELIM_H
  25 #include "getdelim.h"
  26 #endif
  27
  28 #if IS_WIN32
  29 #include <windows.h>
  30 #endif
  31
  32 #if IS_WIN32 | HUGS_FOR_WINDOWS
  33 #undef IN
  34 #endif
  35
  36 /* --------------------------------------------------------------------------
  37  * Global data:
  38  * ------------------------------------------------------------------------*/
  39
  40 List tyconDefns      = NIL;             /* type constructor definitions    */
  41 List typeInDefns     = NIL;             /* type synonym restrictions       */
  42 List valDefns        = NIL;             /* value definitions in script     */
  43 List classDefns      = NIL;             /* class defns in script           */
  44 List instDefns       = NIL;             /* instance defns in script        */
  45 List selDefns        = NIL;             /* list of selector lists          */
  46 List genDefns        = NIL;             /* list of generated names         */
  47 List unqualImports   = NIL;             /* unqualified import list         */
  48 List foreignImports  = NIL;             /* foreign imports                 */
  49 List foreignExports  = NIL;             /* foreign exportsd                */
  50 List defaultDefns    = NIL;             /* default definitions (if any)    */
  51 Int  defaultLine     = 0;               /* line in which default defs occur*/
  52 List evalDefaults    = NIL;             /* defaults for evaluator          */
  53
  54 Cell inputExpr       = NIL;             /* input expression                */
  55 Cell inputContext    = NIL;             /* input context                   */
  56 Bool literateScripts = FALSE;           /* TRUE => default to lit scripts  */
  57 Bool literateErrors  = TRUE;            /* TRUE => report errs in lit scrs */
  58 Bool offsideON       = TRUE;            /* TRUE => implement offside rule  */
  59
  60 String repeatStr     = 0;               /* Repeat last expr                */
  61
  62 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
  63 String preprocessor  = 0;
  64 #endif
  65
  66 /* --------------------------------------------------------------------------
  67  * Local function prototypes:
  68  * ------------------------------------------------------------------------*/
  69
  70 static Void local initCharTab     Args((Void));
  71 static Void local fileInput       Args((String,Long));
  72 static Bool local literateMode    Args((String));
  73 static Bool local linecmp         Args((String,String));
  74 static Int  local nextLine        Args((Void));
  75 static Void local skip            Args((Void));
  76 static Void local thisLineIs      Args((Int));
  77 static Void local newlineSkip     Args((Void));
  78 static Void local closeAnyInput   Args((Void));
  79
  80        Int  yyparse         Args((Void)); /* can't stop yacc making this   */
  81                                           /* public, but don't advertise   */
  82                                           /* it in a header file.          */
  83
  84 static Void local endToken        Args((Void));
  85 static Text local readOperator    Args((Void));
  86 static Text local readIdent       Args((Void));
  87 static Cell local readRadixNumber Args((Int));
  88 static Cell local readNumber      Args((Void));
  89 static Cell local readChar        Args((Void));
  90 static Cell local readString      Args((Void));
  91 static Void local saveStrChr      Args((Char));
  92 static Cell local readAChar       Args((Bool));
  93
  94 static Bool local lazyReadMatches Args((String));
  95 static Cell local readEscapeChar  Args((Bool));
  96 static Void local skipGap         Args((Void));
  97 static Cell local readCtrlChar    Args((Void));
  98 static Cell local readOctChar     Args((Void));
  99 static Cell local readHexChar     Args((Void));
 100 static Int  local readHexDigit    Args((Char));
 101 static Cell local readDecChar     Args((Void));
 102
 103 static Void local goOffside       Args((Int));
 104 static Void local unOffside       Args((Void));
 105 static Bool local canUnOffside    Args((Void));
 106
 107 static Void local skipWhitespace  Args((Void));
 108 static Int  local yylex           Args((Void));
 109 static Int  local repeatLast      Args((Void));
 110
 111 static Void local parseInput      Args((Int));
 112
 113 static Bool local doesNotExceed   Args((String,Int,Int));
 114 static Int  local stringToInt     Args((String,Int));
 115
 116
 117 /* --------------------------------------------------------------------------
 118  * Text values for reserved words and special symbols:
 119  * ------------------------------------------------------------------------*/
 120
 121 static Text textCase,    textOfK,      textData,   textType,   textIf;
 122 static Text textThen,    textElse,     textWhere,  textLet,    textIn;
 123 static Text textInfix,   textInfixl,   textInfixr, textForeign, textNewtype;
 124 static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
 125 #if IPARAM
 126 static Text textWith,  textDlet;
 127 #endif
 128
 129 static Text textCoco,    textEq,       textUpto,   textAs,     textLambda;
 130 static Text textBar,     textMinus,    textFrom,   textArrow,  textLazy;
 131 static Text textBang,    textDot,      textAll,    textImplies;
 132 static Text textWildcard;
 133
 134 static Text textModule,  textImport,    textInterface,  textInstImport;
 135 static Text textHiding,  textQualified, textAsMod;
 136 static Text textExport,  textDynamic,   textUUExport;
 137 static Text textUnsafe,  textUUAll;
 138
 139 Text   textNum;                         /* Num                             */
 140 Text   textPrelude;                     /* Prelude                         */
 141 Text   textPlus;                        /* (+)                             */
 142
 143 static Cell conMain;                    /* Main                            */
 144 static Cell varMain;                    /* main                            */
 145
 146 static Cell varMinus;                   /* (-)                             */
 147 static Cell varPlus;                    /* (+)                             */
 148 static Cell varBang;                    /* (!)                             */
 149 static Cell varDot;                     /* (.)                             */
 150 static Cell varHiding;                  /* hiding                          */
 151 static Cell varQualified;               /* qualified                       */
 152 static Cell varAsMod;                   /* as                              */
 153
 154 static List imps;                       /* List of imports to be chased    */
 155
 156
 157 /* --------------------------------------------------------------------------
 158  * Character set handling:
 159  *
 160  * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 161  * character set.  The following code provides methods for classifying
 162  * input characters according to the lexical structure specified by the
 163  * report.  Hugs should still accept older programs because ASCII is
 164  * essentially just a subset of the ISO character set.
 165  *
 166  * Notes: If you want to port Hugs to a machine that uses something
 167  * substantially different from the ISO character set, then you will need
 168  * to insert additional code to map between character sets.
 169  *
 170  * At some point, the following data structures may be exported in a .h
 171  * file to allow the information contained here to be picked up in the
 172  * implementation of LibChar is* primitives.
 173  *
 174  * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 175  * ------------------------------------------------------------------------*/
 176
 177 static  Bool            charTabBuilt;
 178 static  unsigned char   ctable[NUM_CHARS];
 179 #define isIn(c,x)       (ctable[(unsigned char)(c)]&(x))
 180 #define isISO(c)        (0<=(c) && (c)<NUM_CHARS)
 181
 182 #define DIGIT           0x01
 183 #define SMALL           0x02
 184 #define LARGE           0x04
 185 #define SYMBOL          0x08
 186 #define IDAFTER         0x10
 187 #define SPACE           0x20
 188 #define PRINT           0x40
 189
 190 static Void local initCharTab() {       /* Initialize char decode table    */
 191 #define setRange(x,f,t) {Int i=f;   while (i<=t) ctable[i++] |=x;}
 192 #define setChar(x,c)    ctable[c] |= (x)
 193 #define setChars(x,s)   {char *p=s; while (*p)   ctable[(Int)*p++]|=x;}
 194 #define setCopy(x,c)    {Int i;                         \
 195                          for (i=0; i<NUM_CHARS; ++i)    \
 196                              if (isIn(i,c))             \
 197                                  ctable[i]|=x;          \
 198                         }
 199
 200     setRange(DIGIT,     '0','9');       /* ASCII decimal digits            */
 201
 202     setRange(SMALL,     'a','z');       /* ASCII lower case letters        */
 203     setRange(SMALL,     223,246);       /* ISO lower case letters          */
 204     setRange(SMALL,     248,255);       /* (omits division symbol, 247)    */
 205     setChar (SMALL,     '_');
 206
 207     setRange(LARGE,     'A','Z');       /* ASCII upper case letters        */
 208     setRange(LARGE,     192,214);       /* ISO upper case letters          */
 209     setRange(LARGE,     216,222);       /* (omits multiplication, 215)     */
 210
 211     setRange(SYMBOL,    161,191);       /* Symbol characters + ':'         */
 212     setRange(SYMBOL,    215,215);
 213     setChar (SYMBOL,    247);
 214     setChars(SYMBOL,    ":!#$%&*+./<=>?@\\^|-~");
 215
 216     setChar (IDAFTER,   '\'');          /* Characters in identifier        */
 217     setCopy (IDAFTER,   (DIGIT|SMALL|LARGE));
 218
 219     setChar (SPACE,     ' ');           /* ASCII space character           */
 220     setChar (SPACE,     160);           /* ISO non breaking space          */
 221     setRange(SPACE,     9,13);          /* special whitespace: \t\n\v\f\r  */
 222
 223     setChars(PRINT,     "(),;[]_`{}");  /* Special characters              */
 224     setChars(PRINT,     " '\"");        /* Space and quotes                */
 225     setCopy (PRINT,     (DIGIT|SMALL|LARGE|SYMBOL));
 226
 227     charTabBuilt = TRUE;
 228 #undef setRange
 229 #undef setChar
 230 #undef setChars
 231 #undef setCopy
 232 }
 233
 234
 235 /* --------------------------------------------------------------------------
 236  * Single character input routines:
 237  *
 238  * At the lowest level of input, characters are read one at a time, with the
 239  * current character held in c0 and the following (lookahead) character in
 240  * c1.  The corrdinates of c0 within the file are held in (column,row).
 241  * The input stream is advanced by one character using the skip() function.
 242  * ------------------------------------------------------------------------*/
 243
 244 #define TABSIZE    8                   /* spacing between tabstops         */
 245
 246 #define NOTHING    0                   /* what kind of input is being read?*/
 247 #define KEYBOARD   1                   /* - keyboard/console?              */
 248 #define SCRIPTFILE 2                   /* - script file                    */
 249 #define PROJFILE   3                   /* - project file                   */
 250 #define STRING     4                   /* - string buffer?                 */
 251
 252 static Int    reading   = NOTHING;
 253
 254 static Target readSoFar;
 255 static Int    row, column, startColumn;
 256 static int    c0, c1;
 257 static FILE   *inputStream = 0;
 258 static Bool   thisLiterate;
 259 static String nextStringChar;          /* next char in string buffer       */
 260
 261 #if     USE_READLINE                   /* for command line editors         */
 262 static  String currentLine;            /* editline or GNU readline         */
 263 static  String nextChar;
 264 #define nextConsoleChar() \
 265            (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
 266 extern  Void add_history  Args((String));
 267 extern  String readline   Args((String));
 268 #else
 269 #define nextConsoleChar() getc(stdin)
 270 #endif
 271
 272 static  Int litLines;                  /* count defn lines in lit script   */
 273 #define DEFNCHAR  '>'                  /* definition lines begin with this */
 274 static  Int lastLine;                  /* records type of last line read:  */
 275 #define STARTLINE 0                    /* - at start of file, none read    */
 276 #define BLANKLINE 1                    /* - blank (may preceed definition) */
 277 #define TEXTLINE  2                    /* - text comment                   */
 278 #define DEFNLINE  3                    /* - line containing definition     */
 279 #define CODELINE  4                    /* - line inside code block         */
 280
 281 #define BEGINCODE "\\begin{code}"
 282 #define ENDCODE   "\\end{code}"
 283
 284 #if HAVE_GETDELIM_H
 285 static char *lineBuffer = NULL;   /* getline() does the initial allocation */
 286 #else
 287 #define LINEBUFFER_SIZE 1000
 288 static char lineBuffer[LINEBUFFER_SIZE];
 289 #endif
 290 static int lineLength = 0;
 291 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
 292 static int linePtr = 0;
 293
 294 Void consoleInput(prompt)              /* prepare to input characters from */
 295 String prompt; {                       /* standard in (i.e. console/kbd)   */
 296     reading     = KEYBOARD;            /* keyboard input is Line oriented, */
 297     c0          =                      /* i.e. input terminated by '\n'    */
 298     c1          = ' ';
 299     column      = (-1);
 300     row         = 0;
 301
 302 #if USE_READLINE
 303     /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
 304      * avoids accidentally freeing currentLine twice.
 305      */
 306     if (currentLine) {
 307         String oldCurrentLine = currentLine;
 308         currentLine = 0;           /* We may lose the space of currentLine */
 309         free(oldCurrentLine);      /* if interrupted here - unlikely       */
 310     }
 311     currentLine = readline(prompt);
 312     nextChar    = currentLine;
 313     if (currentLine) {
 314         if (*currentLine)
 315             add_history(currentLine);
 316     }
 317     else
 318         c0 = c1 = EOF;
 319 #else
 320     Printf("%s",prompt);
 321     FlushStdout();
 322 #endif
 323 }
 324
 325 Void projInput(nm)                     /* prepare to input characters from */
 326 String nm; {                           /* from named project file          */
 327     if ((inputStream = fopen(nm,"r"))!=0) {
 328         reading = PROJFILE;
 329         c0      = ' ';
 330         c1      = '\n';
 331         column  = 1;
 332         row     = 0;
 333     }
 334     else {
 335         ERRMSG(0) "Unable to open project file \"%s\"", nm
 336         EEND;
 337     }
 338 }
 339
 340 static Void local fileInput(nm,len)     /* prepare to input characters from*/
 341 String nm;                              /* named file (specified length is */
 342 Long   len; {                           /* used to set target for reading) */
 343 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 344     if (preprocessor) {
 345         Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
 346         char *cmd = malloc(reallen);
 347         if (cmd == NULL) {
 348             ERRMSG(0) "Unable to allocate memory for filter command."
 349             EEND;
 350         }
 351         strcpy(cmd,preprocessor);
 352         strcat(cmd," ");
 353         strcat(cmd,nm);
 354         inputStream = popen(cmd,"r");
 355         free(cmd);
 356     } else {
 357         inputStream = fopen(nm,"r");
 358     }
 359 #else
 360     inputStream = fopen(nm,"r");
 361 #endif
 362     if (inputStream) {
 363         reading      = SCRIPTFILE;
 364         c0           = ' ';
 365         c1           = '\n';
 366         column       = 1;
 367         row          = 0;
 368
 369         lastLine     = STARTLINE;       /* literate file processing */
 370         litLines     = 0;
 371         linePtr      = 0;
 372         lineLength   = 0;
 373         thisLiterate = literateMode(nm);
 374         inCodeBlock  = FALSE;
 375
 376         readSoFar    = 0;
 377         setGoal("Parsing", (Target)len);
 378     }
 379     else {
 380         ERRMSG(0) "Unable to open file \"%s\"", nm
 381         EEND;
 382     }
 383 }
 384
 385 Void stringInput(s)             /* prepare to input characters from string */
 386 String s; {
 387     reading      = STRING;
 388     c0           = EOF;
 389     c1           = EOF;
 390     if (*s) c0 = *s++;
 391     if (*s) c1 = *s++;
 392     column       = 1;
 393     row          = 1;
 394
 395     nextStringChar = s;
 396     if (!charTabBuilt)
 397         initCharTab();
 398 }
 399
 400 static Bool local literateMode(nm)      /* Select literate mode for file   */
 401 String nm; {
 402     char *dot = strrchr(nm,'.');        /* look for last dot in file name  */
 403     if (dot) {
 404         if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate    */
 405             return FALSE;
 406         if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
 407             filenamecmp(dot+1,"verb")==0) /* literate scripts              */
 408             return TRUE;
 409     }
 410     return literateScripts;             /* otherwise, use the default      */
 411 }
 412
 413
 414 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
 415 {
 416    Int len;
 417    String dot;
 418    len = 1 + strlen ( srcName );
 419    *hiName = malloc(len);
 420    *oName  = malloc(len);
 421    if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
 422    (*hiName)[0] = (*oName)[0] = 0;
 423    dot = strrchr(srcName, '.');
 424    if (!dot) return;
 425    if (filenamecmp(dot+1, "hs")==0 &&
 426        filenamecmp(dot+1, "lhs")==0 &&
 427        filenamecmp(dot+1, "verb")==0) return;
 428
 429    strcpy(*hiName, srcName);
 430    dot = strrchr(*hiName, '.');
 431    dot[1] = 'h';
 432    dot[2] = 'i';
 433    dot[3] = 0;
 434
 435    strcpy(*oName, srcName);
 436    dot = strrchr(*oName, '.');
 437    dot[1] = 'o';
 438    dot[2] = 0;
 439 }
 440
 441
 442
 443 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
 444  * I've removed the loop (since newLineSkip contains a loop too) and
 445  * replaced the warnings with errors. ADR
 446  */
 447 /*
 448  * To deal with literate \begin{code}...\end{code} blocks,
 449  * add a line buffer that rooms the current line. The old c0 and c1
 450  * stream pointers are used as before within that buffer -- sof
 451  *
 452  * Upon reading a new line into the line buffer, we check to see if
 453  * we're reading in a line containing \begin{code} or \end{code} and
 454  * take appropriate action.
 455  */
 456
 457 static Bool local linecmp(s,line)       /* compare string with line        */
 458 String s;                               /* line may end in whitespace      */
 459 String line; {
 460     Int i=0;
 461     while (s[i] != '\0' && s[i] == line[i]) {
 462         ++i;
 463     }
 464     /* s[0..i-1] == line[0..i-1] */
 465     if (s[i] != '\0') {                 /* check s `isPrefixOf` line       */
 466         return FALSE;
 467     }
 468     while (isIn(line[i], SPACE)) {      /* allow whitespace at end of line */
 469         ++i;
 470     }
 471     return (line[i] == '\0');
 472 }
 473
 474 /* Returns line length (including \n) or 0 upon EOF. */
 475 static Int local nextLine()
 476 {
 477 #if HAVE_GETDELIM_H
 478     /*
 479        Forget about fgets(), it is utterly braindead.
 480        (Assumes \NUL free streams and does not gracefully deal
 481        with overflow.) Instead, use GNU libc's getline().
 482        */
 483     lineLength = getline(&lineBuffer, &lineLength, inputStream);
 484 #else
 485     if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
 486         lineLength = strlen(lineBuffer);
 487     else
 488         lineLength = 0;
 489 #endif
 490     /* printf("Read: \"%s\"", lineBuffer); */
 491     if (lineLength <= 0) { /* EOF / IO error, who knows.. */
 492         return lineLength;
 493     }
 494     else if (lineLength >= 2 && lineBuffer[0] == '#' &&
 495              lineBuffer[1] == '!') {
 496         lineBuffer[0]='\n'; /* pretend it's a blank line */
 497         lineBuffer[1]='\0';
 498         lineLength=1;
 499     } else if (thisLiterate) {
 500         if (linecmp(BEGINCODE, lineBuffer)) {
 501             if (!inCodeBlock) {             /* Entered a code block        */
 502                 inCodeBlock = TRUE;
 503                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 504                 lineBuffer[1]='\0';
 505                 lineLength=1;
 506             }
 507             else {
 508                 ERRMSG(row) "\\begin{code} encountered inside code block"
 509                 EEND;
 510             }
 511         }
 512         else if (linecmp(ENDCODE, lineBuffer)) {
 513             if (inCodeBlock) {              /* Finished code block         */
 514                 inCodeBlock = FALSE;
 515                 lineBuffer[0]='\n'; /* pretend it's a blank line */
 516                 lineBuffer[1]='\0';
 517                 lineLength=1;
 518             }
 519             else {
 520                 ERRMSG(row) "\\end{code} encountered outside code block"
 521                 EEND;
 522             }
 523         }
 524     }
 525     /* printf("Read: \"%s\"", lineBuffer); */
 526     return lineLength;
 527 }
 528
 529 static Void local skip() {              /* move forward one char in input  */
 530     if (c0!=EOF) {                      /* stream, updating c0, c1, ...    */
 531         if (c0=='\n') {                 /* Adjusting cursor coords as nec. */
 532             row++;
 533             column=1;
 534             if (reading==SCRIPTFILE)
 535                 soFar(readSoFar);
 536         }
 537         else if (c0=='\t')
 538             column += TABSIZE - ((column-1)%TABSIZE);
 539         else
 540             column++;
 541
 542         c0 = c1;
 543         readSoFar++;
 544
 545         if (c0==EOF) {
 546             column = 0;
 547             if (reading==SCRIPTFILE)
 548                 done();
 549             closeAnyInput();
 550         }
 551         else if (reading==KEYBOARD) {
 552             allowBreak();
 553             if (c0=='\n')
 554                 c1 = EOF;
 555             else {
 556                 c1 = nextConsoleChar();
 557 #if IS_WIN32 && !HUGS_FOR_WINDOWS
 558                 Sleep(0);
 559 #endif
 560                 /* On Win32, hitting ctrl-C causes the next getchar to
 561                  * fail - returning "-1" to indicate an error.
 562                  * This is one of the rare cases where "-1" does not mean EOF.
 563                  */
 564                 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
 565                     c1 = ' ';
 566                 }
 567             }
 568         }
 569         else if (reading==STRING) {
 570             c1 = (unsigned char) *nextStringChar++;
 571             if (c1 == '\0')
 572                 c1 = EOF;
 573         }
 574         else {
 575             if (lineLength <=0 || linePtr == lineLength) {
 576                 /* Current line, exhausted - get new one */
 577                 if (nextLine() <= 0) { /* EOF */
 578                     c1 = EOF;
 579                 }
 580                 else {
 581                     linePtr = 0;
 582                     c1 = (unsigned char)lineBuffer[linePtr++];
 583                 }
 584             }
 585             else {
 586                 c1 = (unsigned char)lineBuffer[linePtr++];
 587             }
 588         }
 589
 590     }
 591 }
 592
 593 static Void local thisLineIs(kind)     /* register kind of current line    */
 594 Int kind; {                            /* & check for literate script errs */
 595     if (literateErrors) {
 596         if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
 597             (kind==TEXTLINE && lastLine==DEFNLINE)) {
 598             ERRMSG(row) "Program line next to comment"
 599             EEND;
 600         }
 601         lastLine = kind;
 602     }
 603 }
 604
 605 static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
 606     /* assert(c0=='\n'); */
 607     if (reading==SCRIPTFILE && thisLiterate) {
 608         do {
 609             skip();
 610             if (inCodeBlock) {         /* pass chars on definition lines   */
 611                 thisLineIs(CODELINE);  /* to lexer (w/o leading DEFNCHAR)  */
 612                 litLines++;
 613                 return;
 614             }
 615             if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
 616                 thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
 617                 skip();
 618                 litLines++;
 619                 return;
 620             }
 621             while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank?   */
 622                 skip();
 623             if (c0=='\n' || c0==EOF)
 624                 thisLineIs(BLANKLINE);
 625             else {
 626                 thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
 627                 while (c0!='\n' && c0!=EOF)
 628                     skip();
 629             }                          /* by now, c0=='\n' or c0==EOF      */
 630         } while (c0!=EOF);             /* if new line, start again         */
 631
 632         if (litLines==0 && literateErrors) {
 633             ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
 634                         DEFNCHAR
 635             EEND;
 636         }
 637         return;
 638     }
 639     skip();
 640 }
 641
 642 static Void local closeAnyInput() {    /* Close input stream, if open,     */
 643     switch (reading) {                 /* or skip to end of console line   */
 644         case PROJFILE   :
 645         case SCRIPTFILE : if (inputStream) {
 646 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
 647                               if (preprocessor) {
 648                                   pclose(inputStream);
 649                               } else {
 650                                   fclose(inputStream);
 651                               }
 652 #else
 653                               fclose(inputStream);
 654 #endif
 655                               inputStream = 0;
 656                           }
 657                           break;
 658         case KEYBOARD   : while (c0!=EOF)
 659                               skip();
 660                           break;
 661     }
 662     reading=NOTHING;
 663 }
 664
 665 /* --------------------------------------------------------------------------
 666  * Parser: Uses table driven parser generated from parser.y using yacc
 667  * ------------------------------------------------------------------------*/
 668
 669 #include "parser.c"
 670
 671 /* --------------------------------------------------------------------------
 672  * Single token input routines:
 673  *
 674  * The following routines read the values of particular kinds of token given
 675  * that the first character of the token has already been located in c0 on
 676  * entry to the routine.
 677  * ------------------------------------------------------------------------*/
 678
 679 #define MAX_TOKEN           4000
 680 #define startToken()        tokPos = 0
 681 #define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
 682 #define saveChar(c)         tokenStr[tokPos++]=(char)(c)
 683 #define overflows(n,b,d,m)  (n > ((m)-(d))/(b))
 684
 685 static char tokenStr[MAX_TOKEN+1];     /* token buffer                     */
 686 static Int  tokPos;                    /* input position in buffer         */
 687 static Int  identType;                 /* identifier type: CONID / VARID   */
 688 static Int  opType;                    /* operator type  : CONOP / VAROP   */
 689
 690 static Void local endToken() {         /* check for token overflow         */
 691     if (tokPos>MAX_TOKEN) {
 692         ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
 693         EEND;
 694     }
 695     tokenStr[tokPos] = '\0';
 696 }
 697
 698 static Text local readOperator() {     /* read operator symbol             */
 699     startToken();
 700     do {
 701         saveTokenChar(c0);
 702         skip();
 703     } while (isISO(c0) && isIn(c0,SYMBOL));
 704     opType = (tokenStr[0]==':' ? CONOP : VAROP);
 705     endToken();
 706     return findText(tokenStr);
 707 }
 708
 709 static Text local readIdent() {        /* read identifier                  */
 710     startToken();
 711     do {
 712         saveTokenChar(c0);
 713         skip();
 714     } while (isISO(c0) && isIn(c0,IDAFTER));
 715     endToken();
 716     identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
 717     return findText(tokenStr);
 718 }
 719
 720
 721 static Bool local doesNotExceed(s,radix,limit)
 722 String s;
 723 Int    radix;
 724 Int    limit; {
 725     Int n = 0;
 726     Int p = 0;
 727     while (TRUE) {
 728         if (s[p] == 0) return TRUE;
 729         if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
 730         n = radix*n + (s[p]-'0');
 731         p++;
 732     }
 733 }
 734
 735 static Int local stringToInt(s,radix)
 736 String s;
 737 Int    radix; {
 738     Int n = 0;
 739     Int p = 0;
 740     while (TRUE) {
 741         if (s[p] == 0) return n;
 742         n = radix*n + (s[p]-'0');
 743         p++;
 744     }
 745 }
 746
 747 static Cell local readRadixNumber(r)   /* Read literal in specified radix  */
 748 Int r; {                               /* from input of the form 0c{digs}  */
 749     Int d;
 750     startToken();
 751     skip();                            /* skip leading zero                */
 752     if ((d=readHexDigit(c1))<0 || d>=r) {
 753         /* Special case; no digits, lex as  */
 754         /* if it had been written "0 c..."  */
 755         saveTokenChar('0');
 756     } else {
 757         skip();
 758         do {
 759             saveTokenChar('0'+readHexDigit(c0));
 760             skip();
 761             d = readHexDigit(c0);
 762         } while (d>=0 && d<r);
 763     }
 764     endToken();
 765
 766     if (doesNotExceed(tokenStr,r,MAXPOSINT))
 767         return mkInt(stringToInt(tokenStr,r));
 768     else
 769     if (r == 10)
 770         return stringToBignum(tokenStr);
 771     else {
 772         ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
 773         EEND;
 774     }
 775 }
 776
 777 static Cell local readNumber() {        /* read numeric constant           */
 778
 779     if (c0=='0') {
 780         if (c1=='x' || c1=='X')         /* Maybe a hexadecimal literal?    */
 781             return readRadixNumber(16);
 782         if (c1=='o' || c1=='O')         /* Maybe an octal literal?         */
 783             return readRadixNumber(8);
 784     }
 785
 786     startToken();
 787     do {
 788         saveTokenChar(c0);
 789         skip();
 790     } while (isISO(c0) && isIn(c0,DIGIT));
 791
 792     if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
 793         endToken();
 794         if (doesNotExceed(tokenStr,10,MAXPOSINT))
 795             return mkInt(stringToInt(tokenStr,10)); else
 796             return stringToBignum(tokenStr);
 797     }
 798
 799     saveTokenChar(c0);                  /* save decimal point              */
 800     skip();
 801     do {                                /* process fractional part ...     */
 802         saveTokenChar(c0);
 803         skip();
 804     } while (isISO(c0) && isIn(c0,DIGIT));
 805
 806     if (c0=='e' || c0=='E') {           /* look for exponent part...       */
 807         saveTokenChar('e');
 808         skip();
 809         if (c0=='-') {
 810             saveTokenChar('-');
 811             skip();
 812         }
 813         else if (c0=='+')
 814             skip();
 815
 816         if (!isISO(c0) || !isIn(c0,DIGIT)) {
 817             ERRMSG(row) "Missing digits in exponent"
 818             EEND;
 819         }
 820         else {
 821             do {
 822                 saveTokenChar(c0);
 823                 skip();
 824             } while (isISO(c0) && isIn(c0,DIGIT));
 825         }
 826     }
 827
 828     endToken();
 829     return mkFloat(stringToFloat(tokenStr));
 830 }
 831
 832
 833
 834
 835
 836
 837
 838 static Cell local readChar() {         /* read character constant          */
 839     Cell charRead;
 840
 841     skip(/* '\'' */);
 842     if (c0=='\'' || c0=='\n' || c0==EOF) {
 843         ERRMSG(row) "Illegal character constant"
 844         EEND;
 845     }
 846
 847     charRead = readAChar(FALSE);
 848
 849     if (c0=='\'')
 850         skip(/* '\'' */);
 851     else {
 852         ERRMSG(row) "Improperly terminated character constant"
 853         EEND;
 854     }
 855     return charRead;
 856 }
 857
 858 static Cell local readString() {       /* read string literal              */
 859     Cell c;
 860
 861     startToken();
 862     skip(/* '\"' */);
 863     while (c0!='\"' && c0!='\n' && c0!=EOF) {
 864         c = readAChar(TRUE);
 865         if (nonNull(c))
 866             saveStrChr(charOf(c));
 867     }
 868
 869     if (c0=='\"')
 870         skip(/* '\"' */);
 871     else {
 872         ERRMSG(row) "Improperly terminated string"
 873         EEND;
 874     }
 875     endToken();
 876     return mkStr(findText(tokenStr));
 877 }
 878
 879 static Void local saveStrChr(c)        /* save character in string         */
 880 Char c; {
 881     if (c!='\0' && c!='\\') {          /* save non null char as single char*/
 882         saveTokenChar(c);
 883     }
 884     else {                             /* save null char as TWO null chars */
 885         if (tokPos+1<MAX_TOKEN) {
 886             saveChar('\\');
 887             if (c=='\\')
 888                 saveChar('\\');
 889             else
 890                 saveChar('0');
 891         }
 892     }
 893 }
 894
 895 static Cell local readAChar(isStrLit)  /* read single char constant        */
 896 Bool isStrLit; {                       /* TRUE => enable \& and gaps       */
 897     Cell c = mkChar(c0);
 898
 899     if (c0=='\\')                      /* escape character?                */
 900         return readEscapeChar(isStrLit);
 901     if (!isISO(c0)) {
 902         ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
 903         EEND;
 904     }
 905     skip();                            /* normal character?                */
 906     return c;
 907 }
 908
 909 /* --------------------------------------------------------------------------
 910  * Character escape code sequences:
 911  * ------------------------------------------------------------------------*/
 912
 913 static struct {                        /* table of special escape codes    */
 914     char *codename;
 915     int  codenumber;
 916 } escapes[] = {
 917    {"a",    7}, {"b",    8}, {"f",   12}, {"n",   10},  /* common escapes  */
 918    {"r",   13}, {"t",    9}, {"\\",'\\'}, {"\"",'\"'},
 919    {"\'",'\''}, {"v",   11},
 920    {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},  /* ascii codenames */
 921    {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
 922    {"BS",   8}, {"HT",   9}, {"LF",  10}, {"VT",  11},
 923    {"FF",  12}, {"CR",  13}, {"SO",  14}, {"SI",  15},
 924    {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
 925    {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
 926    {"CAN", 24}, {"EM",  25}, {"SUB", 26}, {"ESC", 27},
 927    {"FS",  28}, {"GS",  29}, {"RS",  30}, {"US",  31},
 928    {"SP",  32}, {"DEL", 127},
 929    {0,0}
 930 };
 931
 932 static Int  alreadyMatched;            /* Record portion of input stream   */
 933 static char alreadyRead[10];           /* that has been read w/o a match   */
 934
 935 static Bool local lazyReadMatches(s)   /* compare input stream with string */
 936 String s; {                            /* possibly using characters that   */
 937     int i;                             /* have already been read           */
 938
 939     for (i=0; i<alreadyMatched; ++i)
 940         if (alreadyRead[i]!=s[i])
 941             return FALSE;
 942
 943     while (s[i] && s[i]==c0) {
 944         alreadyRead[alreadyMatched++]=(char)c0;
 945         skip();
 946         i++;
 947     }
 948
 949     return s[i]=='\0';
 950 }
 951
 952 static Cell local readEscapeChar(isStrLit)/* read escape character         */
 953 Bool isStrLit; {
 954     int i=0;
 955
 956     skip(/* '\\' */);
 957     switch (c0) {
 958         case '&'  : if (isStrLit) {
 959                         skip();
 960                         return NIL;
 961                     }
 962                     ERRMSG(row) "Illegal use of `\\&' in character constant"
 963                     EEND;
 964                     break;/*NOTREACHED*/
 965
 966         case '^'  : return readCtrlChar();
 967
 968         case 'o'  : return readOctChar();
 969         case 'x'  : return readHexChar();
 970
 971         default   : if (!isISO(c0)) {
 972                         ERRMSG(row) "Illegal escape sequence"
 973                         EEND;
 974                     }
 975                     else if (isIn(c0,SPACE)) {
 976                         if (isStrLit) {
 977                             skipGap();
 978                             return NIL;
 979                         }
 980                         ERRMSG(row) "Illegal use of gap in character constant"
 981                         EEND;
 982                         break;
 983                     }
 984                     else if (isIn(c0,DIGIT))
 985                         return readDecChar();
 986     }
 987
 988     for (alreadyMatched=0; escapes[i].codename; i++)
 989         if (lazyReadMatches(escapes[i].codename))
 990             return mkChar(escapes[i].codenumber);
 991
 992     alreadyRead[alreadyMatched++] = (char)c0;
 993     alreadyRead[alreadyMatched++] = '\0';
 994     ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
 995                 alreadyRead
 996     EEND;
 997     return NIL;/*NOTREACHED*/
 998 }
 999
1000 static Void local skipGap() {          /* skip over gap in string literal  */
1001     do                                 /* (simplified in Haskell 1.1)      */
1002         if (c0=='\n')
1003             newlineSkip();
1004         else
1005             skip();
1006     while (isISO(c0) && isIn(c0,SPACE));
1007     if (c0!='\\') {
1008         ERRMSG(row) "Missing `\\' terminating string literal gap"
1009         EEND;
1010     }
1011     skip(/* '\\' */);
1012 }
1013
1014 static Cell local readCtrlChar() {     /* read escape sequence \^x         */
1015     static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1016     String which;
1017
1018     skip(/* '^' */);
1019     if ((which = strchr(controls,c0))==NULL) {
1020         ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1021         EEND;
1022     }
1023     skip();
1024     return mkChar(which-controls);
1025 }
1026
1027 static Cell local readOctChar() {      /* read octal character constant    */
1028     Int n = 0;
1029     Int d;
1030
1031     skip(/* 'o' */);
1032     if ((d = readHexDigit(c0))<0 || d>=8) {
1033         ERRMSG(row) "Empty octal character escape"
1034         EEND;
1035     }
1036     do {
1037         if (overflows(n,8,d,MAXCHARVAL)) {
1038             ERRMSG(row) "Octal character escape out of range"
1039             EEND;
1040         }
1041         n = 8*n + d;
1042         skip();
1043     } while ((d = readHexDigit(c0))>=0 && d<8);
1044
1045     return mkChar(n);
1046 }
1047
1048 static Cell local readHexChar() {      /* read hex character constant      */
1049     Int n = 0;
1050     Int d;
1051
1052     skip(/* 'x' */);
1053     if ((d = readHexDigit(c0))<0) {
1054         ERRMSG(row) "Empty hexadecimal character escape"
1055         EEND;
1056     }
1057     do {
1058         if (overflows(n,16,d,MAXCHARVAL)) {
1059             ERRMSG(row) "Hexadecimal character escape out of range"
1060             EEND;
1061         }
1062         n = 16*n + d;
1063         skip();
1064     } while ((d = readHexDigit(c0))>=0);
1065
1066     return mkChar(n);
1067 }
1068
1069 static Int local readHexDigit(c)       /* read single hex digit            */
1070 Char c; {
1071     if ('0'<=c && c<='9')
1072         return c-'0';
1073     if ('A'<=c && c<='F')
1074         return 10 + (c-'A');
1075     if ('a'<=c && c<='f')
1076         return 10 + (c-'a');
1077     return -1;
1078 }
1079
1080 static Cell local readDecChar() {      /* read decimal character constant  */
1081     Int n = 0;
1082
1083     do {
1084         if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1085             ERRMSG(row) "Decimal character escape out of range"
1086             EEND;
1087         }
1088         n = 10*n + (c0-'0');
1089         skip();
1090     } while (c0!=EOF && isIn(c0,DIGIT));
1091
1092     return mkChar(n);
1093 }
1094
1095 /* --------------------------------------------------------------------------
1096  * Produce printable representation of character:
1097  * ------------------------------------------------------------------------*/
1098
1099 String unlexChar(c,quote)              /* return string representation of  */
1100 Char c;                                /* character...                     */
1101 Char quote; {                          /* protect quote character          */
1102     static char buffer[12];
1103
1104     if (c<0)                           /* deal with sign extended chars..  */
1105         c += NUM_CHARS;
1106
1107     if (isISO(c) && isIn(c,PRINT)) {   /* normal printable character       */
1108         if (c==quote || c=='\\') {     /* look for quote of approp. kind   */
1109             buffer[0] = '\\';
1110             buffer[1] = (char)c;
1111             buffer[2] = '\0';
1112         }
1113         else {
1114             buffer[0] = (char)c;
1115             buffer[1] = '\0';
1116         }
1117     }
1118     else {                             /* look for escape code             */
1119         Int escs;
1120         for (escs=0; escapes[escs].codename; escs++)
1121             if (escapes[escs].codenumber==c) {
1122                 sprintf(buffer,"\\%s",escapes[escs].codename);
1123                 return buffer;
1124             }
1125         sprintf(buffer,"\\%d",c);      /* otherwise use numeric escape     */
1126     }
1127     return buffer;
1128 }
1129
1130 Void printString(s)                    /* print string s, using quotes and */
1131 String s; {                            /* escapes if any parts need them   */
1132     if (s) {
1133         String t = s;
1134         Char   c;
1135         while ((c = *t)!=0 && isISO(c)
1136                            && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1137             t++;
1138         }
1139         if (*t) {
1140             Putchar('"');
1141             for (t=s; *t; t++)
1142                 Printf("%s",unlexChar(*t,'"'));
1143             Putchar('"');
1144         }
1145         else
1146             Printf("%s",s);
1147     }
1148 }
1149
1150 /* -------------------------------------------------------------------------
1151  * Handle special types of input for use in interpreter:
1152  * -----------------------------------------------------------------------*/
1153
1154 Command readCommand(cmds,start,sys)    /* read command at start of input   */
1155 struct cmd *cmds;                      /* line in interpreter              */
1156 Char   start;                          /* characters introducing a cmd     */
1157 Char   sys; {                          /* character for shell escape       */
1158     while (c0==' ' || c0 =='\t')
1159         skip();
1160
1161     if (c0=='\n')                      /* look for blank command lines     */
1162         return NOCMD;
1163     if (c0==EOF)                       /* look for end of input stream     */
1164         return QUIT;
1165     if (c0==sys) {                     /* single character system escape   */
1166         skip();
1167         return SYSTEM;
1168     }
1169     if (c0==start && c1==sys) {        /* two character system escape      */
1170         skip();
1171         skip();
1172         return SYSTEM;
1173     }
1174
1175     startToken();                      /* All cmds start with start        */
1176     if (c0==start)                     /* except default (usually EVAL)    */
1177         do {                           /* which is empty                   */
1178             saveTokenChar(c0);
1179             skip();
1180         } while (c0!=EOF && !isIn(c0,SPACE));
1181     endToken();
1182
1183     for (; cmds->cmdString; ++cmds)
1184         if (strcmp((cmds->cmdString),tokenStr)==0 ||
1185             (tokenStr[0]==start &&
1186              tokenStr[1]==(cmds->cmdString)[1] &&
1187              tokenStr[2]=='\0'))
1188             return (cmds->cmdCode);
1189     return BADCMD;
1190 }
1191
1192 String readFilename() {                /* Read filename from input (if any)*/
1193     if (reading==PROJFILE)
1194         skipWhitespace();
1195     else
1196         while (c0==' ' || c0=='\t')
1197             skip();
1198
1199     if (c0=='\n' || c0==EOF)           /* return null string at end of line*/
1200         return 0;
1201
1202     startToken();
1203     while (c0!=EOF && !isIn(c0,SPACE)) {
1204         if (c0=='"') {
1205             skip();
1206             while (c0!=EOF && c0!='\"') {
1207                 Cell c = readAChar(TRUE);
1208                 if (nonNull(c)) {
1209                     saveTokenChar(charOf(c));
1210                 }
1211             }
1212             if (c0=='"')
1213                 skip();
1214             else {
1215                 ERRMSG(row) "a closing quote, '\"', was expected"
1216                 EEND;
1217             }
1218         }
1219         else {
1220             saveTokenChar(c0);
1221             skip();
1222         }
1223     }
1224     endToken();
1225     return tokenStr;
1226 }
1227
1228 String readLine() {                    /* Read command line from input     */
1229     while (c0==' ' || c0=='\t')        /* skip leading whitespace          */
1230         skip();
1231
1232     startToken();
1233     while (c0!='\n' && c0!=EOF) {
1234         saveTokenChar(c0);
1235         skip();
1236     }
1237     endToken();
1238
1239     return tokenStr;
1240 }
1241
1242 /* --------------------------------------------------------------------------
1243  * This lexer supports the Haskell layout rule:
1244  *
1245  * - Layout area bounded by { ... }, with `;'s in between.
1246  * - A `{' is a HARD indentation and can only be matched by a corresponding
1247  *   HARD '}'
1248  * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1249  *   is inserted with the column number of the first token after the
1250  *   WHERE/LET/OF keyword.
1251  * - When a soft indentation is uppermost on the indetation stack with
1252  *   column col' we insert:
1253  *    `}'  in front of token with column<col' and pop indentation off stack,
1254  *    `;'  in front of token with column==col'.
1255  * ------------------------------------------------------------------------*/
1256
1257 #define MAXINDENT  100                 /* maximum nesting of layout rule   */
1258 static  Int        layout[MAXINDENT+1];/* indentation stack                */
1259 #define HARD       (-1)                /* indicates hard indentation       */
1260 static  Int        indentDepth = (-1); /* current indentation nesting      */
1261
1262 static Void local goOffside(col)       /* insert offside marker            */
1263 Int col; {                             /* for specified column             */
1264 assert(offsideON);
1265     if (indentDepth>=MAXINDENT) {
1266         ERRMSG(row) "Too many levels of program nesting"
1267         EEND;
1268     }
1269     layout[++indentDepth] = col;
1270 }
1271
1272 static Void local unOffside() {        /* leave layout rule area           */
1273 assert(offsideON);
1274     indentDepth--;
1275 }
1276
1277 static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
1278 assert(offsideON);
1279     return indentDepth>=0 && layout[indentDepth]!=HARD;
1280 }
1281
1282 /* --------------------------------------------------------------------------
1283  * Main tokeniser:
1284  * ------------------------------------------------------------------------*/
1285
1286 static Void local skipWhitespace() {   /* Skip over whitespace/comments    */
1287     for (;;)                           /* Strictly speaking, this code is  */
1288         if (c0==EOF)                   /* a little more liberal than the   */
1289             return;                    /* report allows ...                */
1290         else if (c0=='\n')
1291             newlineSkip();
1292         else if (isIn(c0,SPACE))
1293             skip();
1294         else if (c0=='{' && c1=='-') { /* (potentially) nested comment     */
1295             Int nesting = 1;
1296             Int origRow = row;         /* Save original row number         */
1297             skip();
1298             skip();
1299             while (nesting>0 && c0!=EOF)
1300                 if (c0=='{' && c1=='-') {
1301                     skip();
1302                     skip();
1303                     nesting++;
1304                 }
1305                 else if (c0=='-' && c1=='}') {
1306                     skip();
1307                     skip();
1308                     nesting--;
1309                 }
1310                 else if (c0=='\n')
1311                     newlineSkip();
1312                 else
1313                     skip();
1314             if (nesting>0) {
1315                 ERRMSG(origRow) "Unterminated nested comment {- ..."
1316                 EEND;
1317             }
1318         }
1319         else if (c0=='-' && c1=='-') {  /* One line comment                */
1320             do
1321                 skip();
1322             while (c0!='\n' && c0!=EOF);
1323             if (c0=='\n')
1324                 newlineSkip();
1325         }
1326         else
1327             return;
1328 }
1329
1330 static Bool firstToken;                /* Set to TRUE for first token      */
1331 static Int  firstTokenIs;              /* ... with token value stored here */
1332
1333 static Int local yylex() {             /* Read next input token ...        */
1334     static Bool insertOpen    = FALSE;
1335     static Bool insertedToken = FALSE;
1336     static Text textRepeat;
1337
1338 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1339
1340     if (firstToken) {                  /* Special case for first token     */
1341         indentDepth   = (-1);
1342         firstToken    = FALSE;
1343         insertOpen    = FALSE;
1344         insertedToken = FALSE;
1345         if (reading==KEYBOARD)
1346             textRepeat = findText(repeatStr);
1347         return firstTokenIs;
1348     }
1349
1350     if (offsideON && insertOpen) {     /* insert `soft' opening brace      */
1351         insertOpen    = FALSE;
1352         insertedToken = TRUE;
1353         goOffside(column);
1354         push(yylval = mkInt(row));
1355         return '{';
1356     }
1357
1358     /* ----------------------------------------------------------------------
1359      * Skip white space, and insert tokens to support layout rules as reqd.
1360      * --------------------------------------------------------------------*/
1361
1362     skipWhitespace();
1363     startColumn = column;
1364     push(yylval = mkInt(row));         /* default token value is line no.  */
1365     /* subsequent changes to yylval must also set top() to the same value  */
1366
1367     if (indentDepth>=0) {              /* layout rule(s) active ?          */
1368         if (insertedToken)             /* avoid inserting multiple `;'s    */
1369             insertedToken = FALSE;     /* or putting `;' after `{'         */
1370         else
1371         if (offsideON && layout[indentDepth]!=HARD) {
1372             if (column<layout[indentDepth]) {
1373                 unOffside();
1374                 return '}';
1375             }
1376             else if (column==layout[indentDepth] && c0!=EOF) {
1377                 insertedToken = TRUE;
1378                 return ';';
1379             }
1380         }
1381     }
1382
1383     /* ----------------------------------------------------------------------
1384      * Now try to identify token type:
1385      * --------------------------------------------------------------------*/
1386
1387     switch (c0) {
1388         case EOF  : return 0;                   /* End of file/input       */
1389
1390         /* The next 10 characters make up the `special' category in 1.3    */
1391         case '('  : skip(); return '(';
1392         case ')'  : skip(); return ')';
1393         case ','  : skip(); return ',';
1394         case ';'  : skip(); return ';';
1395         case '['  : skip(); return '[';
1396         case ']'  : skip(); return ']';
1397         case '`'  : skip(); return '`';
1398         case '{'  : if (offsideON) goOffside(HARD);
1399                     skip();
1400                     return '{';
1401         case '}'  : if (offsideON && indentDepth<0) {
1402                         ERRMSG(row) "Misplaced `}'"
1403                         EEND;
1404                     }
1405                     if (!(offsideON && layout[indentDepth]!=HARD))
1406                         skip();                         /* skip over hard }*/
1407                     if (offsideON)
1408                         unOffside();    /* otherwise, we have to insert a }*/
1409                     return '}';         /* to (try to) avoid an error...   */
1410
1411         /* Character and string literals                                   */
1412         case '\'' : top() = yylval = readChar();
1413                     return CHARLIT;
1414
1415         case '\"' : top() = yylval = readString();
1416                     return STRINGLIT;
1417     }
1418
1419 #if IPARAM
1420     if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1421         Text it;                        /* Look for implicit param name    */
1422         skip();
1423         it    = readIdent();
1424         top() = yylval = ap(IPVAR,it);
1425         return identType=IPVARID;
1426     }
1427 #endif
1428 #if TREX
1429     if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1430         Text it;                        /* Look for record selector name   */
1431         skip();
1432         it    = readIdent();
1433         top() = yylval = ap(RECSEL,mkExt(it));
1434         return identType=RECSELID;
1435     }
1436 #endif
1437     if (isIn(c0,LARGE)) {               /* Look for qualified name         */
1438         Text it = readIdent();          /* No keyword begins with LARGE ...*/
1439         if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1440             Text it2 = NIL;
1441             skip();                     /* Skip qualifying dot             */
1442             if (isIn(c0,SYMBOL)) { /* Qualified operator */
1443                 it2 = readOperator();
1444                 if (opType==CONOP) {
1445                     top() = yylval = mkQConOp(it,it2);
1446                     return QCONOP;
1447                 } else {
1448                     top() = yylval = mkQVarOp(it,it2);
1449                     return QVAROP;
1450                 }
1451             } else {               /* Qualified identifier */
1452                 it2 = readIdent();
1453                 if (identType==CONID) {
1454                     top() = yylval = mkQCon(it,it2);
1455                     return QCONID;
1456                 } else {
1457                     top() = yylval = mkQVar(it,it2);
1458                     return QVARID;
1459                 }
1460             }
1461         } else {
1462             top() = yylval = mkCon(it);
1463             return identType;
1464         }
1465     }
1466     if (isIn(c0,(SMALL|LARGE))) {
1467         Text it = readIdent();
1468
1469         if (it==textCase)              return CASEXP;
1470         if (it==textOfK)               lookAhead(OF);
1471         if (it==textData)              return DATA;
1472         if (it==textType)              return TYPE;
1473         if (it==textIf)                return IF;
1474         if (it==textThen)              return THEN;
1475         if (it==textElse)              return ELSE;
1476         if (it==textWhere)             lookAhead(WHERE);
1477         if (it==textLet)               lookAhead(LET);
1478         if (it==textIn)                return IN;
1479         if (it==textInfix)             return INFIXN;
1480         if (it==textInfixl)            return INFIXL;
1481         if (it==textInfixr)            return INFIXR;
1482         if (it==textForeign)           return FOREIGN;
1483         if (it==textUnsafe)            return UNSAFE;
1484         if (it==textNewtype)           return TNEWTYPE;
1485         if (it==textDefault)           return DEFAULT;
1486         if (it==textDeriving)          return DERIVING;
1487         if (it==textDo)                lookAhead(DO);
1488         if (it==textClass)             return TCLASS;
1489         if (it==textInstance)          return TINSTANCE;
1490         if (it==textModule)            return TMODULE;
1491         if (it==textInterface)         return INTERFACE;
1492         if (it==textInstImport)        return INSTIMPORT;
1493         if (it==textImport)            return IMPORT;
1494         if (it==textExport)            return EXPORT;
1495         if (it==textDynamic)           return DYNAMIC;
1496         if (it==textUUExport)          return UUEXPORT;
1497         if (it==textHiding)            return HIDING;
1498         if (it==textQualified)         return QUALIFIED;
1499         if (it==textAsMod)             return ASMOD;
1500         if (it==textWildcard)          return '_';
1501         if (it==textAll && !haskell98) return ALL;
1502 #if IPARAM
1503         if (it==textWith && !haskell98) lookAhead(WITH);
1504         if (it==textDlet && !haskell98) lookAhead(DLET);
1505 #endif
1506         if (it==textUUAll)             return ALL;
1507         if (it==textRepeat && reading==KEYBOARD)
1508             return repeatLast();
1509
1510         top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1511         return identType;
1512     }
1513
1514     if (isIn(c0,SYMBOL)) {
1515         Text it = readOperator();
1516
1517         if (it==textCoco)    return COCO;
1518         if (it==textEq)      return '=';
1519         if (it==textUpto)    return UPTO;
1520         if (it==textAs)      return '@';
1521         if (it==textLambda)  return '\\';
1522         if (it==textBar)     return '|';
1523         if (it==textFrom)    return FROM;
1524         if (it==textMinus)   return '-';
1525         if (it==textPlus)    return '+';
1526         if (it==textBang)    return '!';
1527         if (it==textDot)     return '.';
1528         if (it==textArrow)   return ARROW;
1529         if (it==textLazy)    return '~';
1530         if (it==textImplies) return IMPLIES;
1531         if (it==textRepeat && reading==KEYBOARD)
1532             return repeatLast();
1533
1534         top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1535         return opType;
1536     }
1537
1538     if (isIn(c0,DIGIT)) {
1539         top() = yylval = readNumber();
1540         return NUMLIT;
1541     }
1542
1543     ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1544                 ((int)c0), column
1545     EEND;
1546     return 0; /*NOTREACHED*/
1547 }
1548
1549 static Int local repeatLast() {         /* Obtain last expression entered  */
1550     if (isNull(yylval=getLastExpr())) {
1551         ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1552         EEND;
1553     }
1554     return REPEAT;
1555 }
1556
1557 Syntax defaultSyntax(t)                 /* Find default syntax of var named*/
1558 Text t; {                               /* by t ...                        */
1559     String s = textToStr(t);
1560     return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1561 }
1562
1563 Syntax syntaxOf(n)                      /* Find syntax for name            */
1564 Name n; {
1565     if (name(n).syntax==NO_SYNTAX)      /* Return default if no syntax set */
1566         return defaultSyntax(name(n).text);
1567     return name(n).syntax;
1568 }
1569
1570 /* --------------------------------------------------------------------------
1571  * main entry points to parser/lexer:
1572  * ------------------------------------------------------------------------*/
1573
1574 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1575 Int startWith; {                       /* determining whether to read a    */
1576     firstToken   = TRUE;               /* script or an expression          */
1577     firstTokenIs = startWith;
1578     if (startWith==INTERFACE)
1579        offsideON = FALSE; else
1580        offsideON = TRUE;
1581
1582     clearStack();
1583     if (yyparse()) {                   /* This can only be parser overflow */
1584         ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
1585         EEND;                          /* in the parser...                 */
1586     }
1587     drop();
1588     if (!stackEmpty())                 /* stack should now be empty        */
1589         internal("parseInput");
1590 }
1591
1592 #ifdef HSCRIPT
1593 static String memPrefix = "@mem@";
1594 static Int lenMemPrefix = 5;   /* strlen(memPrefix)*/
1595
1596 Void makeMemScript(mem,fname)
1597 String mem;
1598 String fname; {
1599    strcat(fname,memPrefix);
1600    itoa((int)mem, fname+strlen(fname), 10);
1601 }
1602
1603 Bool isMemScript(fname)
1604 String fname; {
1605    return (strstr(fname,memPrefix) != NULL);
1606 }
1607
1608 String memScriptString(fname)
1609 String fname; {
1610     String p = strstr(fname,memPrefix);
1611     if (p) {
1612         return (String)atoi(p+lenMemPrefix);
1613     } else {
1614         return NULL;
1615     }
1616 }
1617
1618 Void parseScript(fname,len)             /* Read a script, possibly from mem */
1619 String fname;
1620 Long len; {
1621     input(RESET);
1622     if (isMemScript(fname)) {
1623         char* s = memScriptString(fname);
1624         stringInput(s);
1625     } else {
1626         fileInput(fname,len);
1627     }
1628     parseInput(SCRIPT);
1629 }
1630 #else
1631 Void parseScript(nm,len)               /* Read a script                    */
1632 String nm;
1633 Long   len; {                          /* Used to set a target for reading */
1634     input(RESET);
1635     fileInput(nm,len);
1636     parseInput(SCRIPT);
1637 }
1638 #endif
1639
1640 Void parseExp() {                      /* Read an expression to evaluate   */
1641     parseInput(EXPR);
1642     setLastExpr(inputExpr);
1643 }
1644
1645 Void parseContext() {                  /* Read a context to prove   */
1646     parseInput(CONTEXT);
1647 }
1648
1649 Void parseInterface(nm,len)            /* Read a GHC interface file        */
1650 String nm;
1651 Long   len; {                          /* Used to set a target for reading */
1652     input(RESET);
1653     fileInput(nm,len);
1654     parseInput(INTERFACE);
1655 }
1656
1657
1658 /* --------------------------------------------------------------------------
1659  * Input control:
1660  * ------------------------------------------------------------------------*/
1661
1662 Void input(what)
1663 Int what; {
1664     switch (what) {
1665         case INSTALL : initCharTab();
1666                        textCase       = findText("case");
1667                        textOfK        = findText("of");
1668                        textData       = findText("data");
1669                        textType       = findText("type");
1670                        textIf         = findText("if");
1671                        textThen       = findText("then");
1672                        textElse       = findText("else");
1673                        textWhere      = findText("where");
1674                        textLet        = findText("let");
1675                        textIn         = findText("in");
1676                        textInfix      = findText("infix");
1677                        textInfixl     = findText("infixl");
1678                        textInfixr     = findText("infixr");
1679                        textForeign    = findText("foreign");
1680                        textUnsafe     = findText("unsafe");
1681                        textNewtype    = findText("newtype");
1682                        textDefault    = findText("default");
1683                        textDeriving   = findText("deriving");
1684                        textDo         = findText("do");
1685                        textClass      = findText("class");
1686 #if IPARAM
1687                        textWith       = findText("with");
1688                        textDlet       = findText("dlet");
1689 #endif
1690                        textInstance   = findText("instance");
1691                        textCoco       = findText("::");
1692                        textEq         = findText("=");
1693                        textUpto       = findText("..");
1694                        textAs         = findText("@");
1695                        textLambda     = findText("\\");
1696                        textBar        = findText("|");
1697                        textMinus      = findText("-");
1698                        textPlus       = findText("+");
1699                        textFrom       = findText("<-");
1700                        textArrow      = findText("->");
1701                        textLazy       = findText("~");
1702                        textBang       = findText("!");
1703                        textDot        = findText(".");
1704                        textImplies    = findText("=>");
1705                        textPrelude    = findText("Prelude");
1706                        textNum        = findText("Num");
1707                        textModule     = findText("module");
1708                        textInterface  = findText("__interface");
1709                        textInstImport = findText("__instimport");
1710                        textExport     = findText("export");
1711                        textDynamic    = findText("dynamic");
1712                        textUUExport   = findText("__export");
1713                        textImport     = findText("import");
1714                        textHiding     = findText("hiding");
1715                        textQualified  = findText("qualified");
1716                        textAsMod      = findText("as");
1717                        textWildcard   = findText("_");
1718                        textAll        = findText("forall");
1719                        textUUAll      = findText("__forall");
1720                        varMinus       = mkVar(textMinus);
1721                        varPlus        = mkVar(textPlus);
1722                        varBang        = mkVar(textBang);
1723                        varDot         = mkVar(textDot);
1724                        varHiding      = mkVar(textHiding);
1725                        varQualified   = mkVar(textQualified);
1726                        varAsMod       = mkVar(textAsMod);
1727                        conMain        = mkCon(findText("Main"));
1728                        varMain        = mkVar(findText("main"));
1729                        evalDefaults   = NIL;
1730
1731                        input(RESET);
1732                        break;
1733
1734         case RESET   : tyconDefns   = NIL;
1735                        typeInDefns  = NIL;
1736                        valDefns     = NIL;
1737                        classDefns   = NIL;
1738                        instDefns    = NIL;
1739                        selDefns     = NIL;
1740                        genDefns     = NIL;
1741                        //primDefns    = NIL;
1742                        unqualImports= NIL;
1743                        foreignImports= NIL;
1744                        foreignExports= NIL;
1745                        defaultDefns = NIL;
1746                        defaultLine  = 0;
1747                        inputExpr    = NIL;
1748                        imps         = NIL;
1749                        closeAnyInput();
1750                        break;
1751
1752         case BREAK   : if (reading==KEYBOARD)
1753                            c0 = EOF;
1754                        break;
1755
1756         case MARK    : mark(tyconDefns);
1757                        mark(typeInDefns);
1758                        mark(valDefns);
1759                        mark(classDefns);
1760                        mark(instDefns);
1761                        mark(selDefns);
1762                        mark(genDefns);
1763                        //mark(primDefns);
1764                        mark(unqualImports);
1765                        mark(foreignImports);
1766                        mark(foreignExports);
1767                        mark(defaultDefns);
1768                        mark(evalDefaults);
1769                        mark(inputExpr);
1770                        mark(varMinus);
1771                        mark(varPlus);
1772                        mark(varBang);
1773                        mark(varDot);
1774                        mark(varHiding);
1775                        mark(varQualified);
1776                        mark(varAsMod);
1777                        mark(varMain);
1778                        mark(conMain);
1779                        mark(imps);
1780                        break;
1781     }
1782 }
1783
1784 /*-------------------------------------------------------------------------*/