2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 1999/11/25 11:10:16 $
14 * ------------------------------------------------------------------------*/
32 #if IS_WIN32 || HUGS_FOR_WINDOWS
36 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H
37 #define USE_READLINE 1
39 #define USE_READLINE 0
43 #include <readline/readline.h>
44 #include <readline/history.h>
48 /* --------------------------------------------------------------------------
50 * ------------------------------------------------------------------------*/
52 List tyconDefns = NIL; /* type constructor definitions */
53 List typeInDefns = NIL; /* type synonym restrictions */
54 List valDefns = NIL; /* value definitions in script */
55 List classDefns = NIL; /* class defns in script */
56 List instDefns = NIL; /* instance defns in script */
57 List selDefns = NIL; /* list of selector lists */
58 List genDefns = NIL; /* list of generated names */
59 List unqualImports = NIL; /* unqualified import list */
60 List foreignImports = NIL; /* foreign imports */
61 List foreignExports = NIL; /* foreign exportsd */
62 List defaultDefns = NIL; /* default definitions (if any) */
63 Int defaultLine = 0; /* line in which default defs occur*/
64 List evalDefaults = NIL; /* defaults for evaluator */
66 Cell inputExpr = NIL; /* input expression */
67 Cell inputContext = NIL; /* input context */
68 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
69 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
70 Bool offsideON = TRUE; /* TRUE => implement offside rule */
72 String repeatStr = 0; /* Repeat last expr */
74 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
75 String preprocessor = 0;
78 /* --------------------------------------------------------------------------
79 * Local function prototypes:
80 * ------------------------------------------------------------------------*/
82 static Void local initCharTab Args((Void));
83 static Void local fileInput Args((String,Long));
84 static Bool local literateMode Args((String));
85 static Bool local linecmp Args((String,String));
86 static Int local nextLine Args((Void));
87 static Void local skip Args((Void));
88 static Void local thisLineIs Args((Int));
89 static Void local newlineSkip Args((Void));
90 static Void local closeAnyInput Args((Void));
92 Int yyparse Args((Void)); /* can't stop yacc making this */
93 /* public, but don't advertise */
94 /* it in a header file. */
96 static Void local endToken Args((Void));
97 static Text local readOperator Args((Void));
98 static Text local readIdent Args((Void));
99 static Cell local readRadixNumber Args((Int));
100 static Cell local readNumber Args((Void));
101 static Cell local readChar Args((Void));
102 static Cell local readString Args((Void));
103 static Void local saveStrChr Args((Char));
104 static Cell local readAChar Args((Bool));
106 static Bool local lazyReadMatches Args((String));
107 static Cell local readEscapeChar Args((Bool));
108 static Void local skipGap Args((Void));
109 static Cell local readCtrlChar Args((Void));
110 static Cell local readOctChar Args((Void));
111 static Cell local readHexChar Args((Void));
112 static Int local readHexDigit Args((Char));
113 static Cell local readDecChar Args((Void));
115 static Void local goOffside Args((Int));
116 static Void local unOffside Args((Void));
117 static Bool local canUnOffside Args((Void));
119 static Void local skipWhitespace Args((Void));
120 static Int local yylex Args((Void));
121 static Int local repeatLast Args((Void));
123 static Void local parseInput Args((Int));
125 static Bool local doesNotExceed Args((String,Int,Int));
126 static Int local stringToInt Args((String,Int));
129 /* --------------------------------------------------------------------------
130 * Text values for reserved words and special symbols:
131 * ------------------------------------------------------------------------*/
133 static Text textCase, textOfK, textData, textType, textIf;
134 static Text textThen, textElse, textWhere, textLet, textIn;
135 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
136 static Text textDefault, textDeriving, textDo, textClass, textInstance;
138 static Text textWith, textDlet;
141 static Text textCoco, textEq, textUpto, textAs, textLambda;
142 static Text textBar, textMinus, textFrom, textArrow, textLazy;
143 static Text textBang, textDot, textAll, textImplies;
144 static Text textWildcard;
146 static Text textModule, textImport, textInterface, textInstImport;
147 static Text textHiding, textQualified, textAsMod;
148 static Text textExport, textDynamic, textUUExport;
149 static Text textUnsafe, textUUAll;
151 Text textCcall; /* ccall */
152 Text textStdcall; /* stdcall */
154 Text textNum; /* Num */
155 Text textPrelude; /* Prelude */
156 Text textPlus; /* (+) */
158 static Cell conMain; /* Main */
159 static Cell varMain; /* main */
161 static Cell varMinus; /* (-) */
162 static Cell varPlus; /* (+) */
163 static Cell varBang; /* (!) */
164 static Cell varDot; /* (.) */
165 static Cell varHiding; /* hiding */
166 static Cell varQualified; /* qualified */
167 static Cell varAsMod; /* as */
169 static List imps; /* List of imports to be chased */
172 /* --------------------------------------------------------------------------
173 * Character set handling:
175 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
176 * character set. The following code provides methods for classifying
177 * input characters according to the lexical structure specified by the
178 * report. Hugs should still accept older programs because ASCII is
179 * essentially just a subset of the ISO character set.
181 * Notes: If you want to port Hugs to a machine that uses something
182 * substantially different from the ISO character set, then you will need
183 * to insert additional code to map between character sets.
185 * At some point, the following data structures may be exported in a .h
186 * file to allow the information contained here to be picked up in the
187 * implementation of LibChar is* primitives.
189 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
190 * ------------------------------------------------------------------------*/
192 static Bool charTabBuilt;
193 static unsigned char ctable[NUM_CHARS];
194 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
195 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
205 static Void local initCharTab() { /* Initialize char decode table */
206 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
207 #define setChar(x,c) ctable[c] |= (x)
208 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
209 #define setCopy(x,c) {Int i; \
210 for (i=0; i<NUM_CHARS; ++i) \
215 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
217 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
218 setRange(SMALL, 223,246); /* ISO lower case letters */
219 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
220 setChar (SMALL, '_');
222 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
223 setRange(LARGE, 192,214); /* ISO upper case letters */
224 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
226 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
227 setRange(SYMBOL, 215,215);
228 setChar (SYMBOL, 247);
229 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
231 setChar (IDAFTER, '\''); /* Characters in identifier */
232 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
234 setChar (ZPACE, ' '); /* ASCII space character */
235 setChar (ZPACE, 160); /* ISO non breaking space */
236 setRange(ZPACE, 9,13); /* special whitespace: \t\n\v\f\r */
238 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
239 setChars(PRINT, " '\""); /* Space and quotes */
240 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
250 /* --------------------------------------------------------------------------
251 * Single character input routines:
253 * At the lowest level of input, characters are read one at a time, with the
254 * current character held in c0 and the following (lookahead) character in
255 * c1. The corrdinates of c0 within the file are held in (column,row).
256 * The input stream is advanced by one character using the skip() function.
257 * ------------------------------------------------------------------------*/
259 #define TABSIZE 8 /* spacing between tabstops */
261 #define NOTHING 0 /* what kind of input is being read?*/
262 #define KEYBOARD 1 /* - keyboard/console? */
263 #define SCRIPTFILE 2 /* - script file */
264 #define PROJFILE 3 /* - project file */
265 #define STRING 4 /* - string buffer? */
267 static Int reading = NOTHING;
269 static Target readSoFar;
270 static Int row, column, startColumn;
272 static FILE *inputStream = 0;
273 static Bool thisLiterate;
274 static String nextStringChar; /* next char in string buffer */
276 #if USE_READLINE /* for command line editors */
277 static String currentLine; /* editline or GNU readline */
278 static String nextChar;
279 #define nextConsoleChar() \
280 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
282 #define nextConsoleChar() getc(stdin)
285 static Int litLines; /* count defn lines in lit script */
286 #define DEFNCHAR '>' /* definition lines begin with this */
287 static Int lastLine; /* records type of last line read: */
288 #define STARTLINE 0 /* - at start of file, none read */
289 #define BLANKLINE 1 /* - blank (may preceed definition) */
290 #define TEXTLINE 2 /* - text comment */
291 #define DEFNLINE 3 /* - line containing definition */
292 #define CODELINE 4 /* - line inside code block */
294 #define BEGINCODE "\\begin{code}"
295 #define ENDCODE "\\end{code}"
298 static char *lineBuffer = NULL; /* getline() does the initial allocation */
300 #define LINEBUFFER_SIZE 1000
301 static char lineBuffer[LINEBUFFER_SIZE];
303 static int lineLength = 0;
304 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
305 static int linePtr = 0;
307 Void consoleInput(prompt) /* prepare to input characters from */
308 String prompt; { /* standard in (i.e. console/kbd) */
309 reading = KEYBOARD; /* keyboard input is Line oriented, */
310 c0 = /* i.e. input terminated by '\n' */
316 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
317 * avoids accidentally freeing currentLine twice.
320 String oldCurrentLine = currentLine;
321 currentLine = 0; /* We may lose the space of currentLine */
322 free(oldCurrentLine); /* if interrupted here - unlikely */
324 currentLine = readline(prompt);
325 nextChar = currentLine;
328 add_history(currentLine);
338 Void projInput(nm) /* prepare to input characters from */
339 String nm; { /* from named project file */
340 if ((inputStream = fopen(nm,"r"))!=0) {
348 ERRMSG(0) "Unable to open project file \"%s\"", nm
353 static Void local fileInput(nm,len) /* prepare to input characters from*/
354 String nm; /* named file (specified length is */
355 Long len; { /* used to set target for reading) */
356 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
358 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
359 char *cmd = malloc(reallen);
361 ERRMSG(0) "Unable to allocate memory for filter command."
364 strcpy(cmd,preprocessor);
367 inputStream = popen(cmd,"r");
370 inputStream = fopen(nm,"r");
373 inputStream = fopen(nm,"r");
376 reading = SCRIPTFILE;
382 lastLine = STARTLINE; /* literate file processing */
386 thisLiterate = literateMode(nm);
390 setGoal("Parsing", (Target)len);
393 ERRMSG(0) "Unable to open file \"%s\"", nm
398 Void stringInput(s) /* prepare to input characters from string */
413 static Bool local literateMode(nm) /* Select literate mode for file */
415 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
417 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
419 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
420 filenamecmp(dot+1,"verb")==0) /* literate scripts */
423 return literateScripts; /* otherwise, use the default */
427 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
431 len = 1 + strlen ( srcName );
432 *hiName = malloc(len);
433 *oName = malloc(len);
434 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
435 (*hiName)[0] = (*oName)[0] = 0;
436 dot = strrchr(srcName, '.');
438 if (filenamecmp(dot+1, "hs")==0 &&
439 filenamecmp(dot+1, "lhs")==0 &&
440 filenamecmp(dot+1, "verb")==0) return;
442 strcpy(*hiName, srcName);
443 dot = strrchr(*hiName, '.');
448 strcpy(*oName, srcName);
449 dot = strrchr(*oName, '.');
456 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
457 * I've removed the loop (since newLineSkip contains a loop too) and
458 * replaced the warnings with errors. ADR
461 * To deal with literate \begin{code}...\end{code} blocks,
462 * add a line buffer that rooms the current line. The old c0 and c1
463 * stream pointers are used as before within that buffer -- sof
465 * Upon reading a new line into the line buffer, we check to see if
466 * we're reading in a line containing \begin{code} or \end{code} and
467 * take appropriate action.
470 static Bool local linecmp(s,line) /* compare string with line */
471 String s; /* line may end in whitespace */
474 while (s[i] != '\0' && s[i] == line[i]) {
477 /* s[0..i-1] == line[0..i-1] */
478 if (s[i] != '\0') { /* check s `isPrefixOf` line */
481 while (isIn(line[i], ZPACE)) { /* allow whitespace at end of line */
484 return (line[i] == '\0');
487 /* Returns line length (including \n) or 0 upon EOF. */
488 static Int local nextLine()
492 Forget about fgets(), it is utterly braindead.
493 (Assumes \NUL free streams and does not gracefully deal
494 with overflow.) Instead, use GNU libc's getline().
496 lineLength = getline(&lineBuffer, &lineLength, inputStream);
498 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
499 lineLength = strlen(lineBuffer);
503 /* printf("Read: \"%s\"", lineBuffer); */
504 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
507 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
508 lineBuffer[1] == '!') {
509 lineBuffer[0]='\n'; /* pretend it's a blank line */
512 } else if (thisLiterate) {
513 if (linecmp(BEGINCODE, lineBuffer)) {
514 if (!inCodeBlock) { /* Entered a code block */
516 lineBuffer[0]='\n'; /* pretend it's a blank line */
521 ERRMSG(row) "\\begin{code} encountered inside code block"
525 else if (linecmp(ENDCODE, lineBuffer)) {
526 if (inCodeBlock) { /* Finished code block */
528 lineBuffer[0]='\n'; /* pretend it's a blank line */
533 ERRMSG(row) "\\end{code} encountered outside code block"
538 /* printf("Read: \"%s\"", lineBuffer); */
542 static Void local skip() { /* move forward one char in input */
543 if (c0!=EOF) { /* stream, updating c0, c1, ... */
544 if (c0=='\n') { /* Adjusting cursor coords as nec. */
547 if (reading==SCRIPTFILE)
551 column += TABSIZE - ((column-1)%TABSIZE);
560 if (reading==SCRIPTFILE)
564 else if (reading==KEYBOARD) {
569 c1 = nextConsoleChar();
570 #if IS_WIN32 && !HUGS_FOR_WINDOWS
573 /* On Win32, hitting ctrl-C causes the next getchar to
574 * fail - returning "-1" to indicate an error.
575 * This is one of the rare cases where "-1" does not mean EOF.
577 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
582 else if (reading==STRING) {
583 c1 = (unsigned char) *nextStringChar++;
588 if (lineLength <=0 || linePtr == lineLength) {
589 /* Current line, exhausted - get new one */
590 if (nextLine() <= 0) { /* EOF */
595 c1 = (unsigned char)lineBuffer[linePtr++];
599 c1 = (unsigned char)lineBuffer[linePtr++];
606 static Void local thisLineIs(kind) /* register kind of current line */
607 Int kind; { /* & check for literate script errs */
608 if (literateErrors) {
609 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
610 (kind==TEXTLINE && lastLine==DEFNLINE)) {
611 ERRMSG(row) "Program line next to comment"
618 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
619 /* assert(c0=='\n'); */
620 if (reading==SCRIPTFILE && thisLiterate) {
623 if (inCodeBlock) { /* pass chars on definition lines */
624 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
628 if (c0==DEFNCHAR) { /* pass chars on definition lines */
629 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
634 while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank? */
636 if (c0=='\n' || c0==EOF)
637 thisLineIs(BLANKLINE);
639 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
640 while (c0!='\n' && c0!=EOF)
642 } /* by now, c0=='\n' or c0==EOF */
643 } while (c0!=EOF); /* if new line, start again */
645 if (litLines==0 && literateErrors) {
646 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
655 static Void local closeAnyInput() { /* Close input stream, if open, */
656 switch (reading) { /* or skip to end of console line */
658 case SCRIPTFILE : if (inputStream) {
659 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
671 case KEYBOARD : while (c0!=EOF)
678 /* --------------------------------------------------------------------------
679 * Parser: Uses table driven parser generated from parser.y using yacc
680 * ------------------------------------------------------------------------*/
684 /* --------------------------------------------------------------------------
685 * Single token input routines:
687 * The following routines read the values of particular kinds of token given
688 * that the first character of the token has already been located in c0 on
689 * entry to the routine.
690 * ------------------------------------------------------------------------*/
692 #define MAX_TOKEN 4000
693 #define startToken() tokPos = 0
694 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
695 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
696 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
698 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
699 static Int tokPos; /* input position in buffer */
700 static Int identType; /* identifier type: CONID / VARID */
701 static Int opType; /* operator type : CONOP / VAROP */
703 static Void local endToken() { /* check for token overflow */
704 if (tokPos>MAX_TOKEN) {
705 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
708 tokenStr[tokPos] = '\0';
711 static Text local readOperator() { /* read operator symbol */
716 } while (isISO(c0) && isIn(c0,SYMBOL));
717 opType = (tokenStr[0]==':' ? CONOP : VAROP);
719 return findText(tokenStr);
722 static Text local readIdent() { /* read identifier */
727 } while (isISO(c0) && isIn(c0,IDAFTER));
729 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
730 return findText(tokenStr);
734 static Bool local doesNotExceed(s,radix,limit)
741 if (s[p] == 0) return TRUE;
742 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
743 n = radix*n + (s[p]-'0');
748 static Int local stringToInt(s,radix)
754 if (s[p] == 0) return n;
755 n = radix*n + (s[p]-'0');
760 static Cell local readRadixNumber(r) /* Read literal in specified radix */
761 Int r; { /* from input of the form 0c{digs} */
764 skip(); /* skip leading zero */
765 if ((d=readHexDigit(c1))<0 || d>=r) {
766 /* Special case; no digits, lex as */
767 /* if it had been written "0 c..." */
772 saveTokenChar('0'+readHexDigit(c0));
774 d = readHexDigit(c0);
775 } while (d>=0 && d<r);
779 if (doesNotExceed(tokenStr,r,MAXPOSINT))
780 return mkInt(stringToInt(tokenStr,r));
783 return stringToBignum(tokenStr);
785 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
790 static Cell local readNumber() { /* read numeric constant */
793 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
794 return readRadixNumber(16);
795 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
796 return readRadixNumber(8);
803 } while (isISO(c0) && isIn(c0,DIGIT));
805 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
807 if (doesNotExceed(tokenStr,10,MAXPOSINT))
808 return mkInt(stringToInt(tokenStr,10)); else
809 return stringToBignum(tokenStr);
812 saveTokenChar(c0); /* save decimal point */
814 do { /* process fractional part ... */
817 } while (isISO(c0) && isIn(c0,DIGIT));
819 if (c0=='e' || c0=='E') { /* look for exponent part... */
829 if (!isISO(c0) || !isIn(c0,DIGIT)) {
830 ERRMSG(row) "Missing digits in exponent"
837 } while (isISO(c0) && isIn(c0,DIGIT));
842 return mkFloat(stringToFloat(tokenStr));
851 static Cell local readChar() { /* read character constant */
855 if (c0=='\'' || c0=='\n' || c0==EOF) {
856 ERRMSG(row) "Illegal character constant"
860 charRead = readAChar(FALSE);
865 ERRMSG(row) "Improperly terminated character constant"
871 static Cell local readString() { /* read string literal */
876 while (c0!='\"' && c0!='\n' && c0!=EOF) {
879 saveStrChr(charOf(c));
885 ERRMSG(row) "Improperly terminated string"
889 return mkStr(findText(tokenStr));
892 static Void local saveStrChr(c) /* save character in string */
894 if (c!='\0' && c!='\\') { /* save non null char as single char*/
897 else { /* save null char as TWO null chars */
898 if (tokPos+1<MAX_TOKEN) {
908 static Cell local readAChar(isStrLit) /* read single char constant */
909 Bool isStrLit; { /* TRUE => enable \& and gaps */
912 if (c0=='\\') /* escape character? */
913 return readEscapeChar(isStrLit);
915 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
918 skip(); /* normal character? */
922 /* --------------------------------------------------------------------------
923 * Character escape code sequences:
924 * ------------------------------------------------------------------------*/
926 static struct { /* table of special escape codes */
930 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
931 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
932 {"\'",'\''}, {"v", 11},
933 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
934 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
935 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
936 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
937 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
938 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
939 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
940 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
941 {"SP", 32}, {"DEL", 127},
945 static Int alreadyMatched; /* Record portion of input stream */
946 static char alreadyRead[10]; /* that has been read w/o a match */
948 static Bool local lazyReadMatches(s) /* compare input stream with string */
949 String s; { /* possibly using characters that */
950 int i; /* have already been read */
952 for (i=0; i<alreadyMatched; ++i)
953 if (alreadyRead[i]!=s[i])
956 while (s[i] && s[i]==c0) {
957 alreadyRead[alreadyMatched++]=(char)c0;
965 static Cell local readEscapeChar(isStrLit)/* read escape character */
971 case '&' : if (isStrLit) {
975 ERRMSG(row) "Illegal use of `\\&' in character constant"
979 case '^' : return readCtrlChar();
981 case 'o' : return readOctChar();
982 case 'x' : return readHexChar();
984 default : if (!isISO(c0)) {
985 ERRMSG(row) "Illegal escape sequence"
988 else if (isIn(c0,ZPACE)) {
993 ERRMSG(row) "Illegal use of gap in character constant"
997 else if (isIn(c0,DIGIT))
998 return readDecChar();
1001 for (alreadyMatched=0; escapes[i].codename; i++)
1002 if (lazyReadMatches(escapes[i].codename))
1003 return mkChar(escapes[i].codenumber);
1005 alreadyRead[alreadyMatched++] = (char)c0;
1006 alreadyRead[alreadyMatched++] = '\0';
1007 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1010 return NIL;/*NOTREACHED*/
1013 static Void local skipGap() { /* skip over gap in string literal */
1014 do /* (simplified in Haskell 1.1) */
1019 while (isISO(c0) && isIn(c0,ZPACE));
1021 ERRMSG(row) "Missing `\\' terminating string literal gap"
1027 static Cell local readCtrlChar() { /* read escape sequence \^x */
1028 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1032 if ((which = strchr(controls,c0))==NULL) {
1033 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1037 return mkChar(which-controls);
1040 static Cell local readOctChar() { /* read octal character constant */
1045 if ((d = readHexDigit(c0))<0 || d>=8) {
1046 ERRMSG(row) "Empty octal character escape"
1050 if (overflows(n,8,d,MAXCHARVAL)) {
1051 ERRMSG(row) "Octal character escape out of range"
1056 } while ((d = readHexDigit(c0))>=0 && d<8);
1061 static Cell local readHexChar() { /* read hex character constant */
1066 if ((d = readHexDigit(c0))<0) {
1067 ERRMSG(row) "Empty hexadecimal character escape"
1071 if (overflows(n,16,d,MAXCHARVAL)) {
1072 ERRMSG(row) "Hexadecimal character escape out of range"
1077 } while ((d = readHexDigit(c0))>=0);
1082 static Int local readHexDigit(c) /* read single hex digit */
1084 if ('0'<=c && c<='9')
1086 if ('A'<=c && c<='F')
1087 return 10 + (c-'A');
1088 if ('a'<=c && c<='f')
1089 return 10 + (c-'a');
1093 static Cell local readDecChar() { /* read decimal character constant */
1097 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1098 ERRMSG(row) "Decimal character escape out of range"
1101 n = 10*n + (c0-'0');
1103 } while (c0!=EOF && isIn(c0,DIGIT));
1108 /* --------------------------------------------------------------------------
1109 * Produce printable representation of character:
1110 * ------------------------------------------------------------------------*/
1112 String unlexChar(c,quote) /* return string representation of */
1113 Char c; /* character... */
1114 Char quote; { /* protect quote character */
1115 static char buffer[12];
1117 if (c<0) /* deal with sign extended chars.. */
1120 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1121 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1123 buffer[1] = (char)c;
1127 buffer[0] = (char)c;
1131 else { /* look for escape code */
1133 for (escs=0; escapes[escs].codename; escs++)
1134 if (escapes[escs].codenumber==c) {
1135 sprintf(buffer,"\\%s",escapes[escs].codename);
1138 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1143 Void printString(s) /* print string s, using quotes and */
1144 String s; { /* escapes if any parts need them */
1148 while ((c = *t)!=0 && isISO(c)
1149 && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1155 Printf("%s",unlexChar(*t,'"'));
1163 /* -------------------------------------------------------------------------
1164 * Handle special types of input for use in interpreter:
1165 * -----------------------------------------------------------------------*/
1167 Command readCommand(cmds,start,sys) /* read command at start of input */
1168 struct cmd *cmds; /* line in interpreter */
1169 Char start; /* characters introducing a cmd */
1170 Char sys; { /* character for shell escape */
1171 while (c0==' ' || c0 =='\t')
1174 if (c0=='\n') /* look for blank command lines */
1176 if (c0==EOF) /* look for end of input stream */
1178 if (c0==sys) { /* single character system escape */
1182 if (c0==start && c1==sys) { /* two character system escape */
1188 startToken(); /* All cmds start with start */
1189 if (c0==start) /* except default (usually EVAL) */
1190 do { /* which is empty */
1193 } while (c0!=EOF && !isIn(c0,ZPACE));
1196 for (; cmds->cmdString; ++cmds)
1197 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1198 (tokenStr[0]==start &&
1199 tokenStr[1]==(cmds->cmdString)[1] &&
1201 return (cmds->cmdCode);
1205 String readFilename() { /* Read filename from input (if any)*/
1206 if (reading==PROJFILE)
1209 while (c0==' ' || c0=='\t')
1212 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1216 while (c0!=EOF && !isIn(c0,ZPACE)) {
1219 while (c0!=EOF && c0!='\"') {
1220 Cell c = readAChar(TRUE);
1222 saveTokenChar(charOf(c));
1228 ERRMSG(row) "a closing quote, '\"', was expected"
1241 String readLine() { /* Read command line from input */
1242 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1246 while (c0!='\n' && c0!=EOF) {
1255 /* --------------------------------------------------------------------------
1256 * This lexer supports the Haskell layout rule:
1258 * - Layout area bounded by { ... }, with `;'s in between.
1259 * - A `{' is a HARD indentation and can only be matched by a corresponding
1261 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1262 * is inserted with the column number of the first token after the
1263 * WHERE/LET/OF keyword.
1264 * - When a soft indentation is uppermost on the indetation stack with
1265 * column col' we insert:
1266 * `}' in front of token with column<col' and pop indentation off stack,
1267 * `;' in front of token with column==col'.
1268 * ------------------------------------------------------------------------*/
1270 #define MAXINDENT 100 /* maximum nesting of layout rule */
1271 static Int layout[MAXINDENT+1];/* indentation stack */
1272 #define HARD (-1) /* indicates hard indentation */
1273 static Int indentDepth = (-1); /* current indentation nesting */
1275 static Void local goOffside(col) /* insert offside marker */
1276 Int col; { /* for specified column */
1278 if (indentDepth>=MAXINDENT) {
1279 ERRMSG(row) "Too many levels of program nesting"
1282 layout[++indentDepth] = col;
1285 static Void local unOffside() { /* leave layout rule area */
1290 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1292 return indentDepth>=0 && layout[indentDepth]!=HARD;
1295 /* --------------------------------------------------------------------------
1297 * ------------------------------------------------------------------------*/
1299 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1300 for (;;) /* Strictly speaking, this code is */
1301 if (c0==EOF) /* a little more liberal than the */
1302 return; /* report allows ... */
1305 else if (isIn(c0,ZPACE))
1307 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1309 Int origRow = row; /* Save original row number */
1312 while (nesting>0 && c0!=EOF)
1313 if (c0=='{' && c1=='-') {
1318 else if (c0=='-' && c1=='}') {
1328 ERRMSG(origRow) "Unterminated nested comment {- ..."
1332 else if (c0=='-' && c1=='-') { /* One line comment */
1335 while (c0!='\n' && c0!=EOF);
1343 static Bool firstToken; /* Set to TRUE for first token */
1344 static Int firstTokenIs; /* ... with token value stored here */
1346 static Int local yylex() { /* Read next input token ... */
1347 static Bool insertOpen = FALSE;
1348 static Bool insertedToken = FALSE;
1349 static Text textRepeat;
1351 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1353 if (firstToken) { /* Special case for first token */
1357 insertedToken = FALSE;
1358 if (reading==KEYBOARD)
1359 textRepeat = findText(repeatStr);
1360 return firstTokenIs;
1363 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1365 insertedToken = TRUE;
1367 push(yylval = mkInt(row));
1371 /* ----------------------------------------------------------------------
1372 * Skip white space, and insert tokens to support layout rules as reqd.
1373 * --------------------------------------------------------------------*/
1376 startColumn = column;
1377 push(yylval = mkInt(row)); /* default token value is line no. */
1378 /* subsequent changes to yylval must also set top() to the same value */
1380 if (indentDepth>=0) { /* layout rule(s) active ? */
1381 if (insertedToken) /* avoid inserting multiple `;'s */
1382 insertedToken = FALSE; /* or putting `;' after `{' */
1384 if (offsideON && layout[indentDepth]!=HARD) {
1385 if (column<layout[indentDepth]) {
1389 else if (column==layout[indentDepth] && c0!=EOF) {
1390 insertedToken = TRUE;
1396 /* ----------------------------------------------------------------------
1397 * Now try to identify token type:
1398 * --------------------------------------------------------------------*/
1401 case EOF : return 0; /* End of file/input */
1403 /* The next 10 characters make up the `special' category in 1.3 */
1404 case '(' : skip(); return '(';
1405 case ')' : skip(); return ')';
1406 case ',' : skip(); return ',';
1407 case ';' : skip(); return ';';
1408 case '[' : skip(); return '[';
1409 case ']' : skip(); return ']';
1410 case '`' : skip(); return '`';
1411 case '{' : if (offsideON) goOffside(HARD);
1414 case '}' : if (offsideON && indentDepth<0) {
1415 ERRMSG(row) "Misplaced `}'"
1418 if (!(offsideON && layout[indentDepth]!=HARD))
1419 skip(); /* skip over hard }*/
1421 unOffside(); /* otherwise, we have to insert a }*/
1422 return '}'; /* to (try to) avoid an error... */
1424 /* Character and string literals */
1425 case '\'' : top() = yylval = readChar();
1428 case '\"' : top() = yylval = readString();
1433 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1434 Text it; /* Look for implicit param name */
1437 top() = yylval = ap(IPVAR,it);
1438 return identType=IPVARID;
1442 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1443 Text it; /* Look for record selector name */
1446 top() = yylval = ap(RECSEL,mkExt(it));
1447 return identType=RECSELID;
1450 if (isIn(c0,LARGE)) { /* Look for qualified name */
1451 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1452 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1454 skip(); /* Skip qualifying dot */
1455 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1456 it2 = readOperator();
1457 if (opType==CONOP) {
1458 top() = yylval = mkQConOp(it,it2);
1461 top() = yylval = mkQVarOp(it,it2);
1464 } else { /* Qualified identifier */
1466 if (identType==CONID) {
1467 top() = yylval = mkQCon(it,it2);
1470 top() = yylval = mkQVar(it,it2);
1475 top() = yylval = mkCon(it);
1479 if (isIn(c0,(SMALL|LARGE))) {
1480 Text it = readIdent();
1482 if (it==textCase) return CASEXP;
1483 if (it==textOfK) lookAhead(OF);
1484 if (it==textData) return DATA;
1485 if (it==textType) return TYPE;
1486 if (it==textIf) return IF;
1487 if (it==textThen) return THEN;
1488 if (it==textElse) return ELSE;
1489 if (it==textWhere) lookAhead(WHERE);
1490 if (it==textLet) lookAhead(LET);
1491 if (it==textIn) return IN;
1492 if (it==textInfix) return INFIXN;
1493 if (it==textInfixl) return INFIXL;
1494 if (it==textInfixr) return INFIXR;
1495 if (it==textForeign) return FOREIGN;
1496 if (it==textUnsafe) return UNSAFE;
1497 if (it==textNewtype) return TNEWTYPE;
1498 if (it==textDefault) return DEFAULT;
1499 if (it==textDeriving) return DERIVING;
1500 if (it==textDo) lookAhead(DO);
1501 if (it==textClass) return TCLASS;
1502 if (it==textInstance) return TINSTANCE;
1503 if (it==textModule) return TMODULE;
1504 if (it==textInterface) return INTERFACE;
1505 if (it==textInstImport) return INSTIMPORT;
1506 if (it==textImport) return IMPORT;
1507 if (it==textExport) return EXPORT;
1508 if (it==textDynamic) return DYNAMIC;
1509 if (it==textCcall) return CCALL;
1510 if (it==textStdcall) return STDKALL;
1511 if (it==textUUExport) return UUEXPORT;
1512 if (it==textHiding) return HIDING;
1513 if (it==textQualified) return QUALIFIED;
1514 if (it==textAsMod) return ASMOD;
1515 if (it==textWildcard) return '_';
1516 if (it==textAll && !haskell98) return ALL;
1518 if (it==textWith && !haskell98) lookAhead(WITH);
1519 if (it==textDlet && !haskell98) lookAhead(DLET);
1521 if (it==textUUAll) return ALL;
1522 if (it==textRepeat && reading==KEYBOARD)
1523 return repeatLast();
1525 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1529 if (isIn(c0,SYMBOL)) {
1530 Text it = readOperator();
1532 if (it==textCoco) return COCO;
1533 if (it==textEq) return '=';
1534 if (it==textUpto) return UPTO;
1535 if (it==textAs) return '@';
1536 if (it==textLambda) return '\\';
1537 if (it==textBar) return '|';
1538 if (it==textFrom) return FROM;
1539 if (it==textMinus) return '-';
1540 if (it==textPlus) return '+';
1541 if (it==textBang) return '!';
1542 if (it==textDot) return '.';
1543 if (it==textArrow) return ARROW;
1544 if (it==textLazy) return '~';
1545 if (it==textImplies) return IMPLIES;
1546 if (it==textRepeat && reading==KEYBOARD)
1547 return repeatLast();
1549 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1553 if (isIn(c0,DIGIT)) {
1554 top() = yylval = readNumber();
1558 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1561 return 0; /*NOTREACHED*/
1564 static Int local repeatLast() { /* Obtain last expression entered */
1565 if (isNull(yylval=getLastExpr())) {
1566 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1572 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1573 Text t; { /* by t ... */
1574 String s = textToStr(t);
1575 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1578 Syntax syntaxOf(n) /* Find syntax for name */
1580 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1581 return defaultSyntax(name(n).text);
1582 return name(n).syntax;
1585 /* --------------------------------------------------------------------------
1586 * main entry points to parser/lexer:
1587 * ------------------------------------------------------------------------*/
1589 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1590 Int startWith; { /* determining whether to read a */
1591 firstToken = TRUE; /* script or an expression */
1592 firstTokenIs = startWith;
1593 if (startWith==INTERFACE)
1594 offsideON = FALSE; else
1598 if (yyparse()) { /* This can only be parser overflow */
1599 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1600 EEND; /* in the parser... */
1603 if (!stackEmpty()) /* stack should now be empty */
1604 internal("parseInput");
1608 static String memPrefix = "@mem@";
1609 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1611 Void makeMemScript(mem,fname)
1614 strcat(fname,memPrefix);
1615 itoa((int)mem, fname+strlen(fname), 10);
1618 Bool isMemScript(fname)
1620 return (strstr(fname,memPrefix) != NULL);
1623 String memScriptString(fname)
1625 String p = strstr(fname,memPrefix);
1627 return (String)atoi(p+lenMemPrefix);
1633 Void parseScript(fname,len) /* Read a script, possibly from mem */
1637 if (isMemScript(fname)) {
1638 char* s = memScriptString(fname);
1641 fileInput(fname,len);
1646 Void parseScript(nm,len) /* Read a script */
1648 Long len; { /* Used to set a target for reading */
1655 Void parseExp() { /* Read an expression to evaluate */
1657 setLastExpr(inputExpr);
1661 #if EXPLAIN_INSTANCE_RESOLUTION
1662 Void parseContext() { /* Read a context to prove */
1663 parseInput(CONTEXT);
1667 Void parseInterface(nm,len) /* Read a GHC interface file */
1669 Long len; { /* Used to set a target for reading */
1672 parseInput(INTERFACE);
1676 /* --------------------------------------------------------------------------
1678 * ------------------------------------------------------------------------*/
1683 case INSTALL : initCharTab();
1684 textCase = findText("case");
1685 textOfK = findText("of");
1686 textData = findText("data");
1687 textType = findText("type");
1688 textIf = findText("if");
1689 textThen = findText("then");
1690 textElse = findText("else");
1691 textWhere = findText("where");
1692 textLet = findText("let");
1693 textIn = findText("in");
1694 textInfix = findText("infix");
1695 textInfixl = findText("infixl");
1696 textInfixr = findText("infixr");
1697 textForeign = findText("foreign");
1698 textUnsafe = findText("unsafe");
1699 textNewtype = findText("newtype");
1700 textDefault = findText("default");
1701 textDeriving = findText("deriving");
1702 textDo = findText("do");
1703 textClass = findText("class");
1705 textWith = findText("with");
1706 textDlet = findText("dlet");
1708 textInstance = findText("instance");
1709 textCoco = findText("::");
1710 textEq = findText("=");
1711 textUpto = findText("..");
1712 textAs = findText("@");
1713 textLambda = findText("\\");
1714 textBar = findText("|");
1715 textMinus = findText("-");
1716 textPlus = findText("+");
1717 textFrom = findText("<-");
1718 textArrow = findText("->");
1719 textLazy = findText("~");
1720 textBang = findText("!");
1721 textDot = findText(".");
1722 textImplies = findText("=>");
1723 textPrelude = findText("Prelude");
1724 textNum = findText("Num");
1725 textModule = findText("module");
1726 textInterface = findText("__interface");
1727 textInstImport = findText("__instimport");
1728 textExport = findText("export");
1729 textDynamic = findText("dynamic");
1730 textCcall = findText("ccall");
1731 textStdcall = findText("stdcall");
1732 textUUExport = findText("__export");
1733 textImport = findText("import");
1734 textHiding = findText("hiding");
1735 textQualified = findText("qualified");
1736 textAsMod = findText("as");
1737 textWildcard = findText("_");
1738 textAll = findText("forall");
1739 textUUAll = findText("__forall");
1740 varMinus = mkVar(textMinus);
1741 varPlus = mkVar(textPlus);
1742 varBang = mkVar(textBang);
1743 varDot = mkVar(textDot);
1744 varHiding = mkVar(textHiding);
1745 varQualified = mkVar(textQualified);
1746 varAsMod = mkVar(textAsMod);
1747 conMain = mkCon(findText("Main"));
1748 varMain = mkVar(findText("main"));
1754 case RESET : tyconDefns = NIL;
1763 foreignImports= NIL;
1764 foreignExports= NIL;
1772 case BREAK : if (reading==KEYBOARD)
1776 case MARK : mark(tyconDefns);
1784 mark(unqualImports);
1785 mark(foreignImports);
1786 mark(foreignExports);
1804 /*-------------------------------------------------------------------------*/