2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 2000/03/09 02:47:13 $
14 * ------------------------------------------------------------------------*/
32 #if IS_WIN32 || HUGS_FOR_WINDOWS
36 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H && HAVE_READLINE_HISTORY_H
37 #define USE_READLINE 1
39 #define USE_READLINE 0
43 #include <readline/readline.h>
44 #include <readline/history.h>
48 /* --------------------------------------------------------------------------
50 * ------------------------------------------------------------------------*/
52 List tyconDefns = NIL; /* type constructor definitions */
53 List typeInDefns = NIL; /* type synonym restrictions */
54 List valDefns = NIL; /* value definitions in script */
55 List classDefns = NIL; /* class defns in script */
56 List instDefns = NIL; /* instance defns in script */
57 List selDefns = NIL; /* list of selector lists */
58 List genDefns = NIL; /* list of generated names */
59 List unqualImports = NIL; /* unqualified import list */
60 List foreignImports = NIL; /* foreign imports */
61 List foreignExports = NIL; /* foreign exportsd */
62 List defaultDefns = NIL; /* default definitions (if any) */
63 Int defaultLine = 0; /* line in which default defs occur*/
64 List evalDefaults = NIL; /* defaults for evaluator */
66 Cell inputExpr = NIL; /* input expression */
67 Cell inputContext = NIL; /* input context */
68 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
69 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
70 Bool offsideON = TRUE; /* TRUE => implement offside rule */
71 Bool readingInterface = FALSE;
73 String repeatStr = 0; /* Repeat last expr */
75 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
76 String preprocessor = 0;
79 /* --------------------------------------------------------------------------
80 * Local function prototypes:
81 * ------------------------------------------------------------------------*/
83 static Void local initCharTab Args((Void));
84 static Void local fileInput Args((String,Long));
85 static Bool local literateMode Args((String));
86 static Bool local linecmp Args((String,String));
87 static Int local nextLine Args((Void));
88 static Void local skip Args((Void));
89 static Void local thisLineIs Args((Int));
90 static Void local newlineSkip Args((Void));
91 static Void local closeAnyInput Args((Void));
93 Int yyparse Args((Void)); /* can't stop yacc making this */
94 /* public, but don't advertise */
95 /* it in a header file. */
97 static Void local endToken Args((Void));
98 static Text local readOperator Args((Void));
99 static Text local readIdent Args((Void));
100 static Cell local readRadixNumber Args((Int));
101 static Cell local readNumber Args((Void));
102 static Cell local readChar Args((Void));
103 static Cell local readString Args((Void));
104 static Void local saveStrChr Args((Char));
105 static Cell local readAChar Args((Bool));
107 static Bool local lazyReadMatches Args((String));
108 static Cell local readEscapeChar Args((Bool));
109 static Void local skipGap Args((Void));
110 static Cell local readCtrlChar Args((Void));
111 static Cell local readOctChar Args((Void));
112 static Cell local readHexChar Args((Void));
113 static Int local readHexDigit Args((Char));
114 static Cell local readDecChar Args((Void));
116 static Void local goOffside Args((Int));
117 static Void local unOffside Args((Void));
118 static Bool local canUnOffside Args((Void));
120 static Void local skipWhitespace Args((Void));
121 static Int local yylex Args((Void));
122 static Int local repeatLast Args((Void));
124 static Cell local parseInput Args((Int));
126 static Bool local doesNotExceed Args((String,Int,Int));
127 static Int local stringToInt Args((String,Int));
130 /* --------------------------------------------------------------------------
131 * Text values for reserved words and special symbols:
132 * ------------------------------------------------------------------------*/
134 static Text textCase, textOfK, textData, textType, textIf;
135 static Text textThen, textElse, textWhere, textLet, textIn;
136 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
137 static Text textDefault, textDeriving, textDo, textClass, textInstance;
139 static Text textWith, textDlet;
142 static Text textCoco, textEq, textUpto, textAs, textLambda;
143 static Text textBar, textMinus, textFrom, textArrow, textLazy;
144 static Text textBang, textDot, textAll, textImplies;
145 static Text textWildcard;
147 static Text textModule, textImport, textInterface, textInstImport;
148 static Text textHiding, textQualified, textAsMod, textPrivileged;
149 static Text textExport, textDynamic, textUUExport;
150 static Text textUnsafe, textUUAll, textUUUsage;
152 Text textCcall; /* ccall */
153 Text textStdcall; /* stdcall */
155 Text textNum; /* Num */
156 Text textPrelude; /* Prelude */
157 Text textPlus; /* (+) */
159 static Cell conMain; /* Main */
160 static Cell varMain; /* main */
162 static Cell varMinus; /* (-) */
163 static Cell varPlus; /* (+) */
164 static Cell varBang; /* (!) */
165 static Cell varDot; /* (.) */
166 static Cell varHiding; /* hiding */
167 static Cell varQualified; /* qualified */
168 static Cell varAsMod; /* as */
169 static Cell varPrivileged; /* privileged */
171 static List imps; /* List of imports to be chased */
174 /* --------------------------------------------------------------------------
175 * Character set handling:
177 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
178 * character set. The following code provides methods for classifying
179 * input characters according to the lexical structure specified by the
180 * report. Hugs should still accept older programs because ASCII is
181 * essentially just a subset of the ISO character set.
183 * Notes: If you want to port Hugs to a machine that uses something
184 * substantially different from the ISO character set, then you will need
185 * to insert additional code to map between character sets.
187 * At some point, the following data structures may be exported in a .h
188 * file to allow the information contained here to be picked up in the
189 * implementation of LibChar is* primitives.
191 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
192 * ------------------------------------------------------------------------*/
194 static Bool charTabBuilt;
195 static unsigned char ctable[NUM_CHARS];
196 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
197 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
207 static Void local initCharTab() { /* Initialize char decode table */
208 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
209 #define setChar(x,c) ctable[c] |= (x)
210 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
211 #define setCopy(x,c) {Int i; \
212 for (i=0; i<NUM_CHARS; ++i) \
217 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
219 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
220 setRange(SMALL, 223,246); /* ISO lower case letters */
221 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
222 setChar (SMALL, '_');
224 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
225 setRange(LARGE, 192,214); /* ISO upper case letters */
226 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
228 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
229 setRange(SYMBOL, 215,215);
230 setChar (SYMBOL, 247);
231 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
233 setChar (IDAFTER, '\''); /* Characters in identifier */
234 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
236 setChar (ZPACE, ' '); /* ASCII space character */
237 setChar (ZPACE, 160); /* ISO non breaking space */
238 setRange(ZPACE, 9,13); /* special whitespace: \t\n\v\f\r */
240 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
241 setChars(PRINT, " '\""); /* Space and quotes */
242 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
252 /* --------------------------------------------------------------------------
253 * Single character input routines:
255 * At the lowest level of input, characters are read one at a time, with the
256 * current character held in c0 and the following (lookahead) character in
257 * c1. The coordinates of c0 within the file are held in (column,row).
258 * The input stream is advanced by one character using the skip() function.
259 * ------------------------------------------------------------------------*/
261 #define TABSIZE 8 /* spacing between tabstops */
263 #define NOTHING 0 /* what kind of input is being read?*/
264 #define KEYBOARD 1 /* - keyboard/console? */
265 #define SCRIPTFILE 2 /* - script file */
266 #define PROJFILE 3 /* - project file */
267 #define STRING 4 /* - string buffer? */
269 static Int reading = NOTHING;
271 static Target readSoFar;
272 static Int row, column, startColumn;
274 static FILE *inputStream = 0;
275 static Bool thisLiterate;
276 static String nextStringChar; /* next char in string buffer */
278 #if USE_READLINE /* for command line editors */
279 static String currentLine; /* editline or GNU readline */
280 static String nextChar;
281 #define nextConsoleChar() \
282 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
284 #define nextConsoleChar() getc(stdin)
287 static Int litLines; /* count defn lines in lit script */
288 #define DEFNCHAR '>' /* definition lines begin with this */
289 static Int lastLine; /* records type of last line read: */
290 #define STARTLINE 0 /* - at start of file, none read */
291 #define BLANKLINE 1 /* - blank (may preceed definition) */
292 #define TEXTLINE 2 /* - text comment */
293 #define DEFNLINE 3 /* - line containing definition */
294 #define CODELINE 4 /* - line inside code block */
296 #define BEGINCODE "\\begin{code}"
297 #define ENDCODE "\\end{code}"
300 static char *lineBuffer = NULL; /* getline() does the initial allocation */
302 #define LINEBUFFER_SIZE 1000
303 static char lineBuffer[LINEBUFFER_SIZE];
305 static int lineLength = 0;
306 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
307 static int linePtr = 0;
309 Void consoleInput(prompt) /* prepare to input characters from */
310 String prompt; { /* standard in (i.e. console/kbd) */
311 reading = KEYBOARD; /* keyboard input is Line oriented, */
312 c0 = /* i.e. input terminated by '\n' */
318 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
319 * avoids accidentally freeing currentLine twice.
322 String oldCurrentLine = currentLine;
323 currentLine = 0; /* We may lose the space of currentLine */
324 free(oldCurrentLine); /* if interrupted here - unlikely */
326 currentLine = readline(prompt);
327 nextChar = currentLine;
330 add_history(currentLine);
340 Void projInput(nm) /* prepare to input characters from */
341 String nm; { /* from named project file */
342 if ((inputStream = fopen(nm,"r"))!=0) {
350 ERRMSG(0) "Unable to open project file \"%s\"", nm
355 static Void local fileInput(nm,len) /* prepare to input characters from*/
356 String nm; /* named file (specified length is */
357 Long len; { /* used to set target for reading) */
358 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
360 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
361 char *cmd = malloc(reallen);
363 ERRMSG(0) "Unable to allocate memory for filter command."
366 strcpy(cmd,preprocessor);
369 inputStream = popen(cmd,"r");
372 inputStream = fopen(nm,"r");
375 inputStream = fopen(nm,"r");
378 reading = SCRIPTFILE;
384 lastLine = STARTLINE; /* literate file processing */
388 thisLiterate = literateMode(nm);
392 setGoal("Parsing", (Target)len);
395 ERRMSG(0) "Unable to open file \"%s\"", nm
400 Void stringInput(s) /* prepare to input characters from string */
415 static Bool local literateMode(nm) /* Select literate mode for file */
417 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
419 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
421 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
422 filenamecmp(dot+1,"verb")==0) /* literate scripts */
425 return literateScripts; /* otherwise, use the default */
429 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
433 len = 1 + strlen ( srcName );
434 *hiName = malloc(len);
435 *oName = malloc(len);
436 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
437 (*hiName)[0] = (*oName)[0] = 0;
438 dot = strrchr(srcName, '.');
440 if (filenamecmp(dot+1, "hs")==0 &&
441 filenamecmp(dot+1, "lhs")==0 &&
442 filenamecmp(dot+1, "verb")==0) return;
444 strcpy(*hiName, srcName);
445 dot = strrchr(*hiName, '.');
450 strcpy(*oName, srcName);
451 dot = strrchr(*oName, '.');
458 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
459 * I've removed the loop (since newLineSkip contains a loop too) and
460 * replaced the warnings with errors. ADR
463 * To deal with literate \begin{code}...\end{code} blocks,
464 * add a line buffer that rooms the current line. The old c0 and c1
465 * stream pointers are used as before within that buffer -- sof
467 * Upon reading a new line into the line buffer, we check to see if
468 * we're reading in a line containing \begin{code} or \end{code} and
469 * take appropriate action.
472 static Bool local linecmp(s,line) /* compare string with line */
473 String s; /* line may end in whitespace */
476 while (s[i] != '\0' && s[i] == line[i]) {
479 /* s[0..i-1] == line[0..i-1] */
480 if (s[i] != '\0') { /* check s `isPrefixOf` line */
483 while (isIn(line[i], ZPACE)) { /* allow whitespace at end of line */
486 return (line[i] == '\0');
489 /* Returns line length (including \n) or 0 upon EOF. */
490 static Int local nextLine()
494 Forget about fgets(), it is utterly braindead.
495 (Assumes \NUL free streams and does not gracefully deal
496 with overflow.) Instead, use GNU libc's getline().
498 lineLength = getline(&lineBuffer, &lineLength, inputStream);
500 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
501 lineLength = strlen(lineBuffer);
505 /* printf("Read: \"%s\"", lineBuffer); */
506 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
509 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
510 lineBuffer[1] == '!') {
511 lineBuffer[0]='\n'; /* pretend it's a blank line */
514 } else if (thisLiterate) {
515 if (linecmp(BEGINCODE, lineBuffer)) {
516 if (!inCodeBlock) { /* Entered a code block */
518 lineBuffer[0]='\n'; /* pretend it's a blank line */
523 ERRMSG(row) "\\begin{code} encountered inside code block"
527 else if (linecmp(ENDCODE, lineBuffer)) {
528 if (inCodeBlock) { /* Finished code block */
530 lineBuffer[0]='\n'; /* pretend it's a blank line */
535 ERRMSG(row) "\\end{code} encountered outside code block"
540 /* printf("Read: \"%s\"", lineBuffer); */
544 static Void local skip() { /* move forward one char in input */
545 if (c0!=EOF) { /* stream, updating c0, c1, ... */
546 if (c0=='\n') { /* Adjusting cursor coords as nec. */
549 if (reading==SCRIPTFILE)
553 column += TABSIZE - ((column-1)%TABSIZE);
562 if (reading==SCRIPTFILE)
566 else if (reading==KEYBOARD) {
571 c1 = nextConsoleChar();
572 #if IS_WIN32 && !HUGS_FOR_WINDOWS
575 /* On Win32, hitting ctrl-C causes the next getchar to
576 * fail - returning "-1" to indicate an error.
577 * This is one of the rare cases where "-1" does not mean EOF.
579 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
584 else if (reading==STRING) {
585 c1 = (unsigned char) *nextStringChar++;
590 if (lineLength <=0 || linePtr == lineLength) {
591 /* Current line, exhausted - get new one */
592 if (nextLine() <= 0) { /* EOF */
597 c1 = (unsigned char)lineBuffer[linePtr++];
601 c1 = (unsigned char)lineBuffer[linePtr++];
608 static Void local thisLineIs(kind) /* register kind of current line */
609 Int kind; { /* & check for literate script errs */
610 if (literateErrors) {
611 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
612 (kind==TEXTLINE && lastLine==DEFNLINE)) {
613 ERRMSG(row) "Program line next to comment"
620 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
621 /* assert(c0=='\n'); */
622 if (reading==SCRIPTFILE && thisLiterate) {
625 if (inCodeBlock) { /* pass chars on definition lines */
626 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
630 if (c0==DEFNCHAR) { /* pass chars on definition lines */
631 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
636 while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank? */
638 if (c0=='\n' || c0==EOF)
639 thisLineIs(BLANKLINE);
641 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
642 while (c0!='\n' && c0!=EOF)
644 } /* by now, c0=='\n' or c0==EOF */
645 } while (c0!=EOF); /* if new line, start again */
647 if (litLines==0 && literateErrors) {
648 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
657 static Void local closeAnyInput() { /* Close input stream, if open, */
658 switch (reading) { /* or skip to end of console line */
660 case SCRIPTFILE : if (inputStream) {
661 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
673 case KEYBOARD : while (c0!=EOF)
680 /* --------------------------------------------------------------------------
681 * Parser: Uses table driven parser generated from parser.y using yacc
682 * ------------------------------------------------------------------------*/
686 /* --------------------------------------------------------------------------
687 * Single token input routines:
689 * The following routines read the values of particular kinds of token given
690 * that the first character of the token has already been located in c0 on
691 * entry to the routine.
692 * ------------------------------------------------------------------------*/
694 #define MAX_TOKEN 4000
695 #define startToken() tokPos = 0
696 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
697 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
698 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
700 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
701 static Int tokPos; /* input position in buffer */
702 static Int identType; /* identifier type: CONID / VARID */
703 static Int opType; /* operator type : CONOP / VAROP */
705 static Void local endToken() { /* check for token overflow */
706 if (tokPos>MAX_TOKEN) {
707 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
710 tokenStr[tokPos] = '\0';
713 static Text local readOperator() { /* read operator symbol */
718 } while (isISO(c0) && isIn(c0,SYMBOL));
719 opType = (tokenStr[0]==':' ? CONOP : VAROP);
721 return findText(tokenStr);
724 static Text local readIdent() { /* read identifier */
729 } while (isISO(c0) && isIn(c0,IDAFTER));
731 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
732 if (readingInterface)
733 return unZcodeThenFindText(tokenStr); else
734 return findText(tokenStr);
738 static Bool local doesNotExceed(s,radix,limit)
745 if (s[p] == 0) return TRUE;
746 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
747 n = radix*n + (s[p]-'0');
752 static Int local stringToInt(s,radix)
758 if (s[p] == 0) return n;
759 n = radix*n + (s[p]-'0');
764 static Cell local readRadixNumber(r) /* Read literal in specified radix */
765 Int r; { /* from input of the form 0c{digs} */
768 skip(); /* skip leading zero */
769 if ((d=readHexDigit(c1))<0 || d>=r) {
770 /* Special case; no digits, lex as */
771 /* if it had been written "0 c..." */
776 saveTokenChar('0'+readHexDigit(c0));
778 d = readHexDigit(c0);
779 } while (d>=0 && d<r);
783 if (doesNotExceed(tokenStr,r,MAXPOSINT))
784 return mkInt(stringToInt(tokenStr,r));
787 return stringToBignum(tokenStr);
789 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
794 static Cell local readNumber() { /* read numeric constant */
797 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
798 return readRadixNumber(16);
799 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
800 return readRadixNumber(8);
807 } while (isISO(c0) && isIn(c0,DIGIT));
809 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
811 if (doesNotExceed(tokenStr,10,MAXPOSINT))
812 return mkInt(stringToInt(tokenStr,10)); else
813 return stringToBignum(tokenStr);
816 saveTokenChar(c0); /* save decimal point */
818 do { /* process fractional part ... */
821 } while (isISO(c0) && isIn(c0,DIGIT));
823 if (c0=='e' || c0=='E') { /* look for exponent part... */
833 if (!isISO(c0) || !isIn(c0,DIGIT)) {
834 ERRMSG(row) "Missing digits in exponent"
841 } while (isISO(c0) && isIn(c0,DIGIT));
846 return mkFloat(stringToFloat(tokenStr));
855 static Cell local readChar() { /* read character constant */
859 if (c0=='\'' || c0=='\n' || c0==EOF) {
860 ERRMSG(row) "Illegal character constant"
864 charRead = readAChar(FALSE);
869 ERRMSG(row) "Improperly terminated character constant"
875 static Cell local readString() { /* read string literal */
880 while (c0!='\"' && c0!='\n' && c0!=EOF) {
883 saveStrChr(charOf(c));
889 ERRMSG(row) "Improperly terminated string"
893 return mkStr(findText(tokenStr));
896 static Void local saveStrChr(c) /* save character in string */
898 if (c!='\0' && c!='\\') { /* save non null char as single char*/
901 else { /* save null char as TWO null chars */
902 if (tokPos+1<MAX_TOKEN) {
912 static Cell local readAChar(isStrLit) /* read single char constant */
913 Bool isStrLit; { /* TRUE => enable \& and gaps */
916 if (c0=='\\') /* escape character? */
917 return readEscapeChar(isStrLit);
919 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
922 skip(); /* normal character? */
926 /* --------------------------------------------------------------------------
927 * Character escape code sequences:
928 * ------------------------------------------------------------------------*/
930 static struct { /* table of special escape codes */
934 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
935 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
936 {"\'",'\''}, {"v", 11},
937 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
938 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
939 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
940 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
941 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
942 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
943 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
944 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
945 {"SP", 32}, {"DEL", 127},
949 static Int alreadyMatched; /* Record portion of input stream */
950 static char alreadyRead[10]; /* that has been read w/o a match */
952 static Bool local lazyReadMatches(s) /* compare input stream with string */
953 String s; { /* possibly using characters that */
954 int i; /* have already been read */
956 for (i=0; i<alreadyMatched; ++i)
957 if (alreadyRead[i]!=s[i])
960 while (s[i] && s[i]==c0) {
961 alreadyRead[alreadyMatched++]=(char)c0;
969 static Cell local readEscapeChar(isStrLit)/* read escape character */
975 case '&' : if (isStrLit) {
979 ERRMSG(row) "Illegal use of `\\&' in character constant"
983 case '^' : return readCtrlChar();
985 case 'o' : return readOctChar();
986 case 'x' : return readHexChar();
988 default : if (!isISO(c0)) {
989 ERRMSG(row) "Illegal escape sequence"
992 else if (isIn(c0,ZPACE)) {
997 ERRMSG(row) "Illegal use of gap in character constant"
1001 else if (isIn(c0,DIGIT))
1002 return readDecChar();
1005 for (alreadyMatched=0; escapes[i].codename; i++)
1006 if (lazyReadMatches(escapes[i].codename))
1007 return mkChar(escapes[i].codenumber);
1009 alreadyRead[alreadyMatched++] = (char)c0;
1010 alreadyRead[alreadyMatched++] = '\0';
1011 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1014 return NIL;/*NOTREACHED*/
1017 static Void local skipGap() { /* skip over gap in string literal */
1018 do /* (simplified in Haskell 1.1) */
1023 while (isISO(c0) && isIn(c0,ZPACE));
1025 ERRMSG(row) "Missing `\\' terminating string literal gap"
1031 static Cell local readCtrlChar() { /* read escape sequence \^x */
1032 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1036 if ((which = strchr(controls,c0))==NULL) {
1037 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1041 return mkChar(which-controls);
1044 static Cell local readOctChar() { /* read octal character constant */
1049 if ((d = readHexDigit(c0))<0 || d>=8) {
1050 ERRMSG(row) "Empty octal character escape"
1054 if (overflows(n,8,d,MAXCHARVAL)) {
1055 ERRMSG(row) "Octal character escape out of range"
1060 } while ((d = readHexDigit(c0))>=0 && d<8);
1065 static Cell local readHexChar() { /* read hex character constant */
1070 if ((d = readHexDigit(c0))<0) {
1071 ERRMSG(row) "Empty hexadecimal character escape"
1075 if (overflows(n,16,d,MAXCHARVAL)) {
1076 ERRMSG(row) "Hexadecimal character escape out of range"
1081 } while ((d = readHexDigit(c0))>=0);
1086 static Int local readHexDigit(c) /* read single hex digit */
1088 if ('0'<=c && c<='9')
1090 if ('A'<=c && c<='F')
1091 return 10 + (c-'A');
1092 if ('a'<=c && c<='f')
1093 return 10 + (c-'a');
1097 static Cell local readDecChar() { /* read decimal character constant */
1101 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1102 ERRMSG(row) "Decimal character escape out of range"
1105 n = 10*n + (c0-'0');
1107 } while (c0!=EOF && isIn(c0,DIGIT));
1112 /* --------------------------------------------------------------------------
1113 * Produce printable representation of character:
1114 * ------------------------------------------------------------------------*/
1116 String unlexChar(c,quote) /* return string representation of */
1117 Char c; /* character... */
1118 Char quote; { /* protect quote character */
1119 static char buffer[12];
1121 if (c<0) /* deal with sign extended chars.. */
1124 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1125 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1127 buffer[1] = (char)c;
1131 buffer[0] = (char)c;
1135 else { /* look for escape code */
1137 for (escs=0; escapes[escs].codename; escs++)
1138 if (escapes[escs].codenumber==c) {
1139 sprintf(buffer,"\\%s",escapes[escs].codename);
1142 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1147 Void printString(s) /* print string s, using quotes and */
1148 String s; { /* escapes if any parts need them */
1152 while ((c = *t)!=0 && isISO(c)
1153 && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1159 Printf("%s",unlexChar(*t,'"'));
1167 /* -------------------------------------------------------------------------
1168 * Handle special types of input for use in interpreter:
1169 * -----------------------------------------------------------------------*/
1171 Command readCommand(cmds,start,sys) /* read command at start of input */
1172 struct cmd *cmds; /* line in interpreter */
1173 Char start; /* characters introducing a cmd */
1174 Char sys; { /* character for shell escape */
1175 while (c0==' ' || c0 =='\t')
1178 if (c0=='\n') /* look for blank command lines */
1180 if (c0==EOF) /* look for end of input stream */
1182 if (c0==sys) { /* single character system escape */
1186 if (c0==start && c1==sys) { /* two character system escape */
1192 startToken(); /* All cmds start with start */
1193 if (c0==start) /* except default (usually EVAL) */
1194 do { /* which is empty */
1197 } while (c0!=EOF && !isIn(c0,ZPACE));
1200 for (; cmds->cmdString; ++cmds)
1201 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1202 (tokenStr[0]==start &&
1203 tokenStr[1]==(cmds->cmdString)[1] &&
1205 return (cmds->cmdCode);
1209 String readFilename() { /* Read filename from input (if any)*/
1210 if (reading==PROJFILE)
1213 while (c0==' ' || c0=='\t')
1216 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1220 while (c0!=EOF && !isIn(c0,ZPACE)) {
1223 while (c0!=EOF && c0!='\"') {
1224 Cell c = readAChar(TRUE);
1226 saveTokenChar(charOf(c));
1232 ERRMSG(row) "a closing quote, '\"', was expected"
1245 String readLine() { /* Read command line from input */
1246 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1250 while (c0!='\n' && c0!=EOF) {
1259 /* --------------------------------------------------------------------------
1260 * This lexer supports the Haskell layout rule:
1262 * - Layout area bounded by { ... }, with `;'s in between.
1263 * - A `{' is a HARD indentation and can only be matched by a corresponding
1265 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1266 * is inserted with the column number of the first token after the
1267 * WHERE/LET/OF keyword.
1268 * - When a soft indentation is uppermost on the indetation stack with
1269 * column col' we insert:
1270 * `}' in front of token with column<col' and pop indentation off stack,
1271 * `;' in front of token with column==col'.
1272 * ------------------------------------------------------------------------*/
1274 #define MAXINDENT 100 /* maximum nesting of layout rule */
1275 static Int layout[MAXINDENT+1];/* indentation stack */
1276 #define HARD (-1) /* indicates hard indentation */
1277 static Int indentDepth = (-1); /* current indentation nesting */
1279 static Void local goOffside(col) /* insert offside marker */
1280 Int col; { /* for specified column */
1282 if (indentDepth>=MAXINDENT) {
1283 ERRMSG(row) "Too many levels of program nesting"
1286 layout[++indentDepth] = col;
1289 static Void local unOffside() { /* leave layout rule area */
1294 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1296 return indentDepth>=0 && layout[indentDepth]!=HARD;
1299 /* --------------------------------------------------------------------------
1301 * ------------------------------------------------------------------------*/
1303 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1304 for (;;) /* Strictly speaking, this code is */
1305 if (c0==EOF) /* a little more liberal than the */
1306 return; /* report allows ... */
1309 else if (isIn(c0,ZPACE))
1311 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1313 Int origRow = row; /* Save original row number */
1316 while (nesting>0 && c0!=EOF)
1317 if (c0=='{' && c1=='-') {
1322 else if (c0=='-' && c1=='}') {
1332 ERRMSG(origRow) "Unterminated nested comment {- ..."
1336 else if (c0=='-' && c1=='-') { /* One line comment */
1339 while (c0!='\n' && c0!=EOF);
1347 static Bool firstToken; /* Set to TRUE for first token */
1348 static Int firstTokenIs; /* ... with token value stored here */
1350 static Int local yylex() { /* Read next input token ... */
1351 static Bool insertOpen = FALSE;
1352 static Bool insertedToken = FALSE;
1353 static Text textRepeat;
1355 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1357 if (firstToken) { /* Special case for first token */
1361 insertedToken = FALSE;
1362 if (reading==KEYBOARD)
1363 textRepeat = findText(repeatStr);
1364 return firstTokenIs;
1367 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1369 insertedToken = TRUE;
1371 push(yylval = mkInt(row));
1375 /* ----------------------------------------------------------------------
1376 * Skip white space, and insert tokens to support layout rules as reqd.
1377 * --------------------------------------------------------------------*/
1380 startColumn = column;
1381 push(yylval = mkInt(row)); /* default token value is line no. */
1382 /* subsequent changes to yylval must also set top() to the same value */
1384 if (indentDepth>=0) { /* layout rule(s) active ? */
1385 if (insertedToken) /* avoid inserting multiple `;'s */
1386 insertedToken = FALSE; /* or putting `;' after `{' */
1388 if (offsideON && layout[indentDepth]!=HARD) {
1389 if (column<layout[indentDepth]) {
1393 else if (column==layout[indentDepth] && c0!=EOF) {
1394 insertedToken = TRUE;
1400 /* ----------------------------------------------------------------------
1401 * Now try to identify token type:
1402 * --------------------------------------------------------------------*/
1404 if (readingInterface) {
1405 if (c0 == '(' && c1 == '#') { skip(); skip(); return UTL; };
1406 if (c0 == '#' && c1 == ')') { skip(); skip(); return UTR; };
1410 case EOF : return 0; /* End of file/input */
1412 /* The next 10 characters make up the `special' category in 1.3 */
1413 case '(' : skip(); return '(';
1414 case ')' : skip(); return ')';
1415 case ',' : skip(); return ',';
1416 case ';' : skip(); return ';';
1417 case '[' : skip(); return '[';
1418 case ']' : skip(); return ']';
1419 case '`' : skip(); return '`';
1420 case '{' : if (offsideON) goOffside(HARD);
1423 case '}' : if (offsideON && indentDepth<0) {
1424 ERRMSG(row) "Misplaced `}'"
1427 if (!(offsideON && layout[indentDepth]!=HARD))
1428 skip(); /* skip over hard }*/
1430 unOffside(); /* otherwise, we have to insert a }*/
1431 return '}'; /* to (try to) avoid an error... */
1433 /* Character and string literals */
1434 case '\'' : top() = yylval = readChar();
1437 case '\"' : top() = yylval = readString();
1442 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1443 Text it; /* Look for implicit param name */
1446 top() = yylval = ap(IPVAR,it);
1447 return identType=IPVARID;
1451 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1452 Text it; /* Look for record selector name */
1455 top() = yylval = ap(RECSEL,mkExt(it));
1456 return identType=RECSELID;
1459 if (isIn(c0,LARGE)) { /* Look for qualified name */
1460 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1461 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1463 skip(); /* Skip qualifying dot */
1464 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1465 it2 = readOperator();
1466 if (opType==CONOP) {
1467 top() = yylval = mkQConOp(it,it2);
1470 top() = yylval = mkQVarOp(it,it2);
1473 } else { /* Qualified identifier */
1475 if (identType==CONID) {
1476 top() = yylval = mkQCon(it,it2);
1479 top() = yylval = mkQVar(it,it2);
1484 top() = yylval = mkCon(it);
1488 if (isIn(c0,(SMALL|LARGE))) {
1489 Text it = readIdent();
1491 if (it==textCase) return CASEXP;
1492 if (it==textOfK) lookAhead(OF);
1493 if (it==textData) return DATA;
1494 if (it==textType) return TYPE;
1495 if (it==textIf) return IF;
1496 if (it==textThen) return THEN;
1497 if (it==textElse) return ELSE;
1498 if (it==textWhere) lookAhead(WHERE);
1499 if (it==textLet) lookAhead(LET);
1500 if (it==textIn) return IN;
1501 if (it==textInfix) return INFIXN;
1502 if (it==textInfixl) return INFIXL;
1503 if (it==textInfixr) return INFIXR;
1504 if (it==textForeign) return FOREIGN;
1505 if (it==textUnsafe) return UNSAFE;
1506 if (it==textNewtype) return TNEWTYPE;
1507 if (it==textDefault) return DEFAULT;
1508 if (it==textDeriving) return DERIVING;
1509 if (it==textDo) lookAhead(DO);
1510 if (it==textClass) return TCLASS;
1511 if (it==textInstance) return TINSTANCE;
1512 if (it==textModule) return TMODULE;
1513 if (it==textInterface) return INTERFACE;
1514 if (it==textInstImport) return INSTIMPORT;
1515 if (it==textImport) return IMPORT;
1516 if (it==textExport) return EXPORT;
1517 if (it==textDynamic) return DYNAMIC;
1518 if (it==textCcall) return CCALL;
1519 if (it==textStdcall) return STDKALL;
1520 if (it==textUUExport) return UUEXPORT;
1521 if (it==textHiding) return HIDING;
1522 if (it==textQualified) return QUALIFIED;
1523 if (it==textAsMod) return ASMOD;
1524 if (it==textPrivileged) return PRIVILEGED;
1525 if (it==textWildcard) return '_';
1526 if (it==textAll && !haskell98) return ALL;
1528 if (it==textWith && !haskell98) lookAhead(WITH);
1529 if (it==textDlet && !haskell98) lookAhead(DLET);
1531 if (it==textUUAll) return ALL;
1532 if (it==textUUUsage) return UUUSAGE;
1533 if (it==textRepeat && reading==KEYBOARD)
1534 return repeatLast();
1536 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1540 if (isIn(c0,SYMBOL)) {
1541 Text it = readOperator();
1543 if (it==textCoco) return COCO;
1544 if (it==textEq) return '=';
1545 if (it==textUpto) return UPTO;
1546 if (it==textAs) return '@';
1547 if (it==textLambda) return '\\';
1548 if (it==textBar) return '|';
1549 if (it==textFrom) return FROM;
1550 if (it==textMinus) return '-';
1551 if (it==textPlus) return '+';
1552 if (it==textBang) return '!';
1553 if (it==textDot) return '.';
1554 if (it==textArrow) return ARROW;
1555 if (it==textLazy) return '~';
1556 if (it==textImplies) return IMPLIES;
1557 if (it==textRepeat && reading==KEYBOARD)
1558 return repeatLast();
1560 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1564 if (isIn(c0,DIGIT)) {
1565 top() = yylval = readNumber();
1569 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1572 return 0; /*NOTREACHED*/
1575 static Int local repeatLast() { /* Obtain last expression entered */
1576 if (isNull(yylval=getLastExpr())) {
1577 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1583 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1584 Text t; { /* by t ... */
1585 String s = textToStr(t);
1586 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1589 Syntax syntaxOf(n) /* Find syntax for name */
1591 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1592 return defaultSyntax(name(n).text);
1593 return name(n).syntax;
1596 /* --------------------------------------------------------------------------
1597 * main entry points to parser/lexer:
1598 * ------------------------------------------------------------------------*/
1600 static Cell local parseInput(startWith)/* Parse input with given first tok,*/
1601 Int startWith; { /* determining whether to read a */
1602 Cell final = NIL; /* script or an expression */
1604 firstTokenIs = startWith;
1605 if (startWith==INTERFACE) {
1606 offsideON = FALSE; readingInterface = TRUE;
1608 offsideON = TRUE; readingInterface = FALSE;
1612 if (yyparse()) { /* This can only be parser overflow */
1613 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1614 EEND; /* in the parser... */
1617 if (!stackEmpty()) /* stack should now be empty */
1618 internal("parseInput");
1623 static String memPrefix = "@mem@";
1624 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1626 Void makeMemScript(mem,fname)
1629 strcat(fname,memPrefix);
1630 itoa((int)mem, fname+strlen(fname), 10);
1633 Bool isMemScript(fname)
1635 return (strstr(fname,memPrefix) != NULL);
1638 String memScriptString(fname)
1640 String p = strstr(fname,memPrefix);
1642 return (String)atoi(p+lenMemPrefix);
1648 Void parseScript(fname,len) /* Read a script, possibly from mem */
1652 if (isMemScript(fname)) {
1653 char* s = memScriptString(fname);
1656 fileInput(fname,len);
1661 Void parseScript(nm,len) /* Read a script */
1663 Long len; { /* Used to set a target for reading */
1670 Void parseExp() { /* Read an expression to evaluate */
1672 setLastExpr(inputExpr);
1676 #if EXPLAIN_INSTANCE_RESOLUTION
1677 Void parseContext() { /* Read a context to prove */
1678 parseInput(CONTEXT);
1682 Cell parseInterface(nm,len) /* Read a GHC interface file */
1684 Long len; { /* Used to set a target for reading */
1687 return parseInput(INTERFACE);
1691 /* --------------------------------------------------------------------------
1693 * ------------------------------------------------------------------------*/
1698 case POSTPREL: break;
1700 case PREPREL : initCharTab();
1701 textCase = findText("case");
1702 textOfK = findText("of");
1703 textData = findText("data");
1704 textType = findText("type");
1705 textIf = findText("if");
1706 textThen = findText("then");
1707 textElse = findText("else");
1708 textWhere = findText("where");
1709 textLet = findText("let");
1710 textIn = findText("in");
1711 textInfix = findText("infix");
1712 textInfixl = findText("infixl");
1713 textInfixr = findText("infixr");
1714 textForeign = findText("foreign");
1715 textUnsafe = findText("unsafe");
1716 textNewtype = findText("newtype");
1717 textDefault = findText("default");
1718 textDeriving = findText("deriving");
1719 textDo = findText("do");
1720 textClass = findText("class");
1722 textWith = findText("with");
1723 textDlet = findText("dlet");
1725 textInstance = findText("instance");
1726 textCoco = findText("::");
1727 textEq = findText("=");
1728 textUpto = findText("..");
1729 textAs = findText("@");
1730 textLambda = findText("\\");
1731 textBar = findText("|");
1732 textMinus = findText("-");
1733 textPlus = findText("+");
1734 textFrom = findText("<-");
1735 textArrow = findText("->");
1736 textLazy = findText("~");
1737 textBang = findText("!");
1738 textDot = findText(".");
1739 textImplies = findText("=>");
1740 textPrelude = findText("Prelude");
1741 textNum = findText("Num");
1742 textModule = findText("module");
1743 textInterface = findText("__interface");
1744 textInstImport = findText("__instimport");
1745 textExport = findText("export");
1746 textDynamic = findText("dynamic");
1747 textCcall = findText("ccall");
1748 textStdcall = findText("stdcall");
1749 textUUExport = findText("__export");
1750 textImport = findText("import");
1751 textHiding = findText("hiding");
1752 textQualified = findText("qualified");
1753 textAsMod = findText("as");
1754 textPrivileged = findText("privileged");
1755 textWildcard = findText("_");
1756 textAll = findText("forall");
1757 textUUAll = findText("__forall");
1758 textUUUsage = findText("__u");
1759 varMinus = mkVar(textMinus);
1760 varPlus = mkVar(textPlus);
1761 varBang = mkVar(textBang);
1762 varDot = mkVar(textDot);
1763 varHiding = mkVar(textHiding);
1764 varQualified = mkVar(textQualified);
1765 varAsMod = mkVar(textAsMod);
1766 varPrivileged = mkVar(textPrivileged);
1767 conMain = mkCon(findText("Main"));
1768 varMain = mkVar(findText("main"));
1774 case RESET : tyconDefns = NIL;
1782 foreignImports= NIL;
1783 foreignExports= NIL;
1791 case BREAK : if (reading==KEYBOARD)
1795 case MARK : mark(tyconDefns);
1802 mark(unqualImports);
1803 mark(foreignImports);
1804 mark(foreignExports);
1815 mark(varPrivileged);
1823 /*-------------------------------------------------------------------------*/