2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
6 * Haskell Group 1994-99, and is distributed as Open Source software
7 * under the Artistic License; see the file "Artistic" that is included
8 * in the distribution for details.
10 * $RCSfile: input.c,v $
12 * $Date: 1999/10/15 11:02:12 $
13 * ------------------------------------------------------------------------*/
31 /* --------------------------------------------------------------------------
33 * ------------------------------------------------------------------------*/
35 List tyconDefns = NIL; /* type constructor definitions */
36 List typeInDefns = NIL; /* type synonym restrictions */
37 List valDefns = NIL; /* value definitions in script */
38 List classDefns = NIL; /* class defns in script */
39 List instDefns = NIL; /* instance defns in script */
40 List selDefns = NIL; /* list of selector lists */
41 List genDefns = NIL; /* list of generated names */
42 List unqualImports = NIL; /* unqualified import list */
43 List foreignImports = NIL; /* foreign imports */
44 List foreignExports = NIL; /* foreign exportsd */
45 List defaultDefns = NIL; /* default definitions (if any) */
46 Int defaultLine = 0; /* line in which default defs occur*/
47 List evalDefaults = NIL; /* defaults for evaluator */
49 Cell inputExpr = NIL; /* input expression */
50 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
51 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
52 Bool offsideON = TRUE; /* TRUE => implement offside rule */
54 String repeatStr = 0; /* Repeat last expr */
56 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
57 String preprocessor = 0;
60 /* --------------------------------------------------------------------------
61 * Local function prototypes:
62 * ------------------------------------------------------------------------*/
64 static Void local initCharTab Args((Void));
65 static Void local fileInput Args((String,Long));
66 static Bool local literateMode Args((String));
67 static Bool local linecmp Args((String,String));
68 static Int local nextLine Args((Void));
69 static Void local skip Args((Void));
70 static Void local thisLineIs Args((Int));
71 static Void local newlineSkip Args((Void));
72 static Void local closeAnyInput Args((Void));
74 Int yyparse Args((Void)); /* can't stop yacc making this */
75 /* public, but don't advertise */
76 /* it in a header file. */
78 static Void local endToken Args((Void));
79 static Text local readOperator Args((Void));
80 static Text local readIdent Args((Void));
81 static Cell local readRadixNumber Args((Int));
82 static Cell local readNumber Args((Void));
83 static Cell local readChar Args((Void));
84 static Cell local readString Args((Void));
85 static Void local saveStrChr Args((Char));
86 static Cell local readAChar Args((Bool));
88 static Bool local lazyReadMatches Args((String));
89 static Cell local readEscapeChar Args((Bool));
90 static Void local skipGap Args((Void));
91 static Cell local readCtrlChar Args((Void));
92 static Cell local readOctChar Args((Void));
93 static Cell local readHexChar Args((Void));
94 static Int local readHexDigit Args((Char));
95 static Cell local readDecChar Args((Void));
97 static Void local goOffside Args((Int));
98 static Void local unOffside Args((Void));
99 static Bool local canUnOffside Args((Void));
101 static Void local skipWhitespace Args((Void));
102 static Int local yylex Args((Void));
103 static Int local repeatLast Args((Void));
105 static Void local parseInput Args((Int));
107 static Bool local doesNotExceed Args((String,Int,Int));
108 static Int local stringToInt Args((String,Int));
111 /* --------------------------------------------------------------------------
112 * Text values for reserved words and special symbols:
113 * ------------------------------------------------------------------------*/
115 static Text textCase, textOfK, textData, textType, textIf;
116 static Text textThen, textElse, textWhere, textLet, textIn;
117 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
118 static Text textDefault, textDeriving, textDo, textClass, textInstance;
120 static Text textCoco, textEq, textUpto, textAs, textLambda;
121 static Text textBar, textMinus, textFrom, textArrow, textLazy;
122 static Text textBang, textDot, textAll, textImplies;
123 static Text textWildcard;
125 static Text textModule, textImport, textInterface, textInstImport;
126 static Text textHiding, textQualified, textAsMod;
127 static Text textExport, textDynamic, textUUExport;
128 static Text textUnsafe, textUUAll;
130 Text textNum; /* Num */
131 Text textPrelude; /* Prelude */
132 Text textPlus; /* (+) */
134 static Cell conMain; /* Main */
135 static Cell varMain; /* main */
137 static Cell varMinus; /* (-) */
138 static Cell varPlus; /* (+) */
139 static Cell varBang; /* (!) */
140 static Cell varDot; /* (.) */
141 static Cell varHiding; /* hiding */
142 static Cell varQualified; /* qualified */
143 static Cell varAsMod; /* as */
145 static List imps; /* List of imports to be chased */
148 /* --------------------------------------------------------------------------
149 * Character set handling:
151 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
152 * character set. The following code provides methods for classifying
153 * input characters according to the lexical structure specified by the
154 * report. Hugs should still accept older programs because ASCII is
155 * essentially just a subset of the ISO character set.
157 * Notes: If you want to port Hugs to a machine that uses something
158 * substantially different from the ISO character set, then you will need
159 * to insert additional code to map between character sets.
161 * At some point, the following data structures may be exported in a .h
162 * file to allow the information contained here to be picked up in the
163 * implementation of LibChar is* primitives.
165 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
166 * ------------------------------------------------------------------------*/
168 static Bool charTabBuilt;
169 static unsigned char ctable[NUM_CHARS];
170 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
171 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
181 static Void local initCharTab() { /* Initialize char decode table */
182 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
183 #define setChar(x,c) ctable[c] |= (x)
184 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
185 #define setCopy(x,c) {Int i; \
186 for (i=0; i<NUM_CHARS; ++i) \
191 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
193 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
194 setRange(SMALL, 223,246); /* ISO lower case letters */
195 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
196 setChar (SMALL, '_');
198 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
199 setRange(LARGE, 192,214); /* ISO upper case letters */
200 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
202 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
203 setRange(SYMBOL, 215,215);
204 setChar (SYMBOL, 247);
205 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
207 setChar (IDAFTER, '\''); /* Characters in identifier */
208 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
210 setChar (SPACE, ' '); /* ASCII space character */
211 setChar (SPACE, 160); /* ISO non breaking space */
212 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
214 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
215 setChars(PRINT, " '\""); /* Space and quotes */
216 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
226 /* --------------------------------------------------------------------------
227 * Single character input routines:
229 * At the lowest level of input, characters are read one at a time, with the
230 * current character held in c0 and the following (lookahead) character in
231 * c1. The corrdinates of c0 within the file are held in (column,row).
232 * The input stream is advanced by one character using the skip() function.
233 * ------------------------------------------------------------------------*/
235 #define TABSIZE 8 /* spacing between tabstops */
237 #define NOTHING 0 /* what kind of input is being read?*/
238 #define KEYBOARD 1 /* - keyboard/console? */
239 #define SCRIPTFILE 2 /* - script file */
240 #define PROJFILE 3 /* - project file */
241 #define STRING 4 /* - string buffer? */
243 static Int reading = NOTHING;
245 static Target readSoFar;
246 static Int row, column, startColumn;
248 static FILE *inputStream = 0;
249 static Bool thisLiterate;
250 static String nextStringChar; /* next char in string buffer */
252 #if USE_READLINE /* for command line editors */
253 static String currentLine; /* editline or GNU readline */
254 static String nextChar;
255 #define nextConsoleChar() \
256 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
257 extern Void add_history Args((String));
258 extern String readline Args((String));
260 #define nextConsoleChar() getc(stdin)
263 static Int litLines; /* count defn lines in lit script */
264 #define DEFNCHAR '>' /* definition lines begin with this */
265 static Int lastLine; /* records type of last line read: */
266 #define STARTLINE 0 /* - at start of file, none read */
267 #define BLANKLINE 1 /* - blank (may preceed definition) */
268 #define TEXTLINE 2 /* - text comment */
269 #define DEFNLINE 3 /* - line containing definition */
270 #define CODELINE 4 /* - line inside code block */
272 #define BEGINCODE "\\begin{code}"
273 #define ENDCODE "\\end{code}"
276 static char *lineBuffer = NULL; /* getline() does the initial allocation */
278 #define LINEBUFFER_SIZE 1000
279 static char lineBuffer[LINEBUFFER_SIZE];
281 static int lineLength = 0;
282 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
283 static int linePtr = 0;
285 Void consoleInput(prompt) /* prepare to input characters from */
286 String prompt; { /* standard in (i.e. console/kbd) */
287 reading = KEYBOARD; /* keyboard input is Line oriented, */
288 c0 = /* i.e. input terminated by '\n' */
294 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
295 * avoids accidentally freeing currentLine twice.
298 String oldCurrentLine = currentLine;
299 currentLine = 0; /* We may lose the space of currentLine */
300 free(oldCurrentLine); /* if interrupted here - unlikely */
302 currentLine = readline(prompt);
303 nextChar = currentLine;
306 add_history(currentLine);
316 Void projInput(nm) /* prepare to input characters from */
317 String nm; { /* from named project file */
318 if ((inputStream = fopen(nm,"r"))!=0) {
326 ERRMSG(0) "Unable to open project file \"%s\"", nm
331 static Void local fileInput(nm,len) /* prepare to input characters from*/
332 String nm; /* named file (specified length is */
333 Long len; { /* used to set target for reading) */
334 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
336 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
337 char *cmd = malloc(reallen);
339 ERRMSG(0) "Unable to allocate memory for filter command."
342 strcpy(cmd,preprocessor);
345 inputStream = popen(cmd,"r");
348 inputStream = fopen(nm,"r");
351 inputStream = fopen(nm,"r");
354 reading = SCRIPTFILE;
360 lastLine = STARTLINE; /* literate file processing */
364 thisLiterate = literateMode(nm);
368 setGoal("Parsing", (Target)len);
371 ERRMSG(0) "Unable to open file \"%s\"", nm
376 Void stringInput(s) /* prepare to input characters from string */
391 static Bool local literateMode(nm) /* Select literate mode for file */
393 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
395 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
397 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
398 filenamecmp(dot+1,"verb")==0) /* literate scripts */
401 return literateScripts; /* otherwise, use the default */
405 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
409 len = 1 + strlen ( srcName );
410 *hiName = malloc(len);
411 *oName = malloc(len);
412 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
413 (*hiName)[0] = (*oName)[0] = 0;
414 dot = strrchr(srcName, '.');
416 if (filenamecmp(dot+1, "hs")==0 &&
417 filenamecmp(dot+1, "lhs")==0 &&
418 filenamecmp(dot+1, "verb")==0) return;
420 strcpy(*hiName, srcName);
421 dot = strrchr(*hiName, '.');
426 strcpy(*oName, srcName);
427 dot = strrchr(*oName, '.');
434 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
435 * I've removed the loop (since newLineSkip contains a loop too) and
436 * replaced the warnings with errors. ADR
439 * To deal with literate \begin{code}...\end{code} blocks,
440 * add a line buffer that rooms the current line. The old c0 and c1
441 * stream pointers are used as before within that buffer -- sof
443 * Upon reading a new line into the line buffer, we check to see if
444 * we're reading in a line containing \begin{code} or \end{code} and
445 * take appropriate action.
448 static Bool local linecmp(s,line) /* compare string with line */
449 String s; /* line may end in whitespace */
452 while (s[i] != '\0' && s[i] == line[i]) {
455 /* s[0..i-1] == line[0..i-1] */
456 if (s[i] != '\0') { /* check s `isPrefixOf` line */
459 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
462 return (line[i] == '\0');
465 /* Returns line length (including \n) or 0 upon EOF. */
466 static Int local nextLine()
470 Forget about fgets(), it is utterly braindead.
471 (Assumes \NUL free streams and does not gracefully deal
472 with overflow.) Instead, use GNU libc's getline().
474 lineLength = getline(&lineBuffer, &lineLength, inputStream);
476 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
477 lineLength = strlen(lineBuffer);
481 /* printf("Read: \"%s\"", lineBuffer); */
482 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
485 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
486 lineBuffer[1] == '!') {
487 lineBuffer[0]='\n'; /* pretend it's a blank line */
490 } else if (thisLiterate) {
491 if (linecmp(BEGINCODE, lineBuffer)) {
492 if (!inCodeBlock) { /* Entered a code block */
494 lineBuffer[0]='\n'; /* pretend it's a blank line */
499 ERRMSG(row) "\\begin{code} encountered inside code block"
503 else if (linecmp(ENDCODE, lineBuffer)) {
504 if (inCodeBlock) { /* Finished code block */
506 lineBuffer[0]='\n'; /* pretend it's a blank line */
511 ERRMSG(row) "\\end{code} encountered outside code block"
516 /* printf("Read: \"%s\"", lineBuffer); */
520 static Void local skip() { /* move forward one char in input */
521 if (c0!=EOF) { /* stream, updating c0, c1, ... */
522 if (c0=='\n') { /* Adjusting cursor coords as nec. */
525 if (reading==SCRIPTFILE)
529 column += TABSIZE - ((column-1)%TABSIZE);
538 if (reading==SCRIPTFILE)
542 else if (reading==KEYBOARD) {
547 c1 = nextConsoleChar();
548 /* On Win32, hitting ctrl-C causes the next getchar to
549 * fail - returning "-1" to indicate an error.
550 * This is one of the rare cases where "-1" does not mean EOF.
552 if (EOF == c1 && !feof(stdin)) {
557 else if (reading==STRING) {
558 c1 = (unsigned char) *nextStringChar++;
563 if (lineLength <=0 || linePtr == lineLength) {
564 /* Current line, exhausted - get new one */
565 if (nextLine() <= 0) { /* EOF */
570 c1 = (unsigned char)lineBuffer[linePtr++];
574 c1 = (unsigned char)lineBuffer[linePtr++];
581 static Void local thisLineIs(kind) /* register kind of current line */
582 Int kind; { /* & check for literate script errs */
583 if (literateErrors) {
584 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
585 (kind==TEXTLINE && lastLine==DEFNLINE)) {
586 ERRMSG(row) "Program line next to comment"
593 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
594 /* assert(c0=='\n'); */
595 if (reading==SCRIPTFILE && thisLiterate) {
598 if (inCodeBlock) { /* pass chars on definition lines */
599 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
603 if (c0==DEFNCHAR) { /* pass chars on definition lines */
604 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
609 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
611 if (c0=='\n' || c0==EOF)
612 thisLineIs(BLANKLINE);
614 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
615 while (c0!='\n' && c0!=EOF)
617 } /* by now, c0=='\n' or c0==EOF */
618 } while (c0!=EOF); /* if new line, start again */
620 if (litLines==0 && literateErrors) {
621 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
630 static Void local closeAnyInput() { /* Close input stream, if open, */
631 switch (reading) { /* or skip to end of console line */
633 case SCRIPTFILE : if (inputStream) {
634 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
646 case KEYBOARD : while (c0!=EOF)
653 /* --------------------------------------------------------------------------
654 * Parser: Uses table driven parser generated from parser.y using yacc
655 * ------------------------------------------------------------------------*/
659 /* --------------------------------------------------------------------------
660 * Single token input routines:
662 * The following routines read the values of particular kinds of token given
663 * that the first character of the token has already been located in c0 on
664 * entry to the routine.
665 * ------------------------------------------------------------------------*/
667 #define MAX_TOKEN 4000
668 #define startToken() tokPos = 0
669 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
670 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
671 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
673 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
674 static Int tokPos; /* input position in buffer */
675 static Int identType; /* identifier type: CONID / VARID */
676 static Int opType; /* operator type : CONOP / VAROP */
678 static Void local endToken() { /* check for token overflow */
679 if (tokPos>MAX_TOKEN) {
680 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
683 tokenStr[tokPos] = '\0';
686 static Text local readOperator() { /* read operator symbol */
691 } while (isISO(c0) && isIn(c0,SYMBOL));
692 opType = (tokenStr[0]==':' ? CONOP : VAROP);
694 return findText(tokenStr);
697 static Text local readIdent() { /* read identifier */
702 } while (isISO(c0) && isIn(c0,IDAFTER));
704 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
705 return findText(tokenStr);
709 static Bool local doesNotExceed(s,radix,limit)
716 if (s[p] == 0) return TRUE;
717 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
718 n = radix*n + (s[p]-'0');
723 static Int local stringToInt(s,radix)
729 if (s[p] == 0) return n;
730 n = radix*n + (s[p]-'0');
735 static Cell local readRadixNumber(r) /* Read literal in specified radix */
736 Int r; { /* from input of the form 0c{digs} */
739 skip(); /* skip leading zero */
740 if ((d=readHexDigit(c1))<0 || d>=r) {
741 /* Special case; no digits, lex as */
742 /* if it had been written "0 c..." */
747 saveTokenChar('0'+readHexDigit(c0));
749 d = readHexDigit(c0);
750 } while (d>=0 && d<r);
754 if (doesNotExceed(tokenStr,r,MAXPOSINT))
755 return mkInt(stringToInt(tokenStr,r));
758 return stringToBignum(tokenStr);
760 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
765 static Cell local readNumber() { /* read numeric constant */
768 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
769 return readRadixNumber(16);
770 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
771 return readRadixNumber(8);
778 } while (isISO(c0) && isIn(c0,DIGIT));
780 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
782 if (doesNotExceed(tokenStr,10,MAXPOSINT))
783 return mkInt(stringToInt(tokenStr,10)); else
784 return stringToBignum(tokenStr);
787 saveTokenChar(c0); /* save decimal point */
789 do { /* process fractional part ... */
792 } while (isISO(c0) && isIn(c0,DIGIT));
794 if (c0=='e' || c0=='E') { /* look for exponent part... */
804 if (!isISO(c0) || !isIn(c0,DIGIT)) {
805 ERRMSG(row) "Missing digits in exponent"
812 } while (isISO(c0) && isIn(c0,DIGIT));
817 return mkFloat(stringToFloat(tokenStr));
826 static Cell local readChar() { /* read character constant */
830 if (c0=='\'' || c0=='\n' || c0==EOF) {
831 ERRMSG(row) "Illegal character constant"
835 charRead = readAChar(FALSE);
840 ERRMSG(row) "Improperly terminated character constant"
846 static Cell local readString() { /* read string literal */
851 while (c0!='\"' && c0!='\n' && c0!=EOF) {
854 saveStrChr(charOf(c));
860 ERRMSG(row) "Improperly terminated string"
864 return mkStr(findText(tokenStr));
867 static Void local saveStrChr(c) /* save character in string */
869 if (c!='\0' && c!='\\') { /* save non null char as single char*/
872 else { /* save null char as TWO null chars */
873 if (tokPos+1<MAX_TOKEN) {
883 static Cell local readAChar(isStrLit) /* read single char constant */
884 Bool isStrLit; { /* TRUE => enable \& and gaps */
887 if (c0=='\\') /* escape character? */
888 return readEscapeChar(isStrLit);
890 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
893 skip(); /* normal character? */
897 /* --------------------------------------------------------------------------
898 * Character escape code sequences:
899 * ------------------------------------------------------------------------*/
901 static struct { /* table of special escape codes */
905 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
906 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
907 {"\'",'\''}, {"v", 11},
908 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
909 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
910 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
911 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
912 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
913 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
914 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
915 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
916 {"SP", 32}, {"DEL", 127},
920 static Int alreadyMatched; /* Record portion of input stream */
921 static char alreadyRead[10]; /* that has been read w/o a match */
923 static Bool local lazyReadMatches(s) /* compare input stream with string */
924 String s; { /* possibly using characters that */
925 int i; /* have already been read */
927 for (i=0; i<alreadyMatched; ++i)
928 if (alreadyRead[i]!=s[i])
931 while (s[i] && s[i]==c0) {
932 alreadyRead[alreadyMatched++]=(char)c0;
940 static Cell local readEscapeChar(isStrLit)/* read escape character */
946 case '&' : if (isStrLit) {
950 ERRMSG(row) "Illegal use of `\\&' in character constant"
954 case '^' : return readCtrlChar();
956 case 'o' : return readOctChar();
957 case 'x' : return readHexChar();
959 default : if (!isISO(c0)) {
960 ERRMSG(row) "Illegal escape sequence"
963 else if (isIn(c0,SPACE)) {
968 ERRMSG(row) "Illegal use of gap in character constant"
972 else if (isIn(c0,DIGIT))
973 return readDecChar();
976 for (alreadyMatched=0; escapes[i].codename; i++)
977 if (lazyReadMatches(escapes[i].codename))
978 return mkChar(escapes[i].codenumber);
980 alreadyRead[alreadyMatched++] = (char)c0;
981 alreadyRead[alreadyMatched++] = '\0';
982 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
985 return NIL;/*NOTREACHED*/
988 static Void local skipGap() { /* skip over gap in string literal */
989 do /* (simplified in Haskell 1.1) */
994 while (isISO(c0) && isIn(c0,SPACE));
996 ERRMSG(row) "Missing `\\' terminating string literal gap"
1002 static Cell local readCtrlChar() { /* read escape sequence \^x */
1003 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1007 if ((which = strchr(controls,c0))==NULL) {
1008 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1012 return mkChar(which-controls);
1015 static Cell local readOctChar() { /* read octal character constant */
1020 if ((d = readHexDigit(c0))<0 || d>=8) {
1021 ERRMSG(row) "Empty octal character escape"
1025 if (overflows(n,8,d,MAXCHARVAL)) {
1026 ERRMSG(row) "Octal character escape out of range"
1031 } while ((d = readHexDigit(c0))>=0 && d<8);
1036 static Cell local readHexChar() { /* read hex character constant */
1041 if ((d = readHexDigit(c0))<0) {
1042 ERRMSG(row) "Empty hexadecimal character escape"
1046 if (overflows(n,16,d,MAXCHARVAL)) {
1047 ERRMSG(row) "Hexadecimal character escape out of range"
1052 } while ((d = readHexDigit(c0))>=0);
1057 static Int local readHexDigit(c) /* read single hex digit */
1059 if ('0'<=c && c<='9')
1061 if ('A'<=c && c<='F')
1062 return 10 + (c-'A');
1063 if ('a'<=c && c<='f')
1064 return 10 + (c-'a');
1068 static Cell local readDecChar() { /* read decimal character constant */
1072 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1073 ERRMSG(row) "Decimal character escape out of range"
1076 n = 10*n + (c0-'0');
1078 } while (c0!=EOF && isIn(c0,DIGIT));
1083 /* --------------------------------------------------------------------------
1084 * Produce printable representation of character:
1085 * ------------------------------------------------------------------------*/
1087 String unlexChar(c,quote) /* return string representation of */
1088 Char c; /* character... */
1089 Char quote; { /* protect quote character */
1090 static char buffer[12];
1092 if (c<0) /* deal with sign extended chars.. */
1095 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1096 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1098 buffer[1] = (char)c;
1102 buffer[0] = (char)c;
1106 else { /* look for escape code */
1108 for (escs=0; escapes[escs].codename; escs++)
1109 if (escapes[escs].codenumber==c) {
1110 sprintf(buffer,"\\%s",escapes[escs].codename);
1113 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1118 Void printString(s) /* print string s, using quotes and */
1119 String s; { /* escapes if any parts need them */
1123 while ((c = *t)!=0 && isISO(c)
1124 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1130 Printf("%s",unlexChar(*t,'"'));
1138 /* -------------------------------------------------------------------------
1139 * Handle special types of input for use in interpreter:
1140 * -----------------------------------------------------------------------*/
1142 Command readCommand(cmds,start,sys) /* read command at start of input */
1143 struct cmd *cmds; /* line in interpreter */
1144 Char start; /* characters introducing a cmd */
1145 Char sys; { /* character for shell escape */
1146 while (c0==' ' || c0 =='\t')
1149 if (c0=='\n') /* look for blank command lines */
1151 if (c0==EOF) /* look for end of input stream */
1153 if (c0==sys) { /* single character system escape */
1157 if (c0==start && c1==sys) { /* two character system escape */
1163 startToken(); /* All cmds start with start */
1164 if (c0==start) /* except default (usually EVAL) */
1165 do { /* which is empty */
1168 } while (c0!=EOF && !isIn(c0,SPACE));
1171 for (; cmds->cmdString; ++cmds)
1172 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1173 (tokenStr[0]==start &&
1174 tokenStr[1]==(cmds->cmdString)[1] &&
1176 return (cmds->cmdCode);
1180 String readFilename() { /* Read filename from input (if any)*/
1181 if (reading==PROJFILE)
1184 while (c0==' ' || c0=='\t')
1187 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1191 while (c0!=EOF && !isIn(c0,SPACE)) {
1194 while (c0!=EOF && c0!='\"') {
1195 Cell c = readAChar(TRUE);
1197 saveTokenChar(charOf(c));
1203 ERRMSG(row) "a closing quote, '\"', was expected"
1216 String readLine() { /* Read command line from input */
1217 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1221 while (c0!='\n' && c0!=EOF) {
1230 /* --------------------------------------------------------------------------
1231 * This lexer supports the Haskell layout rule:
1233 * - Layout area bounded by { ... }, with `;'s in between.
1234 * - A `{' is a HARD indentation and can only be matched by a corresponding
1236 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1237 * is inserted with the column number of the first token after the
1238 * WHERE/LET/OF keyword.
1239 * - When a soft indentation is uppermost on the indetation stack with
1240 * column col' we insert:
1241 * `}' in front of token with column<col' and pop indentation off stack,
1242 * `;' in front of token with column==col'.
1243 * ------------------------------------------------------------------------*/
1245 #define MAXINDENT 100 /* maximum nesting of layout rule */
1246 static Int layout[MAXINDENT+1];/* indentation stack */
1247 #define HARD (-1) /* indicates hard indentation */
1248 static Int indentDepth = (-1); /* current indentation nesting */
1250 static Void local goOffside(col) /* insert offside marker */
1251 Int col; { /* for specified column */
1253 if (indentDepth>=MAXINDENT) {
1254 ERRMSG(row) "Too many levels of program nesting"
1257 layout[++indentDepth] = col;
1260 static Void local unOffside() { /* leave layout rule area */
1265 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1267 return indentDepth>=0 && layout[indentDepth]!=HARD;
1270 /* --------------------------------------------------------------------------
1272 * ------------------------------------------------------------------------*/
1274 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1275 for (;;) /* Strictly speaking, this code is */
1276 if (c0==EOF) /* a little more liberal than the */
1277 return; /* report allows ... */
1280 else if (isIn(c0,SPACE))
1282 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1284 Int origRow = row; /* Save original row number */
1287 while (nesting>0 && c0!=EOF)
1288 if (c0=='{' && c1=='-') {
1293 else if (c0=='-' && c1=='}') {
1303 ERRMSG(origRow) "Unterminated nested comment {- ..."
1307 else if (c0=='-' && c1=='-') { /* One line comment */
1310 while (c0!='\n' && c0!=EOF);
1318 static Bool firstToken; /* Set to TRUE for first token */
1319 static Int firstTokenIs; /* ... with token value stored here */
1321 static Int local yylex() { /* Read next input token ... */
1322 static Bool insertOpen = FALSE;
1323 static Bool insertedToken = FALSE;
1324 static Text textRepeat;
1326 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1328 if (firstToken) { /* Special case for first token */
1332 insertedToken = FALSE;
1333 if (reading==KEYBOARD)
1334 textRepeat = findText(repeatStr);
1335 return firstTokenIs;
1338 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1340 insertedToken = TRUE;
1342 push(yylval = mkInt(row));
1346 /* ----------------------------------------------------------------------
1347 * Skip white space, and insert tokens to support layout rules as reqd.
1348 * --------------------------------------------------------------------*/
1351 startColumn = column;
1352 push(yylval = mkInt(row)); /* default token value is line no. */
1353 /* subsequent changes to yylval must also set top() to the same value */
1355 if (indentDepth>=0) { /* layout rule(s) active ? */
1356 if (insertedToken) /* avoid inserting multiple `;'s */
1357 insertedToken = FALSE; /* or putting `;' after `{' */
1359 if (offsideON && layout[indentDepth]!=HARD) {
1360 if (column<layout[indentDepth]) {
1364 else if (column==layout[indentDepth] && c0!=EOF) {
1365 insertedToken = TRUE;
1371 /* ----------------------------------------------------------------------
1372 * Now try to identify token type:
1373 * --------------------------------------------------------------------*/
1376 case EOF : return 0; /* End of file/input */
1378 /* The next 10 characters make up the `special' category in 1.3 */
1379 case '(' : skip(); return '(';
1380 case ')' : skip(); return ')';
1381 case ',' : skip(); return ',';
1382 case ';' : skip(); return ';';
1383 case '[' : skip(); return '[';
1384 case ']' : skip(); return ']';
1385 case '`' : skip(); return '`';
1386 case '{' : if (offsideON) goOffside(HARD);
1389 case '}' : if (offsideON && indentDepth<0) {
1390 ERRMSG(row) "Misplaced `}'"
1393 if (!(offsideON && layout[indentDepth]!=HARD))
1394 skip(); /* skip over hard }*/
1396 unOffside(); /* otherwise, we have to insert a }*/
1397 return '}'; /* to (try to) avoid an error... */
1399 /* Character and string literals */
1400 case '\'' : top() = yylval = readChar();
1403 case '\"' : top() = yylval = readString();
1408 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1409 Text it; /* Look for record selector name */
1412 top() = yylval = ap(RECSEL,mkExt(it));
1413 return identType=RECSELID;
1416 if (isIn(c0,LARGE)) { /* Look for qualified name */
1417 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1418 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1420 skip(); /* Skip qualifying dot */
1421 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1422 it2 = readOperator();
1423 if (opType==CONOP) {
1424 top() = yylval = mkQConOp(it,it2);
1427 top() = yylval = mkQVarOp(it,it2);
1430 } else { /* Qualified identifier */
1432 if (identType==CONID) {
1433 top() = yylval = mkQCon(it,it2);
1436 top() = yylval = mkQVar(it,it2);
1441 top() = yylval = mkCon(it);
1445 if (isIn(c0,(SMALL|LARGE))) {
1446 Text it = readIdent();
1448 if (it==textCase) return CASEXP;
1449 if (it==textOfK) lookAhead(OF);
1450 if (it==textData) return DATA;
1451 if (it==textType) return TYPE;
1452 if (it==textIf) return IF;
1453 if (it==textThen) return THEN;
1454 if (it==textElse) return ELSE;
1455 if (it==textWhere) lookAhead(WHERE);
1456 if (it==textLet) lookAhead(LET);
1457 if (it==textIn) return IN;
1458 if (it==textInfix) return INFIXN;
1459 if (it==textInfixl) return INFIXL;
1460 if (it==textInfixr) return INFIXR;
1461 if (it==textForeign) return FOREIGN;
1462 if (it==textUnsafe) return UNSAFE;
1463 if (it==textNewtype) return TNEWTYPE;
1464 if (it==textDefault) return DEFAULT;
1465 if (it==textDeriving) return DERIVING;
1466 if (it==textDo) lookAhead(DO);
1467 if (it==textClass) return TCLASS;
1468 if (it==textInstance) return TINSTANCE;
1469 if (it==textModule) return TMODULE;
1470 if (it==textInterface) return INTERFACE;
1471 if (it==textInstImport) return INSTIMPORT;
1472 if (it==textImport) return IMPORT;
1473 if (it==textExport) return EXPORT;
1474 if (it==textDynamic) return DYNAMIC;
1475 if (it==textUUExport) return UUEXPORT;
1476 if (it==textHiding) return HIDING;
1477 if (it==textQualified) return QUALIFIED;
1478 if (it==textAsMod) return ASMOD;
1479 if (it==textWildcard) return '_';
1480 if (it==textAll && !haskell98) return ALL;
1481 if (it==textUUAll) return ALL;
1482 if (it==textRepeat && reading==KEYBOARD)
1483 return repeatLast();
1485 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1489 if (isIn(c0,SYMBOL)) {
1490 Text it = readOperator();
1492 if (it==textCoco) return COCO;
1493 if (it==textEq) return '=';
1494 if (it==textUpto) return UPTO;
1495 if (it==textAs) return '@';
1496 if (it==textLambda) return '\\';
1497 if (it==textBar) return '|';
1498 if (it==textFrom) return FROM;
1499 if (it==textMinus) return '-';
1500 if (it==textPlus) return '+';
1501 if (it==textBang) return '!';
1502 if (it==textDot) return '.';
1503 if (it==textArrow) return ARROW;
1504 if (it==textLazy) return '~';
1505 if (it==textImplies) return IMPLIES;
1506 if (it==textRepeat && reading==KEYBOARD)
1507 return repeatLast();
1509 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1513 if (isIn(c0,DIGIT)) {
1514 top() = yylval = readNumber();
1518 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1521 return 0; /*NOTREACHED*/
1524 static Int local repeatLast() { /* Obtain last expression entered */
1525 if (isNull(yylval=getLastExpr())) {
1526 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1532 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1533 Text t; { /* by t ... */
1534 String s = textToStr(t);
1535 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1538 Syntax syntaxOf(n) /* Find syntax for name */
1540 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1541 return defaultSyntax(name(n).text);
1542 return name(n).syntax;
1545 /* --------------------------------------------------------------------------
1546 * main entry points to parser/lexer:
1547 * ------------------------------------------------------------------------*/
1549 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1550 Int startWith; { /* determining whether to read a */
1551 firstToken = TRUE; /* script or an expression */
1552 firstTokenIs = startWith;
1553 if (startWith==INTERFACE)
1554 offsideON = FALSE; else
1558 if (yyparse()) { /* This can only be parser overflow */
1559 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1560 EEND; /* in the parser... */
1563 if (!stackEmpty()) /* stack should now be empty */
1564 internal("parseInput");
1568 static String memPrefix = "@mem@";
1569 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1571 Void makeMemScript(mem,fname)
1574 strcat(fname,memPrefix);
1575 itoa((int)mem, fname+strlen(fname), 10);
1578 Bool isMemScript(fname)
1580 return (strstr(fname,memPrefix) != NULL);
1583 String memScriptString(fname)
1585 String p = strstr(fname,memPrefix);
1587 return (String)atoi(p+lenMemPrefix);
1593 Void parseScript(fname,len) /* Read a script, possibly from mem */
1597 if (isMemScript(fname)) {
1598 char* s = memScriptString(fname);
1601 fileInput(fname,len);
1606 Void parseScript(nm,len) /* Read a script */
1608 Long len; { /* Used to set a target for reading */
1615 Void parseExp() { /* Read an expression to evaluate */
1617 setLastExpr(inputExpr);
1620 Void parseInterface(nm,len) /* Read a GHC interface file */
1622 Long len; { /* Used to set a target for reading */
1625 parseInput(INTERFACE);
1629 /* --------------------------------------------------------------------------
1631 * ------------------------------------------------------------------------*/
1636 case INSTALL : initCharTab();
1637 textCase = findText("case");
1638 textOfK = findText("of");
1639 textData = findText("data");
1640 textType = findText("type");
1641 textIf = findText("if");
1642 textThen = findText("then");
1643 textElse = findText("else");
1644 textWhere = findText("where");
1645 textLet = findText("let");
1646 textIn = findText("in");
1647 textInfix = findText("infix");
1648 textInfixl = findText("infixl");
1649 textInfixr = findText("infixr");
1650 textForeign = findText("foreign");
1651 textUnsafe = findText("unsafe");
1652 textNewtype = findText("newtype");
1653 textDefault = findText("default");
1654 textDeriving = findText("deriving");
1655 textDo = findText("do");
1656 textClass = findText("class");
1657 textInstance = findText("instance");
1658 textCoco = findText("::");
1659 textEq = findText("=");
1660 textUpto = findText("..");
1661 textAs = findText("@");
1662 textLambda = findText("\\");
1663 textBar = findText("|");
1664 textMinus = findText("-");
1665 textPlus = findText("+");
1666 textFrom = findText("<-");
1667 textArrow = findText("->");
1668 textLazy = findText("~");
1669 textBang = findText("!");
1670 textDot = findText(".");
1671 textImplies = findText("=>");
1672 textPrelude = findText("Prelude");
1673 textNum = findText("Num");
1674 textModule = findText("module");
1675 textInterface = findText("__interface");
1676 textInstImport = findText("__instimport");
1677 textExport = findText("export");
1678 textDynamic = findText("dynamic");
1679 textUUExport = findText("__export");
1680 textImport = findText("import");
1681 textHiding = findText("hiding");
1682 textQualified = findText("qualified");
1683 textAsMod = findText("as");
1684 textWildcard = findText("_");
1685 textAll = findText("forall");
1686 textUUAll = findText("__forall");
1687 varMinus = mkVar(textMinus);
1688 varPlus = mkVar(textPlus);
1689 varBang = mkVar(textBang);
1690 varDot = mkVar(textDot);
1691 varHiding = mkVar(textHiding);
1692 varQualified = mkVar(textQualified);
1693 varAsMod = mkVar(textAsMod);
1694 conMain = mkCon(findText("Main"));
1695 varMain = mkVar(findText("main"));
1701 case RESET : tyconDefns = NIL;
1710 foreignImports= NIL;
1711 foreignExports= NIL;
1719 case BREAK : if (reading==KEYBOARD)
1723 case MARK : mark(tyconDefns);
1731 mark(unqualImports);
1732 mark(foreignImports);
1733 mark(foreignExports);
1751 /*-------------------------------------------------------------------------*/