2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 1999/10/26 17:27:39 $
14 * ------------------------------------------------------------------------*/
32 #if IS_WIN32 | HUGS_FOR_WINDOWS
36 /* --------------------------------------------------------------------------
38 * ------------------------------------------------------------------------*/
40 List tyconDefns = NIL; /* type constructor definitions */
41 List typeInDefns = NIL; /* type synonym restrictions */
42 List valDefns = NIL; /* value definitions in script */
43 List classDefns = NIL; /* class defns in script */
44 List instDefns = NIL; /* instance defns in script */
45 List selDefns = NIL; /* list of selector lists */
46 List genDefns = NIL; /* list of generated names */
47 List unqualImports = NIL; /* unqualified import list */
48 List foreignImports = NIL; /* foreign imports */
49 List foreignExports = NIL; /* foreign exportsd */
50 List defaultDefns = NIL; /* default definitions (if any) */
51 Int defaultLine = 0; /* line in which default defs occur*/
52 List evalDefaults = NIL; /* defaults for evaluator */
54 Cell inputExpr = NIL; /* input expression */
55 Cell inputContext = NIL; /* input context */
56 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
57 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
58 Bool offsideON = TRUE; /* TRUE => implement offside rule */
60 String repeatStr = 0; /* Repeat last expr */
62 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
63 String preprocessor = 0;
66 /* --------------------------------------------------------------------------
67 * Local function prototypes:
68 * ------------------------------------------------------------------------*/
70 static Void local initCharTab Args((Void));
71 static Void local fileInput Args((String,Long));
72 static Bool local literateMode Args((String));
73 static Bool local linecmp Args((String,String));
74 static Int local nextLine Args((Void));
75 static Void local skip Args((Void));
76 static Void local thisLineIs Args((Int));
77 static Void local newlineSkip Args((Void));
78 static Void local closeAnyInput Args((Void));
80 Int yyparse Args((Void)); /* can't stop yacc making this */
81 /* public, but don't advertise */
82 /* it in a header file. */
84 static Void local endToken Args((Void));
85 static Text local readOperator Args((Void));
86 static Text local readIdent Args((Void));
87 static Cell local readRadixNumber Args((Int));
88 static Cell local readNumber Args((Void));
89 static Cell local readChar Args((Void));
90 static Cell local readString Args((Void));
91 static Void local saveStrChr Args((Char));
92 static Cell local readAChar Args((Bool));
94 static Bool local lazyReadMatches Args((String));
95 static Cell local readEscapeChar Args((Bool));
96 static Void local skipGap Args((Void));
97 static Cell local readCtrlChar Args((Void));
98 static Cell local readOctChar Args((Void));
99 static Cell local readHexChar Args((Void));
100 static Int local readHexDigit Args((Char));
101 static Cell local readDecChar Args((Void));
103 static Void local goOffside Args((Int));
104 static Void local unOffside Args((Void));
105 static Bool local canUnOffside Args((Void));
107 static Void local skipWhitespace Args((Void));
108 static Int local yylex Args((Void));
109 static Int local repeatLast Args((Void));
111 static Void local parseInput Args((Int));
113 static Bool local doesNotExceed Args((String,Int,Int));
114 static Int local stringToInt Args((String,Int));
117 /* --------------------------------------------------------------------------
118 * Text values for reserved words and special symbols:
119 * ------------------------------------------------------------------------*/
121 static Text textCase, textOfK, textData, textType, textIf;
122 static Text textThen, textElse, textWhere, textLet, textIn;
123 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
124 static Text textDefault, textDeriving, textDo, textClass, textInstance;
126 static Text textWith, textDlet;
129 static Text textCoco, textEq, textUpto, textAs, textLambda;
130 static Text textBar, textMinus, textFrom, textArrow, textLazy;
131 static Text textBang, textDot, textAll, textImplies;
132 static Text textWildcard;
134 static Text textModule, textImport, textInterface, textInstImport;
135 static Text textHiding, textQualified, textAsMod;
136 static Text textExport, textDynamic, textUUExport;
137 static Text textUnsafe, textUUAll;
139 Text textCcall; /* ccall */
140 Text textStdcall; /* stdcall */
142 Text textNum; /* Num */
143 Text textPrelude; /* Prelude */
144 Text textPlus; /* (+) */
146 static Cell conMain; /* Main */
147 static Cell varMain; /* main */
149 static Cell varMinus; /* (-) */
150 static Cell varPlus; /* (+) */
151 static Cell varBang; /* (!) */
152 static Cell varDot; /* (.) */
153 static Cell varHiding; /* hiding */
154 static Cell varQualified; /* qualified */
155 static Cell varAsMod; /* as */
157 static List imps; /* List of imports to be chased */
160 /* --------------------------------------------------------------------------
161 * Character set handling:
163 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
164 * character set. The following code provides methods for classifying
165 * input characters according to the lexical structure specified by the
166 * report. Hugs should still accept older programs because ASCII is
167 * essentially just a subset of the ISO character set.
169 * Notes: If you want to port Hugs to a machine that uses something
170 * substantially different from the ISO character set, then you will need
171 * to insert additional code to map between character sets.
173 * At some point, the following data structures may be exported in a .h
174 * file to allow the information contained here to be picked up in the
175 * implementation of LibChar is* primitives.
177 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
178 * ------------------------------------------------------------------------*/
180 static Bool charTabBuilt;
181 static unsigned char ctable[NUM_CHARS];
182 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
183 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
193 static Void local initCharTab() { /* Initialize char decode table */
194 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
195 #define setChar(x,c) ctable[c] |= (x)
196 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
197 #define setCopy(x,c) {Int i; \
198 for (i=0; i<NUM_CHARS; ++i) \
203 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
205 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
206 setRange(SMALL, 223,246); /* ISO lower case letters */
207 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
208 setChar (SMALL, '_');
210 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
211 setRange(LARGE, 192,214); /* ISO upper case letters */
212 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
214 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
215 setRange(SYMBOL, 215,215);
216 setChar (SYMBOL, 247);
217 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
219 setChar (IDAFTER, '\''); /* Characters in identifier */
220 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
222 setChar (SPACE, ' '); /* ASCII space character */
223 setChar (SPACE, 160); /* ISO non breaking space */
224 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
226 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
227 setChars(PRINT, " '\""); /* Space and quotes */
228 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
238 /* --------------------------------------------------------------------------
239 * Single character input routines:
241 * At the lowest level of input, characters are read one at a time, with the
242 * current character held in c0 and the following (lookahead) character in
243 * c1. The corrdinates of c0 within the file are held in (column,row).
244 * The input stream is advanced by one character using the skip() function.
245 * ------------------------------------------------------------------------*/
247 #define TABSIZE 8 /* spacing between tabstops */
249 #define NOTHING 0 /* what kind of input is being read?*/
250 #define KEYBOARD 1 /* - keyboard/console? */
251 #define SCRIPTFILE 2 /* - script file */
252 #define PROJFILE 3 /* - project file */
253 #define STRING 4 /* - string buffer? */
255 static Int reading = NOTHING;
257 static Target readSoFar;
258 static Int row, column, startColumn;
260 static FILE *inputStream = 0;
261 static Bool thisLiterate;
262 static String nextStringChar; /* next char in string buffer */
264 #if USE_READLINE /* for command line editors */
265 static String currentLine; /* editline or GNU readline */
266 static String nextChar;
267 #define nextConsoleChar() \
268 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
269 extern Void add_history Args((String));
270 extern String readline Args((String));
272 #define nextConsoleChar() getc(stdin)
275 static Int litLines; /* count defn lines in lit script */
276 #define DEFNCHAR '>' /* definition lines begin with this */
277 static Int lastLine; /* records type of last line read: */
278 #define STARTLINE 0 /* - at start of file, none read */
279 #define BLANKLINE 1 /* - blank (may preceed definition) */
280 #define TEXTLINE 2 /* - text comment */
281 #define DEFNLINE 3 /* - line containing definition */
282 #define CODELINE 4 /* - line inside code block */
284 #define BEGINCODE "\\begin{code}"
285 #define ENDCODE "\\end{code}"
288 static char *lineBuffer = NULL; /* getline() does the initial allocation */
290 #define LINEBUFFER_SIZE 1000
291 static char lineBuffer[LINEBUFFER_SIZE];
293 static int lineLength = 0;
294 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
295 static int linePtr = 0;
297 Void consoleInput(prompt) /* prepare to input characters from */
298 String prompt; { /* standard in (i.e. console/kbd) */
299 reading = KEYBOARD; /* keyboard input is Line oriented, */
300 c0 = /* i.e. input terminated by '\n' */
306 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
307 * avoids accidentally freeing currentLine twice.
310 String oldCurrentLine = currentLine;
311 currentLine = 0; /* We may lose the space of currentLine */
312 free(oldCurrentLine); /* if interrupted here - unlikely */
314 currentLine = readline(prompt);
315 nextChar = currentLine;
318 add_history(currentLine);
328 Void projInput(nm) /* prepare to input characters from */
329 String nm; { /* from named project file */
330 if ((inputStream = fopen(nm,"r"))!=0) {
338 ERRMSG(0) "Unable to open project file \"%s\"", nm
343 static Void local fileInput(nm,len) /* prepare to input characters from*/
344 String nm; /* named file (specified length is */
345 Long len; { /* used to set target for reading) */
346 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
348 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
349 char *cmd = malloc(reallen);
351 ERRMSG(0) "Unable to allocate memory for filter command."
354 strcpy(cmd,preprocessor);
357 inputStream = popen(cmd,"r");
360 inputStream = fopen(nm,"r");
363 inputStream = fopen(nm,"r");
366 reading = SCRIPTFILE;
372 lastLine = STARTLINE; /* literate file processing */
376 thisLiterate = literateMode(nm);
380 setGoal("Parsing", (Target)len);
383 ERRMSG(0) "Unable to open file \"%s\"", nm
388 Void stringInput(s) /* prepare to input characters from string */
403 static Bool local literateMode(nm) /* Select literate mode for file */
405 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
407 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
409 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
410 filenamecmp(dot+1,"verb")==0) /* literate scripts */
413 return literateScripts; /* otherwise, use the default */
417 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
421 len = 1 + strlen ( srcName );
422 *hiName = malloc(len);
423 *oName = malloc(len);
424 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
425 (*hiName)[0] = (*oName)[0] = 0;
426 dot = strrchr(srcName, '.');
428 if (filenamecmp(dot+1, "hs")==0 &&
429 filenamecmp(dot+1, "lhs")==0 &&
430 filenamecmp(dot+1, "verb")==0) return;
432 strcpy(*hiName, srcName);
433 dot = strrchr(*hiName, '.');
438 strcpy(*oName, srcName);
439 dot = strrchr(*oName, '.');
446 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
447 * I've removed the loop (since newLineSkip contains a loop too) and
448 * replaced the warnings with errors. ADR
451 * To deal with literate \begin{code}...\end{code} blocks,
452 * add a line buffer that rooms the current line. The old c0 and c1
453 * stream pointers are used as before within that buffer -- sof
455 * Upon reading a new line into the line buffer, we check to see if
456 * we're reading in a line containing \begin{code} or \end{code} and
457 * take appropriate action.
460 static Bool local linecmp(s,line) /* compare string with line */
461 String s; /* line may end in whitespace */
464 while (s[i] != '\0' && s[i] == line[i]) {
467 /* s[0..i-1] == line[0..i-1] */
468 if (s[i] != '\0') { /* check s `isPrefixOf` line */
471 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
474 return (line[i] == '\0');
477 /* Returns line length (including \n) or 0 upon EOF. */
478 static Int local nextLine()
482 Forget about fgets(), it is utterly braindead.
483 (Assumes \NUL free streams and does not gracefully deal
484 with overflow.) Instead, use GNU libc's getline().
486 lineLength = getline(&lineBuffer, &lineLength, inputStream);
488 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
489 lineLength = strlen(lineBuffer);
493 /* printf("Read: \"%s\"", lineBuffer); */
494 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
497 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
498 lineBuffer[1] == '!') {
499 lineBuffer[0]='\n'; /* pretend it's a blank line */
502 } else if (thisLiterate) {
503 if (linecmp(BEGINCODE, lineBuffer)) {
504 if (!inCodeBlock) { /* Entered a code block */
506 lineBuffer[0]='\n'; /* pretend it's a blank line */
511 ERRMSG(row) "\\begin{code} encountered inside code block"
515 else if (linecmp(ENDCODE, lineBuffer)) {
516 if (inCodeBlock) { /* Finished code block */
518 lineBuffer[0]='\n'; /* pretend it's a blank line */
523 ERRMSG(row) "\\end{code} encountered outside code block"
528 /* printf("Read: \"%s\"", lineBuffer); */
532 static Void local skip() { /* move forward one char in input */
533 if (c0!=EOF) { /* stream, updating c0, c1, ... */
534 if (c0=='\n') { /* Adjusting cursor coords as nec. */
537 if (reading==SCRIPTFILE)
541 column += TABSIZE - ((column-1)%TABSIZE);
550 if (reading==SCRIPTFILE)
554 else if (reading==KEYBOARD) {
559 c1 = nextConsoleChar();
560 #if IS_WIN32 && !HUGS_FOR_WINDOWS
563 /* On Win32, hitting ctrl-C causes the next getchar to
564 * fail - returning "-1" to indicate an error.
565 * This is one of the rare cases where "-1" does not mean EOF.
567 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
572 else if (reading==STRING) {
573 c1 = (unsigned char) *nextStringChar++;
578 if (lineLength <=0 || linePtr == lineLength) {
579 /* Current line, exhausted - get new one */
580 if (nextLine() <= 0) { /* EOF */
585 c1 = (unsigned char)lineBuffer[linePtr++];
589 c1 = (unsigned char)lineBuffer[linePtr++];
596 static Void local thisLineIs(kind) /* register kind of current line */
597 Int kind; { /* & check for literate script errs */
598 if (literateErrors) {
599 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
600 (kind==TEXTLINE && lastLine==DEFNLINE)) {
601 ERRMSG(row) "Program line next to comment"
608 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
609 /* assert(c0=='\n'); */
610 if (reading==SCRIPTFILE && thisLiterate) {
613 if (inCodeBlock) { /* pass chars on definition lines */
614 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
618 if (c0==DEFNCHAR) { /* pass chars on definition lines */
619 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
624 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
626 if (c0=='\n' || c0==EOF)
627 thisLineIs(BLANKLINE);
629 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
630 while (c0!='\n' && c0!=EOF)
632 } /* by now, c0=='\n' or c0==EOF */
633 } while (c0!=EOF); /* if new line, start again */
635 if (litLines==0 && literateErrors) {
636 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
645 static Void local closeAnyInput() { /* Close input stream, if open, */
646 switch (reading) { /* or skip to end of console line */
648 case SCRIPTFILE : if (inputStream) {
649 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
661 case KEYBOARD : while (c0!=EOF)
668 /* --------------------------------------------------------------------------
669 * Parser: Uses table driven parser generated from parser.y using yacc
670 * ------------------------------------------------------------------------*/
674 /* --------------------------------------------------------------------------
675 * Single token input routines:
677 * The following routines read the values of particular kinds of token given
678 * that the first character of the token has already been located in c0 on
679 * entry to the routine.
680 * ------------------------------------------------------------------------*/
682 #define MAX_TOKEN 4000
683 #define startToken() tokPos = 0
684 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
685 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
686 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
688 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
689 static Int tokPos; /* input position in buffer */
690 static Int identType; /* identifier type: CONID / VARID */
691 static Int opType; /* operator type : CONOP / VAROP */
693 static Void local endToken() { /* check for token overflow */
694 if (tokPos>MAX_TOKEN) {
695 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
698 tokenStr[tokPos] = '\0';
701 static Text local readOperator() { /* read operator symbol */
706 } while (isISO(c0) && isIn(c0,SYMBOL));
707 opType = (tokenStr[0]==':' ? CONOP : VAROP);
709 return findText(tokenStr);
712 static Text local readIdent() { /* read identifier */
717 } while (isISO(c0) && isIn(c0,IDAFTER));
719 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
720 return findText(tokenStr);
724 static Bool local doesNotExceed(s,radix,limit)
731 if (s[p] == 0) return TRUE;
732 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
733 n = radix*n + (s[p]-'0');
738 static Int local stringToInt(s,radix)
744 if (s[p] == 0) return n;
745 n = radix*n + (s[p]-'0');
750 static Cell local readRadixNumber(r) /* Read literal in specified radix */
751 Int r; { /* from input of the form 0c{digs} */
754 skip(); /* skip leading zero */
755 if ((d=readHexDigit(c1))<0 || d>=r) {
756 /* Special case; no digits, lex as */
757 /* if it had been written "0 c..." */
762 saveTokenChar('0'+readHexDigit(c0));
764 d = readHexDigit(c0);
765 } while (d>=0 && d<r);
769 if (doesNotExceed(tokenStr,r,MAXPOSINT))
770 return mkInt(stringToInt(tokenStr,r));
773 return stringToBignum(tokenStr);
775 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
780 static Cell local readNumber() { /* read numeric constant */
783 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
784 return readRadixNumber(16);
785 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
786 return readRadixNumber(8);
793 } while (isISO(c0) && isIn(c0,DIGIT));
795 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
797 if (doesNotExceed(tokenStr,10,MAXPOSINT))
798 return mkInt(stringToInt(tokenStr,10)); else
799 return stringToBignum(tokenStr);
802 saveTokenChar(c0); /* save decimal point */
804 do { /* process fractional part ... */
807 } while (isISO(c0) && isIn(c0,DIGIT));
809 if (c0=='e' || c0=='E') { /* look for exponent part... */
819 if (!isISO(c0) || !isIn(c0,DIGIT)) {
820 ERRMSG(row) "Missing digits in exponent"
827 } while (isISO(c0) && isIn(c0,DIGIT));
832 return mkFloat(stringToFloat(tokenStr));
841 static Cell local readChar() { /* read character constant */
845 if (c0=='\'' || c0=='\n' || c0==EOF) {
846 ERRMSG(row) "Illegal character constant"
850 charRead = readAChar(FALSE);
855 ERRMSG(row) "Improperly terminated character constant"
861 static Cell local readString() { /* read string literal */
866 while (c0!='\"' && c0!='\n' && c0!=EOF) {
869 saveStrChr(charOf(c));
875 ERRMSG(row) "Improperly terminated string"
879 return mkStr(findText(tokenStr));
882 static Void local saveStrChr(c) /* save character in string */
884 if (c!='\0' && c!='\\') { /* save non null char as single char*/
887 else { /* save null char as TWO null chars */
888 if (tokPos+1<MAX_TOKEN) {
898 static Cell local readAChar(isStrLit) /* read single char constant */
899 Bool isStrLit; { /* TRUE => enable \& and gaps */
902 if (c0=='\\') /* escape character? */
903 return readEscapeChar(isStrLit);
905 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
908 skip(); /* normal character? */
912 /* --------------------------------------------------------------------------
913 * Character escape code sequences:
914 * ------------------------------------------------------------------------*/
916 static struct { /* table of special escape codes */
920 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
921 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
922 {"\'",'\''}, {"v", 11},
923 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
924 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
925 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
926 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
927 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
928 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
929 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
930 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
931 {"SP", 32}, {"DEL", 127},
935 static Int alreadyMatched; /* Record portion of input stream */
936 static char alreadyRead[10]; /* that has been read w/o a match */
938 static Bool local lazyReadMatches(s) /* compare input stream with string */
939 String s; { /* possibly using characters that */
940 int i; /* have already been read */
942 for (i=0; i<alreadyMatched; ++i)
943 if (alreadyRead[i]!=s[i])
946 while (s[i] && s[i]==c0) {
947 alreadyRead[alreadyMatched++]=(char)c0;
955 static Cell local readEscapeChar(isStrLit)/* read escape character */
961 case '&' : if (isStrLit) {
965 ERRMSG(row) "Illegal use of `\\&' in character constant"
969 case '^' : return readCtrlChar();
971 case 'o' : return readOctChar();
972 case 'x' : return readHexChar();
974 default : if (!isISO(c0)) {
975 ERRMSG(row) "Illegal escape sequence"
978 else if (isIn(c0,SPACE)) {
983 ERRMSG(row) "Illegal use of gap in character constant"
987 else if (isIn(c0,DIGIT))
988 return readDecChar();
991 for (alreadyMatched=0; escapes[i].codename; i++)
992 if (lazyReadMatches(escapes[i].codename))
993 return mkChar(escapes[i].codenumber);
995 alreadyRead[alreadyMatched++] = (char)c0;
996 alreadyRead[alreadyMatched++] = '\0';
997 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1000 return NIL;/*NOTREACHED*/
1003 static Void local skipGap() { /* skip over gap in string literal */
1004 do /* (simplified in Haskell 1.1) */
1009 while (isISO(c0) && isIn(c0,SPACE));
1011 ERRMSG(row) "Missing `\\' terminating string literal gap"
1017 static Cell local readCtrlChar() { /* read escape sequence \^x */
1018 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1022 if ((which = strchr(controls,c0))==NULL) {
1023 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1027 return mkChar(which-controls);
1030 static Cell local readOctChar() { /* read octal character constant */
1035 if ((d = readHexDigit(c0))<0 || d>=8) {
1036 ERRMSG(row) "Empty octal character escape"
1040 if (overflows(n,8,d,MAXCHARVAL)) {
1041 ERRMSG(row) "Octal character escape out of range"
1046 } while ((d = readHexDigit(c0))>=0 && d<8);
1051 static Cell local readHexChar() { /* read hex character constant */
1056 if ((d = readHexDigit(c0))<0) {
1057 ERRMSG(row) "Empty hexadecimal character escape"
1061 if (overflows(n,16,d,MAXCHARVAL)) {
1062 ERRMSG(row) "Hexadecimal character escape out of range"
1067 } while ((d = readHexDigit(c0))>=0);
1072 static Int local readHexDigit(c) /* read single hex digit */
1074 if ('0'<=c && c<='9')
1076 if ('A'<=c && c<='F')
1077 return 10 + (c-'A');
1078 if ('a'<=c && c<='f')
1079 return 10 + (c-'a');
1083 static Cell local readDecChar() { /* read decimal character constant */
1087 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1088 ERRMSG(row) "Decimal character escape out of range"
1091 n = 10*n + (c0-'0');
1093 } while (c0!=EOF && isIn(c0,DIGIT));
1098 /* --------------------------------------------------------------------------
1099 * Produce printable representation of character:
1100 * ------------------------------------------------------------------------*/
1102 String unlexChar(c,quote) /* return string representation of */
1103 Char c; /* character... */
1104 Char quote; { /* protect quote character */
1105 static char buffer[12];
1107 if (c<0) /* deal with sign extended chars.. */
1110 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1111 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1113 buffer[1] = (char)c;
1117 buffer[0] = (char)c;
1121 else { /* look for escape code */
1123 for (escs=0; escapes[escs].codename; escs++)
1124 if (escapes[escs].codenumber==c) {
1125 sprintf(buffer,"\\%s",escapes[escs].codename);
1128 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1133 Void printString(s) /* print string s, using quotes and */
1134 String s; { /* escapes if any parts need them */
1138 while ((c = *t)!=0 && isISO(c)
1139 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1145 Printf("%s",unlexChar(*t,'"'));
1153 /* -------------------------------------------------------------------------
1154 * Handle special types of input for use in interpreter:
1155 * -----------------------------------------------------------------------*/
1157 Command readCommand(cmds,start,sys) /* read command at start of input */
1158 struct cmd *cmds; /* line in interpreter */
1159 Char start; /* characters introducing a cmd */
1160 Char sys; { /* character for shell escape */
1161 while (c0==' ' || c0 =='\t')
1164 if (c0=='\n') /* look for blank command lines */
1166 if (c0==EOF) /* look for end of input stream */
1168 if (c0==sys) { /* single character system escape */
1172 if (c0==start && c1==sys) { /* two character system escape */
1178 startToken(); /* All cmds start with start */
1179 if (c0==start) /* except default (usually EVAL) */
1180 do { /* which is empty */
1183 } while (c0!=EOF && !isIn(c0,SPACE));
1186 for (; cmds->cmdString; ++cmds)
1187 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1188 (tokenStr[0]==start &&
1189 tokenStr[1]==(cmds->cmdString)[1] &&
1191 return (cmds->cmdCode);
1195 String readFilename() { /* Read filename from input (if any)*/
1196 if (reading==PROJFILE)
1199 while (c0==' ' || c0=='\t')
1202 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1206 while (c0!=EOF && !isIn(c0,SPACE)) {
1209 while (c0!=EOF && c0!='\"') {
1210 Cell c = readAChar(TRUE);
1212 saveTokenChar(charOf(c));
1218 ERRMSG(row) "a closing quote, '\"', was expected"
1231 String readLine() { /* Read command line from input */
1232 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1236 while (c0!='\n' && c0!=EOF) {
1245 /* --------------------------------------------------------------------------
1246 * This lexer supports the Haskell layout rule:
1248 * - Layout area bounded by { ... }, with `;'s in between.
1249 * - A `{' is a HARD indentation and can only be matched by a corresponding
1251 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1252 * is inserted with the column number of the first token after the
1253 * WHERE/LET/OF keyword.
1254 * - When a soft indentation is uppermost on the indetation stack with
1255 * column col' we insert:
1256 * `}' in front of token with column<col' and pop indentation off stack,
1257 * `;' in front of token with column==col'.
1258 * ------------------------------------------------------------------------*/
1260 #define MAXINDENT 100 /* maximum nesting of layout rule */
1261 static Int layout[MAXINDENT+1];/* indentation stack */
1262 #define HARD (-1) /* indicates hard indentation */
1263 static Int indentDepth = (-1); /* current indentation nesting */
1265 static Void local goOffside(col) /* insert offside marker */
1266 Int col; { /* for specified column */
1268 if (indentDepth>=MAXINDENT) {
1269 ERRMSG(row) "Too many levels of program nesting"
1272 layout[++indentDepth] = col;
1275 static Void local unOffside() { /* leave layout rule area */
1280 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1282 return indentDepth>=0 && layout[indentDepth]!=HARD;
1285 /* --------------------------------------------------------------------------
1287 * ------------------------------------------------------------------------*/
1289 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1290 for (;;) /* Strictly speaking, this code is */
1291 if (c0==EOF) /* a little more liberal than the */
1292 return; /* report allows ... */
1295 else if (isIn(c0,SPACE))
1297 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1299 Int origRow = row; /* Save original row number */
1302 while (nesting>0 && c0!=EOF)
1303 if (c0=='{' && c1=='-') {
1308 else if (c0=='-' && c1=='}') {
1318 ERRMSG(origRow) "Unterminated nested comment {- ..."
1322 else if (c0=='-' && c1=='-') { /* One line comment */
1325 while (c0!='\n' && c0!=EOF);
1333 static Bool firstToken; /* Set to TRUE for first token */
1334 static Int firstTokenIs; /* ... with token value stored here */
1336 static Int local yylex() { /* Read next input token ... */
1337 static Bool insertOpen = FALSE;
1338 static Bool insertedToken = FALSE;
1339 static Text textRepeat;
1341 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1343 if (firstToken) { /* Special case for first token */
1347 insertedToken = FALSE;
1348 if (reading==KEYBOARD)
1349 textRepeat = findText(repeatStr);
1350 return firstTokenIs;
1353 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1355 insertedToken = TRUE;
1357 push(yylval = mkInt(row));
1361 /* ----------------------------------------------------------------------
1362 * Skip white space, and insert tokens to support layout rules as reqd.
1363 * --------------------------------------------------------------------*/
1366 startColumn = column;
1367 push(yylval = mkInt(row)); /* default token value is line no. */
1368 /* subsequent changes to yylval must also set top() to the same value */
1370 if (indentDepth>=0) { /* layout rule(s) active ? */
1371 if (insertedToken) /* avoid inserting multiple `;'s */
1372 insertedToken = FALSE; /* or putting `;' after `{' */
1374 if (offsideON && layout[indentDepth]!=HARD) {
1375 if (column<layout[indentDepth]) {
1379 else if (column==layout[indentDepth] && c0!=EOF) {
1380 insertedToken = TRUE;
1386 /* ----------------------------------------------------------------------
1387 * Now try to identify token type:
1388 * --------------------------------------------------------------------*/
1391 case EOF : return 0; /* End of file/input */
1393 /* The next 10 characters make up the `special' category in 1.3 */
1394 case '(' : skip(); return '(';
1395 case ')' : skip(); return ')';
1396 case ',' : skip(); return ',';
1397 case ';' : skip(); return ';';
1398 case '[' : skip(); return '[';
1399 case ']' : skip(); return ']';
1400 case '`' : skip(); return '`';
1401 case '{' : if (offsideON) goOffside(HARD);
1404 case '}' : if (offsideON && indentDepth<0) {
1405 ERRMSG(row) "Misplaced `}'"
1408 if (!(offsideON && layout[indentDepth]!=HARD))
1409 skip(); /* skip over hard }*/
1411 unOffside(); /* otherwise, we have to insert a }*/
1412 return '}'; /* to (try to) avoid an error... */
1414 /* Character and string literals */
1415 case '\'' : top() = yylval = readChar();
1418 case '\"' : top() = yylval = readString();
1423 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1424 Text it; /* Look for implicit param name */
1427 top() = yylval = ap(IPVAR,it);
1428 return identType=IPVARID;
1432 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1433 Text it; /* Look for record selector name */
1436 top() = yylval = ap(RECSEL,mkExt(it));
1437 return identType=RECSELID;
1440 if (isIn(c0,LARGE)) { /* Look for qualified name */
1441 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1442 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1444 skip(); /* Skip qualifying dot */
1445 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1446 it2 = readOperator();
1447 if (opType==CONOP) {
1448 top() = yylval = mkQConOp(it,it2);
1451 top() = yylval = mkQVarOp(it,it2);
1454 } else { /* Qualified identifier */
1456 if (identType==CONID) {
1457 top() = yylval = mkQCon(it,it2);
1460 top() = yylval = mkQVar(it,it2);
1465 top() = yylval = mkCon(it);
1469 if (isIn(c0,(SMALL|LARGE))) {
1470 Text it = readIdent();
1472 if (it==textCase) return CASEXP;
1473 if (it==textOfK) lookAhead(OF);
1474 if (it==textData) return DATA;
1475 if (it==textType) return TYPE;
1476 if (it==textIf) return IF;
1477 if (it==textThen) return THEN;
1478 if (it==textElse) return ELSE;
1479 if (it==textWhere) lookAhead(WHERE);
1480 if (it==textLet) lookAhead(LET);
1481 if (it==textIn) return IN;
1482 if (it==textInfix) return INFIXN;
1483 if (it==textInfixl) return INFIXL;
1484 if (it==textInfixr) return INFIXR;
1485 if (it==textForeign) return FOREIGN;
1486 if (it==textUnsafe) return UNSAFE;
1487 if (it==textNewtype) return TNEWTYPE;
1488 if (it==textDefault) return DEFAULT;
1489 if (it==textDeriving) return DERIVING;
1490 if (it==textDo) lookAhead(DO);
1491 if (it==textClass) return TCLASS;
1492 if (it==textInstance) return TINSTANCE;
1493 if (it==textModule) return TMODULE;
1494 if (it==textInterface) return INTERFACE;
1495 if (it==textInstImport) return INSTIMPORT;
1496 if (it==textImport) return IMPORT;
1497 if (it==textExport) return EXPORT;
1498 if (it==textDynamic) return DYNAMIC;
1499 if (it==textCcall) return CCALL;
1500 if (it==textStdcall) return STDCALL;
1501 if (it==textUUExport) return UUEXPORT;
1502 if (it==textHiding) return HIDING;
1503 if (it==textQualified) return QUALIFIED;
1504 if (it==textAsMod) return ASMOD;
1505 if (it==textWildcard) return '_';
1506 if (it==textAll && !haskell98) return ALL;
1508 if (it==textWith && !haskell98) lookAhead(WITH);
1509 if (it==textDlet && !haskell98) lookAhead(DLET);
1511 if (it==textUUAll) return ALL;
1512 if (it==textRepeat && reading==KEYBOARD)
1513 return repeatLast();
1515 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1519 if (isIn(c0,SYMBOL)) {
1520 Text it = readOperator();
1522 if (it==textCoco) return COCO;
1523 if (it==textEq) return '=';
1524 if (it==textUpto) return UPTO;
1525 if (it==textAs) return '@';
1526 if (it==textLambda) return '\\';
1527 if (it==textBar) return '|';
1528 if (it==textFrom) return FROM;
1529 if (it==textMinus) return '-';
1530 if (it==textPlus) return '+';
1531 if (it==textBang) return '!';
1532 if (it==textDot) return '.';
1533 if (it==textArrow) return ARROW;
1534 if (it==textLazy) return '~';
1535 if (it==textImplies) return IMPLIES;
1536 if (it==textRepeat && reading==KEYBOARD)
1537 return repeatLast();
1539 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1543 if (isIn(c0,DIGIT)) {
1544 top() = yylval = readNumber();
1548 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1551 return 0; /*NOTREACHED*/
1554 static Int local repeatLast() { /* Obtain last expression entered */
1555 if (isNull(yylval=getLastExpr())) {
1556 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1562 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1563 Text t; { /* by t ... */
1564 String s = textToStr(t);
1565 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1568 Syntax syntaxOf(n) /* Find syntax for name */
1570 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1571 return defaultSyntax(name(n).text);
1572 return name(n).syntax;
1575 /* --------------------------------------------------------------------------
1576 * main entry points to parser/lexer:
1577 * ------------------------------------------------------------------------*/
1579 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1580 Int startWith; { /* determining whether to read a */
1581 firstToken = TRUE; /* script or an expression */
1582 firstTokenIs = startWith;
1583 if (startWith==INTERFACE)
1584 offsideON = FALSE; else
1588 if (yyparse()) { /* This can only be parser overflow */
1589 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1590 EEND; /* in the parser... */
1593 if (!stackEmpty()) /* stack should now be empty */
1594 internal("parseInput");
1598 static String memPrefix = "@mem@";
1599 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1601 Void makeMemScript(mem,fname)
1604 strcat(fname,memPrefix);
1605 itoa((int)mem, fname+strlen(fname), 10);
1608 Bool isMemScript(fname)
1610 return (strstr(fname,memPrefix) != NULL);
1613 String memScriptString(fname)
1615 String p = strstr(fname,memPrefix);
1617 return (String)atoi(p+lenMemPrefix);
1623 Void parseScript(fname,len) /* Read a script, possibly from mem */
1627 if (isMemScript(fname)) {
1628 char* s = memScriptString(fname);
1631 fileInput(fname,len);
1636 Void parseScript(nm,len) /* Read a script */
1638 Long len; { /* Used to set a target for reading */
1645 Void parseExp() { /* Read an expression to evaluate */
1647 setLastExpr(inputExpr);
1650 Void parseContext() { /* Read a context to prove */
1651 parseInput(CONTEXT);
1654 Void parseInterface(nm,len) /* Read a GHC interface file */
1656 Long len; { /* Used to set a target for reading */
1659 parseInput(INTERFACE);
1663 /* --------------------------------------------------------------------------
1665 * ------------------------------------------------------------------------*/
1670 case INSTALL : initCharTab();
1671 textCase = findText("case");
1672 textOfK = findText("of");
1673 textData = findText("data");
1674 textType = findText("type");
1675 textIf = findText("if");
1676 textThen = findText("then");
1677 textElse = findText("else");
1678 textWhere = findText("where");
1679 textLet = findText("let");
1680 textIn = findText("in");
1681 textInfix = findText("infix");
1682 textInfixl = findText("infixl");
1683 textInfixr = findText("infixr");
1684 textForeign = findText("foreign");
1685 textUnsafe = findText("unsafe");
1686 textNewtype = findText("newtype");
1687 textDefault = findText("default");
1688 textDeriving = findText("deriving");
1689 textDo = findText("do");
1690 textClass = findText("class");
1692 textWith = findText("with");
1693 textDlet = findText("dlet");
1695 textInstance = findText("instance");
1696 textCoco = findText("::");
1697 textEq = findText("=");
1698 textUpto = findText("..");
1699 textAs = findText("@");
1700 textLambda = findText("\\");
1701 textBar = findText("|");
1702 textMinus = findText("-");
1703 textPlus = findText("+");
1704 textFrom = findText("<-");
1705 textArrow = findText("->");
1706 textLazy = findText("~");
1707 textBang = findText("!");
1708 textDot = findText(".");
1709 textImplies = findText("=>");
1710 textPrelude = findText("Prelude");
1711 textNum = findText("Num");
1712 textModule = findText("module");
1713 textInterface = findText("__interface");
1714 textInstImport = findText("__instimport");
1715 textExport = findText("export");
1716 textDynamic = findText("dynamic");
1717 textCcall = findText("ccall");
1718 textStdcall = findText("stdcall");
1719 textUUExport = findText("__export");
1720 textImport = findText("import");
1721 textHiding = findText("hiding");
1722 textQualified = findText("qualified");
1723 textAsMod = findText("as");
1724 textWildcard = findText("_");
1725 textAll = findText("forall");
1726 textUUAll = findText("__forall");
1727 varMinus = mkVar(textMinus);
1728 varPlus = mkVar(textPlus);
1729 varBang = mkVar(textBang);
1730 varDot = mkVar(textDot);
1731 varHiding = mkVar(textHiding);
1732 varQualified = mkVar(textQualified);
1733 varAsMod = mkVar(textAsMod);
1734 conMain = mkCon(findText("Main"));
1735 varMain = mkVar(findText("main"));
1741 case RESET : tyconDefns = NIL;
1750 foreignImports= NIL;
1751 foreignExports= NIL;
1759 case BREAK : if (reading==KEYBOARD)
1763 case MARK : mark(tyconDefns);
1771 mark(unqualImports);
1772 mark(foreignImports);
1773 mark(foreignExports);
1791 /*-------------------------------------------------------------------------*/