2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
6 * Haskell Group 1994-99, and is distributed as Open Source software
7 * under the Artistic License; see the file "Artistic" that is included
8 * in the distribution for details.
10 * $RCSfile: input.c,v $
12 * $Date: 1999/02/03 17:08:30 $
13 * ------------------------------------------------------------------------*/
30 /* --------------------------------------------------------------------------
32 * ------------------------------------------------------------------------*/
34 List tyconDefns = NIL; /* type constructor definitions */
35 List typeInDefns = NIL; /* type synonym restrictions */
36 List valDefns = NIL; /* value definitions in script */
37 List classDefns = NIL; /* class defns in script */
38 List instDefns = NIL; /* instance defns in script */
39 List selDefns = NIL; /* list of selector lists */
40 List genDefns = NIL; /* list of generated names */
41 List unqualImports = NIL; /* unqualified import list */
42 List foreignImports = NIL; /* foreign imports */
43 List foreignExports = NIL; /* foreign exportsd */
44 List defaultDefns = NIL; /* default definitions (if any) */
45 Int defaultLine = 0; /* line in which default defs occur*/
46 List evalDefaults = NIL; /* defaults for evaluator */
48 Cell inputExpr = NIL; /* input expression */
49 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
50 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
52 String repeatStr = 0; /* Repeat last expr */
54 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
55 String preprocessor = 0;
58 /* --------------------------------------------------------------------------
59 * Local function prototypes:
60 * ------------------------------------------------------------------------*/
62 static Void local initCharTab Args((Void));
63 static Void local fileInput Args((String,Long));
64 static Bool local literateMode Args((String));
65 static Bool local linecmp Args((String,String));
66 static Int local nextLine Args((Void));
67 static Void local skip Args((Void));
68 static Void local thisLineIs Args((Int));
69 static Void local newlineSkip Args((Void));
70 static Void local closeAnyInput Args((Void));
72 Int yyparse Args((Void)); /* can't stop yacc making this */
73 /* public, but don't advertise */
74 /* it in a header file. */
76 static Void local endToken Args((Void));
77 static Text local readOperator Args((Void));
78 static Text local readIdent Args((Void));
79 static Cell local readRadixNumber Args((Int));
80 static Cell local readNumber Args((Void));
81 static Cell local readChar Args((Void));
82 static Cell local readString Args((Void));
83 static Void local saveStrChr Args((Char));
84 static Cell local readAChar Args((Bool));
86 static Bool local lazyReadMatches Args((String));
87 static Cell local readEscapeChar Args((Bool));
88 static Void local skipGap Args((Void));
89 static Cell local readCtrlChar Args((Void));
90 static Cell local readOctChar Args((Void));
91 static Cell local readHexChar Args((Void));
92 static Int local readHexDigit Args((Char));
93 static Cell local readDecChar Args((Void));
95 static Void local goOffside Args((Int));
96 static Void local unOffside Args((Void));
97 static Bool local canUnOffside Args((Void));
99 static Void local skipWhitespace Args((Void));
100 static Int local yylex Args((Void));
101 static Int local repeatLast Args((Void));
103 static Void local parseInput Args((Int));
105 /* --------------------------------------------------------------------------
106 * Text values for reserved words and special symbols:
107 * ------------------------------------------------------------------------*/
109 static Text textCase, textOfK, textData, textType, textIf;
110 static Text textThen, textElse, textWhere, textLet, textIn;
111 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
112 static Text textDefault, textDeriving, textDo, textClass, textInstance;
114 static Text textCoco, textEq, textUpto, textAs, textLambda;
115 static Text textBar, textMinus, textFrom, textArrow, textLazy;
116 static Text textBang, textDot, textAll, textImplies;
117 static Text textWildcard;
119 static Text textModule, textImport;
120 static Text textHiding, textQualified, textAsMod;
121 static Text textExport, textInterface, textRequires, textUnsafe;
123 Text textNum; /* Num */
124 Text textPrelude; /* Prelude */
125 Text textPlus; /* (+) */
127 static Cell conMain; /* Main */
128 static Cell varMain; /* main */
130 static Cell varMinus; /* (-) */
131 static Cell varPlus; /* (+) */
132 static Cell varBang; /* (!) */
133 static Cell varDot; /* (.) */
134 static Cell varHiding; /* hiding */
135 static Cell varQualified; /* qualified */
136 static Cell varAsMod; /* as */
138 static List imps; /* List of imports to be chased */
141 /* --------------------------------------------------------------------------
142 * Character set handling:
144 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
145 * character set. The following code provides methods for classifying
146 * input characters according to the lexical structure specified by the
147 * report. Hugs should still accept older programs because ASCII is
148 * essentially just a subset of the ISO character set.
150 * Notes: If you want to port Hugs to a machine that uses something
151 * substantially different from the ISO character set, then you will need
152 * to insert additional code to map between character sets.
154 * At some point, the following data structures may be exported in a .h
155 * file to allow the information contained here to be picked up in the
156 * implementation of LibChar is* primitives.
158 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
159 * ------------------------------------------------------------------------*/
161 static Bool charTabBuilt;
162 static unsigned char ctable[NUM_CHARS];
163 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
164 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
174 static Void local initCharTab() { /* Initialize char decode table */
175 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
176 #define setChar(x,c) ctable[c] |= (x)
177 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
178 #define setCopy(x,c) {Int i; \
179 for (i=0; i<NUM_CHARS; ++i) \
184 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
186 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
187 setRange(SMALL, 223,246); /* ISO lower case letters */
188 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
189 setChar (SMALL, '_');
191 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
192 setRange(LARGE, 192,214); /* ISO upper case letters */
193 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
195 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
196 setRange(SYMBOL, 215,215);
197 setChar (SYMBOL, 247);
198 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
200 setChar (IDAFTER, '\''); /* Characters in identifier */
201 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
203 setChar (SPACE, ' '); /* ASCII space character */
204 setChar (SPACE, 160); /* ISO non breaking space */
205 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
207 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
208 setChars(PRINT, " '\""); /* Space and quotes */
209 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
219 /* --------------------------------------------------------------------------
220 * Single character input routines:
222 * At the lowest level of input, characters are read one at a time, with the
223 * current character held in c0 and the following (lookahead) character in
224 * c1. The corrdinates of c0 within the file are held in (column,row).
225 * The input stream is advanced by one character using the skip() function.
226 * ------------------------------------------------------------------------*/
228 #define TABSIZE 8 /* spacing between tabstops */
230 #define NOTHING 0 /* what kind of input is being read?*/
231 #define KEYBOARD 1 /* - keyboard/console? */
232 #define SCRIPTFILE 2 /* - script file */
233 #define PROJFILE 3 /* - project file */
234 #define STRING 4 /* - string buffer? */
236 static Int reading = NOTHING;
238 static Target readSoFar;
239 static Int row, column, startColumn;
241 static FILE *inputStream = 0;
242 static Bool thisLiterate;
243 static String nextStringChar; /* next char in string buffer */
245 #if USE_READLINE /* for command line editors */
246 static String currentLine; /* editline or GNU readline */
247 static String nextChar;
248 #define nextConsoleChar() (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
249 extern Void add_history Args((String));
250 extern String readline Args((String));
252 #define nextConsoleChar() getc(stdin)
255 static Int litLines; /* count defn lines in lit script */
256 #define DEFNCHAR '>' /* definition lines begin with this */
257 static Int lastLine; /* records type of last line read: */
258 #define STARTLINE 0 /* - at start of file, none read */
259 #define BLANKLINE 1 /* - blank (may preceed definition) */
260 #define TEXTLINE 2 /* - text comment */
261 #define DEFNLINE 3 /* - line containing definition */
262 #define CODELINE 4 /* - line inside code block */
264 #define BEGINCODE "\\begin{code}"
265 #define ENDCODE "\\end{code}"
268 static char *lineBuffer = NULL; /* getline() does the initial allocation */
270 #define LINEBUFFER_SIZE 1000
271 static char lineBuffer[LINEBUFFER_SIZE];
273 static int lineLength = 0;
274 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
275 static int linePtr = 0;
277 Void consoleInput(prompt) /* prepare to input characters from */
278 String prompt; { /* standard in (i.e. console/kbd) */
279 reading = KEYBOARD; /* keyboard input is Line oriented, */
280 c0 = /* i.e. input terminated by '\n' */
286 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
287 * avoids accidentally freeing currentLine twice.
290 String oldCurrentLine = currentLine;
291 currentLine = 0; /* We may lose the space of currentLine */
292 free(oldCurrentLine); /* if interrupted here - unlikely */
294 currentLine = readline(prompt);
295 nextChar = currentLine;
298 add_history(currentLine);
308 Void projInput(nm) /* prepare to input characters from */
309 String nm; { /* from named project file */
310 if ((inputStream = fopen(nm,"r"))!=0) {
318 ERRMSG(0) "Unable to open project file \"%s\"", nm
323 static Void local fileInput(nm,len) /* prepare to input characters from*/
324 String nm; /* named file (specified length is */
325 Long len; { /* used to set target for reading) */
326 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
328 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
329 char *cmd = malloc(reallen);
331 ERRMSG(0) "Unable to allocate memory for filter command."
334 strcpy(cmd,preprocessor);
337 inputStream = popen(cmd,"r");
340 inputStream = fopen(nm,"r");
343 inputStream = fopen(nm,"r");
346 reading = SCRIPTFILE;
352 lastLine = STARTLINE; /* literate file processing */
356 thisLiterate = literateMode(nm);
360 setGoal("Parsing", (Target)len);
363 ERRMSG(0) "Unable to open file \"%s\"", nm
368 Void stringInput(s) /* prepare to input characters from string */
383 static Bool local literateMode(nm) /* Select literate mode for file */
385 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
387 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
389 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
390 filenamecmp(dot+1,"verb")==0) /* literate scripts */
393 return literateScripts; /* otherwise, use the default */
397 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
398 * I've removed the loop (since newLineSkip contains a loop too) and
399 * replaced the warnings with errors. ADR
402 * To deal with literate \begin{code}...\end{code} blocks,
403 * add a line buffer that rooms the current line. The old c0 and c1
404 * stream pointers are used as before within that buffer -- sof
406 * Upon reading a new line into the line buffer, we check to see if
407 * we're reading in a line containing \begin{code} or \end{code} and
408 * take appropriate action.
411 static Bool local linecmp(s,line) /* compare string with line */
412 String s; /* line may end in whitespace */
415 while (s[i] != '\0' && s[i] == line[i]) {
418 /* s[0..i-1] == line[0..i-1] */
419 if (s[i] != '\0') { /* check s `isPrefixOf` line */
422 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
425 return (line[i] == '\0');
428 /* Returns line length (including \n) or 0 upon EOF. */
429 static Int local nextLine()
433 Forget about fgets(), it is utterly braindead.
434 (Assumes \NUL free streams and does not gracefully deal
435 with overflow.) Instead, use GNU libc's getline().
437 lineLength = getline(&lineBuffer, &lineLength, inputStream);
439 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
440 lineLength = strlen(lineBuffer);
444 /* printf("Read: \"%s\"", lineBuffer); */
445 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
448 else if (lineLength >= 2 && lineBuffer[0] == '#' && lineBuffer[1] == '!') {
449 lineBuffer[0]='\n'; /* pretend it's a blank line */
452 } else if (thisLiterate) {
453 if (linecmp(BEGINCODE, lineBuffer)) {
454 if (!inCodeBlock) { /* Entered a code block */
456 lineBuffer[0]='\n'; /* pretend it's a blank line */
461 ERRMSG(row) "\\begin{code} encountered inside code block"
465 else if (linecmp(ENDCODE, lineBuffer)) {
466 if (inCodeBlock) { /* Finished code block */
468 lineBuffer[0]='\n'; /* pretend it's a blank line */
473 ERRMSG(row) "\\end{code} encountered outside code block"
478 /* printf("Read: \"%s\"", lineBuffer); */
482 static Void local skip() { /* move forward one char in input */
483 if (c0!=EOF) { /* stream, updating c0, c1, ... */
484 if (c0=='\n') { /* Adjusting cursor coords as nec. */
487 if (reading==SCRIPTFILE)
491 column += TABSIZE - ((column-1)%TABSIZE);
500 if (reading==SCRIPTFILE)
504 else if (reading==KEYBOARD) {
509 c1 = nextConsoleChar();
510 /* On Win32, hitting ctrl-C causes the next getchar to
511 * fail - returning "-1" to indicate an error.
512 * This is one of the rare cases where "-1" does not mean EOF.
514 if (EOF == c1 && !feof(stdin)) {
519 else if (reading==STRING) {
520 c1 = (unsigned char) *nextStringChar++;
525 if (lineLength <=0 || linePtr == lineLength) {
526 /* Current line, exhausted - get new one */
527 if (nextLine() <= 0) { /* EOF */
532 c1 = (unsigned char)lineBuffer[linePtr++];
536 c1 = (unsigned char)lineBuffer[linePtr++];
543 static Void local thisLineIs(kind) /* register kind of current line */
544 Int kind; { /* & check for literate script errs */
545 if (literateErrors) {
546 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
547 (kind==TEXTLINE && lastLine==DEFNLINE)) {
548 ERRMSG(row) "Program line next to comment"
555 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
556 /* assert(c0=='\n'); */
557 if (reading==SCRIPTFILE && thisLiterate) {
560 if (inCodeBlock) { /* pass chars on definition lines */
561 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
565 if (c0==DEFNCHAR) { /* pass chars on definition lines */
566 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
571 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
573 if (c0=='\n' || c0==EOF)
574 thisLineIs(BLANKLINE);
576 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
577 while (c0!='\n' && c0!=EOF)
579 } /* by now, c0=='\n' or c0==EOF */
580 } while (c0!=EOF); /* if new line, start again */
582 if (litLines==0 && literateErrors) {
583 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
592 static Void local closeAnyInput() { /* Close input stream, if open, */
593 switch (reading) { /* or skip to end of console line */
595 case SCRIPTFILE : if (inputStream) {
596 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
608 case KEYBOARD : while (c0!=EOF)
615 /* --------------------------------------------------------------------------
616 * Parser: Uses table driven parser generated from parser.y using yacc
617 * ------------------------------------------------------------------------*/
621 /* --------------------------------------------------------------------------
622 * Single token input routines:
624 * The following routines read the values of particular kinds of token given
625 * that the first character of the token has already been located in c0 on
626 * entry to the routine.
627 * ------------------------------------------------------------------------*/
629 #define MAX_TOKEN 4000
630 #define startToken() tokPos = 0
631 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
632 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
633 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
635 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
636 static Int tokPos; /* input position in buffer */
637 static Int identType; /* identifier type: CONID / VARID */
638 static Int opType; /* operator type : CONOP / VAROP */
640 static Void local endToken() { /* check for token overflow */
641 if (tokPos>MAX_TOKEN) {
642 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
645 tokenStr[tokPos] = '\0';
648 static Text local readOperator() { /* read operator symbol */
653 } while (isISO(c0) && isIn(c0,SYMBOL));
654 opType = (tokenStr[0]==':' ? CONOP : VAROP);
656 return findText(tokenStr);
659 static Text local readIdent() { /* read identifier */
664 } while (isISO(c0) && isIn(c0,IDAFTER));
666 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
667 return findText(tokenStr);
670 static Cell local readRadixNumber(r) /* Read literal in specified radix */
671 Int r; { /* from input of the form 0c{digs} */
673 skip(); /* skip leading zero */
674 if ((d=readHexDigit(c1))<0 || d>=r)/* Special case; no digits, lex as */
675 return mkInt(0); /* if it had been written "0 c..." */
685 big = bigShift(big,d,r);
686 else if (overflows(n,r,d,MAXPOSINT))
687 big = bigShift(bigInt(n),d,r);
690 if (overflows(n,r,d,MAXPOSINT)) {
691 ERRMSG(row) "Integer literal out of range"
698 d = readHexDigit(c0);
699 } while (d>=0 && d<r);
701 return nonNull(big) ? big : mkInt(n);
708 static Cell local readNumber() { /* read numeric constant */
710 Bool intTooLarge = FALSE;
713 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
714 return readRadixNumber(16);
715 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
716 return readRadixNumber(8);
721 if (overflows(n,10,(c0-'0'),MAXPOSINT))
726 } while (isISO(c0) && isIn(c0,DIGIT));
728 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
733 return bigStr(tokenStr);
735 ERRMSG(row) "Integer literal out of range"
740 saveTokenChar(c0); /* save decimal point */
742 do { /* process fractional part ... */
745 } while (isISO(c0) && isIn(c0,DIGIT));
747 if (c0=='e' || c0=='E') { /* look for exponent part... */
757 if (!isISO(c0) || !isIn(c0,DIGIT)) {
758 ERRMSG(row) "Missing digits in exponent"
765 } while (isISO(c0) && isIn(c0,DIGIT));
771 ERRMSG(row) "No floating point numbers in this implementation"
775 return mkFloat(stringToFloat(tokenStr));
778 static Cell local readChar() { /* read character constant */
782 if (c0=='\'' || c0=='\n' || c0==EOF) {
783 ERRMSG(row) "Illegal character constant"
787 charRead = readAChar(FALSE);
792 ERRMSG(row) "Improperly terminated character constant"
798 static Cell local readString() { /* read string literal */
803 while (c0!='\"' && c0!='\n' && c0!=EOF) {
806 saveStrChr(charOf(c));
812 ERRMSG(row) "Improperly terminated string"
816 return mkStr(findText(tokenStr));
819 static Void local saveStrChr(c) /* save character in string */
821 if (c!='\0' && c!='\\') { /* save non null char as single char*/
824 else { /* save null char as TWO null chars */
825 if (tokPos+1<MAX_TOKEN) {
835 static Cell local readAChar(isStrLit) /* read single char constant */
836 Bool isStrLit; { /* TRUE => enable \& and gaps */
839 if (c0=='\\') /* escape character? */
840 return readEscapeChar(isStrLit);
842 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
845 skip(); /* normal character? */
849 /* --------------------------------------------------------------------------
850 * Character escape code sequences:
851 * ------------------------------------------------------------------------*/
853 static struct { /* table of special escape codes */
857 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
858 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
859 {"\'",'\''}, {"v", 11},
860 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
861 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
862 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
863 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
864 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
865 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
866 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
867 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
868 {"SP", 32}, {"DEL", 127},
872 static Int alreadyMatched; /* Record portion of input stream */
873 static char alreadyRead[10]; /* that has been read w/o a match */
875 static Bool local lazyReadMatches(s) /* compare input stream with string */
876 String s; { /* possibly using characters that */
877 int i; /* have already been read */
879 for (i=0; i<alreadyMatched; ++i)
880 if (alreadyRead[i]!=s[i])
883 while (s[i] && s[i]==c0) {
884 alreadyRead[alreadyMatched++]=(char)c0;
892 static Cell local readEscapeChar(isStrLit)/* read escape character */
898 case '&' : if (isStrLit) {
902 ERRMSG(row) "Illegal use of `\\&' in character constant"
906 case '^' : return readCtrlChar();
908 case 'o' : return readOctChar();
909 case 'x' : return readHexChar();
911 default : if (!isISO(c0)) {
912 ERRMSG(row) "Illegal escape sequence"
915 else if (isIn(c0,SPACE)) {
920 ERRMSG(row) "Illegal use of gap in character constant"
924 else if (isIn(c0,DIGIT))
925 return readDecChar();
928 for (alreadyMatched=0; escapes[i].codename; i++)
929 if (lazyReadMatches(escapes[i].codename))
930 return mkChar(escapes[i].codenumber);
932 alreadyRead[alreadyMatched++] = (char)c0;
933 alreadyRead[alreadyMatched++] = '\0';
934 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
937 return NIL;/*NOTREACHED*/
940 static Void local skipGap() { /* skip over gap in string literal */
941 do /* (simplified in Haskell 1.1) */
946 while (isISO(c0) && isIn(c0,SPACE));
948 ERRMSG(row) "Missing `\\' terminating string literal gap"
954 static Cell local readCtrlChar() { /* read escape sequence \^x */
955 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
959 if ((which = strchr(controls,c0))==NULL) {
960 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
964 return mkChar(which-controls);
967 static Cell local readOctChar() { /* read octal character constant */
972 if ((d = readHexDigit(c0))<0 || d>=8) {
973 ERRMSG(row) "Empty octal character escape"
977 if (overflows(n,8,d,MAXCHARVAL)) {
978 ERRMSG(row) "Octal character escape out of range"
983 } while ((d = readHexDigit(c0))>=0 && d<8);
988 static Cell local readHexChar() { /* read hex character constant */
993 if ((d = readHexDigit(c0))<0) {
994 ERRMSG(row) "Empty hexadecimal character escape"
998 if (overflows(n,16,d,MAXCHARVAL)) {
999 ERRMSG(row) "Hexadecimal character escape out of range"
1004 } while ((d = readHexDigit(c0))>=0);
1009 static Int local readHexDigit(c) /* read single hex digit */
1011 if ('0'<=c && c<='9')
1013 if ('A'<=c && c<='F')
1014 return 10 + (c-'A');
1015 if ('a'<=c && c<='f')
1016 return 10 + (c-'a');
1020 static Cell local readDecChar() { /* read decimal character constant */
1024 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1025 ERRMSG(row) "Decimal character escape out of range"
1028 n = 10*n + (c0-'0');
1030 } while (c0!=EOF && isIn(c0,DIGIT));
1035 /* --------------------------------------------------------------------------
1036 * Produce printable representation of character:
1037 * ------------------------------------------------------------------------*/
1039 String unlexChar(c,quote) /* return string representation of */
1040 Char c; /* character... */
1041 Char quote; { /* protect quote character */
1042 static char buffer[12];
1044 if (c<0) /* deal with sign extended chars.. */
1047 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1048 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1050 buffer[1] = (char)c;
1054 buffer[0] = (char)c;
1058 else { /* look for escape code */
1060 for (escs=0; escapes[escs].codename; escs++)
1061 if (escapes[escs].codenumber==c) {
1062 sprintf(buffer,"\\%s",escapes[escs].codename);
1065 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1070 Void printString(s) /* print string s, using quotes and */
1071 String s; { /* escapes if any parts need them */
1075 while ((c = *t)!=0 && isISO(c)
1076 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1082 Printf("%s",unlexChar(*t,'"'));
1090 /* -------------------------------------------------------------------------
1091 * Handle special types of input for use in interpreter:
1092 * -----------------------------------------------------------------------*/
1094 Command readCommand(cmds,start,sys) /* read command at start of input */
1095 struct cmd *cmds; /* line in interpreter */
1096 Char start; /* characters introducing a cmd */
1097 Char sys; { /* character for shell escape */
1098 while (c0==' ' || c0 =='\t')
1101 if (c0=='\n') /* look for blank command lines */
1103 if (c0==EOF) /* look for end of input stream */
1105 if (c0==sys) { /* single character system escape */
1109 if (c0==start && c1==sys) { /* two character system escape */
1115 startToken(); /* All cmds start with start */
1116 if (c0==start) /* except default (usually EVAL) */
1117 do { /* which is empty */
1120 } while (c0!=EOF && !isIn(c0,SPACE));
1123 for (; cmds->cmdString; ++cmds)
1124 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1125 (tokenStr[0]==start &&
1126 tokenStr[1]==(cmds->cmdString)[1] &&
1128 return (cmds->cmdCode);
1132 String readFilename() { /* Read filename from input (if any)*/
1133 if (reading==PROJFILE)
1136 while (c0==' ' || c0=='\t')
1139 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1143 while (c0!=EOF && !isIn(c0,SPACE)) {
1146 while (c0!=EOF && c0!='\"') {
1147 Cell c = readAChar(TRUE);
1149 saveTokenChar(charOf(c));
1155 ERRMSG(row) "a closing quote, '\"', was expected"
1168 String readLine() { /* Read command line from input */
1169 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1173 while (c0!='\n' && c0!=EOF) {
1182 /* --------------------------------------------------------------------------
1183 * This lexer supports the Haskell layout rule:
1185 * - Layout area bounded by { ... }, with `;'s in between.
1186 * - A `{' is a HARD indentation and can only be matched by a corresponding
1188 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1189 * is inserted with the column number of the first token after the
1190 * WHERE/LET/OF keyword.
1191 * - When a soft indentation is uppermost on the indetation stack with
1192 * column col' we insert:
1193 * `}' in front of token with column<col' and pop indentation off stack,
1194 * `;' in front of token with column==col'.
1195 * ------------------------------------------------------------------------*/
1197 #define MAXINDENT 100 /* maximum nesting of layout rule */
1198 static Int layout[MAXINDENT+1];/* indentation stack */
1199 #define HARD (-1) /* indicates hard indentation */
1200 static Int indentDepth = (-1); /* current indentation nesting */
1202 static Void local goOffside(col) /* insert offside marker */
1203 Int col; { /* for specified column */
1204 if (indentDepth>=MAXINDENT) {
1205 ERRMSG(row) "Too many levels of program nesting"
1208 layout[++indentDepth] = col;
1211 static Void local unOffside() { /* leave layout rule area */
1215 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1216 return indentDepth>=0 && layout[indentDepth]!=HARD;
1219 /* --------------------------------------------------------------------------
1221 * ------------------------------------------------------------------------*/
1223 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1224 for (;;) /* Strictly speaking, this code is */
1225 if (c0==EOF) /* a little more liberal than the */
1226 return; /* report allows ... */
1229 else if (isIn(c0,SPACE))
1231 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1233 Int origRow = row; /* Save original row number */
1236 while (nesting>0 && c0!=EOF)
1237 if (c0=='{' && c1=='-') {
1242 else if (c0=='-' && c1=='}') {
1252 ERRMSG(origRow) "Unterminated nested comment {- ..."
1256 else if (c0=='-' && c1=='-') { /* One line comment */
1259 while (c0!='\n' && c0!=EOF);
1267 static Bool firstToken; /* Set to TRUE for first token */
1268 static Int firstTokenIs; /* ... with token value stored here */
1270 static Int local yylex() { /* Read next input token ... */
1271 static Bool insertOpen = FALSE;
1272 static Bool insertedToken = FALSE;
1273 static Text textRepeat;
1275 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1277 if (firstToken) { /* Special case for first token */
1281 insertedToken = FALSE;
1282 if (reading==KEYBOARD)
1283 textRepeat = findText(repeatStr);
1284 return firstTokenIs;
1287 if (insertOpen) { /* insert `soft' opening brace */
1289 insertedToken = TRUE;
1291 push(yylval = mkInt(row));
1295 /* ----------------------------------------------------------------------
1296 * Skip white space, and insert tokens to support layout rules as reqd.
1297 * --------------------------------------------------------------------*/
1300 startColumn = column;
1301 push(yylval = mkInt(row)); /* default token value is line no. */
1302 /* subsequent changes to yylval must also set top() to the same value */
1304 if (indentDepth>=0) { /* layout rule(s) active ? */
1305 if (insertedToken) /* avoid inserting multiple `;'s */
1306 insertedToken = FALSE; /* or putting `;' after `{' */
1308 if (layout[indentDepth]!=HARD) {
1309 if (column<layout[indentDepth]) {
1313 else if (column==layout[indentDepth] && c0!=EOF) {
1314 insertedToken = TRUE;
1320 /* ----------------------------------------------------------------------
1321 * Now try to identify token type:
1322 * --------------------------------------------------------------------*/
1325 case EOF : return 0; /* End of file/input */
1327 /* The next 10 characters make up the `special' category in 1.3 */
1328 case '(' : skip(); return '(';
1329 case ')' : skip(); return ')';
1330 case ',' : skip(); return ',';
1331 case ';' : skip(); return ';';
1332 case '[' : skip(); return '[';
1333 case ']' : skip(); return ']';
1334 case '`' : skip(); return '`';
1335 case '{' : goOffside(HARD);
1338 case '}' : if (indentDepth<0) {
1339 ERRMSG(row) "Misplaced `}'"
1342 if (layout[indentDepth]==HARD) /* skip over hard }*/
1344 unOffside(); /* otherwise, we have to insert a }*/
1345 return '}'; /* to (try to) avoid an error... */
1347 /* Character and string literals */
1348 case '\'' : top() = yylval = readChar();
1351 case '\"' : top() = yylval = readString();
1356 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1357 Text it; /* Look for record selector name */
1360 top() = yylval = ap(RECSEL,mkExt(it));
1361 return identType=RECSELID;
1364 if (isIn(c0,LARGE)) { /* Look for qualified name */
1365 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1366 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1368 skip(); /* Skip qualifying dot */
1369 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1370 it2 = readOperator();
1371 if (opType==CONOP) {
1372 top() = yylval = mkQConOp(it,it2);
1375 top() = yylval = mkQVarOp(it,it2);
1378 } else { /* Qualified identifier */
1380 if (identType==CONID) {
1381 top() = yylval = mkQCon(it,it2);
1384 top() = yylval = mkQVar(it,it2);
1389 top() = yylval = mkCon(it);
1393 if (isIn(c0,(SMALL|LARGE))) {
1394 Text it = readIdent();
1396 if (it==textCase) return CASEXP;
1397 if (it==textOfK) lookAhead(OF);
1398 if (it==textData) return DATA;
1399 if (it==textType) return TYPE;
1400 if (it==textIf) return IF;
1401 if (it==textThen) return THEN;
1402 if (it==textElse) return ELSE;
1403 if (it==textWhere) lookAhead(WHERE);
1404 if (it==textLet) lookAhead(LET);
1405 if (it==textIn) return IN;
1406 if (it==textInfix) return INFIXN;
1407 if (it==textInfixl) return INFIXL;
1408 if (it==textInfixr) return INFIXR;
1409 if (it==textForeign) return FOREIGN;
1410 if (it==textUnsafe) return UNSAFE;
1411 if (it==textNewtype) return TNEWTYPE;
1412 if (it==textDefault) return DEFAULT;
1413 if (it==textDeriving) return DERIVING;
1414 if (it==textDo) lookAhead(DO);
1415 if (it==textClass) return TCLASS;
1416 if (it==textInstance) return TINSTANCE;
1417 if (it==textModule) return TMODULE;
1418 if (it==textImport) return IMPORT;
1419 if (it==textExport) return EXPORT;
1420 if (it==textHiding) return HIDING;
1421 if (it==textQualified) return QUALIFIED;
1422 if (it==textAsMod) return ASMOD;
1423 if (it==textWildcard) return '_';
1424 if (it==textAll && !haskell98) return ALL;
1425 if (it==textRepeat && reading==KEYBOARD)
1426 return repeatLast();
1428 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1432 if (isIn(c0,SYMBOL)) {
1433 Text it = readOperator();
1435 if (it==textCoco) return COCO;
1436 if (it==textEq) return '=';
1437 if (it==textUpto) return UPTO;
1438 if (it==textAs) return '@';
1439 if (it==textLambda) return '\\';
1440 if (it==textBar) return '|';
1441 if (it==textFrom) return FROM;
1442 if (it==textMinus) return '-';
1443 if (it==textPlus) return '+';
1444 if (it==textBang) return '!';
1445 if (it==textDot) return '.';
1446 if (it==textArrow) return ARROW;
1447 if (it==textLazy) return '~';
1448 if (it==textImplies) return IMPLIES;
1449 if (it==textRepeat && reading==KEYBOARD)
1450 return repeatLast();
1452 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1456 if (isIn(c0,DIGIT)) {
1457 top() = yylval = readNumber();
1461 ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
1463 return 0; /*NOTREACHED*/
1466 static Int local repeatLast() { /* Obtain last expression entered */
1467 if (isNull(yylval=getLastExpr())) {
1468 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1474 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1475 Text t; { /* by t ... */
1476 String s = textToStr(t);
1477 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1480 Syntax syntaxOf(n) /* Find syntax for name */
1482 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1483 return defaultSyntax(name(n).text);
1484 return name(n).syntax;
1487 /* --------------------------------------------------------------------------
1488 * main entry points to parser/lexer:
1489 * ------------------------------------------------------------------------*/
1491 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1492 Int startWith; { /* determining whether to read a */
1493 firstToken = TRUE; /* script or an expression */
1494 firstTokenIs = startWith;
1497 if (yyparse()) { /* This can only be parser overflow */
1498 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1499 EEND; /* in the parser... */
1502 if (!stackEmpty()) /* stack should now be empty */
1503 internal("parseInput");
1507 static String memPrefix = "@mem@";
1508 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1510 Void makeMemScript(mem,fname)
1513 strcat(fname,memPrefix);
1514 itoa((int)mem, fname+strlen(fname), 10);
1517 Bool isMemScript(fname)
1519 return (strstr(fname,memPrefix) != NULL);
1522 String memScriptString(fname)
1524 String p = strstr(fname,memPrefix);
1526 return (String)atoi(p+lenMemPrefix);
1532 Void parseScript(fname,len) /* Read a script, possibly from mem */
1536 if (isMemScript(fname)) {
1537 char* s = memScriptString(fname);
1540 fileInput(fname,len);
1545 Void parseScript(nm,len) /* Read a script */
1547 Long len; { /* Used to set a target for reading */
1554 Void parseExp() { /* Read an expression to evaluate */
1556 setLastExpr(inputExpr);
1559 /* --------------------------------------------------------------------------
1561 * ------------------------------------------------------------------------*/
1566 case INSTALL : initCharTab();
1567 textCase = findText("case");
1568 textOfK = findText("of");
1569 textData = findText("data");
1570 textType = findText("type");
1571 textIf = findText("if");
1572 textThen = findText("then");
1573 textElse = findText("else");
1574 textWhere = findText("where");
1575 textLet = findText("let");
1576 textIn = findText("in");
1577 textInfix = findText("infix");
1578 textInfixl = findText("infixl");
1579 textInfixr = findText("infixr");
1580 textForeign = findText("foreign");
1581 textUnsafe = findText("unsafe");
1582 textNewtype = findText("newtype");
1583 textDefault = findText("default");
1584 textDeriving = findText("deriving");
1585 textDo = findText("do");
1586 textClass = findText("class");
1587 textInstance = findText("instance");
1588 textCoco = findText("::");
1589 textEq = findText("=");
1590 textUpto = findText("..");
1591 textAs = findText("@");
1592 textLambda = findText("\\");
1593 textBar = findText("|");
1594 textMinus = findText("-");
1595 textPlus = findText("+");
1596 textFrom = findText("<-");
1597 textArrow = findText("->");
1598 textLazy = findText("~");
1599 textBang = findText("!");
1600 textDot = findText(".");
1601 textImplies = findText("=>");
1602 textPrelude = findText("Prelude");
1603 textNum = findText("Num");
1604 textModule = findText("module");
1605 textImport = findText("import");
1606 textHiding = findText("hiding");
1607 textQualified = findText("qualified");
1608 textAsMod = findText("as");
1609 textWildcard = findText("_");
1610 textAll = findText("forall");
1611 varMinus = mkVar(textMinus);
1612 varPlus = mkVar(textPlus);
1613 varBang = mkVar(textBang);
1614 varDot = mkVar(textDot);
1615 varHiding = mkVar(textHiding);
1616 varQualified = mkVar(textQualified);
1617 varAsMod = mkVar(textAsMod);
1618 conMain = mkCon(findText("Main"));
1619 varMain = mkVar(findText("main"));
1625 case RESET : tyconDefns = NIL;
1634 foreignImports= NIL;
1635 foreignExports= NIL;
1643 case BREAK : if (reading==KEYBOARD)
1647 case MARK : mark(tyconDefns);
1655 mark(unqualImports);
1656 mark(foreignImports);
1657 mark(foreignExports);
1675 /*-------------------------------------------------------------------------*/