2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
6 * Haskell Group 1994-99, and is distributed as Open Source software
7 * under the Artistic License; see the file "Artistic" that is included
8 * in the distribution for details.
10 * $RCSfile: input.c,v $
12 * $Date: 1999/03/01 14:46:46 $
13 * ------------------------------------------------------------------------*/
30 /* --------------------------------------------------------------------------
32 * ------------------------------------------------------------------------*/
34 List tyconDefns = NIL; /* type constructor definitions */
35 List typeInDefns = NIL; /* type synonym restrictions */
36 List valDefns = NIL; /* value definitions in script */
37 List classDefns = NIL; /* class defns in script */
38 List instDefns = NIL; /* instance defns in script */
39 List selDefns = NIL; /* list of selector lists */
40 List genDefns = NIL; /* list of generated names */
41 List unqualImports = NIL; /* unqualified import list */
42 List foreignImports = NIL; /* foreign imports */
43 List foreignExports = NIL; /* foreign exportsd */
44 List defaultDefns = NIL; /* default definitions (if any) */
45 Int defaultLine = 0; /* line in which default defs occur*/
46 List evalDefaults = NIL; /* defaults for evaluator */
48 Cell inputExpr = NIL; /* input expression */
49 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
50 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
52 String repeatStr = 0; /* Repeat last expr */
54 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
55 String preprocessor = 0;
58 /* --------------------------------------------------------------------------
59 * Local function prototypes:
60 * ------------------------------------------------------------------------*/
62 static Void local initCharTab Args((Void));
63 static Void local fileInput Args((String,Long));
64 static Bool local literateMode Args((String));
65 static Bool local linecmp Args((String,String));
66 static Int local nextLine Args((Void));
67 static Void local skip Args((Void));
68 static Void local thisLineIs Args((Int));
69 static Void local newlineSkip Args((Void));
70 static Void local closeAnyInput Args((Void));
72 Int yyparse Args((Void)); /* can't stop yacc making this */
73 /* public, but don't advertise */
74 /* it in a header file. */
76 static Void local endToken Args((Void));
77 static Text local readOperator Args((Void));
78 static Text local readIdent Args((Void));
79 static Cell local readRadixNumber Args((Int));
80 static Cell local readNumber Args((Void));
81 static Cell local readChar Args((Void));
82 static Cell local readString Args((Void));
83 static Void local saveStrChr Args((Char));
84 static Cell local readAChar Args((Bool));
86 static Bool local lazyReadMatches Args((String));
87 static Cell local readEscapeChar Args((Bool));
88 static Void local skipGap Args((Void));
89 static Cell local readCtrlChar Args((Void));
90 static Cell local readOctChar Args((Void));
91 static Cell local readHexChar Args((Void));
92 static Int local readHexDigit Args((Char));
93 static Cell local readDecChar Args((Void));
95 static Void local goOffside Args((Int));
96 static Void local unOffside Args((Void));
97 static Bool local canUnOffside Args((Void));
99 static Void local skipWhitespace Args((Void));
100 static Int local yylex Args((Void));
101 static Int local repeatLast Args((Void));
103 static Void local parseInput Args((Int));
105 /* --------------------------------------------------------------------------
106 * Text values for reserved words and special symbols:
107 * ------------------------------------------------------------------------*/
109 static Text textCase, textOfK, textData, textType, textIf;
110 static Text textThen, textElse, textWhere, textLet, textIn;
111 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
112 static Text textDefault, textDeriving, textDo, textClass, textInstance;
114 static Text textCoco, textEq, textUpto, textAs, textLambda;
115 static Text textBar, textMinus, textFrom, textArrow, textLazy;
116 static Text textBang, textDot, textAll, textImplies;
117 static Text textWildcard;
119 static Text textModule, textImport;
120 static Text textHiding, textQualified, textAsMod;
121 static Text textExport, textUnsafe;
123 Text textNum; /* Num */
124 Text textPrelude; /* Prelude */
125 Text textPlus; /* (+) */
127 static Cell conMain; /* Main */
128 static Cell varMain; /* main */
130 static Cell varMinus; /* (-) */
131 static Cell varPlus; /* (+) */
132 static Cell varBang; /* (!) */
133 static Cell varDot; /* (.) */
134 static Cell varHiding; /* hiding */
135 static Cell varQualified; /* qualified */
136 static Cell varAsMod; /* as */
138 static List imps; /* List of imports to be chased */
141 /* --------------------------------------------------------------------------
142 * Character set handling:
144 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
145 * character set. The following code provides methods for classifying
146 * input characters according to the lexical structure specified by the
147 * report. Hugs should still accept older programs because ASCII is
148 * essentially just a subset of the ISO character set.
150 * Notes: If you want to port Hugs to a machine that uses something
151 * substantially different from the ISO character set, then you will need
152 * to insert additional code to map between character sets.
154 * At some point, the following data structures may be exported in a .h
155 * file to allow the information contained here to be picked up in the
156 * implementation of LibChar is* primitives.
158 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
159 * ------------------------------------------------------------------------*/
161 static Bool charTabBuilt;
162 static unsigned char ctable[NUM_CHARS];
163 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
164 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
174 static Void local initCharTab() { /* Initialize char decode table */
175 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
176 #define setChar(x,c) ctable[c] |= (x)
177 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
178 #define setCopy(x,c) {Int i; \
179 for (i=0; i<NUM_CHARS; ++i) \
184 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
186 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
187 setRange(SMALL, 223,246); /* ISO lower case letters */
188 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
189 setChar (SMALL, '_');
191 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
192 setRange(LARGE, 192,214); /* ISO upper case letters */
193 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
195 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
196 setRange(SYMBOL, 215,215);
197 setChar (SYMBOL, 247);
198 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
200 setChar (IDAFTER, '\''); /* Characters in identifier */
201 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
203 setChar (SPACE, ' '); /* ASCII space character */
204 setChar (SPACE, 160); /* ISO non breaking space */
205 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
207 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
208 setChars(PRINT, " '\""); /* Space and quotes */
209 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
219 /* --------------------------------------------------------------------------
220 * Single character input routines:
222 * At the lowest level of input, characters are read one at a time, with the
223 * current character held in c0 and the following (lookahead) character in
224 * c1. The corrdinates of c0 within the file are held in (column,row).
225 * The input stream is advanced by one character using the skip() function.
226 * ------------------------------------------------------------------------*/
228 #define TABSIZE 8 /* spacing between tabstops */
230 #define NOTHING 0 /* what kind of input is being read?*/
231 #define KEYBOARD 1 /* - keyboard/console? */
232 #define SCRIPTFILE 2 /* - script file */
233 #define PROJFILE 3 /* - project file */
234 #define STRING 4 /* - string buffer? */
236 static Int reading = NOTHING;
238 static Target readSoFar;
239 static Int row, column, startColumn;
241 static FILE *inputStream = 0;
242 static Bool thisLiterate;
243 static String nextStringChar; /* next char in string buffer */
245 #if USE_READLINE /* for command line editors */
246 static String currentLine; /* editline or GNU readline */
247 static String nextChar;
248 #define nextConsoleChar() (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
249 extern Void add_history Args((String));
250 extern String readline Args((String));
252 #define nextConsoleChar() getc(stdin)
255 static Int litLines; /* count defn lines in lit script */
256 #define DEFNCHAR '>' /* definition lines begin with this */
257 static Int lastLine; /* records type of last line read: */
258 #define STARTLINE 0 /* - at start of file, none read */
259 #define BLANKLINE 1 /* - blank (may preceed definition) */
260 #define TEXTLINE 2 /* - text comment */
261 #define DEFNLINE 3 /* - line containing definition */
262 #define CODELINE 4 /* - line inside code block */
264 #define BEGINCODE "\\begin{code}"
265 #define ENDCODE "\\end{code}"
268 static char *lineBuffer = NULL; /* getline() does the initial allocation */
270 #define LINEBUFFER_SIZE 1000
271 static char lineBuffer[LINEBUFFER_SIZE];
273 static int lineLength = 0;
274 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
275 static int linePtr = 0;
277 Void consoleInput(prompt) /* prepare to input characters from */
278 String prompt; { /* standard in (i.e. console/kbd) */
279 reading = KEYBOARD; /* keyboard input is Line oriented, */
280 c0 = /* i.e. input terminated by '\n' */
286 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
287 * avoids accidentally freeing currentLine twice.
290 String oldCurrentLine = currentLine;
291 currentLine = 0; /* We may lose the space of currentLine */
292 free(oldCurrentLine); /* if interrupted here - unlikely */
294 currentLine = readline(prompt);
295 nextChar = currentLine;
298 add_history(currentLine);
308 Void projInput(nm) /* prepare to input characters from */
309 String nm; { /* from named project file */
310 if ((inputStream = fopen(nm,"r"))!=0) {
318 ERRMSG(0) "Unable to open project file \"%s\"", nm
323 static Void local fileInput(nm,len) /* prepare to input characters from*/
324 String nm; /* named file (specified length is */
325 Long len; { /* used to set target for reading) */
326 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
328 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
329 char *cmd = malloc(reallen);
331 ERRMSG(0) "Unable to allocate memory for filter command."
334 strcpy(cmd,preprocessor);
337 inputStream = popen(cmd,"r");
340 inputStream = fopen(nm,"r");
343 inputStream = fopen(nm,"r");
346 reading = SCRIPTFILE;
352 lastLine = STARTLINE; /* literate file processing */
356 thisLiterate = literateMode(nm);
360 setGoal("Parsing", (Target)len);
363 ERRMSG(0) "Unable to open file \"%s\"", nm
368 Void stringInput(s) /* prepare to input characters from string */
383 static Bool local literateMode(nm) /* Select literate mode for file */
385 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
387 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
389 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
390 filenamecmp(dot+1,"verb")==0) /* literate scripts */
393 return literateScripts; /* otherwise, use the default */
397 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
398 * I've removed the loop (since newLineSkip contains a loop too) and
399 * replaced the warnings with errors. ADR
402 * To deal with literate \begin{code}...\end{code} blocks,
403 * add a line buffer that rooms the current line. The old c0 and c1
404 * stream pointers are used as before within that buffer -- sof
406 * Upon reading a new line into the line buffer, we check to see if
407 * we're reading in a line containing \begin{code} or \end{code} and
408 * take appropriate action.
411 static Bool local linecmp(s,line) /* compare string with line */
412 String s; /* line may end in whitespace */
415 while (s[i] != '\0' && s[i] == line[i]) {
418 /* s[0..i-1] == line[0..i-1] */
419 if (s[i] != '\0') { /* check s `isPrefixOf` line */
422 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
425 return (line[i] == '\0');
428 /* Returns line length (including \n) or 0 upon EOF. */
429 static Int local nextLine()
433 Forget about fgets(), it is utterly braindead.
434 (Assumes \NUL free streams and does not gracefully deal
435 with overflow.) Instead, use GNU libc's getline().
437 lineLength = getline(&lineBuffer, &lineLength, inputStream);
439 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
440 lineLength = strlen(lineBuffer);
444 /* printf("Read: \"%s\"", lineBuffer); */
445 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
448 else if (lineLength >= 2 && lineBuffer[0] == '#' && lineBuffer[1] == '!') {
449 lineBuffer[0]='\n'; /* pretend it's a blank line */
452 } else if (thisLiterate) {
453 if (linecmp(BEGINCODE, lineBuffer)) {
454 if (!inCodeBlock) { /* Entered a code block */
456 lineBuffer[0]='\n'; /* pretend it's a blank line */
461 ERRMSG(row) "\\begin{code} encountered inside code block"
465 else if (linecmp(ENDCODE, lineBuffer)) {
466 if (inCodeBlock) { /* Finished code block */
468 lineBuffer[0]='\n'; /* pretend it's a blank line */
473 ERRMSG(row) "\\end{code} encountered outside code block"
478 /* printf("Read: \"%s\"", lineBuffer); */
482 static Void local skip() { /* move forward one char in input */
483 if (c0!=EOF) { /* stream, updating c0, c1, ... */
484 if (c0=='\n') { /* Adjusting cursor coords as nec. */
487 if (reading==SCRIPTFILE)
491 column += TABSIZE - ((column-1)%TABSIZE);
500 if (reading==SCRIPTFILE)
504 else if (reading==KEYBOARD) {
509 c1 = nextConsoleChar();
510 /* On Win32, hitting ctrl-C causes the next getchar to
511 * fail - returning "-1" to indicate an error.
512 * This is one of the rare cases where "-1" does not mean EOF.
514 if (EOF == c1 && !feof(stdin)) {
519 else if (reading==STRING) {
520 c1 = (unsigned char) *nextStringChar++;
525 if (lineLength <=0 || linePtr == lineLength) {
526 /* Current line, exhausted - get new one */
527 if (nextLine() <= 0) { /* EOF */
532 c1 = (unsigned char)lineBuffer[linePtr++];
536 c1 = (unsigned char)lineBuffer[linePtr++];
543 static Void local thisLineIs(kind) /* register kind of current line */
544 Int kind; { /* & check for literate script errs */
545 if (literateErrors) {
546 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
547 (kind==TEXTLINE && lastLine==DEFNLINE)) {
548 ERRMSG(row) "Program line next to comment"
555 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
556 /* assert(c0=='\n'); */
557 if (reading==SCRIPTFILE && thisLiterate) {
560 if (inCodeBlock) { /* pass chars on definition lines */
561 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
565 if (c0==DEFNCHAR) { /* pass chars on definition lines */
566 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
571 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
573 if (c0=='\n' || c0==EOF)
574 thisLineIs(BLANKLINE);
576 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
577 while (c0!='\n' && c0!=EOF)
579 } /* by now, c0=='\n' or c0==EOF */
580 } while (c0!=EOF); /* if new line, start again */
582 if (litLines==0 && literateErrors) {
583 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
592 static Void local closeAnyInput() { /* Close input stream, if open, */
593 switch (reading) { /* or skip to end of console line */
595 case SCRIPTFILE : if (inputStream) {
596 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
608 case KEYBOARD : while (c0!=EOF)
615 /* --------------------------------------------------------------------------
616 * Parser: Uses table driven parser generated from parser.y using yacc
617 * ------------------------------------------------------------------------*/
621 /* --------------------------------------------------------------------------
622 * Single token input routines:
624 * The following routines read the values of particular kinds of token given
625 * that the first character of the token has already been located in c0 on
626 * entry to the routine.
627 * ------------------------------------------------------------------------*/
629 #define MAX_TOKEN 4000
630 #define startToken() tokPos = 0
631 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
632 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
633 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
635 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
636 static Int tokPos; /* input position in buffer */
637 static Int identType; /* identifier type: CONID / VARID */
638 static Int opType; /* operator type : CONOP / VAROP */
640 static Void local endToken() { /* check for token overflow */
641 if (tokPos>MAX_TOKEN) {
642 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
645 tokenStr[tokPos] = '\0';
648 static Text local readOperator() { /* read operator symbol */
653 } while (isISO(c0) && isIn(c0,SYMBOL));
654 opType = (tokenStr[0]==':' ? CONOP : VAROP);
656 return findText(tokenStr);
659 static Text local readIdent() { /* read identifier */
664 } while (isISO(c0) && isIn(c0,IDAFTER));
666 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
667 return findText(tokenStr);
670 static Cell local readRadixNumber(r) /* Read literal in specified radix */
671 Int r; { /* from input of the form 0c{digs} */
673 skip(); /* skip leading zero */
674 if ((d=readHexDigit(c1))<0 || d>=r)/* Special case; no digits, lex as */
675 return mkInt(0); /* if it had been written "0 c..." */
685 big = bigShift(big,d,r);
686 else if (overflows(n,r,d,MAXPOSINT))
687 big = bigShift(bigInt(n),d,r);
690 if (overflows(n,r,d,MAXPOSINT)) {
691 ERRMSG(row) "Integer literal out of range"
698 d = readHexDigit(c0);
699 } while (d>=0 && d<r);
701 return nonNull(big) ? big : mkInt(n);
708 static Cell local readNumber() { /* read numeric constant */
710 Bool intTooLarge = FALSE;
713 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
714 return readRadixNumber(16);
715 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
716 return readRadixNumber(8);
721 if (overflows(n,10,(c0-'0'),MAXPOSINT))
726 } while (isISO(c0) && isIn(c0,DIGIT));
728 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
733 return bigStr(tokenStr);
735 ERRMSG(row) "Integer literal out of range"
740 saveTokenChar(c0); /* save decimal point */
742 do { /* process fractional part ... */
745 } while (isISO(c0) && isIn(c0,DIGIT));
747 if (c0=='e' || c0=='E') { /* look for exponent part... */
757 if (!isISO(c0) || !isIn(c0,DIGIT)) {
758 ERRMSG(row) "Missing digits in exponent"
765 } while (isISO(c0) && isIn(c0,DIGIT));
770 return mkFloat(stringToFloat(tokenStr));
773 static Cell local readChar() { /* read character constant */
777 if (c0=='\'' || c0=='\n' || c0==EOF) {
778 ERRMSG(row) "Illegal character constant"
782 charRead = readAChar(FALSE);
787 ERRMSG(row) "Improperly terminated character constant"
793 static Cell local readString() { /* read string literal */
798 while (c0!='\"' && c0!='\n' && c0!=EOF) {
801 saveStrChr(charOf(c));
807 ERRMSG(row) "Improperly terminated string"
811 return mkStr(findText(tokenStr));
814 static Void local saveStrChr(c) /* save character in string */
816 if (c!='\0' && c!='\\') { /* save non null char as single char*/
819 else { /* save null char as TWO null chars */
820 if (tokPos+1<MAX_TOKEN) {
830 static Cell local readAChar(isStrLit) /* read single char constant */
831 Bool isStrLit; { /* TRUE => enable \& and gaps */
834 if (c0=='\\') /* escape character? */
835 return readEscapeChar(isStrLit);
837 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
840 skip(); /* normal character? */
844 /* --------------------------------------------------------------------------
845 * Character escape code sequences:
846 * ------------------------------------------------------------------------*/
848 static struct { /* table of special escape codes */
852 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
853 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
854 {"\'",'\''}, {"v", 11},
855 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
856 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
857 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
858 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
859 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
860 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
861 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
862 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
863 {"SP", 32}, {"DEL", 127},
867 static Int alreadyMatched; /* Record portion of input stream */
868 static char alreadyRead[10]; /* that has been read w/o a match */
870 static Bool local lazyReadMatches(s) /* compare input stream with string */
871 String s; { /* possibly using characters that */
872 int i; /* have already been read */
874 for (i=0; i<alreadyMatched; ++i)
875 if (alreadyRead[i]!=s[i])
878 while (s[i] && s[i]==c0) {
879 alreadyRead[alreadyMatched++]=(char)c0;
887 static Cell local readEscapeChar(isStrLit)/* read escape character */
893 case '&' : if (isStrLit) {
897 ERRMSG(row) "Illegal use of `\\&' in character constant"
901 case '^' : return readCtrlChar();
903 case 'o' : return readOctChar();
904 case 'x' : return readHexChar();
906 default : if (!isISO(c0)) {
907 ERRMSG(row) "Illegal escape sequence"
910 else if (isIn(c0,SPACE)) {
915 ERRMSG(row) "Illegal use of gap in character constant"
919 else if (isIn(c0,DIGIT))
920 return readDecChar();
923 for (alreadyMatched=0; escapes[i].codename; i++)
924 if (lazyReadMatches(escapes[i].codename))
925 return mkChar(escapes[i].codenumber);
927 alreadyRead[alreadyMatched++] = (char)c0;
928 alreadyRead[alreadyMatched++] = '\0';
929 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
932 return NIL;/*NOTREACHED*/
935 static Void local skipGap() { /* skip over gap in string literal */
936 do /* (simplified in Haskell 1.1) */
941 while (isISO(c0) && isIn(c0,SPACE));
943 ERRMSG(row) "Missing `\\' terminating string literal gap"
949 static Cell local readCtrlChar() { /* read escape sequence \^x */
950 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
954 if ((which = strchr(controls,c0))==NULL) {
955 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
959 return mkChar(which-controls);
962 static Cell local readOctChar() { /* read octal character constant */
967 if ((d = readHexDigit(c0))<0 || d>=8) {
968 ERRMSG(row) "Empty octal character escape"
972 if (overflows(n,8,d,MAXCHARVAL)) {
973 ERRMSG(row) "Octal character escape out of range"
978 } while ((d = readHexDigit(c0))>=0 && d<8);
983 static Cell local readHexChar() { /* read hex character constant */
988 if ((d = readHexDigit(c0))<0) {
989 ERRMSG(row) "Empty hexadecimal character escape"
993 if (overflows(n,16,d,MAXCHARVAL)) {
994 ERRMSG(row) "Hexadecimal character escape out of range"
999 } while ((d = readHexDigit(c0))>=0);
1004 static Int local readHexDigit(c) /* read single hex digit */
1006 if ('0'<=c && c<='9')
1008 if ('A'<=c && c<='F')
1009 return 10 + (c-'A');
1010 if ('a'<=c && c<='f')
1011 return 10 + (c-'a');
1015 static Cell local readDecChar() { /* read decimal character constant */
1019 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1020 ERRMSG(row) "Decimal character escape out of range"
1023 n = 10*n + (c0-'0');
1025 } while (c0!=EOF && isIn(c0,DIGIT));
1030 /* --------------------------------------------------------------------------
1031 * Produce printable representation of character:
1032 * ------------------------------------------------------------------------*/
1034 String unlexChar(c,quote) /* return string representation of */
1035 Char c; /* character... */
1036 Char quote; { /* protect quote character */
1037 static char buffer[12];
1039 if (c<0) /* deal with sign extended chars.. */
1042 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1043 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1045 buffer[1] = (char)c;
1049 buffer[0] = (char)c;
1053 else { /* look for escape code */
1055 for (escs=0; escapes[escs].codename; escs++)
1056 if (escapes[escs].codenumber==c) {
1057 sprintf(buffer,"\\%s",escapes[escs].codename);
1060 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1065 Void printString(s) /* print string s, using quotes and */
1066 String s; { /* escapes if any parts need them */
1070 while ((c = *t)!=0 && isISO(c)
1071 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1077 Printf("%s",unlexChar(*t,'"'));
1085 /* -------------------------------------------------------------------------
1086 * Handle special types of input for use in interpreter:
1087 * -----------------------------------------------------------------------*/
1089 Command readCommand(cmds,start,sys) /* read command at start of input */
1090 struct cmd *cmds; /* line in interpreter */
1091 Char start; /* characters introducing a cmd */
1092 Char sys; { /* character for shell escape */
1093 while (c0==' ' || c0 =='\t')
1096 if (c0=='\n') /* look for blank command lines */
1098 if (c0==EOF) /* look for end of input stream */
1100 if (c0==sys) { /* single character system escape */
1104 if (c0==start && c1==sys) { /* two character system escape */
1110 startToken(); /* All cmds start with start */
1111 if (c0==start) /* except default (usually EVAL) */
1112 do { /* which is empty */
1115 } while (c0!=EOF && !isIn(c0,SPACE));
1118 for (; cmds->cmdString; ++cmds)
1119 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1120 (tokenStr[0]==start &&
1121 tokenStr[1]==(cmds->cmdString)[1] &&
1123 return (cmds->cmdCode);
1127 String readFilename() { /* Read filename from input (if any)*/
1128 if (reading==PROJFILE)
1131 while (c0==' ' || c0=='\t')
1134 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1138 while (c0!=EOF && !isIn(c0,SPACE)) {
1141 while (c0!=EOF && c0!='\"') {
1142 Cell c = readAChar(TRUE);
1144 saveTokenChar(charOf(c));
1150 ERRMSG(row) "a closing quote, '\"', was expected"
1163 String readLine() { /* Read command line from input */
1164 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1168 while (c0!='\n' && c0!=EOF) {
1177 /* --------------------------------------------------------------------------
1178 * This lexer supports the Haskell layout rule:
1180 * - Layout area bounded by { ... }, with `;'s in between.
1181 * - A `{' is a HARD indentation and can only be matched by a corresponding
1183 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1184 * is inserted with the column number of the first token after the
1185 * WHERE/LET/OF keyword.
1186 * - When a soft indentation is uppermost on the indetation stack with
1187 * column col' we insert:
1188 * `}' in front of token with column<col' and pop indentation off stack,
1189 * `;' in front of token with column==col'.
1190 * ------------------------------------------------------------------------*/
1192 #define MAXINDENT 100 /* maximum nesting of layout rule */
1193 static Int layout[MAXINDENT+1];/* indentation stack */
1194 #define HARD (-1) /* indicates hard indentation */
1195 static Int indentDepth = (-1); /* current indentation nesting */
1197 static Void local goOffside(col) /* insert offside marker */
1198 Int col; { /* for specified column */
1199 if (indentDepth>=MAXINDENT) {
1200 ERRMSG(row) "Too many levels of program nesting"
1203 layout[++indentDepth] = col;
1206 static Void local unOffside() { /* leave layout rule area */
1210 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1211 return indentDepth>=0 && layout[indentDepth]!=HARD;
1214 /* --------------------------------------------------------------------------
1216 * ------------------------------------------------------------------------*/
1218 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1219 for (;;) /* Strictly speaking, this code is */
1220 if (c0==EOF) /* a little more liberal than the */
1221 return; /* report allows ... */
1224 else if (isIn(c0,SPACE))
1226 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1228 Int origRow = row; /* Save original row number */
1231 while (nesting>0 && c0!=EOF)
1232 if (c0=='{' && c1=='-') {
1237 else if (c0=='-' && c1=='}') {
1247 ERRMSG(origRow) "Unterminated nested comment {- ..."
1251 else if (c0=='-' && c1=='-') { /* One line comment */
1254 while (c0!='\n' && c0!=EOF);
1262 static Bool firstToken; /* Set to TRUE for first token */
1263 static Int firstTokenIs; /* ... with token value stored here */
1265 static Int local yylex() { /* Read next input token ... */
1266 static Bool insertOpen = FALSE;
1267 static Bool insertedToken = FALSE;
1268 static Text textRepeat;
1270 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1272 if (firstToken) { /* Special case for first token */
1276 insertedToken = FALSE;
1277 if (reading==KEYBOARD)
1278 textRepeat = findText(repeatStr);
1279 return firstTokenIs;
1282 if (insertOpen) { /* insert `soft' opening brace */
1284 insertedToken = TRUE;
1286 push(yylval = mkInt(row));
1290 /* ----------------------------------------------------------------------
1291 * Skip white space, and insert tokens to support layout rules as reqd.
1292 * --------------------------------------------------------------------*/
1295 startColumn = column;
1296 push(yylval = mkInt(row)); /* default token value is line no. */
1297 /* subsequent changes to yylval must also set top() to the same value */
1299 if (indentDepth>=0) { /* layout rule(s) active ? */
1300 if (insertedToken) /* avoid inserting multiple `;'s */
1301 insertedToken = FALSE; /* or putting `;' after `{' */
1303 if (layout[indentDepth]!=HARD) {
1304 if (column<layout[indentDepth]) {
1308 else if (column==layout[indentDepth] && c0!=EOF) {
1309 insertedToken = TRUE;
1315 /* ----------------------------------------------------------------------
1316 * Now try to identify token type:
1317 * --------------------------------------------------------------------*/
1320 case EOF : return 0; /* End of file/input */
1322 /* The next 10 characters make up the `special' category in 1.3 */
1323 case '(' : skip(); return '(';
1324 case ')' : skip(); return ')';
1325 case ',' : skip(); return ',';
1326 case ';' : skip(); return ';';
1327 case '[' : skip(); return '[';
1328 case ']' : skip(); return ']';
1329 case '`' : skip(); return '`';
1330 case '{' : goOffside(HARD);
1333 case '}' : if (indentDepth<0) {
1334 ERRMSG(row) "Misplaced `}'"
1337 if (layout[indentDepth]==HARD) /* skip over hard }*/
1339 unOffside(); /* otherwise, we have to insert a }*/
1340 return '}'; /* to (try to) avoid an error... */
1342 /* Character and string literals */
1343 case '\'' : top() = yylval = readChar();
1346 case '\"' : top() = yylval = readString();
1351 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1352 Text it; /* Look for record selector name */
1355 top() = yylval = ap(RECSEL,mkExt(it));
1356 return identType=RECSELID;
1359 if (isIn(c0,LARGE)) { /* Look for qualified name */
1360 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1361 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1363 skip(); /* Skip qualifying dot */
1364 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1365 it2 = readOperator();
1366 if (opType==CONOP) {
1367 top() = yylval = mkQConOp(it,it2);
1370 top() = yylval = mkQVarOp(it,it2);
1373 } else { /* Qualified identifier */
1375 if (identType==CONID) {
1376 top() = yylval = mkQCon(it,it2);
1379 top() = yylval = mkQVar(it,it2);
1384 top() = yylval = mkCon(it);
1388 if (isIn(c0,(SMALL|LARGE))) {
1389 Text it = readIdent();
1391 if (it==textCase) return CASEXP;
1392 if (it==textOfK) lookAhead(OF);
1393 if (it==textData) return DATA;
1394 if (it==textType) return TYPE;
1395 if (it==textIf) return IF;
1396 if (it==textThen) return THEN;
1397 if (it==textElse) return ELSE;
1398 if (it==textWhere) lookAhead(WHERE);
1399 if (it==textLet) lookAhead(LET);
1400 if (it==textIn) return IN;
1401 if (it==textInfix) return INFIXN;
1402 if (it==textInfixl) return INFIXL;
1403 if (it==textInfixr) return INFIXR;
1404 if (it==textForeign) return FOREIGN;
1405 if (it==textUnsafe) return UNSAFE;
1406 if (it==textNewtype) return TNEWTYPE;
1407 if (it==textDefault) return DEFAULT;
1408 if (it==textDeriving) return DERIVING;
1409 if (it==textDo) lookAhead(DO);
1410 if (it==textClass) return TCLASS;
1411 if (it==textInstance) return TINSTANCE;
1412 if (it==textModule) return TMODULE;
1413 if (it==textImport) return IMPORT;
1414 if (it==textExport) return EXPORT;
1415 if (it==textHiding) return HIDING;
1416 if (it==textQualified) return QUALIFIED;
1417 if (it==textAsMod) return ASMOD;
1418 if (it==textWildcard) return '_';
1419 if (it==textAll && !haskell98) return ALL;
1420 if (it==textRepeat && reading==KEYBOARD)
1421 return repeatLast();
1423 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1427 if (isIn(c0,SYMBOL)) {
1428 Text it = readOperator();
1430 if (it==textCoco) return COCO;
1431 if (it==textEq) return '=';
1432 if (it==textUpto) return UPTO;
1433 if (it==textAs) return '@';
1434 if (it==textLambda) return '\\';
1435 if (it==textBar) return '|';
1436 if (it==textFrom) return FROM;
1437 if (it==textMinus) return '-';
1438 if (it==textPlus) return '+';
1439 if (it==textBang) return '!';
1440 if (it==textDot) return '.';
1441 if (it==textArrow) return ARROW;
1442 if (it==textLazy) return '~';
1443 if (it==textImplies) return IMPLIES;
1444 if (it==textRepeat && reading==KEYBOARD)
1445 return repeatLast();
1447 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1451 if (isIn(c0,DIGIT)) {
1452 top() = yylval = readNumber();
1456 ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
1458 return 0; /*NOTREACHED*/
1461 static Int local repeatLast() { /* Obtain last expression entered */
1462 if (isNull(yylval=getLastExpr())) {
1463 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1469 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1470 Text t; { /* by t ... */
1471 String s = textToStr(t);
1472 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1475 Syntax syntaxOf(n) /* Find syntax for name */
1477 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1478 return defaultSyntax(name(n).text);
1479 return name(n).syntax;
1482 /* --------------------------------------------------------------------------
1483 * main entry points to parser/lexer:
1484 * ------------------------------------------------------------------------*/
1486 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1487 Int startWith; { /* determining whether to read a */
1488 firstToken = TRUE; /* script or an expression */
1489 firstTokenIs = startWith;
1492 if (yyparse()) { /* This can only be parser overflow */
1493 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1494 EEND; /* in the parser... */
1497 if (!stackEmpty()) /* stack should now be empty */
1498 internal("parseInput");
1502 static String memPrefix = "@mem@";
1503 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1505 Void makeMemScript(mem,fname)
1508 strcat(fname,memPrefix);
1509 itoa((int)mem, fname+strlen(fname), 10);
1512 Bool isMemScript(fname)
1514 return (strstr(fname,memPrefix) != NULL);
1517 String memScriptString(fname)
1519 String p = strstr(fname,memPrefix);
1521 return (String)atoi(p+lenMemPrefix);
1527 Void parseScript(fname,len) /* Read a script, possibly from mem */
1531 if (isMemScript(fname)) {
1532 char* s = memScriptString(fname);
1535 fileInput(fname,len);
1540 Void parseScript(nm,len) /* Read a script */
1542 Long len; { /* Used to set a target for reading */
1549 Void parseExp() { /* Read an expression to evaluate */
1551 setLastExpr(inputExpr);
1554 /* --------------------------------------------------------------------------
1556 * ------------------------------------------------------------------------*/
1561 case INSTALL : initCharTab();
1562 textCase = findText("case");
1563 textOfK = findText("of");
1564 textData = findText("data");
1565 textType = findText("type");
1566 textIf = findText("if");
1567 textThen = findText("then");
1568 textElse = findText("else");
1569 textWhere = findText("where");
1570 textLet = findText("let");
1571 textIn = findText("in");
1572 textInfix = findText("infix");
1573 textInfixl = findText("infixl");
1574 textInfixr = findText("infixr");
1575 textForeign = findText("foreign");
1576 textUnsafe = findText("unsafe");
1577 textNewtype = findText("newtype");
1578 textDefault = findText("default");
1579 textDeriving = findText("deriving");
1580 textDo = findText("do");
1581 textClass = findText("class");
1582 textInstance = findText("instance");
1583 textCoco = findText("::");
1584 textEq = findText("=");
1585 textUpto = findText("..");
1586 textAs = findText("@");
1587 textLambda = findText("\\");
1588 textBar = findText("|");
1589 textMinus = findText("-");
1590 textPlus = findText("+");
1591 textFrom = findText("<-");
1592 textArrow = findText("->");
1593 textLazy = findText("~");
1594 textBang = findText("!");
1595 textDot = findText(".");
1596 textImplies = findText("=>");
1597 textPrelude = findText("Prelude");
1598 textNum = findText("Num");
1599 textModule = findText("module");
1600 textImport = findText("import");
1601 textHiding = findText("hiding");
1602 textQualified = findText("qualified");
1603 textAsMod = findText("as");
1604 textWildcard = findText("_");
1605 textAll = findText("forall");
1606 varMinus = mkVar(textMinus);
1607 varPlus = mkVar(textPlus);
1608 varBang = mkVar(textBang);
1609 varDot = mkVar(textDot);
1610 varHiding = mkVar(textHiding);
1611 varQualified = mkVar(textQualified);
1612 varAsMod = mkVar(textAsMod);
1613 conMain = mkCon(findText("Main"));
1614 varMain = mkVar(findText("main"));
1620 case RESET : tyconDefns = NIL;
1629 foreignImports= NIL;
1630 foreignExports= NIL;
1638 case BREAK : if (reading==KEYBOARD)
1642 case MARK : mark(tyconDefns);
1650 mark(unqualImports);
1651 mark(foreignImports);
1652 mark(foreignExports);
1670 /*-------------------------------------------------------------------------*/