2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
6 * Haskell Group 1994-99, and is distributed as Open Source software
7 * under the Artistic License; see the file "Artistic" that is included
8 * in the distribution for details.
10 * $RCSfile: input.c,v $
12 * $Date: 1999/04/27 10:06:53 $
13 * ------------------------------------------------------------------------*/
30 /* --------------------------------------------------------------------------
32 * ------------------------------------------------------------------------*/
34 List tyconDefns = NIL; /* type constructor definitions */
35 List typeInDefns = NIL; /* type synonym restrictions */
36 List valDefns = NIL; /* value definitions in script */
37 List classDefns = NIL; /* class defns in script */
38 List instDefns = NIL; /* instance defns in script */
39 List selDefns = NIL; /* list of selector lists */
40 List genDefns = NIL; /* list of generated names */
41 List unqualImports = NIL; /* unqualified import list */
42 List foreignImports = NIL; /* foreign imports */
43 List foreignExports = NIL; /* foreign exportsd */
44 List defaultDefns = NIL; /* default definitions (if any) */
45 Int defaultLine = 0; /* line in which default defs occur*/
46 List evalDefaults = NIL; /* defaults for evaluator */
48 Cell inputExpr = NIL; /* input expression */
49 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
50 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
52 String repeatStr = 0; /* Repeat last expr */
54 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
55 String preprocessor = 0;
58 /* --------------------------------------------------------------------------
59 * Local function prototypes:
60 * ------------------------------------------------------------------------*/
62 static Void local initCharTab Args((Void));
63 static Void local fileInput Args((String,Long));
64 static Bool local literateMode Args((String));
65 static Bool local linecmp Args((String,String));
66 static Int local nextLine Args((Void));
67 static Void local skip Args((Void));
68 static Void local thisLineIs Args((Int));
69 static Void local newlineSkip Args((Void));
70 static Void local closeAnyInput Args((Void));
72 Int yyparse Args((Void)); /* can't stop yacc making this */
73 /* public, but don't advertise */
74 /* it in a header file. */
76 static Void local endToken Args((Void));
77 static Text local readOperator Args((Void));
78 static Text local readIdent Args((Void));
79 static Cell local readRadixNumber Args((Int));
80 static Cell local readNumber Args((Void));
81 static Cell local readChar Args((Void));
82 static Cell local readString Args((Void));
83 static Void local saveStrChr Args((Char));
84 static Cell local readAChar Args((Bool));
86 static Bool local lazyReadMatches Args((String));
87 static Cell local readEscapeChar Args((Bool));
88 static Void local skipGap Args((Void));
89 static Cell local readCtrlChar Args((Void));
90 static Cell local readOctChar Args((Void));
91 static Cell local readHexChar Args((Void));
92 static Int local readHexDigit Args((Char));
93 static Cell local readDecChar Args((Void));
95 static Void local goOffside Args((Int));
96 static Void local unOffside Args((Void));
97 static Bool local canUnOffside Args((Void));
99 static Void local skipWhitespace Args((Void));
100 static Int local yylex Args((Void));
101 static Int local repeatLast Args((Void));
103 static Void local parseInput Args((Int));
105 static Bool local doesNotExceed Args((String,Int,Int));
106 static Int local stringToInt Args((String,Int));
109 /* --------------------------------------------------------------------------
110 * Text values for reserved words and special symbols:
111 * ------------------------------------------------------------------------*/
113 static Text textCase, textOfK, textData, textType, textIf;
114 static Text textThen, textElse, textWhere, textLet, textIn;
115 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
116 static Text textDefault, textDeriving, textDo, textClass, textInstance;
118 static Text textCoco, textEq, textUpto, textAs, textLambda;
119 static Text textBar, textMinus, textFrom, textArrow, textLazy;
120 static Text textBang, textDot, textAll, textImplies;
121 static Text textWildcard;
123 static Text textModule, textImport;
124 static Text textHiding, textQualified, textAsMod;
125 static Text textExport, textUnsafe;
127 Text textNum; /* Num */
128 Text textPrelude; /* Prelude */
129 Text textPlus; /* (+) */
131 static Cell conMain; /* Main */
132 static Cell varMain; /* main */
134 static Cell varMinus; /* (-) */
135 static Cell varPlus; /* (+) */
136 static Cell varBang; /* (!) */
137 static Cell varDot; /* (.) */
138 static Cell varHiding; /* hiding */
139 static Cell varQualified; /* qualified */
140 static Cell varAsMod; /* as */
142 static List imps; /* List of imports to be chased */
145 /* --------------------------------------------------------------------------
146 * Character set handling:
148 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
149 * character set. The following code provides methods for classifying
150 * input characters according to the lexical structure specified by the
151 * report. Hugs should still accept older programs because ASCII is
152 * essentially just a subset of the ISO character set.
154 * Notes: If you want to port Hugs to a machine that uses something
155 * substantially different from the ISO character set, then you will need
156 * to insert additional code to map between character sets.
158 * At some point, the following data structures may be exported in a .h
159 * file to allow the information contained here to be picked up in the
160 * implementation of LibChar is* primitives.
162 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
163 * ------------------------------------------------------------------------*/
165 static Bool charTabBuilt;
166 static unsigned char ctable[NUM_CHARS];
167 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
168 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
178 static Void local initCharTab() { /* Initialize char decode table */
179 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
180 #define setChar(x,c) ctable[c] |= (x)
181 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
182 #define setCopy(x,c) {Int i; \
183 for (i=0; i<NUM_CHARS; ++i) \
188 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
190 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
191 setRange(SMALL, 223,246); /* ISO lower case letters */
192 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
193 setChar (SMALL, '_');
195 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
196 setRange(LARGE, 192,214); /* ISO upper case letters */
197 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
199 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
200 setRange(SYMBOL, 215,215);
201 setChar (SYMBOL, 247);
202 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
204 setChar (IDAFTER, '\''); /* Characters in identifier */
205 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
207 setChar (SPACE, ' '); /* ASCII space character */
208 setChar (SPACE, 160); /* ISO non breaking space */
209 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
211 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
212 setChars(PRINT, " '\""); /* Space and quotes */
213 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
223 /* --------------------------------------------------------------------------
224 * Single character input routines:
226 * At the lowest level of input, characters are read one at a time, with the
227 * current character held in c0 and the following (lookahead) character in
228 * c1. The corrdinates of c0 within the file are held in (column,row).
229 * The input stream is advanced by one character using the skip() function.
230 * ------------------------------------------------------------------------*/
232 #define TABSIZE 8 /* spacing between tabstops */
234 #define NOTHING 0 /* what kind of input is being read?*/
235 #define KEYBOARD 1 /* - keyboard/console? */
236 #define SCRIPTFILE 2 /* - script file */
237 #define PROJFILE 3 /* - project file */
238 #define STRING 4 /* - string buffer? */
240 static Int reading = NOTHING;
242 static Target readSoFar;
243 static Int row, column, startColumn;
245 static FILE *inputStream = 0;
246 static Bool thisLiterate;
247 static String nextStringChar; /* next char in string buffer */
249 #if USE_READLINE /* for command line editors */
250 static String currentLine; /* editline or GNU readline */
251 static String nextChar;
252 #define nextConsoleChar() (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
253 extern Void add_history Args((String));
254 extern String readline Args((String));
256 #define nextConsoleChar() getc(stdin)
259 static Int litLines; /* count defn lines in lit script */
260 #define DEFNCHAR '>' /* definition lines begin with this */
261 static Int lastLine; /* records type of last line read: */
262 #define STARTLINE 0 /* - at start of file, none read */
263 #define BLANKLINE 1 /* - blank (may preceed definition) */
264 #define TEXTLINE 2 /* - text comment */
265 #define DEFNLINE 3 /* - line containing definition */
266 #define CODELINE 4 /* - line inside code block */
268 #define BEGINCODE "\\begin{code}"
269 #define ENDCODE "\\end{code}"
272 static char *lineBuffer = NULL; /* getline() does the initial allocation */
274 #define LINEBUFFER_SIZE 1000
275 static char lineBuffer[LINEBUFFER_SIZE];
277 static int lineLength = 0;
278 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
279 static int linePtr = 0;
281 Void consoleInput(prompt) /* prepare to input characters from */
282 String prompt; { /* standard in (i.e. console/kbd) */
283 reading = KEYBOARD; /* keyboard input is Line oriented, */
284 c0 = /* i.e. input terminated by '\n' */
290 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
291 * avoids accidentally freeing currentLine twice.
294 String oldCurrentLine = currentLine;
295 currentLine = 0; /* We may lose the space of currentLine */
296 free(oldCurrentLine); /* if interrupted here - unlikely */
298 currentLine = readline(prompt);
299 nextChar = currentLine;
302 add_history(currentLine);
312 Void projInput(nm) /* prepare to input characters from */
313 String nm; { /* from named project file */
314 if ((inputStream = fopen(nm,"r"))!=0) {
322 ERRMSG(0) "Unable to open project file \"%s\"", nm
327 static Void local fileInput(nm,len) /* prepare to input characters from*/
328 String nm; /* named file (specified length is */
329 Long len; { /* used to set target for reading) */
330 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
332 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
333 char *cmd = malloc(reallen);
335 ERRMSG(0) "Unable to allocate memory for filter command."
338 strcpy(cmd,preprocessor);
341 inputStream = popen(cmd,"r");
344 inputStream = fopen(nm,"r");
347 inputStream = fopen(nm,"r");
350 reading = SCRIPTFILE;
356 lastLine = STARTLINE; /* literate file processing */
360 thisLiterate = literateMode(nm);
364 setGoal("Parsing", (Target)len);
367 ERRMSG(0) "Unable to open file \"%s\"", nm
372 Void stringInput(s) /* prepare to input characters from string */
387 static Bool local literateMode(nm) /* Select literate mode for file */
389 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
391 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
393 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
394 filenamecmp(dot+1,"verb")==0) /* literate scripts */
397 return literateScripts; /* otherwise, use the default */
401 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
402 * I've removed the loop (since newLineSkip contains a loop too) and
403 * replaced the warnings with errors. ADR
406 * To deal with literate \begin{code}...\end{code} blocks,
407 * add a line buffer that rooms the current line. The old c0 and c1
408 * stream pointers are used as before within that buffer -- sof
410 * Upon reading a new line into the line buffer, we check to see if
411 * we're reading in a line containing \begin{code} or \end{code} and
412 * take appropriate action.
415 static Bool local linecmp(s,line) /* compare string with line */
416 String s; /* line may end in whitespace */
419 while (s[i] != '\0' && s[i] == line[i]) {
422 /* s[0..i-1] == line[0..i-1] */
423 if (s[i] != '\0') { /* check s `isPrefixOf` line */
426 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
429 return (line[i] == '\0');
432 /* Returns line length (including \n) or 0 upon EOF. */
433 static Int local nextLine()
437 Forget about fgets(), it is utterly braindead.
438 (Assumes \NUL free streams and does not gracefully deal
439 with overflow.) Instead, use GNU libc's getline().
441 lineLength = getline(&lineBuffer, &lineLength, inputStream);
443 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
444 lineLength = strlen(lineBuffer);
448 /* printf("Read: \"%s\"", lineBuffer); */
449 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
452 else if (lineLength >= 2 && lineBuffer[0] == '#' && lineBuffer[1] == '!') {
453 lineBuffer[0]='\n'; /* pretend it's a blank line */
456 } else if (thisLiterate) {
457 if (linecmp(BEGINCODE, lineBuffer)) {
458 if (!inCodeBlock) { /* Entered a code block */
460 lineBuffer[0]='\n'; /* pretend it's a blank line */
465 ERRMSG(row) "\\begin{code} encountered inside code block"
469 else if (linecmp(ENDCODE, lineBuffer)) {
470 if (inCodeBlock) { /* Finished code block */
472 lineBuffer[0]='\n'; /* pretend it's a blank line */
477 ERRMSG(row) "\\end{code} encountered outside code block"
482 /* printf("Read: \"%s\"", lineBuffer); */
486 static Void local skip() { /* move forward one char in input */
487 if (c0!=EOF) { /* stream, updating c0, c1, ... */
488 if (c0=='\n') { /* Adjusting cursor coords as nec. */
491 if (reading==SCRIPTFILE)
495 column += TABSIZE - ((column-1)%TABSIZE);
504 if (reading==SCRIPTFILE)
508 else if (reading==KEYBOARD) {
513 c1 = nextConsoleChar();
514 /* On Win32, hitting ctrl-C causes the next getchar to
515 * fail - returning "-1" to indicate an error.
516 * This is one of the rare cases where "-1" does not mean EOF.
518 if (EOF == c1 && !feof(stdin)) {
523 else if (reading==STRING) {
524 c1 = (unsigned char) *nextStringChar++;
529 if (lineLength <=0 || linePtr == lineLength) {
530 /* Current line, exhausted - get new one */
531 if (nextLine() <= 0) { /* EOF */
536 c1 = (unsigned char)lineBuffer[linePtr++];
540 c1 = (unsigned char)lineBuffer[linePtr++];
547 static Void local thisLineIs(kind) /* register kind of current line */
548 Int kind; { /* & check for literate script errs */
549 if (literateErrors) {
550 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
551 (kind==TEXTLINE && lastLine==DEFNLINE)) {
552 ERRMSG(row) "Program line next to comment"
559 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
560 /* assert(c0=='\n'); */
561 if (reading==SCRIPTFILE && thisLiterate) {
564 if (inCodeBlock) { /* pass chars on definition lines */
565 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
569 if (c0==DEFNCHAR) { /* pass chars on definition lines */
570 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
575 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
577 if (c0=='\n' || c0==EOF)
578 thisLineIs(BLANKLINE);
580 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
581 while (c0!='\n' && c0!=EOF)
583 } /* by now, c0=='\n' or c0==EOF */
584 } while (c0!=EOF); /* if new line, start again */
586 if (litLines==0 && literateErrors) {
587 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
596 static Void local closeAnyInput() { /* Close input stream, if open, */
597 switch (reading) { /* or skip to end of console line */
599 case SCRIPTFILE : if (inputStream) {
600 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
612 case KEYBOARD : while (c0!=EOF)
619 /* --------------------------------------------------------------------------
620 * Parser: Uses table driven parser generated from parser.y using yacc
621 * ------------------------------------------------------------------------*/
625 /* --------------------------------------------------------------------------
626 * Single token input routines:
628 * The following routines read the values of particular kinds of token given
629 * that the first character of the token has already been located in c0 on
630 * entry to the routine.
631 * ------------------------------------------------------------------------*/
633 #define MAX_TOKEN 4000
634 #define startToken() tokPos = 0
635 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
636 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
637 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
639 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
640 static Int tokPos; /* input position in buffer */
641 static Int identType; /* identifier type: CONID / VARID */
642 static Int opType; /* operator type : CONOP / VAROP */
644 static Void local endToken() { /* check for token overflow */
645 if (tokPos>MAX_TOKEN) {
646 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
649 tokenStr[tokPos] = '\0';
652 static Text local readOperator() { /* read operator symbol */
657 } while (isISO(c0) && isIn(c0,SYMBOL));
658 opType = (tokenStr[0]==':' ? CONOP : VAROP);
660 return findText(tokenStr);
663 static Text local readIdent() { /* read identifier */
668 } while (isISO(c0) && isIn(c0,IDAFTER));
670 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
671 return findText(tokenStr);
675 static Bool local doesNotExceed(s,radix,limit)
682 if (s[p] == 0) return TRUE;
683 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
684 n = radix*n + (s[p]-'0');
689 static Int local stringToInt(s,radix)
695 if (s[p] == 0) return n;
696 n = radix*n + (s[p]-'0');
701 static Cell local readRadixNumber(r) /* Read literal in specified radix */
702 Int r; { /* from input of the form 0c{digs} */
705 skip(); /* skip leading zero */
706 if ((d=readHexDigit(c1))<0 || d>=r) {
707 /* Special case; no digits, lex as */
708 /* if it had been written "0 c..." */
713 saveTokenChar('0'+readHexDigit(c0));
715 d = readHexDigit(c0);
716 } while (d>=0 && d<r);
720 if (doesNotExceed(tokenStr,r,MAXPOSINT))
721 return mkInt(stringToInt(tokenStr,r));
724 return stringToBignum(tokenStr);
726 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
731 static Cell local readNumber() { /* read numeric constant */
734 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
735 return readRadixNumber(16);
736 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
737 return readRadixNumber(8);
744 } while (isISO(c0) && isIn(c0,DIGIT));
746 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
748 if (doesNotExceed(tokenStr,10,MAXPOSINT))
749 return mkInt(stringToInt(tokenStr,10)); else
750 return stringToBignum(tokenStr);
753 saveTokenChar(c0); /* save decimal point */
755 do { /* process fractional part ... */
758 } while (isISO(c0) && isIn(c0,DIGIT));
760 if (c0=='e' || c0=='E') { /* look for exponent part... */
770 if (!isISO(c0) || !isIn(c0,DIGIT)) {
771 ERRMSG(row) "Missing digits in exponent"
778 } while (isISO(c0) && isIn(c0,DIGIT));
783 return mkFloat(stringToFloat(tokenStr));
792 static Cell local readChar() { /* read character constant */
796 if (c0=='\'' || c0=='\n' || c0==EOF) {
797 ERRMSG(row) "Illegal character constant"
801 charRead = readAChar(FALSE);
806 ERRMSG(row) "Improperly terminated character constant"
812 static Cell local readString() { /* read string literal */
817 while (c0!='\"' && c0!='\n' && c0!=EOF) {
820 saveStrChr(charOf(c));
826 ERRMSG(row) "Improperly terminated string"
830 return mkStr(findText(tokenStr));
833 static Void local saveStrChr(c) /* save character in string */
835 if (c!='\0' && c!='\\') { /* save non null char as single char*/
838 else { /* save null char as TWO null chars */
839 if (tokPos+1<MAX_TOKEN) {
849 static Cell local readAChar(isStrLit) /* read single char constant */
850 Bool isStrLit; { /* TRUE => enable \& and gaps */
853 if (c0=='\\') /* escape character? */
854 return readEscapeChar(isStrLit);
856 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
859 skip(); /* normal character? */
863 /* --------------------------------------------------------------------------
864 * Character escape code sequences:
865 * ------------------------------------------------------------------------*/
867 static struct { /* table of special escape codes */
871 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
872 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
873 {"\'",'\''}, {"v", 11},
874 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
875 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
876 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
877 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
878 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
879 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
880 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
881 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
882 {"SP", 32}, {"DEL", 127},
886 static Int alreadyMatched; /* Record portion of input stream */
887 static char alreadyRead[10]; /* that has been read w/o a match */
889 static Bool local lazyReadMatches(s) /* compare input stream with string */
890 String s; { /* possibly using characters that */
891 int i; /* have already been read */
893 for (i=0; i<alreadyMatched; ++i)
894 if (alreadyRead[i]!=s[i])
897 while (s[i] && s[i]==c0) {
898 alreadyRead[alreadyMatched++]=(char)c0;
906 static Cell local readEscapeChar(isStrLit)/* read escape character */
912 case '&' : if (isStrLit) {
916 ERRMSG(row) "Illegal use of `\\&' in character constant"
920 case '^' : return readCtrlChar();
922 case 'o' : return readOctChar();
923 case 'x' : return readHexChar();
925 default : if (!isISO(c0)) {
926 ERRMSG(row) "Illegal escape sequence"
929 else if (isIn(c0,SPACE)) {
934 ERRMSG(row) "Illegal use of gap in character constant"
938 else if (isIn(c0,DIGIT))
939 return readDecChar();
942 for (alreadyMatched=0; escapes[i].codename; i++)
943 if (lazyReadMatches(escapes[i].codename))
944 return mkChar(escapes[i].codenumber);
946 alreadyRead[alreadyMatched++] = (char)c0;
947 alreadyRead[alreadyMatched++] = '\0';
948 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
951 return NIL;/*NOTREACHED*/
954 static Void local skipGap() { /* skip over gap in string literal */
955 do /* (simplified in Haskell 1.1) */
960 while (isISO(c0) && isIn(c0,SPACE));
962 ERRMSG(row) "Missing `\\' terminating string literal gap"
968 static Cell local readCtrlChar() { /* read escape sequence \^x */
969 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
973 if ((which = strchr(controls,c0))==NULL) {
974 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
978 return mkChar(which-controls);
981 static Cell local readOctChar() { /* read octal character constant */
986 if ((d = readHexDigit(c0))<0 || d>=8) {
987 ERRMSG(row) "Empty octal character escape"
991 if (overflows(n,8,d,MAXCHARVAL)) {
992 ERRMSG(row) "Octal character escape out of range"
997 } while ((d = readHexDigit(c0))>=0 && d<8);
1002 static Cell local readHexChar() { /* read hex character constant */
1007 if ((d = readHexDigit(c0))<0) {
1008 ERRMSG(row) "Empty hexadecimal character escape"
1012 if (overflows(n,16,d,MAXCHARVAL)) {
1013 ERRMSG(row) "Hexadecimal character escape out of range"
1018 } while ((d = readHexDigit(c0))>=0);
1023 static Int local readHexDigit(c) /* read single hex digit */
1025 if ('0'<=c && c<='9')
1027 if ('A'<=c && c<='F')
1028 return 10 + (c-'A');
1029 if ('a'<=c && c<='f')
1030 return 10 + (c-'a');
1034 static Cell local readDecChar() { /* read decimal character constant */
1038 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1039 ERRMSG(row) "Decimal character escape out of range"
1042 n = 10*n + (c0-'0');
1044 } while (c0!=EOF && isIn(c0,DIGIT));
1049 /* --------------------------------------------------------------------------
1050 * Produce printable representation of character:
1051 * ------------------------------------------------------------------------*/
1053 String unlexChar(c,quote) /* return string representation of */
1054 Char c; /* character... */
1055 Char quote; { /* protect quote character */
1056 static char buffer[12];
1058 if (c<0) /* deal with sign extended chars.. */
1061 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1062 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1064 buffer[1] = (char)c;
1068 buffer[0] = (char)c;
1072 else { /* look for escape code */
1074 for (escs=0; escapes[escs].codename; escs++)
1075 if (escapes[escs].codenumber==c) {
1076 sprintf(buffer,"\\%s",escapes[escs].codename);
1079 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1084 Void printString(s) /* print string s, using quotes and */
1085 String s; { /* escapes if any parts need them */
1089 while ((c = *t)!=0 && isISO(c)
1090 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1096 Printf("%s",unlexChar(*t,'"'));
1104 /* -------------------------------------------------------------------------
1105 * Handle special types of input for use in interpreter:
1106 * -----------------------------------------------------------------------*/
1108 Command readCommand(cmds,start,sys) /* read command at start of input */
1109 struct cmd *cmds; /* line in interpreter */
1110 Char start; /* characters introducing a cmd */
1111 Char sys; { /* character for shell escape */
1112 while (c0==' ' || c0 =='\t')
1115 if (c0=='\n') /* look for blank command lines */
1117 if (c0==EOF) /* look for end of input stream */
1119 if (c0==sys) { /* single character system escape */
1123 if (c0==start && c1==sys) { /* two character system escape */
1129 startToken(); /* All cmds start with start */
1130 if (c0==start) /* except default (usually EVAL) */
1131 do { /* which is empty */
1134 } while (c0!=EOF && !isIn(c0,SPACE));
1137 for (; cmds->cmdString; ++cmds)
1138 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1139 (tokenStr[0]==start &&
1140 tokenStr[1]==(cmds->cmdString)[1] &&
1142 return (cmds->cmdCode);
1146 String readFilename() { /* Read filename from input (if any)*/
1147 if (reading==PROJFILE)
1150 while (c0==' ' || c0=='\t')
1153 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1157 while (c0!=EOF && !isIn(c0,SPACE)) {
1160 while (c0!=EOF && c0!='\"') {
1161 Cell c = readAChar(TRUE);
1163 saveTokenChar(charOf(c));
1169 ERRMSG(row) "a closing quote, '\"', was expected"
1182 String readLine() { /* Read command line from input */
1183 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1187 while (c0!='\n' && c0!=EOF) {
1196 /* --------------------------------------------------------------------------
1197 * This lexer supports the Haskell layout rule:
1199 * - Layout area bounded by { ... }, with `;'s in between.
1200 * - A `{' is a HARD indentation and can only be matched by a corresponding
1202 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1203 * is inserted with the column number of the first token after the
1204 * WHERE/LET/OF keyword.
1205 * - When a soft indentation is uppermost on the indetation stack with
1206 * column col' we insert:
1207 * `}' in front of token with column<col' and pop indentation off stack,
1208 * `;' in front of token with column==col'.
1209 * ------------------------------------------------------------------------*/
1211 #define MAXINDENT 100 /* maximum nesting of layout rule */
1212 static Int layout[MAXINDENT+1];/* indentation stack */
1213 #define HARD (-1) /* indicates hard indentation */
1214 static Int indentDepth = (-1); /* current indentation nesting */
1216 static Void local goOffside(col) /* insert offside marker */
1217 Int col; { /* for specified column */
1218 if (indentDepth>=MAXINDENT) {
1219 ERRMSG(row) "Too many levels of program nesting"
1222 layout[++indentDepth] = col;
1225 static Void local unOffside() { /* leave layout rule area */
1229 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1230 return indentDepth>=0 && layout[indentDepth]!=HARD;
1233 /* --------------------------------------------------------------------------
1235 * ------------------------------------------------------------------------*/
1237 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1238 for (;;) /* Strictly speaking, this code is */
1239 if (c0==EOF) /* a little more liberal than the */
1240 return; /* report allows ... */
1243 else if (isIn(c0,SPACE))
1245 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1247 Int origRow = row; /* Save original row number */
1250 while (nesting>0 && c0!=EOF)
1251 if (c0=='{' && c1=='-') {
1256 else if (c0=='-' && c1=='}') {
1266 ERRMSG(origRow) "Unterminated nested comment {- ..."
1270 else if (c0=='-' && c1=='-') { /* One line comment */
1273 while (c0!='\n' && c0!=EOF);
1281 static Bool firstToken; /* Set to TRUE for first token */
1282 static Int firstTokenIs; /* ... with token value stored here */
1284 static Int local yylex() { /* Read next input token ... */
1285 static Bool insertOpen = FALSE;
1286 static Bool insertedToken = FALSE;
1287 static Text textRepeat;
1289 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1291 if (firstToken) { /* Special case for first token */
1295 insertedToken = FALSE;
1296 if (reading==KEYBOARD)
1297 textRepeat = findText(repeatStr);
1298 return firstTokenIs;
1301 if (insertOpen) { /* insert `soft' opening brace */
1303 insertedToken = TRUE;
1305 push(yylval = mkInt(row));
1309 /* ----------------------------------------------------------------------
1310 * Skip white space, and insert tokens to support layout rules as reqd.
1311 * --------------------------------------------------------------------*/
1314 startColumn = column;
1315 push(yylval = mkInt(row)); /* default token value is line no. */
1316 /* subsequent changes to yylval must also set top() to the same value */
1318 if (indentDepth>=0) { /* layout rule(s) active ? */
1319 if (insertedToken) /* avoid inserting multiple `;'s */
1320 insertedToken = FALSE; /* or putting `;' after `{' */
1322 if (layout[indentDepth]!=HARD) {
1323 if (column<layout[indentDepth]) {
1327 else if (column==layout[indentDepth] && c0!=EOF) {
1328 insertedToken = TRUE;
1334 /* ----------------------------------------------------------------------
1335 * Now try to identify token type:
1336 * --------------------------------------------------------------------*/
1339 case EOF : return 0; /* End of file/input */
1341 /* The next 10 characters make up the `special' category in 1.3 */
1342 case '(' : skip(); return '(';
1343 case ')' : skip(); return ')';
1344 case ',' : skip(); return ',';
1345 case ';' : skip(); return ';';
1346 case '[' : skip(); return '[';
1347 case ']' : skip(); return ']';
1348 case '`' : skip(); return '`';
1349 case '{' : goOffside(HARD);
1352 case '}' : if (indentDepth<0) {
1353 ERRMSG(row) "Misplaced `}'"
1356 if (layout[indentDepth]==HARD) /* skip over hard }*/
1358 unOffside(); /* otherwise, we have to insert a }*/
1359 return '}'; /* to (try to) avoid an error... */
1361 /* Character and string literals */
1362 case '\'' : top() = yylval = readChar();
1365 case '\"' : top() = yylval = readString();
1370 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1371 Text it; /* Look for record selector name */
1374 top() = yylval = ap(RECSEL,mkExt(it));
1375 return identType=RECSELID;
1378 if (isIn(c0,LARGE)) { /* Look for qualified name */
1379 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1380 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1382 skip(); /* Skip qualifying dot */
1383 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1384 it2 = readOperator();
1385 if (opType==CONOP) {
1386 top() = yylval = mkQConOp(it,it2);
1389 top() = yylval = mkQVarOp(it,it2);
1392 } else { /* Qualified identifier */
1394 if (identType==CONID) {
1395 top() = yylval = mkQCon(it,it2);
1398 top() = yylval = mkQVar(it,it2);
1403 top() = yylval = mkCon(it);
1407 if (isIn(c0,(SMALL|LARGE))) {
1408 Text it = readIdent();
1410 if (it==textCase) return CASEXP;
1411 if (it==textOfK) lookAhead(OF);
1412 if (it==textData) return DATA;
1413 if (it==textType) return TYPE;
1414 if (it==textIf) return IF;
1415 if (it==textThen) return THEN;
1416 if (it==textElse) return ELSE;
1417 if (it==textWhere) lookAhead(WHERE);
1418 if (it==textLet) lookAhead(LET);
1419 if (it==textIn) return IN;
1420 if (it==textInfix) return INFIXN;
1421 if (it==textInfixl) return INFIXL;
1422 if (it==textInfixr) return INFIXR;
1423 if (it==textForeign) return FOREIGN;
1424 if (it==textUnsafe) return UNSAFE;
1425 if (it==textNewtype) return TNEWTYPE;
1426 if (it==textDefault) return DEFAULT;
1427 if (it==textDeriving) return DERIVING;
1428 if (it==textDo) lookAhead(DO);
1429 if (it==textClass) return TCLASS;
1430 if (it==textInstance) return TINSTANCE;
1431 if (it==textModule) return TMODULE;
1432 if (it==textImport) return IMPORT;
1433 if (it==textExport) return EXPORT;
1434 if (it==textHiding) return HIDING;
1435 if (it==textQualified) return QUALIFIED;
1436 if (it==textAsMod) return ASMOD;
1437 if (it==textWildcard) return '_';
1438 if (it==textAll && !haskell98) return ALL;
1439 if (it==textRepeat && reading==KEYBOARD)
1440 return repeatLast();
1442 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1446 if (isIn(c0,SYMBOL)) {
1447 Text it = readOperator();
1449 if (it==textCoco) return COCO;
1450 if (it==textEq) return '=';
1451 if (it==textUpto) return UPTO;
1452 if (it==textAs) return '@';
1453 if (it==textLambda) return '\\';
1454 if (it==textBar) return '|';
1455 if (it==textFrom) return FROM;
1456 if (it==textMinus) return '-';
1457 if (it==textPlus) return '+';
1458 if (it==textBang) return '!';
1459 if (it==textDot) return '.';
1460 if (it==textArrow) return ARROW;
1461 if (it==textLazy) return '~';
1462 if (it==textImplies) return IMPLIES;
1463 if (it==textRepeat && reading==KEYBOARD)
1464 return repeatLast();
1466 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1470 if (isIn(c0,DIGIT)) {
1471 top() = yylval = readNumber();
1475 ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
1477 return 0; /*NOTREACHED*/
1480 static Int local repeatLast() { /* Obtain last expression entered */
1481 if (isNull(yylval=getLastExpr())) {
1482 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1488 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1489 Text t; { /* by t ... */
1490 String s = textToStr(t);
1491 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1494 Syntax syntaxOf(n) /* Find syntax for name */
1496 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1497 return defaultSyntax(name(n).text);
1498 return name(n).syntax;
1501 /* --------------------------------------------------------------------------
1502 * main entry points to parser/lexer:
1503 * ------------------------------------------------------------------------*/
1505 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1506 Int startWith; { /* determining whether to read a */
1507 firstToken = TRUE; /* script or an expression */
1508 firstTokenIs = startWith;
1511 if (yyparse()) { /* This can only be parser overflow */
1512 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1513 EEND; /* in the parser... */
1516 if (!stackEmpty()) /* stack should now be empty */
1517 internal("parseInput");
1521 static String memPrefix = "@mem@";
1522 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1524 Void makeMemScript(mem,fname)
1527 strcat(fname,memPrefix);
1528 itoa((int)mem, fname+strlen(fname), 10);
1531 Bool isMemScript(fname)
1533 return (strstr(fname,memPrefix) != NULL);
1536 String memScriptString(fname)
1538 String p = strstr(fname,memPrefix);
1540 return (String)atoi(p+lenMemPrefix);
1546 Void parseScript(fname,len) /* Read a script, possibly from mem */
1550 if (isMemScript(fname)) {
1551 char* s = memScriptString(fname);
1554 fileInput(fname,len);
1559 Void parseScript(nm,len) /* Read a script */
1561 Long len; { /* Used to set a target for reading */
1568 Void parseExp() { /* Read an expression to evaluate */
1570 setLastExpr(inputExpr);
1573 /* --------------------------------------------------------------------------
1575 * ------------------------------------------------------------------------*/
1580 case INSTALL : initCharTab();
1581 textCase = findText("case");
1582 textOfK = findText("of");
1583 textData = findText("data");
1584 textType = findText("type");
1585 textIf = findText("if");
1586 textThen = findText("then");
1587 textElse = findText("else");
1588 textWhere = findText("where");
1589 textLet = findText("let");
1590 textIn = findText("in");
1591 textInfix = findText("infix");
1592 textInfixl = findText("infixl");
1593 textInfixr = findText("infixr");
1594 textForeign = findText("foreign");
1595 textUnsafe = findText("unsafe");
1596 textNewtype = findText("newtype");
1597 textDefault = findText("default");
1598 textDeriving = findText("deriving");
1599 textDo = findText("do");
1600 textClass = findText("class");
1601 textInstance = findText("instance");
1602 textCoco = findText("::");
1603 textEq = findText("=");
1604 textUpto = findText("..");
1605 textAs = findText("@");
1606 textLambda = findText("\\");
1607 textBar = findText("|");
1608 textMinus = findText("-");
1609 textPlus = findText("+");
1610 textFrom = findText("<-");
1611 textArrow = findText("->");
1612 textLazy = findText("~");
1613 textBang = findText("!");
1614 textDot = findText(".");
1615 textImplies = findText("=>");
1616 textPrelude = findText("Prelude");
1617 textNum = findText("Num");
1618 textModule = findText("module");
1619 textImport = findText("import");
1620 textHiding = findText("hiding");
1621 textQualified = findText("qualified");
1622 textAsMod = findText("as");
1623 textWildcard = findText("_");
1624 textAll = findText("forall");
1625 varMinus = mkVar(textMinus);
1626 varPlus = mkVar(textPlus);
1627 varBang = mkVar(textBang);
1628 varDot = mkVar(textDot);
1629 varHiding = mkVar(textHiding);
1630 varQualified = mkVar(textQualified);
1631 varAsMod = mkVar(textAsMod);
1632 conMain = mkCon(findText("Main"));
1633 varMain = mkVar(findText("main"));
1639 case RESET : tyconDefns = NIL;
1648 foreignImports= NIL;
1649 foreignExports= NIL;
1657 case BREAK : if (reading==KEYBOARD)
1661 case MARK : mark(tyconDefns);
1669 mark(unqualImports);
1670 mark(foreignImports);
1671 mark(foreignExports);
1689 /*-------------------------------------------------------------------------*/