2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 2000/03/13 11:37:16 $
14 * ------------------------------------------------------------------------*/
30 #if IS_WIN32 || HUGS_FOR_WINDOWS
34 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H && HAVE_READLINE_HISTORY_H
35 #define USE_READLINE 1
37 #define USE_READLINE 0
41 #include <readline/readline.h>
42 #include <readline/history.h>
46 /* --------------------------------------------------------------------------
48 * ------------------------------------------------------------------------*/
50 List tyconDefns = NIL; /* type constructor definitions */
51 List typeInDefns = NIL; /* type synonym restrictions */
52 List valDefns = NIL; /* value definitions in script */
53 List classDefns = NIL; /* class defns in script */
54 List instDefns = NIL; /* instance defns in script */
55 List selDefns = NIL; /* list of selector lists */
56 List genDefns = NIL; /* list of generated names */
57 List unqualImports = NIL; /* unqualified import list */
58 List foreignImports = NIL; /* foreign imports */
59 List foreignExports = NIL; /* foreign exportsd */
60 List defaultDefns = NIL; /* default definitions (if any) */
61 Int defaultLine = 0; /* line in which default defs occur*/
62 List evalDefaults = NIL; /* defaults for evaluator */
64 Cell inputExpr = NIL; /* input expression */
65 Cell inputContext = NIL; /* input context */
66 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
67 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
68 Bool offsideON = TRUE; /* TRUE => implement offside rule */
69 Bool readingInterface = FALSE;
71 String repeatStr = 0; /* Repeat last expr */
73 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
74 String preprocessor = 0;
77 /* --------------------------------------------------------------------------
78 * Local function prototypes:
79 * ------------------------------------------------------------------------*/
81 static Void local initCharTab ( Void );
82 static Void local fileInput ( String,Long );
83 static Bool local literateMode ( String );
84 static Bool local linecmp ( String,String );
85 static Int local nextLine ( Void );
86 static Void local skip ( Void );
87 static Void local thisLineIs ( Int );
88 static Void local newlineSkip ( Void );
89 static Void local closeAnyInput ( Void );
91 Int yyparse ( Void ); /* can't stop yacc making this */
92 /* public, but don't advertise */
93 /* it in a header file. */
95 static Void local endToken ( Void );
96 static Text local readOperator ( Void );
97 static Text local readIdent ( Void );
98 static Cell local readRadixNumber ( Int );
99 static Cell local readNumber ( Void );
100 static Cell local readChar ( Void );
101 static Cell local readString ( Void );
102 static Void local saveStrChr ( Char );
103 static Cell local readAChar ( Bool );
105 static Bool local lazyReadMatches ( String );
106 static Cell local readEscapeChar ( Bool );
107 static Void local skipGap ( Void );
108 static Cell local readCtrlChar ( Void );
109 static Cell local readOctChar ( Void );
110 static Cell local readHexChar ( Void );
111 static Int local readHexDigit ( Char );
112 static Cell local readDecChar ( Void );
114 static Void local goOffside ( Int );
115 static Void local unOffside ( Void );
116 static Bool local canUnOffside ( Void );
118 static Void local skipWhitespace ( Void );
119 static Int local yylex ( Void );
120 static Int local repeatLast ( Void );
122 static Cell local parseInput ( Int );
124 static Bool local doesNotExceed ( String,Int,Int );
125 static Int local stringToInt ( String,Int );
128 /* --------------------------------------------------------------------------
129 * Text values for reserved words and special symbols:
130 * ------------------------------------------------------------------------*/
132 static Text textCase, textOfK, textData, textType, textIf;
133 static Text textThen, textElse, textWhere, textLet, textIn;
134 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
135 static Text textDefault, textDeriving, textDo, textClass, textInstance;
137 static Text textWith, textDlet;
140 static Text textCoco, textEq, textUpto, textAs, textLambda;
141 static Text textBar, textMinus, textFrom, textArrow, textLazy;
142 static Text textBang, textDot, textAll, textImplies;
143 static Text textWildcard;
145 static Text textModule, textImport, textInterface, textInstImport;
146 static Text textHiding, textQualified, textAsMod, textPrivileged;
147 static Text textExport, textDynamic, textUUExport;
148 static Text textUnsafe, textUUAll, textUUUsage;
150 Text textCcall; /* ccall */
151 Text textStdcall; /* stdcall */
153 Text textNum; /* Num */
154 Text textPrelude; /* Prelude */
155 Text textPlus; /* (+) */
157 static Cell conMain; /* Main */
158 static Cell varMain; /* main */
160 static Cell varMinus; /* (-) */
161 static Cell varPlus; /* (+) */
162 static Cell varBang; /* (!) */
163 static Cell varDot; /* (.) */
164 static Cell varHiding; /* hiding */
165 static Cell varQualified; /* qualified */
166 static Cell varAsMod; /* as */
167 static Cell varPrivileged; /* privileged */
169 static List imps; /* List of imports to be chased */
172 /* --------------------------------------------------------------------------
173 * Character set handling:
175 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
176 * character set. The following code provides methods for classifying
177 * input characters according to the lexical structure specified by the
178 * report. Hugs should still accept older programs because ASCII is
179 * essentially just a subset of the ISO character set.
181 * Notes: If you want to port Hugs to a machine that uses something
182 * substantially different from the ISO character set, then you will need
183 * to insert additional code to map between character sets.
185 * At some point, the following data structures may be exported in a .h
186 * file to allow the information contained here to be picked up in the
187 * implementation of LibChar is* primitives.
189 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
190 * ------------------------------------------------------------------------*/
192 static Bool charTabBuilt;
193 static unsigned char ctable[NUM_CHARS];
194 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
195 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
205 static Void local initCharTab() { /* Initialize char decode table */
206 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
207 #define setChar(x,c) ctable[c] |= (x)
208 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
209 #define setCopy(x,c) {Int i; \
210 for (i=0; i<NUM_CHARS; ++i) \
215 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
217 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
218 setRange(SMALL, 223,246); /* ISO lower case letters */
219 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
220 setChar (SMALL, '_');
222 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
223 setRange(LARGE, 192,214); /* ISO upper case letters */
224 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
226 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
227 setRange(SYMBOL, 215,215);
228 setChar (SYMBOL, 247);
229 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
231 setChar (IDAFTER, '\''); /* Characters in identifier */
232 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
234 setChar (ZPACE, ' '); /* ASCII space character */
235 setChar (ZPACE, 160); /* ISO non breaking space */
236 setRange(ZPACE, 9,13); /* special whitespace: \t\n\v\f\r */
238 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
239 setChars(PRINT, " '\""); /* Space and quotes */
240 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
250 /* --------------------------------------------------------------------------
251 * Single character input routines:
253 * At the lowest level of input, characters are read one at a time, with the
254 * current character held in c0 and the following (lookahead) character in
255 * c1. The coordinates of c0 within the file are held in (column,row).
256 * The input stream is advanced by one character using the skip() function.
257 * ------------------------------------------------------------------------*/
259 #define TABSIZE 8 /* spacing between tabstops */
261 #define NOTHING 0 /* what kind of input is being read?*/
262 #define KEYBOARD 1 /* - keyboard/console? */
263 #define SCRIPTFILE 2 /* - script file */
264 #define PROJFILE 3 /* - project file */
265 #define STRING 4 /* - string buffer? */
267 static Int reading = NOTHING;
269 static Target readSoFar;
270 static Int row, column, startColumn;
272 static FILE *inputStream = 0;
273 static Bool thisLiterate;
274 static String nextStringChar; /* next char in string buffer */
276 #if USE_READLINE /* for command line editors */
277 static String currentLine; /* editline or GNU readline */
278 static String nextChar;
279 #define nextConsoleChar() \
280 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
282 #define nextConsoleChar() getc(stdin)
285 static Int litLines; /* count defn lines in lit script */
286 #define DEFNCHAR '>' /* definition lines begin with this */
287 static Int lastLine; /* records type of last line read: */
288 #define STARTLINE 0 /* - at start of file, none read */
289 #define BLANKLINE 1 /* - blank (may preceed definition) */
290 #define TEXTLINE 2 /* - text comment */
291 #define DEFNLINE 3 /* - line containing definition */
292 #define CODELINE 4 /* - line inside code block */
294 #define BEGINCODE "\\begin{code}"
295 #define ENDCODE "\\end{code}"
298 static char *lineBuffer = NULL; /* getline() does the initial allocation */
300 #define LINEBUFFER_SIZE 1000
301 static char lineBuffer[LINEBUFFER_SIZE];
303 static int lineLength = 0;
304 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
305 static int linePtr = 0;
307 Void consoleInput(prompt) /* prepare to input characters from */
308 String prompt; { /* standard in (i.e. console/kbd) */
309 reading = KEYBOARD; /* keyboard input is Line oriented, */
310 c0 = /* i.e. input terminated by '\n' */
316 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
317 * avoids accidentally freeing currentLine twice.
320 String oldCurrentLine = currentLine;
321 currentLine = 0; /* We may lose the space of currentLine */
322 free(oldCurrentLine); /* if interrupted here - unlikely */
324 currentLine = readline(prompt);
325 nextChar = currentLine;
328 add_history(currentLine);
338 Void projInput(nm) /* prepare to input characters from */
339 String nm; { /* from named project file */
340 if ((inputStream = fopen(nm,"r"))!=0) {
348 ERRMSG(0) "Unable to open project file \"%s\"", nm
353 static Void local fileInput(nm,len) /* prepare to input characters from*/
354 String nm; /* named file (specified length is */
355 Long len; { /* used to set target for reading) */
356 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
358 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
359 char *cmd = malloc(reallen);
361 ERRMSG(0) "Unable to allocate memory for filter command."
364 strcpy(cmd,preprocessor);
367 inputStream = popen(cmd,"r");
370 inputStream = fopen(nm,"r");
373 inputStream = fopen(nm,"r");
376 reading = SCRIPTFILE;
382 lastLine = STARTLINE; /* literate file processing */
386 thisLiterate = literateMode(nm);
390 setGoal("Parsing", (Target)len);
393 ERRMSG(0) "Unable to open file \"%s\"", nm
398 Void stringInput(s) /* prepare to input characters from string */
413 static Bool local literateMode(nm) /* Select literate mode for file */
415 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
417 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
419 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
420 filenamecmp(dot+1,"verb")==0) /* literate scripts */
423 return literateScripts; /* otherwise, use the default */
427 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
431 len = 1 + strlen ( srcName );
432 *hiName = malloc(len);
433 *oName = malloc(len);
434 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
435 (*hiName)[0] = (*oName)[0] = 0;
436 dot = strrchr(srcName, '.');
438 if (filenamecmp(dot+1, "hs")==0 &&
439 filenamecmp(dot+1, "lhs")==0 &&
440 filenamecmp(dot+1, "verb")==0) return;
442 strcpy(*hiName, srcName);
443 dot = strrchr(*hiName, '.');
448 strcpy(*oName, srcName);
449 dot = strrchr(*oName, '.');
456 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
457 * I've removed the loop (since newLineSkip contains a loop too) and
458 * replaced the warnings with errors. ADR
461 * To deal with literate \begin{code}...\end{code} blocks,
462 * add a line buffer that rooms the current line. The old c0 and c1
463 * stream pointers are used as before within that buffer -- sof
465 * Upon reading a new line into the line buffer, we check to see if
466 * we're reading in a line containing \begin{code} or \end{code} and
467 * take appropriate action.
470 static Bool local linecmp(s,line) /* compare string with line */
471 String s; /* line may end in whitespace */
474 while (s[i] != '\0' && s[i] == line[i]) {
477 /* s[0..i-1] == line[0..i-1] */
478 if (s[i] != '\0') { /* check s `isPrefixOf` line */
481 while (isIn(line[i], ZPACE)) { /* allow whitespace at end of line */
484 return (line[i] == '\0');
487 /* Returns line length (including \n) or 0 upon EOF. */
488 static Int local nextLine()
492 Forget about fgets(), it is utterly braindead.
493 (Assumes \NUL free streams and does not gracefully deal
494 with overflow.) Instead, use GNU libc's getline().
496 lineLength = getline(&lineBuffer, &lineLength, inputStream);
498 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
499 lineLength = strlen(lineBuffer);
503 /* printf("Read: \"%s\"", lineBuffer); */
504 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
507 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
508 lineBuffer[1] == '!') {
509 lineBuffer[0]='\n'; /* pretend it's a blank line */
512 } else if (thisLiterate) {
513 if (linecmp(BEGINCODE, lineBuffer)) {
514 if (!inCodeBlock) { /* Entered a code block */
516 lineBuffer[0]='\n'; /* pretend it's a blank line */
521 ERRMSG(row) "\\begin{code} encountered inside code block"
525 else if (linecmp(ENDCODE, lineBuffer)) {
526 if (inCodeBlock) { /* Finished code block */
528 lineBuffer[0]='\n'; /* pretend it's a blank line */
533 ERRMSG(row) "\\end{code} encountered outside code block"
538 /* printf("Read: \"%s\"", lineBuffer); */
542 static Void local skip() { /* move forward one char in input */
543 if (c0!=EOF) { /* stream, updating c0, c1, ... */
544 if (c0=='\n') { /* Adjusting cursor coords as nec. */
547 if (reading==SCRIPTFILE)
551 column += TABSIZE - ((column-1)%TABSIZE);
560 if (reading==SCRIPTFILE)
564 else if (reading==KEYBOARD) {
569 c1 = nextConsoleChar();
570 #if IS_WIN32 && !HUGS_FOR_WINDOWS
573 /* On Win32, hitting ctrl-C causes the next getchar to
574 * fail - returning "-1" to indicate an error.
575 * This is one of the rare cases where "-1" does not mean EOF.
577 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
582 else if (reading==STRING) {
583 c1 = (unsigned char) *nextStringChar++;
588 if (lineLength <=0 || linePtr == lineLength) {
589 /* Current line, exhausted - get new one */
590 if (nextLine() <= 0) { /* EOF */
595 c1 = (unsigned char)lineBuffer[linePtr++];
599 c1 = (unsigned char)lineBuffer[linePtr++];
606 static Void local thisLineIs(kind) /* register kind of current line */
607 Int kind; { /* & check for literate script errs */
608 if (literateErrors) {
609 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
610 (kind==TEXTLINE && lastLine==DEFNLINE)) {
611 ERRMSG(row) "Program line next to comment"
618 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
619 /* assert(c0=='\n'); */
620 if (reading==SCRIPTFILE && thisLiterate) {
623 if (inCodeBlock) { /* pass chars on definition lines */
624 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
628 if (c0==DEFNCHAR) { /* pass chars on definition lines */
629 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
634 while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank? */
636 if (c0=='\n' || c0==EOF)
637 thisLineIs(BLANKLINE);
639 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
640 while (c0!='\n' && c0!=EOF)
642 } /* by now, c0=='\n' or c0==EOF */
643 } while (c0!=EOF); /* if new line, start again */
645 if (litLines==0 && literateErrors) {
646 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
655 static Void local closeAnyInput() { /* Close input stream, if open, */
656 switch (reading) { /* or skip to end of console line */
658 case SCRIPTFILE : if (inputStream) {
659 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
671 case KEYBOARD : while (c0!=EOF)
678 /* --------------------------------------------------------------------------
679 * Parser: Uses table driven parser generated from parser.y using yacc
680 * ------------------------------------------------------------------------*/
684 /* --------------------------------------------------------------------------
685 * Single token input routines:
687 * The following routines read the values of particular kinds of token given
688 * that the first character of the token has already been located in c0 on
689 * entry to the routine.
690 * ------------------------------------------------------------------------*/
692 #define MAX_TOKEN 4000
693 #define startToken() tokPos = 0
694 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
695 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
696 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
698 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
699 static Int tokPos; /* input position in buffer */
700 static Int identType; /* identifier type: CONID / VARID */
701 static Int opType; /* operator type : CONOP / VAROP */
703 static Void local endToken() { /* check for token overflow */
704 if (tokPos>MAX_TOKEN) {
705 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
708 tokenStr[tokPos] = '\0';
711 static Text local readOperator() { /* read operator symbol */
716 } while (isISO(c0) && isIn(c0,SYMBOL));
717 opType = (tokenStr[0]==':' ? CONOP : VAROP);
719 return findText(tokenStr);
722 static Text local readIdent() { /* read identifier */
727 } while (isISO(c0) && isIn(c0,IDAFTER));
729 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
730 if (readingInterface)
731 return unZcodeThenFindText(tokenStr); else
732 return findText(tokenStr);
736 static Bool local doesNotExceed(s,radix,limit)
743 if (s[p] == 0) return TRUE;
744 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
745 n = radix*n + (s[p]-'0');
750 static Int local stringToInt(s,radix)
756 if (s[p] == 0) return n;
757 n = radix*n + (s[p]-'0');
762 static Cell local readRadixNumber(r) /* Read literal in specified radix */
763 Int r; { /* from input of the form 0c{digs} */
766 skip(); /* skip leading zero */
767 if ((d=readHexDigit(c1))<0 || d>=r) {
768 /* Special case; no digits, lex as */
769 /* if it had been written "0 c..." */
774 saveTokenChar('0'+readHexDigit(c0));
776 d = readHexDigit(c0);
777 } while (d>=0 && d<r);
781 if (doesNotExceed(tokenStr,r,MAXPOSINT))
782 return mkInt(stringToInt(tokenStr,r));
785 return stringToBignum(tokenStr);
787 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
792 static Cell local readNumber() { /* read numeric constant */
795 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
796 return readRadixNumber(16);
797 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
798 return readRadixNumber(8);
805 } while (isISO(c0) && isIn(c0,DIGIT));
807 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
809 if (doesNotExceed(tokenStr,10,MAXPOSINT))
810 return mkInt(stringToInt(tokenStr,10)); else
811 return stringToBignum(tokenStr);
814 saveTokenChar(c0); /* save decimal point */
816 do { /* process fractional part ... */
819 } while (isISO(c0) && isIn(c0,DIGIT));
821 if (c0=='e' || c0=='E') { /* look for exponent part... */
831 if (!isISO(c0) || !isIn(c0,DIGIT)) {
832 ERRMSG(row) "Missing digits in exponent"
839 } while (isISO(c0) && isIn(c0,DIGIT));
844 return mkFloat(stringToFloat(tokenStr));
853 static Cell local readChar() { /* read character constant */
857 if (c0=='\'' || c0=='\n' || c0==EOF) {
858 ERRMSG(row) "Illegal character constant"
862 charRead = readAChar(FALSE);
867 ERRMSG(row) "Improperly terminated character constant"
873 static Cell local readString() { /* read string literal */
878 while (c0!='\"' && c0!='\n' && c0!=EOF) {
881 saveStrChr(charOf(c));
887 ERRMSG(row) "Improperly terminated string"
891 return mkStr(findText(tokenStr));
894 static Void local saveStrChr(c) /* save character in string */
896 if (c!='\0' && c!='\\') { /* save non null char as single char*/
899 else { /* save null char as TWO null chars */
900 if (tokPos+1<MAX_TOKEN) {
910 static Cell local readAChar(isStrLit) /* read single char constant */
911 Bool isStrLit; { /* TRUE => enable \& and gaps */
914 if (c0=='\\') /* escape character? */
915 return readEscapeChar(isStrLit);
917 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
920 skip(); /* normal character? */
924 /* --------------------------------------------------------------------------
925 * Character escape code sequences:
926 * ------------------------------------------------------------------------*/
928 static struct { /* table of special escape codes */
932 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
933 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
934 {"\'",'\''}, {"v", 11},
935 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
936 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
937 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
938 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
939 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
940 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
941 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
942 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
943 {"SP", 32}, {"DEL", 127},
947 static Int alreadyMatched; /* Record portion of input stream */
948 static char alreadyRead[10]; /* that has been read w/o a match */
950 static Bool local lazyReadMatches(s) /* compare input stream with string */
951 String s; { /* possibly using characters that */
952 int i; /* have already been read */
954 for (i=0; i<alreadyMatched; ++i)
955 if (alreadyRead[i]!=s[i])
958 while (s[i] && s[i]==c0) {
959 alreadyRead[alreadyMatched++]=(char)c0;
967 static Cell local readEscapeChar(isStrLit)/* read escape character */
973 case '&' : if (isStrLit) {
977 ERRMSG(row) "Illegal use of `\\&' in character constant"
981 case '^' : return readCtrlChar();
983 case 'o' : return readOctChar();
984 case 'x' : return readHexChar();
986 default : if (!isISO(c0)) {
987 ERRMSG(row) "Illegal escape sequence"
990 else if (isIn(c0,ZPACE)) {
995 ERRMSG(row) "Illegal use of gap in character constant"
999 else if (isIn(c0,DIGIT))
1000 return readDecChar();
1003 for (alreadyMatched=0; escapes[i].codename; i++)
1004 if (lazyReadMatches(escapes[i].codename))
1005 return mkChar(escapes[i].codenumber);
1007 alreadyRead[alreadyMatched++] = (char)c0;
1008 alreadyRead[alreadyMatched++] = '\0';
1009 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1012 return NIL;/*NOTREACHED*/
1015 static Void local skipGap() { /* skip over gap in string literal */
1016 do /* (simplified in Haskell 1.1) */
1021 while (isISO(c0) && isIn(c0,ZPACE));
1023 ERRMSG(row) "Missing `\\' terminating string literal gap"
1029 static Cell local readCtrlChar() { /* read escape sequence \^x */
1030 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1034 if ((which = strchr(controls,c0))==NULL) {
1035 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1039 return mkChar(which-controls);
1042 static Cell local readOctChar() { /* read octal character constant */
1047 if ((d = readHexDigit(c0))<0 || d>=8) {
1048 ERRMSG(row) "Empty octal character escape"
1052 if (overflows(n,8,d,MAXCHARVAL)) {
1053 ERRMSG(row) "Octal character escape out of range"
1058 } while ((d = readHexDigit(c0))>=0 && d<8);
1063 static Cell local readHexChar() { /* read hex character constant */
1068 if ((d = readHexDigit(c0))<0) {
1069 ERRMSG(row) "Empty hexadecimal character escape"
1073 if (overflows(n,16,d,MAXCHARVAL)) {
1074 ERRMSG(row) "Hexadecimal character escape out of range"
1079 } while ((d = readHexDigit(c0))>=0);
1084 static Int local readHexDigit(c) /* read single hex digit */
1086 if ('0'<=c && c<='9')
1088 if ('A'<=c && c<='F')
1089 return 10 + (c-'A');
1090 if ('a'<=c && c<='f')
1091 return 10 + (c-'a');
1095 static Cell local readDecChar() { /* read decimal character constant */
1099 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1100 ERRMSG(row) "Decimal character escape out of range"
1103 n = 10*n + (c0-'0');
1105 } while (c0!=EOF && isIn(c0,DIGIT));
1110 /* --------------------------------------------------------------------------
1111 * Produce printable representation of character:
1112 * ------------------------------------------------------------------------*/
1114 String unlexChar(c,quote) /* return string representation of */
1115 Char c; /* character... */
1116 Char quote; { /* protect quote character */
1117 static char buffer[12];
1119 if (c<0) /* deal with sign extended chars.. */
1122 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1123 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1125 buffer[1] = (char)c;
1129 buffer[0] = (char)c;
1133 else { /* look for escape code */
1135 for (escs=0; escapes[escs].codename; escs++)
1136 if (escapes[escs].codenumber==c) {
1137 sprintf(buffer,"\\%s",escapes[escs].codename);
1140 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1145 Void printString(s) /* print string s, using quotes and */
1146 String s; { /* escapes if any parts need them */
1150 while ((c = *t)!=0 && isISO(c)
1151 && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1157 Printf("%s",unlexChar(*t,'"'));
1165 /* -------------------------------------------------------------------------
1166 * Handle special types of input for use in interpreter:
1167 * -----------------------------------------------------------------------*/
1169 Command readCommand(cmds,start,sys) /* read command at start of input */
1170 struct cmd *cmds; /* line in interpreter */
1171 Char start; /* characters introducing a cmd */
1172 Char sys; { /* character for shell escape */
1173 while (c0==' ' || c0 =='\t')
1176 if (c0=='\n') /* look for blank command lines */
1178 if (c0==EOF) /* look for end of input stream */
1180 if (c0==sys) { /* single character system escape */
1184 if (c0==start && c1==sys) { /* two character system escape */
1190 startToken(); /* All cmds start with start */
1191 if (c0==start) /* except default (usually EVAL) */
1192 do { /* which is empty */
1195 } while (c0!=EOF && !isIn(c0,ZPACE));
1198 for (; cmds->cmdString; ++cmds)
1199 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1200 (tokenStr[0]==start &&
1201 tokenStr[1]==(cmds->cmdString)[1] &&
1203 return (cmds->cmdCode);
1207 String readFilename() { /* Read filename from input (if any)*/
1208 if (reading==PROJFILE)
1211 while (c0==' ' || c0=='\t')
1214 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1218 while (c0!=EOF && !isIn(c0,ZPACE)) {
1221 while (c0!=EOF && c0!='\"') {
1222 Cell c = readAChar(TRUE);
1224 saveTokenChar(charOf(c));
1230 ERRMSG(row) "a closing quote, '\"', was expected"
1243 String readLine() { /* Read command line from input */
1244 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1248 while (c0!='\n' && c0!=EOF) {
1257 /* --------------------------------------------------------------------------
1258 * This lexer supports the Haskell layout rule:
1260 * - Layout area bounded by { ... }, with `;'s in between.
1261 * - A `{' is a HARD indentation and can only be matched by a corresponding
1263 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1264 * is inserted with the column number of the first token after the
1265 * WHERE/LET/OF keyword.
1266 * - When a soft indentation is uppermost on the indetation stack with
1267 * column col' we insert:
1268 * `}' in front of token with column<col' and pop indentation off stack,
1269 * `;' in front of token with column==col'.
1270 * ------------------------------------------------------------------------*/
1272 #define MAXINDENT 100 /* maximum nesting of layout rule */
1273 static Int layout[MAXINDENT+1];/* indentation stack */
1274 #define HARD (-1) /* indicates hard indentation */
1275 static Int indentDepth = (-1); /* current indentation nesting */
1277 static Void local goOffside(col) /* insert offside marker */
1278 Int col; { /* for specified column */
1280 if (indentDepth>=MAXINDENT) {
1281 ERRMSG(row) "Too many levels of program nesting"
1284 layout[++indentDepth] = col;
1287 static Void local unOffside() { /* leave layout rule area */
1292 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1294 return indentDepth>=0 && layout[indentDepth]!=HARD;
1297 /* --------------------------------------------------------------------------
1299 * ------------------------------------------------------------------------*/
1301 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1302 for (;;) /* Strictly speaking, this code is */
1303 if (c0==EOF) /* a little more liberal than the */
1304 return; /* report allows ... */
1307 else if (isIn(c0,ZPACE))
1309 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1311 Int origRow = row; /* Save original row number */
1314 while (nesting>0 && c0!=EOF)
1315 if (c0=='{' && c1=='-') {
1320 else if (c0=='-' && c1=='}') {
1330 ERRMSG(origRow) "Unterminated nested comment {- ..."
1334 else if (c0=='-' && c1=='-') { /* One line comment */
1337 while (c0!='\n' && c0!=EOF);
1345 static Bool firstToken; /* Set to TRUE for first token */
1346 static Int firstTokenIs; /* ... with token value stored here */
1348 static Int local yylex() { /* Read next input token ... */
1349 static Bool insertOpen = FALSE;
1350 static Bool insertedToken = FALSE;
1351 static Text textRepeat;
1353 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1355 if (firstToken) { /* Special case for first token */
1359 insertedToken = FALSE;
1360 if (reading==KEYBOARD)
1361 textRepeat = findText(repeatStr);
1362 return firstTokenIs;
1365 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1367 insertedToken = TRUE;
1369 push(yylval = mkInt(row));
1373 /* ----------------------------------------------------------------------
1374 * Skip white space, and insert tokens to support layout rules as reqd.
1375 * --------------------------------------------------------------------*/
1378 startColumn = column;
1379 push(yylval = mkInt(row)); /* default token value is line no. */
1380 /* subsequent changes to yylval must also set top() to the same value */
1382 if (indentDepth>=0) { /* layout rule(s) active ? */
1383 if (insertedToken) /* avoid inserting multiple `;'s */
1384 insertedToken = FALSE; /* or putting `;' after `{' */
1386 if (offsideON && layout[indentDepth]!=HARD) {
1387 if (column<layout[indentDepth]) {
1391 else if (column==layout[indentDepth] && c0!=EOF) {
1392 insertedToken = TRUE;
1398 /* ----------------------------------------------------------------------
1399 * Now try to identify token type:
1400 * --------------------------------------------------------------------*/
1402 if (readingInterface) {
1403 if (c0 == '(' && c1 == '#') { skip(); skip(); return UTL; };
1404 if (c0 == '#' && c1 == ')') { skip(); skip(); return UTR; };
1408 case EOF : return 0; /* End of file/input */
1410 /* The next 10 characters make up the `special' category in 1.3 */
1411 case '(' : skip(); return '(';
1412 case ')' : skip(); return ')';
1413 case ',' : skip(); return ',';
1414 case ';' : skip(); return ';';
1415 case '[' : skip(); return '[';
1416 case ']' : skip(); return ']';
1417 case '`' : skip(); return '`';
1418 case '{' : if (offsideON) goOffside(HARD);
1421 case '}' : if (offsideON && indentDepth<0) {
1422 ERRMSG(row) "Misplaced `}'"
1425 if (!(offsideON && layout[indentDepth]!=HARD))
1426 skip(); /* skip over hard }*/
1428 unOffside(); /* otherwise, we have to insert a }*/
1429 return '}'; /* to (try to) avoid an error... */
1431 /* Character and string literals */
1432 case '\'' : top() = yylval = readChar();
1435 case '\"' : top() = yylval = readString();
1440 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1441 Text it; /* Look for implicit param name */
1444 top() = yylval = ap(IPVAR,it);
1445 return identType=IPVARID;
1449 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1450 Text it; /* Look for record selector name */
1453 top() = yylval = ap(RECSEL,mkExt(it));
1454 return identType=RECSELID;
1457 if (isIn(c0,LARGE)) { /* Look for qualified name */
1458 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1459 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1461 skip(); /* Skip qualifying dot */
1462 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1463 it2 = readOperator();
1464 if (opType==CONOP) {
1465 top() = yylval = mkQConOp(it,it2);
1468 top() = yylval = mkQVarOp(it,it2);
1471 } else { /* Qualified identifier */
1473 if (identType==CONID) {
1474 top() = yylval = mkQCon(it,it2);
1477 top() = yylval = mkQVar(it,it2);
1482 top() = yylval = mkCon(it);
1486 if (isIn(c0,(SMALL|LARGE))) {
1487 Text it = readIdent();
1489 if (it==textCase) return CASEXP;
1490 if (it==textOfK) lookAhead(OF);
1491 if (it==textData) return DATA;
1492 if (it==textType) return TYPE;
1493 if (it==textIf) return IF;
1494 if (it==textThen) return THEN;
1495 if (it==textElse) return ELSE;
1496 if (it==textWhere) lookAhead(WHERE);
1497 if (it==textLet) lookAhead(LET);
1498 if (it==textIn) return IN;
1499 if (it==textInfix) return INFIXN;
1500 if (it==textInfixl) return INFIXL;
1501 if (it==textInfixr) return INFIXR;
1502 if (it==textForeign) return FOREIGN;
1503 if (it==textUnsafe) return UNSAFE;
1504 if (it==textNewtype) return TNEWTYPE;
1505 if (it==textDefault) return DEFAULT;
1506 if (it==textDeriving) return DERIVING;
1507 if (it==textDo) lookAhead(DO);
1508 if (it==textClass) return TCLASS;
1509 if (it==textInstance) return TINSTANCE;
1510 if (it==textModule) return TMODULE;
1511 if (it==textInterface) return INTERFACE;
1512 if (it==textInstImport) return INSTIMPORT;
1513 if (it==textImport) return IMPORT;
1514 if (it==textExport) return EXPORT;
1515 if (it==textDynamic) return DYNAMIC;
1516 if (it==textCcall) return CCALL;
1517 if (it==textStdcall) return STDKALL;
1518 if (it==textUUExport) return UUEXPORT;
1519 if (it==textHiding) return HIDING;
1520 if (it==textQualified) return QUALIFIED;
1521 if (it==textAsMod) return ASMOD;
1522 if (it==textPrivileged) return PRIVILEGED;
1523 if (it==textWildcard) return '_';
1524 if (it==textAll && !haskell98) return ALL;
1526 if (it==textWith && !haskell98) lookAhead(WITH);
1527 if (it==textDlet && !haskell98) lookAhead(DLET);
1529 if (it==textUUAll) return ALL;
1530 if (it==textUUUsage) return UUUSAGE;
1531 if (it==textRepeat && reading==KEYBOARD)
1532 return repeatLast();
1534 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1538 if (isIn(c0,SYMBOL)) {
1539 Text it = readOperator();
1541 if (it==textCoco) return COCO;
1542 if (it==textEq) return '=';
1543 if (it==textUpto) return UPTO;
1544 if (it==textAs) return '@';
1545 if (it==textLambda) return '\\';
1546 if (it==textBar) return '|';
1547 if (it==textFrom) return FROM;
1548 if (it==textMinus) return '-';
1549 if (it==textPlus) return '+';
1550 if (it==textBang) return '!';
1551 if (it==textDot) return '.';
1552 if (it==textArrow) return ARROW;
1553 if (it==textLazy) return '~';
1554 if (it==textImplies) return IMPLIES;
1555 if (it==textRepeat && reading==KEYBOARD)
1556 return repeatLast();
1558 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1562 if (isIn(c0,DIGIT)) {
1563 top() = yylval = readNumber();
1567 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1570 return 0; /*NOTREACHED*/
1573 static Int local repeatLast() { /* Obtain last expression entered */
1574 if (isNull(yylval=getLastExpr())) {
1575 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1581 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1582 Text t; { /* by t ... */
1583 String s = textToStr(t);
1584 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1587 Syntax syntaxOf(n) /* Find syntax for name */
1589 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1590 return defaultSyntax(name(n).text);
1591 return name(n).syntax;
1594 /* --------------------------------------------------------------------------
1595 * main entry points to parser/lexer:
1596 * ------------------------------------------------------------------------*/
1598 static Cell local parseInput(startWith)/* Parse input with given first tok,*/
1599 Int startWith; { /* determining whether to read a */
1600 Cell final = NIL; /* script or an expression */
1602 firstTokenIs = startWith;
1603 if (startWith==INTERFACE) {
1604 offsideON = FALSE; readingInterface = TRUE;
1606 offsideON = TRUE; readingInterface = FALSE;
1610 if (yyparse()) { /* This can only be parser overflow */
1611 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1612 EEND; /* in the parser... */
1615 if (!stackEmpty()) /* stack should now be empty */
1616 internal("parseInput");
1621 static String memPrefix = "@mem@";
1622 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1624 Void makeMemScript(mem,fname)
1627 strcat(fname,memPrefix);
1628 itoa((int)mem, fname+strlen(fname), 10);
1631 Bool isMemScript(fname)
1633 return (strstr(fname,memPrefix) != NULL);
1636 String memScriptString(fname)
1638 String p = strstr(fname,memPrefix);
1640 return (String)atoi(p+lenMemPrefix);
1646 Void parseScript(fname,len) /* Read a script, possibly from mem */
1650 if (isMemScript(fname)) {
1651 char* s = memScriptString(fname);
1654 fileInput(fname,len);
1659 Void parseScript(nm,len) /* Read a script */
1661 Long len; { /* Used to set a target for reading */
1668 Void parseExp() { /* Read an expression to evaluate */
1670 setLastExpr(inputExpr);
1674 #if EXPLAIN_INSTANCE_RESOLUTION
1675 Void parseContext() { /* Read a context to prove */
1676 parseInput(CONTEXT);
1680 Cell parseInterface(nm,len) /* Read a GHC interface file */
1682 Long len; { /* Used to set a target for reading */
1685 return parseInput(INTERFACE);
1689 /* --------------------------------------------------------------------------
1691 * ------------------------------------------------------------------------*/
1696 case POSTPREL: break;
1698 case PREPREL : initCharTab();
1699 textCase = findText("case");
1700 textOfK = findText("of");
1701 textData = findText("data");
1702 textType = findText("type");
1703 textIf = findText("if");
1704 textThen = findText("then");
1705 textElse = findText("else");
1706 textWhere = findText("where");
1707 textLet = findText("let");
1708 textIn = findText("in");
1709 textInfix = findText("infix");
1710 textInfixl = findText("infixl");
1711 textInfixr = findText("infixr");
1712 textForeign = findText("foreign");
1713 textUnsafe = findText("unsafe");
1714 textNewtype = findText("newtype");
1715 textDefault = findText("default");
1716 textDeriving = findText("deriving");
1717 textDo = findText("do");
1718 textClass = findText("class");
1720 textWith = findText("with");
1721 textDlet = findText("dlet");
1723 textInstance = findText("instance");
1724 textCoco = findText("::");
1725 textEq = findText("=");
1726 textUpto = findText("..");
1727 textAs = findText("@");
1728 textLambda = findText("\\");
1729 textBar = findText("|");
1730 textMinus = findText("-");
1731 textPlus = findText("+");
1732 textFrom = findText("<-");
1733 textArrow = findText("->");
1734 textLazy = findText("~");
1735 textBang = findText("!");
1736 textDot = findText(".");
1737 textImplies = findText("=>");
1738 textPrelude = findText("Prelude");
1739 textNum = findText("Num");
1740 textModule = findText("module");
1741 textInterface = findText("__interface");
1742 textInstImport = findText("__instimport");
1743 textExport = findText("export");
1744 textDynamic = findText("dynamic");
1745 textCcall = findText("ccall");
1746 textStdcall = findText("stdcall");
1747 textUUExport = findText("__export");
1748 textImport = findText("import");
1749 textHiding = findText("hiding");
1750 textQualified = findText("qualified");
1751 textAsMod = findText("as");
1752 textPrivileged = findText("privileged");
1753 textWildcard = findText("_");
1754 textAll = findText("forall");
1755 textUUAll = findText("__forall");
1756 textUUUsage = findText("__u");
1757 varMinus = mkVar(textMinus);
1758 varPlus = mkVar(textPlus);
1759 varBang = mkVar(textBang);
1760 varDot = mkVar(textDot);
1761 varHiding = mkVar(textHiding);
1762 varQualified = mkVar(textQualified);
1763 varAsMod = mkVar(textAsMod);
1764 varPrivileged = mkVar(textPrivileged);
1765 conMain = mkCon(findText("Main"));
1766 varMain = mkVar(findText("main"));
1772 case RESET : tyconDefns = NIL;
1780 foreignImports= NIL;
1781 foreignExports= NIL;
1789 case BREAK : if (reading==KEYBOARD)
1793 case MARK : mark(tyconDefns);
1800 mark(unqualImports);
1801 mark(foreignImports);
1802 mark(foreignExports);
1813 mark(varPrivileged);
1821 /*-------------------------------------------------------------------------*/