2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 1999/12/01 11:50:34 $
14 * ------------------------------------------------------------------------*/
32 #if IS_WIN32 || HUGS_FOR_WINDOWS
36 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H && HAVE_READLINE_HISTORY_H
37 #define USE_READLINE 1
39 #define USE_READLINE 0
43 #include <readline/readline.h>
44 #include <readline/history.h>
48 /* --------------------------------------------------------------------------
50 * ------------------------------------------------------------------------*/
52 List tyconDefns = NIL; /* type constructor definitions */
53 List typeInDefns = NIL; /* type synonym restrictions */
54 List valDefns = NIL; /* value definitions in script */
55 List classDefns = NIL; /* class defns in script */
56 List instDefns = NIL; /* instance defns in script */
57 List selDefns = NIL; /* list of selector lists */
58 List genDefns = NIL; /* list of generated names */
59 List unqualImports = NIL; /* unqualified import list */
60 List foreignImports = NIL; /* foreign imports */
61 List foreignExports = NIL; /* foreign exportsd */
62 List defaultDefns = NIL; /* default definitions (if any) */
63 Int defaultLine = 0; /* line in which default defs occur*/
64 List evalDefaults = NIL; /* defaults for evaluator */
66 Cell inputExpr = NIL; /* input expression */
67 Cell inputContext = NIL; /* input context */
68 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
69 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
70 Bool offsideON = TRUE; /* TRUE => implement offside rule */
71 Bool readingInterface = FALSE;
73 String repeatStr = 0; /* Repeat last expr */
75 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
76 String preprocessor = 0;
79 /* --------------------------------------------------------------------------
80 * Local function prototypes:
81 * ------------------------------------------------------------------------*/
83 static Void local initCharTab Args((Void));
84 static Void local fileInput Args((String,Long));
85 static Bool local literateMode Args((String));
86 static Bool local linecmp Args((String,String));
87 static Int local nextLine Args((Void));
88 static Void local skip Args((Void));
89 static Void local thisLineIs Args((Int));
90 static Void local newlineSkip Args((Void));
91 static Void local closeAnyInput Args((Void));
93 Int yyparse Args((Void)); /* can't stop yacc making this */
94 /* public, but don't advertise */
95 /* it in a header file. */
97 static Void local endToken Args((Void));
98 static Text local readOperator Args((Void));
99 static Text local readIdent Args((Void));
100 static Cell local readRadixNumber Args((Int));
101 static Cell local readNumber Args((Void));
102 static Cell local readChar Args((Void));
103 static Cell local readString Args((Void));
104 static Void local saveStrChr Args((Char));
105 static Cell local readAChar Args((Bool));
107 static Bool local lazyReadMatches Args((String));
108 static Cell local readEscapeChar Args((Bool));
109 static Void local skipGap Args((Void));
110 static Cell local readCtrlChar Args((Void));
111 static Cell local readOctChar Args((Void));
112 static Cell local readHexChar Args((Void));
113 static Int local readHexDigit Args((Char));
114 static Cell local readDecChar Args((Void));
116 static Void local goOffside Args((Int));
117 static Void local unOffside Args((Void));
118 static Bool local canUnOffside Args((Void));
120 static Void local skipWhitespace Args((Void));
121 static Int local yylex Args((Void));
122 static Int local repeatLast Args((Void));
124 static Void local parseInput Args((Int));
126 static Bool local doesNotExceed Args((String,Int,Int));
127 static Int local stringToInt Args((String,Int));
130 /* --------------------------------------------------------------------------
131 * Text values for reserved words and special symbols:
132 * ------------------------------------------------------------------------*/
134 static Text textCase, textOfK, textData, textType, textIf;
135 static Text textThen, textElse, textWhere, textLet, textIn;
136 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
137 static Text textDefault, textDeriving, textDo, textClass, textInstance;
139 static Text textWith, textDlet;
142 static Text textCoco, textEq, textUpto, textAs, textLambda;
143 static Text textBar, textMinus, textFrom, textArrow, textLazy;
144 static Text textBang, textDot, textAll, textImplies;
145 static Text textWildcard;
147 static Text textModule, textImport, textInterface, textInstImport;
148 static Text textHiding, textQualified, textAsMod;
149 static Text textExport, textDynamic, textUUExport;
150 static Text textUnsafe, textUUAll;
152 Text textCcall; /* ccall */
153 Text textStdcall; /* stdcall */
155 Text textNum; /* Num */
156 Text textPrelude; /* Prelude */
157 Text textPlus; /* (+) */
159 static Cell conMain; /* Main */
160 static Cell varMain; /* main */
162 static Cell varMinus; /* (-) */
163 static Cell varPlus; /* (+) */
164 static Cell varBang; /* (!) */
165 static Cell varDot; /* (.) */
166 static Cell varHiding; /* hiding */
167 static Cell varQualified; /* qualified */
168 static Cell varAsMod; /* as */
170 static List imps; /* List of imports to be chased */
173 /* --------------------------------------------------------------------------
174 * Character set handling:
176 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
177 * character set. The following code provides methods for classifying
178 * input characters according to the lexical structure specified by the
179 * report. Hugs should still accept older programs because ASCII is
180 * essentially just a subset of the ISO character set.
182 * Notes: If you want to port Hugs to a machine that uses something
183 * substantially different from the ISO character set, then you will need
184 * to insert additional code to map between character sets.
186 * At some point, the following data structures may be exported in a .h
187 * file to allow the information contained here to be picked up in the
188 * implementation of LibChar is* primitives.
190 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
191 * ------------------------------------------------------------------------*/
193 static Bool charTabBuilt;
194 static unsigned char ctable[NUM_CHARS];
195 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
196 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
206 static Void local initCharTab() { /* Initialize char decode table */
207 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
208 #define setChar(x,c) ctable[c] |= (x)
209 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
210 #define setCopy(x,c) {Int i; \
211 for (i=0; i<NUM_CHARS; ++i) \
216 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
218 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
219 setRange(SMALL, 223,246); /* ISO lower case letters */
220 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
221 setChar (SMALL, '_');
223 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
224 setRange(LARGE, 192,214); /* ISO upper case letters */
225 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
227 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
228 setRange(SYMBOL, 215,215);
229 setChar (SYMBOL, 247);
230 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
232 setChar (IDAFTER, '\''); /* Characters in identifier */
233 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
235 setChar (ZPACE, ' '); /* ASCII space character */
236 setChar (ZPACE, 160); /* ISO non breaking space */
237 setRange(ZPACE, 9,13); /* special whitespace: \t\n\v\f\r */
239 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
240 setChars(PRINT, " '\""); /* Space and quotes */
241 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
251 /* --------------------------------------------------------------------------
252 * Single character input routines:
254 * At the lowest level of input, characters are read one at a time, with the
255 * current character held in c0 and the following (lookahead) character in
256 * c1. The corrdinates of c0 within the file are held in (column,row).
257 * The input stream is advanced by one character using the skip() function.
258 * ------------------------------------------------------------------------*/
260 #define TABSIZE 8 /* spacing between tabstops */
262 #define NOTHING 0 /* what kind of input is being read?*/
263 #define KEYBOARD 1 /* - keyboard/console? */
264 #define SCRIPTFILE 2 /* - script file */
265 #define PROJFILE 3 /* - project file */
266 #define STRING 4 /* - string buffer? */
268 static Int reading = NOTHING;
270 static Target readSoFar;
271 static Int row, column, startColumn;
273 static FILE *inputStream = 0;
274 static Bool thisLiterate;
275 static String nextStringChar; /* next char in string buffer */
277 #if USE_READLINE /* for command line editors */
278 static String currentLine; /* editline or GNU readline */
279 static String nextChar;
280 #define nextConsoleChar() \
281 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
283 #define nextConsoleChar() getc(stdin)
286 static Int litLines; /* count defn lines in lit script */
287 #define DEFNCHAR '>' /* definition lines begin with this */
288 static Int lastLine; /* records type of last line read: */
289 #define STARTLINE 0 /* - at start of file, none read */
290 #define BLANKLINE 1 /* - blank (may preceed definition) */
291 #define TEXTLINE 2 /* - text comment */
292 #define DEFNLINE 3 /* - line containing definition */
293 #define CODELINE 4 /* - line inside code block */
295 #define BEGINCODE "\\begin{code}"
296 #define ENDCODE "\\end{code}"
299 static char *lineBuffer = NULL; /* getline() does the initial allocation */
301 #define LINEBUFFER_SIZE 1000
302 static char lineBuffer[LINEBUFFER_SIZE];
304 static int lineLength = 0;
305 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
306 static int linePtr = 0;
308 Void consoleInput(prompt) /* prepare to input characters from */
309 String prompt; { /* standard in (i.e. console/kbd) */
310 reading = KEYBOARD; /* keyboard input is Line oriented, */
311 c0 = /* i.e. input terminated by '\n' */
317 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
318 * avoids accidentally freeing currentLine twice.
321 String oldCurrentLine = currentLine;
322 currentLine = 0; /* We may lose the space of currentLine */
323 free(oldCurrentLine); /* if interrupted here - unlikely */
325 currentLine = readline(prompt);
326 nextChar = currentLine;
329 add_history(currentLine);
339 Void projInput(nm) /* prepare to input characters from */
340 String nm; { /* from named project file */
341 if ((inputStream = fopen(nm,"r"))!=0) {
349 ERRMSG(0) "Unable to open project file \"%s\"", nm
354 static Void local fileInput(nm,len) /* prepare to input characters from*/
355 String nm; /* named file (specified length is */
356 Long len; { /* used to set target for reading) */
357 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
359 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
360 char *cmd = malloc(reallen);
362 ERRMSG(0) "Unable to allocate memory for filter command."
365 strcpy(cmd,preprocessor);
368 inputStream = popen(cmd,"r");
371 inputStream = fopen(nm,"r");
374 inputStream = fopen(nm,"r");
377 reading = SCRIPTFILE;
383 lastLine = STARTLINE; /* literate file processing */
387 thisLiterate = literateMode(nm);
391 setGoal("Parsing", (Target)len);
394 ERRMSG(0) "Unable to open file \"%s\"", nm
399 Void stringInput(s) /* prepare to input characters from string */
414 static Bool local literateMode(nm) /* Select literate mode for file */
416 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
418 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
420 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
421 filenamecmp(dot+1,"verb")==0) /* literate scripts */
424 return literateScripts; /* otherwise, use the default */
428 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
432 len = 1 + strlen ( srcName );
433 *hiName = malloc(len);
434 *oName = malloc(len);
435 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
436 (*hiName)[0] = (*oName)[0] = 0;
437 dot = strrchr(srcName, '.');
439 if (filenamecmp(dot+1, "hs")==0 &&
440 filenamecmp(dot+1, "lhs")==0 &&
441 filenamecmp(dot+1, "verb")==0) return;
443 strcpy(*hiName, srcName);
444 dot = strrchr(*hiName, '.');
449 strcpy(*oName, srcName);
450 dot = strrchr(*oName, '.');
457 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
458 * I've removed the loop (since newLineSkip contains a loop too) and
459 * replaced the warnings with errors. ADR
462 * To deal with literate \begin{code}...\end{code} blocks,
463 * add a line buffer that rooms the current line. The old c0 and c1
464 * stream pointers are used as before within that buffer -- sof
466 * Upon reading a new line into the line buffer, we check to see if
467 * we're reading in a line containing \begin{code} or \end{code} and
468 * take appropriate action.
471 static Bool local linecmp(s,line) /* compare string with line */
472 String s; /* line may end in whitespace */
475 while (s[i] != '\0' && s[i] == line[i]) {
478 /* s[0..i-1] == line[0..i-1] */
479 if (s[i] != '\0') { /* check s `isPrefixOf` line */
482 while (isIn(line[i], ZPACE)) { /* allow whitespace at end of line */
485 return (line[i] == '\0');
488 /* Returns line length (including \n) or 0 upon EOF. */
489 static Int local nextLine()
493 Forget about fgets(), it is utterly braindead.
494 (Assumes \NUL free streams and does not gracefully deal
495 with overflow.) Instead, use GNU libc's getline().
497 lineLength = getline(&lineBuffer, &lineLength, inputStream);
499 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
500 lineLength = strlen(lineBuffer);
504 /* printf("Read: \"%s\"", lineBuffer); */
505 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
508 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
509 lineBuffer[1] == '!') {
510 lineBuffer[0]='\n'; /* pretend it's a blank line */
513 } else if (thisLiterate) {
514 if (linecmp(BEGINCODE, lineBuffer)) {
515 if (!inCodeBlock) { /* Entered a code block */
517 lineBuffer[0]='\n'; /* pretend it's a blank line */
522 ERRMSG(row) "\\begin{code} encountered inside code block"
526 else if (linecmp(ENDCODE, lineBuffer)) {
527 if (inCodeBlock) { /* Finished code block */
529 lineBuffer[0]='\n'; /* pretend it's a blank line */
534 ERRMSG(row) "\\end{code} encountered outside code block"
539 /* printf("Read: \"%s\"", lineBuffer); */
543 static Void local skip() { /* move forward one char in input */
544 if (c0!=EOF) { /* stream, updating c0, c1, ... */
545 if (c0=='\n') { /* Adjusting cursor coords as nec. */
548 if (reading==SCRIPTFILE)
552 column += TABSIZE - ((column-1)%TABSIZE);
561 if (reading==SCRIPTFILE)
565 else if (reading==KEYBOARD) {
570 c1 = nextConsoleChar();
571 #if IS_WIN32 && !HUGS_FOR_WINDOWS
574 /* On Win32, hitting ctrl-C causes the next getchar to
575 * fail - returning "-1" to indicate an error.
576 * This is one of the rare cases where "-1" does not mean EOF.
578 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
583 else if (reading==STRING) {
584 c1 = (unsigned char) *nextStringChar++;
589 if (lineLength <=0 || linePtr == lineLength) {
590 /* Current line, exhausted - get new one */
591 if (nextLine() <= 0) { /* EOF */
596 c1 = (unsigned char)lineBuffer[linePtr++];
600 c1 = (unsigned char)lineBuffer[linePtr++];
607 static Void local thisLineIs(kind) /* register kind of current line */
608 Int kind; { /* & check for literate script errs */
609 if (literateErrors) {
610 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
611 (kind==TEXTLINE && lastLine==DEFNLINE)) {
612 ERRMSG(row) "Program line next to comment"
619 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
620 /* assert(c0=='\n'); */
621 if (reading==SCRIPTFILE && thisLiterate) {
624 if (inCodeBlock) { /* pass chars on definition lines */
625 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
629 if (c0==DEFNCHAR) { /* pass chars on definition lines */
630 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
635 while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank? */
637 if (c0=='\n' || c0==EOF)
638 thisLineIs(BLANKLINE);
640 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
641 while (c0!='\n' && c0!=EOF)
643 } /* by now, c0=='\n' or c0==EOF */
644 } while (c0!=EOF); /* if new line, start again */
646 if (litLines==0 && literateErrors) {
647 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
656 static Void local closeAnyInput() { /* Close input stream, if open, */
657 switch (reading) { /* or skip to end of console line */
659 case SCRIPTFILE : if (inputStream) {
660 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
672 case KEYBOARD : while (c0!=EOF)
679 /* --------------------------------------------------------------------------
680 * Parser: Uses table driven parser generated from parser.y using yacc
681 * ------------------------------------------------------------------------*/
685 /* --------------------------------------------------------------------------
686 * Single token input routines:
688 * The following routines read the values of particular kinds of token given
689 * that the first character of the token has already been located in c0 on
690 * entry to the routine.
691 * ------------------------------------------------------------------------*/
693 #define MAX_TOKEN 4000
694 #define startToken() tokPos = 0
695 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
696 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
697 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
699 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
700 static Int tokPos; /* input position in buffer */
701 static Int identType; /* identifier type: CONID / VARID */
702 static Int opType; /* operator type : CONOP / VAROP */
704 static Void local endToken() { /* check for token overflow */
705 if (tokPos>MAX_TOKEN) {
706 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
709 tokenStr[tokPos] = '\0';
712 static Text local readOperator() { /* read operator symbol */
717 } while (isISO(c0) && isIn(c0,SYMBOL));
718 opType = (tokenStr[0]==':' ? CONOP : VAROP);
720 return findText(tokenStr);
723 static Text local readIdent() { /* read identifier */
728 } while (isISO(c0) && isIn(c0,IDAFTER));
730 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
731 if (readingInterface)
732 return unZcodeThenFindText(tokenStr); else
733 return findText(tokenStr);
737 static Bool local doesNotExceed(s,radix,limit)
744 if (s[p] == 0) return TRUE;
745 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
746 n = radix*n + (s[p]-'0');
751 static Int local stringToInt(s,radix)
757 if (s[p] == 0) return n;
758 n = radix*n + (s[p]-'0');
763 static Cell local readRadixNumber(r) /* Read literal in specified radix */
764 Int r; { /* from input of the form 0c{digs} */
767 skip(); /* skip leading zero */
768 if ((d=readHexDigit(c1))<0 || d>=r) {
769 /* Special case; no digits, lex as */
770 /* if it had been written "0 c..." */
775 saveTokenChar('0'+readHexDigit(c0));
777 d = readHexDigit(c0);
778 } while (d>=0 && d<r);
782 if (doesNotExceed(tokenStr,r,MAXPOSINT))
783 return mkInt(stringToInt(tokenStr,r));
786 return stringToBignum(tokenStr);
788 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
793 static Cell local readNumber() { /* read numeric constant */
796 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
797 return readRadixNumber(16);
798 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
799 return readRadixNumber(8);
806 } while (isISO(c0) && isIn(c0,DIGIT));
808 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
810 if (doesNotExceed(tokenStr,10,MAXPOSINT))
811 return mkInt(stringToInt(tokenStr,10)); else
812 return stringToBignum(tokenStr);
815 saveTokenChar(c0); /* save decimal point */
817 do { /* process fractional part ... */
820 } while (isISO(c0) && isIn(c0,DIGIT));
822 if (c0=='e' || c0=='E') { /* look for exponent part... */
832 if (!isISO(c0) || !isIn(c0,DIGIT)) {
833 ERRMSG(row) "Missing digits in exponent"
840 } while (isISO(c0) && isIn(c0,DIGIT));
845 return mkFloat(stringToFloat(tokenStr));
854 static Cell local readChar() { /* read character constant */
858 if (c0=='\'' || c0=='\n' || c0==EOF) {
859 ERRMSG(row) "Illegal character constant"
863 charRead = readAChar(FALSE);
868 ERRMSG(row) "Improperly terminated character constant"
874 static Cell local readString() { /* read string literal */
879 while (c0!='\"' && c0!='\n' && c0!=EOF) {
882 saveStrChr(charOf(c));
888 ERRMSG(row) "Improperly terminated string"
892 return mkStr(findText(tokenStr));
895 static Void local saveStrChr(c) /* save character in string */
897 if (c!='\0' && c!='\\') { /* save non null char as single char*/
900 else { /* save null char as TWO null chars */
901 if (tokPos+1<MAX_TOKEN) {
911 static Cell local readAChar(isStrLit) /* read single char constant */
912 Bool isStrLit; { /* TRUE => enable \& and gaps */
915 if (c0=='\\') /* escape character? */
916 return readEscapeChar(isStrLit);
918 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
921 skip(); /* normal character? */
925 /* --------------------------------------------------------------------------
926 * Character escape code sequences:
927 * ------------------------------------------------------------------------*/
929 static struct { /* table of special escape codes */
933 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
934 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
935 {"\'",'\''}, {"v", 11},
936 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
937 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
938 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
939 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
940 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
941 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
942 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
943 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
944 {"SP", 32}, {"DEL", 127},
948 static Int alreadyMatched; /* Record portion of input stream */
949 static char alreadyRead[10]; /* that has been read w/o a match */
951 static Bool local lazyReadMatches(s) /* compare input stream with string */
952 String s; { /* possibly using characters that */
953 int i; /* have already been read */
955 for (i=0; i<alreadyMatched; ++i)
956 if (alreadyRead[i]!=s[i])
959 while (s[i] && s[i]==c0) {
960 alreadyRead[alreadyMatched++]=(char)c0;
968 static Cell local readEscapeChar(isStrLit)/* read escape character */
974 case '&' : if (isStrLit) {
978 ERRMSG(row) "Illegal use of `\\&' in character constant"
982 case '^' : return readCtrlChar();
984 case 'o' : return readOctChar();
985 case 'x' : return readHexChar();
987 default : if (!isISO(c0)) {
988 ERRMSG(row) "Illegal escape sequence"
991 else if (isIn(c0,ZPACE)) {
996 ERRMSG(row) "Illegal use of gap in character constant"
1000 else if (isIn(c0,DIGIT))
1001 return readDecChar();
1004 for (alreadyMatched=0; escapes[i].codename; i++)
1005 if (lazyReadMatches(escapes[i].codename))
1006 return mkChar(escapes[i].codenumber);
1008 alreadyRead[alreadyMatched++] = (char)c0;
1009 alreadyRead[alreadyMatched++] = '\0';
1010 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1013 return NIL;/*NOTREACHED*/
1016 static Void local skipGap() { /* skip over gap in string literal */
1017 do /* (simplified in Haskell 1.1) */
1022 while (isISO(c0) && isIn(c0,ZPACE));
1024 ERRMSG(row) "Missing `\\' terminating string literal gap"
1030 static Cell local readCtrlChar() { /* read escape sequence \^x */
1031 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1035 if ((which = strchr(controls,c0))==NULL) {
1036 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1040 return mkChar(which-controls);
1043 static Cell local readOctChar() { /* read octal character constant */
1048 if ((d = readHexDigit(c0))<0 || d>=8) {
1049 ERRMSG(row) "Empty octal character escape"
1053 if (overflows(n,8,d,MAXCHARVAL)) {
1054 ERRMSG(row) "Octal character escape out of range"
1059 } while ((d = readHexDigit(c0))>=0 && d<8);
1064 static Cell local readHexChar() { /* read hex character constant */
1069 if ((d = readHexDigit(c0))<0) {
1070 ERRMSG(row) "Empty hexadecimal character escape"
1074 if (overflows(n,16,d,MAXCHARVAL)) {
1075 ERRMSG(row) "Hexadecimal character escape out of range"
1080 } while ((d = readHexDigit(c0))>=0);
1085 static Int local readHexDigit(c) /* read single hex digit */
1087 if ('0'<=c && c<='9')
1089 if ('A'<=c && c<='F')
1090 return 10 + (c-'A');
1091 if ('a'<=c && c<='f')
1092 return 10 + (c-'a');
1096 static Cell local readDecChar() { /* read decimal character constant */
1100 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1101 ERRMSG(row) "Decimal character escape out of range"
1104 n = 10*n + (c0-'0');
1106 } while (c0!=EOF && isIn(c0,DIGIT));
1111 /* --------------------------------------------------------------------------
1112 * Produce printable representation of character:
1113 * ------------------------------------------------------------------------*/
1115 String unlexChar(c,quote) /* return string representation of */
1116 Char c; /* character... */
1117 Char quote; { /* protect quote character */
1118 static char buffer[12];
1120 if (c<0) /* deal with sign extended chars.. */
1123 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1124 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1126 buffer[1] = (char)c;
1130 buffer[0] = (char)c;
1134 else { /* look for escape code */
1136 for (escs=0; escapes[escs].codename; escs++)
1137 if (escapes[escs].codenumber==c) {
1138 sprintf(buffer,"\\%s",escapes[escs].codename);
1141 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1146 Void printString(s) /* print string s, using quotes and */
1147 String s; { /* escapes if any parts need them */
1151 while ((c = *t)!=0 && isISO(c)
1152 && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1158 Printf("%s",unlexChar(*t,'"'));
1166 /* -------------------------------------------------------------------------
1167 * Handle special types of input for use in interpreter:
1168 * -----------------------------------------------------------------------*/
1170 Command readCommand(cmds,start,sys) /* read command at start of input */
1171 struct cmd *cmds; /* line in interpreter */
1172 Char start; /* characters introducing a cmd */
1173 Char sys; { /* character for shell escape */
1174 while (c0==' ' || c0 =='\t')
1177 if (c0=='\n') /* look for blank command lines */
1179 if (c0==EOF) /* look for end of input stream */
1181 if (c0==sys) { /* single character system escape */
1185 if (c0==start && c1==sys) { /* two character system escape */
1191 startToken(); /* All cmds start with start */
1192 if (c0==start) /* except default (usually EVAL) */
1193 do { /* which is empty */
1196 } while (c0!=EOF && !isIn(c0,ZPACE));
1199 for (; cmds->cmdString; ++cmds)
1200 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1201 (tokenStr[0]==start &&
1202 tokenStr[1]==(cmds->cmdString)[1] &&
1204 return (cmds->cmdCode);
1208 String readFilename() { /* Read filename from input (if any)*/
1209 if (reading==PROJFILE)
1212 while (c0==' ' || c0=='\t')
1215 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1219 while (c0!=EOF && !isIn(c0,ZPACE)) {
1222 while (c0!=EOF && c0!='\"') {
1223 Cell c = readAChar(TRUE);
1225 saveTokenChar(charOf(c));
1231 ERRMSG(row) "a closing quote, '\"', was expected"
1244 String readLine() { /* Read command line from input */
1245 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1249 while (c0!='\n' && c0!=EOF) {
1258 /* --------------------------------------------------------------------------
1259 * This lexer supports the Haskell layout rule:
1261 * - Layout area bounded by { ... }, with `;'s in between.
1262 * - A `{' is a HARD indentation and can only be matched by a corresponding
1264 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1265 * is inserted with the column number of the first token after the
1266 * WHERE/LET/OF keyword.
1267 * - When a soft indentation is uppermost on the indetation stack with
1268 * column col' we insert:
1269 * `}' in front of token with column<col' and pop indentation off stack,
1270 * `;' in front of token with column==col'.
1271 * ------------------------------------------------------------------------*/
1273 #define MAXINDENT 100 /* maximum nesting of layout rule */
1274 static Int layout[MAXINDENT+1];/* indentation stack */
1275 #define HARD (-1) /* indicates hard indentation */
1276 static Int indentDepth = (-1); /* current indentation nesting */
1278 static Void local goOffside(col) /* insert offside marker */
1279 Int col; { /* for specified column */
1281 if (indentDepth>=MAXINDENT) {
1282 ERRMSG(row) "Too many levels of program nesting"
1285 layout[++indentDepth] = col;
1288 static Void local unOffside() { /* leave layout rule area */
1293 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1295 return indentDepth>=0 && layout[indentDepth]!=HARD;
1298 /* --------------------------------------------------------------------------
1300 * ------------------------------------------------------------------------*/
1302 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1303 for (;;) /* Strictly speaking, this code is */
1304 if (c0==EOF) /* a little more liberal than the */
1305 return; /* report allows ... */
1308 else if (isIn(c0,ZPACE))
1310 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1312 Int origRow = row; /* Save original row number */
1315 while (nesting>0 && c0!=EOF)
1316 if (c0=='{' && c1=='-') {
1321 else if (c0=='-' && c1=='}') {
1331 ERRMSG(origRow) "Unterminated nested comment {- ..."
1335 else if (c0=='-' && c1=='-') { /* One line comment */
1338 while (c0!='\n' && c0!=EOF);
1346 static Bool firstToken; /* Set to TRUE for first token */
1347 static Int firstTokenIs; /* ... with token value stored here */
1349 static Int local yylex() { /* Read next input token ... */
1350 static Bool insertOpen = FALSE;
1351 static Bool insertedToken = FALSE;
1352 static Text textRepeat;
1354 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1356 if (firstToken) { /* Special case for first token */
1360 insertedToken = FALSE;
1361 if (reading==KEYBOARD)
1362 textRepeat = findText(repeatStr);
1363 return firstTokenIs;
1366 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1368 insertedToken = TRUE;
1370 push(yylval = mkInt(row));
1374 /* ----------------------------------------------------------------------
1375 * Skip white space, and insert tokens to support layout rules as reqd.
1376 * --------------------------------------------------------------------*/
1379 startColumn = column;
1380 push(yylval = mkInt(row)); /* default token value is line no. */
1381 /* subsequent changes to yylval must also set top() to the same value */
1383 if (indentDepth>=0) { /* layout rule(s) active ? */
1384 if (insertedToken) /* avoid inserting multiple `;'s */
1385 insertedToken = FALSE; /* or putting `;' after `{' */
1387 if (offsideON && layout[indentDepth]!=HARD) {
1388 if (column<layout[indentDepth]) {
1392 else if (column==layout[indentDepth] && c0!=EOF) {
1393 insertedToken = TRUE;
1399 /* ----------------------------------------------------------------------
1400 * Now try to identify token type:
1401 * --------------------------------------------------------------------*/
1404 case EOF : return 0; /* End of file/input */
1406 /* The next 10 characters make up the `special' category in 1.3 */
1407 case '(' : skip(); return '(';
1408 case ')' : skip(); return ')';
1409 case ',' : skip(); return ',';
1410 case ';' : skip(); return ';';
1411 case '[' : skip(); return '[';
1412 case ']' : skip(); return ']';
1413 case '`' : skip(); return '`';
1414 case '{' : if (offsideON) goOffside(HARD);
1417 case '}' : if (offsideON && indentDepth<0) {
1418 ERRMSG(row) "Misplaced `}'"
1421 if (!(offsideON && layout[indentDepth]!=HARD))
1422 skip(); /* skip over hard }*/
1424 unOffside(); /* otherwise, we have to insert a }*/
1425 return '}'; /* to (try to) avoid an error... */
1427 /* Character and string literals */
1428 case '\'' : top() = yylval = readChar();
1431 case '\"' : top() = yylval = readString();
1436 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1437 Text it; /* Look for implicit param name */
1440 top() = yylval = ap(IPVAR,it);
1441 return identType=IPVARID;
1445 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1446 Text it; /* Look for record selector name */
1449 top() = yylval = ap(RECSEL,mkExt(it));
1450 return identType=RECSELID;
1453 if (isIn(c0,LARGE)) { /* Look for qualified name */
1454 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1455 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1457 skip(); /* Skip qualifying dot */
1458 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1459 it2 = readOperator();
1460 if (opType==CONOP) {
1461 top() = yylval = mkQConOp(it,it2);
1464 top() = yylval = mkQVarOp(it,it2);
1467 } else { /* Qualified identifier */
1469 if (identType==CONID) {
1470 top() = yylval = mkQCon(it,it2);
1473 top() = yylval = mkQVar(it,it2);
1478 top() = yylval = mkCon(it);
1482 if (isIn(c0,(SMALL|LARGE))) {
1483 Text it = readIdent();
1485 if (it==textCase) return CASEXP;
1486 if (it==textOfK) lookAhead(OF);
1487 if (it==textData) return DATA;
1488 if (it==textType) return TYPE;
1489 if (it==textIf) return IF;
1490 if (it==textThen) return THEN;
1491 if (it==textElse) return ELSE;
1492 if (it==textWhere) lookAhead(WHERE);
1493 if (it==textLet) lookAhead(LET);
1494 if (it==textIn) return IN;
1495 if (it==textInfix) return INFIXN;
1496 if (it==textInfixl) return INFIXL;
1497 if (it==textInfixr) return INFIXR;
1498 if (it==textForeign) return FOREIGN;
1499 if (it==textUnsafe) return UNSAFE;
1500 if (it==textNewtype) return TNEWTYPE;
1501 if (it==textDefault) return DEFAULT;
1502 if (it==textDeriving) return DERIVING;
1503 if (it==textDo) lookAhead(DO);
1504 if (it==textClass) return TCLASS;
1505 if (it==textInstance) return TINSTANCE;
1506 if (it==textModule) return TMODULE;
1507 if (it==textInterface) return INTERFACE;
1508 if (it==textInstImport) return INSTIMPORT;
1509 if (it==textImport) return IMPORT;
1510 if (it==textExport) return EXPORT;
1511 if (it==textDynamic) return DYNAMIC;
1512 if (it==textCcall) return CCALL;
1513 if (it==textStdcall) return STDKALL;
1514 if (it==textUUExport) return UUEXPORT;
1515 if (it==textHiding) return HIDING;
1516 if (it==textQualified) return QUALIFIED;
1517 if (it==textAsMod) return ASMOD;
1518 if (it==textWildcard) return '_';
1519 if (it==textAll && !haskell98) return ALL;
1521 if (it==textWith && !haskell98) lookAhead(WITH);
1522 if (it==textDlet && !haskell98) lookAhead(DLET);
1524 if (it==textUUAll) return ALL;
1525 if (it==textRepeat && reading==KEYBOARD)
1526 return repeatLast();
1528 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1532 if (isIn(c0,SYMBOL)) {
1533 Text it = readOperator();
1535 if (it==textCoco) return COCO;
1536 if (it==textEq) return '=';
1537 if (it==textUpto) return UPTO;
1538 if (it==textAs) return '@';
1539 if (it==textLambda) return '\\';
1540 if (it==textBar) return '|';
1541 if (it==textFrom) return FROM;
1542 if (it==textMinus) return '-';
1543 if (it==textPlus) return '+';
1544 if (it==textBang) return '!';
1545 if (it==textDot) return '.';
1546 if (it==textArrow) return ARROW;
1547 if (it==textLazy) return '~';
1548 if (it==textImplies) return IMPLIES;
1549 if (it==textRepeat && reading==KEYBOARD)
1550 return repeatLast();
1552 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1556 if (isIn(c0,DIGIT)) {
1557 top() = yylval = readNumber();
1561 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1564 return 0; /*NOTREACHED*/
1567 static Int local repeatLast() { /* Obtain last expression entered */
1568 if (isNull(yylval=getLastExpr())) {
1569 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1575 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1576 Text t; { /* by t ... */
1577 String s = textToStr(t);
1578 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1581 Syntax syntaxOf(n) /* Find syntax for name */
1583 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1584 return defaultSyntax(name(n).text);
1585 return name(n).syntax;
1588 /* --------------------------------------------------------------------------
1589 * main entry points to parser/lexer:
1590 * ------------------------------------------------------------------------*/
1592 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1593 Int startWith; { /* determining whether to read a */
1594 firstToken = TRUE; /* script or an expression */
1595 firstTokenIs = startWith;
1596 if (startWith==INTERFACE) {
1597 offsideON = FALSE; readingInterface = TRUE;
1599 offsideON = TRUE; readingInterface = FALSE;
1603 if (yyparse()) { /* This can only be parser overflow */
1604 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1605 EEND; /* in the parser... */
1608 if (!stackEmpty()) /* stack should now be empty */
1609 internal("parseInput");
1613 static String memPrefix = "@mem@";
1614 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1616 Void makeMemScript(mem,fname)
1619 strcat(fname,memPrefix);
1620 itoa((int)mem, fname+strlen(fname), 10);
1623 Bool isMemScript(fname)
1625 return (strstr(fname,memPrefix) != NULL);
1628 String memScriptString(fname)
1630 String p = strstr(fname,memPrefix);
1632 return (String)atoi(p+lenMemPrefix);
1638 Void parseScript(fname,len) /* Read a script, possibly from mem */
1642 if (isMemScript(fname)) {
1643 char* s = memScriptString(fname);
1646 fileInput(fname,len);
1651 Void parseScript(nm,len) /* Read a script */
1653 Long len; { /* Used to set a target for reading */
1660 Void parseExp() { /* Read an expression to evaluate */
1662 setLastExpr(inputExpr);
1666 #if EXPLAIN_INSTANCE_RESOLUTION
1667 Void parseContext() { /* Read a context to prove */
1668 parseInput(CONTEXT);
1672 Void parseInterface(nm,len) /* Read a GHC interface file */
1674 Long len; { /* Used to set a target for reading */
1677 parseInput(INTERFACE);
1681 /* --------------------------------------------------------------------------
1683 * ------------------------------------------------------------------------*/
1688 case INSTALL : initCharTab();
1689 textCase = findText("case");
1690 textOfK = findText("of");
1691 textData = findText("data");
1692 textType = findText("type");
1693 textIf = findText("if");
1694 textThen = findText("then");
1695 textElse = findText("else");
1696 textWhere = findText("where");
1697 textLet = findText("let");
1698 textIn = findText("in");
1699 textInfix = findText("infix");
1700 textInfixl = findText("infixl");
1701 textInfixr = findText("infixr");
1702 textForeign = findText("foreign");
1703 textUnsafe = findText("unsafe");
1704 textNewtype = findText("newtype");
1705 textDefault = findText("default");
1706 textDeriving = findText("deriving");
1707 textDo = findText("do");
1708 textClass = findText("class");
1710 textWith = findText("with");
1711 textDlet = findText("dlet");
1713 textInstance = findText("instance");
1714 textCoco = findText("::");
1715 textEq = findText("=");
1716 textUpto = findText("..");
1717 textAs = findText("@");
1718 textLambda = findText("\\");
1719 textBar = findText("|");
1720 textMinus = findText("-");
1721 textPlus = findText("+");
1722 textFrom = findText("<-");
1723 textArrow = findText("->");
1724 textLazy = findText("~");
1725 textBang = findText("!");
1726 textDot = findText(".");
1727 textImplies = findText("=>");
1728 textPrelude = findText("Prelude");
1729 textNum = findText("Num");
1730 textModule = findText("module");
1731 textInterface = findText("__interface");
1732 textInstImport = findText("__instimport");
1733 textExport = findText("export");
1734 textDynamic = findText("dynamic");
1735 textCcall = findText("ccall");
1736 textStdcall = findText("stdcall");
1737 textUUExport = findText("__export");
1738 textImport = findText("import");
1739 textHiding = findText("hiding");
1740 textQualified = findText("qualified");
1741 textAsMod = findText("as");
1742 textWildcard = findText("_");
1743 textAll = findText("forall");
1744 textUUAll = findText("__forall");
1745 varMinus = mkVar(textMinus);
1746 varPlus = mkVar(textPlus);
1747 varBang = mkVar(textBang);
1748 varDot = mkVar(textDot);
1749 varHiding = mkVar(textHiding);
1750 varQualified = mkVar(textQualified);
1751 varAsMod = mkVar(textAsMod);
1752 conMain = mkCon(findText("Main"));
1753 varMain = mkVar(findText("main"));
1759 case RESET : tyconDefns = NIL;
1768 foreignImports= NIL;
1769 foreignExports= NIL;
1777 case BREAK : if (reading==KEYBOARD)
1781 case MARK : mark(tyconDefns);
1789 mark(unqualImports);
1790 mark(foreignImports);
1791 mark(foreignExports);
1809 /*-------------------------------------------------------------------------*/