2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 2000/04/04 01:07:49 $
14 * ------------------------------------------------------------------------*/
16 #include "hugsbasictypes.h"
34 #if HAVE_LIBREADLINE && HAVE_READLINE_READLINE_H && HAVE_READLINE_HISTORY_H
35 #define USE_READLINE 1
37 #define USE_READLINE 0
41 #include <readline/readline.h>
42 #include <readline/history.h>
46 /* --------------------------------------------------------------------------
48 * ------------------------------------------------------------------------*/
50 List tyconDefns = NIL; /* type constructor definitions */
51 List typeInDefns = NIL; /* type synonym restrictions */
52 List valDefns = NIL; /* value definitions in script */
53 List classDefns = NIL; /* class defns in script */
54 List instDefns = NIL; /* instance defns in script */
55 List selDefns = NIL; /* list of selector lists */
56 List genDefns = NIL; /* list of generated names */
57 List unqualImports = NIL; /* unqualified import list */
58 List foreignImports = NIL; /* foreign imports */
59 List foreignExports = NIL; /* foreign exportsd */
60 List defaultDefns = NIL; /* default definitions (if any) */
61 Int defaultLine = 0; /* line in which default defs occur*/
62 List evalDefaults = NIL; /* defaults for evaluator */
64 Cell inputExpr = NIL; /* input expression */
65 Cell inputContext = NIL; /* input context */
66 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
67 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
68 Bool offsideON = TRUE; /* TRUE => implement offside rule */
69 Bool readingInterface = FALSE;
71 String repeatStr = 0; /* Repeat last expr */
73 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
74 String preprocessor = 0;
77 /* --------------------------------------------------------------------------
78 * Local function prototypes:
79 * ------------------------------------------------------------------------*/
81 static Void local initCharTab ( Void );
82 static Void local fileInput ( String,Long );
83 static Bool local literateMode ( String );
84 static Bool local linecmp ( String,String );
85 static Int local nextLine ( Void );
86 static Void local skip ( Void );
87 static Void local thisLineIs ( Int );
88 static Void local newlineSkip ( Void );
89 static Void local closeAnyInput ( Void );
91 Int yyparse ( Void ); /* can't stop yacc making this */
92 /* public, but don't advertise */
93 /* it in a header file. */
95 static Void local endToken ( Void );
96 static Text local readOperator ( Void );
97 static Text local readIdent ( Void );
98 static Cell local readRadixNumber ( Int );
99 static Cell local readNumber ( Void );
100 static Cell local readChar ( Void );
101 static Cell local readString ( Void );
102 static Void local saveStrChr ( Char );
103 static Cell local readAChar ( Bool );
105 static Bool local lazyReadMatches ( String );
106 static Cell local readEscapeChar ( Bool );
107 static Void local skipGap ( Void );
108 static Cell local readCtrlChar ( Void );
109 static Cell local readOctChar ( Void );
110 static Cell local readHexChar ( Void );
111 static Int local readHexDigit ( Char );
112 static Cell local readDecChar ( Void );
114 static Void local goOffside ( Int );
115 static Void local unOffside ( Void );
116 static Bool local canUnOffside ( Void );
118 static Void local skipWhitespace ( Void );
119 static Int local yylex ( Void );
120 static Int local repeatLast ( Void );
122 static Cell local parseInput ( Int );
124 static Bool local doesNotExceed ( String,Int,Int );
125 static Int local stringToInt ( String,Int );
128 /* --------------------------------------------------------------------------
129 * Text values for reserved words and special symbols:
130 * ------------------------------------------------------------------------*/
132 static Text textCase, textOfK, textData, textType, textIf;
133 static Text textThen, textElse, textWhere, textLet, textIn;
134 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
135 static Text textDefault, textDeriving, textDo, textClass, textInstance;
137 static Text textWith, textDlet;
140 static Text textCoco, textEq, textUpto, textAs, textLambda;
141 static Text textBar, textMinus, textFrom, textArrow, textLazy;
142 static Text textBang, textDot, textAll, textImplies;
143 static Text textWildcard;
145 static Text textModule, textImport, textInterface, textInstImport;
146 static Text textHiding, textQualified, textAsMod, textPrivileged;
147 static Text textExport, textDynamic, textUUExport;
148 static Text textUnsafe, textUUAll, textUUUsage;
150 Text textCcall; /* ccall */
151 Text textStdcall; /* stdcall */
153 Text textNum; /* Num */
154 Text textPrimPrel; /* PrimPrel */
155 Text textPrelude; /* Prelude */
156 Text textPlus; /* (+) */
158 static Cell conMain; /* Main */
159 static Cell varMain; /* main */
161 static Cell varMinus; /* (-) */
162 static Cell varPlus; /* (+) */
163 static Cell varBang; /* (!) */
164 static Cell varDot; /* (.) */
165 static Cell varHiding; /* hiding */
166 static Cell varQualified; /* qualified */
167 static Cell varAsMod; /* as */
168 static Cell varPrivileged; /* privileged */
170 static List imps; /* List of imports to be chased */
173 /* --------------------------------------------------------------------------
174 * Character set handling:
176 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
177 * character set. The following code provides methods for classifying
178 * input characters according to the lexical structure specified by the
179 * report. Hugs should still accept older programs because ASCII is
180 * essentially just a subset of the ISO character set.
182 * Notes: If you want to port Hugs to a machine that uses something
183 * substantially different from the ISO character set, then you will need
184 * to insert additional code to map between character sets.
186 * At some point, the following data structures may be exported in a .h
187 * file to allow the information contained here to be picked up in the
188 * implementation of LibChar is* primitives.
190 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
191 * ------------------------------------------------------------------------*/
193 static Bool charTabBuilt;
194 static unsigned char ctable[NUM_CHARS];
195 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
196 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
206 static Void local initCharTab() { /* Initialize char decode table */
207 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
208 #define setChar(x,c) ctable[c] |= (x)
209 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
210 #define setCopy(x,c) {Int i; \
211 for (i=0; i<NUM_CHARS; ++i) \
216 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
218 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
219 setRange(SMALL, 223,246); /* ISO lower case letters */
220 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
221 setChar (SMALL, '_');
223 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
224 setRange(LARGE, 192,214); /* ISO upper case letters */
225 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
227 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
228 setRange(SYMBOL, 215,215);
229 setChar (SYMBOL, 247);
230 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
232 setChar (IDAFTER, '\''); /* Characters in identifier */
233 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
235 setChar (ZPACE, ' '); /* ASCII space character */
236 setChar (ZPACE, 160); /* ISO non breaking space */
237 setRange(ZPACE, 9,13); /* special whitespace: \t\n\v\f\r */
239 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
240 setChars(PRINT, " '\""); /* Space and quotes */
241 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
251 /* --------------------------------------------------------------------------
252 * Single character input routines:
254 * At the lowest level of input, characters are read one at a time, with the
255 * current character held in c0 and the following (lookahead) character in
256 * c1. The coordinates of c0 within the file are held in (column,row).
257 * The input stream is advanced by one character using the skip() function.
258 * ------------------------------------------------------------------------*/
260 #define TABSIZE 8 /* spacing between tabstops */
262 #define NOTHING 0 /* what kind of input is being read?*/
263 #define KEYBOARD 1 /* - keyboard/console? */
264 #define SCRIPTFILE 2 /* - script file */
265 #define PROJFILE 3 /* - project file */
266 #define STRING 4 /* - string buffer? */
268 static Int reading = NOTHING;
270 static Target readSoFar;
271 static Int row, column, startColumn;
273 static FILE *inputStream = 0;
274 static Bool thisLiterate;
275 static String nextStringChar; /* next char in string buffer */
277 #if USE_READLINE /* for command line editors */
278 static String currentLine; /* editline or GNU readline */
279 static String nextChar;
280 #define nextConsoleChar() \
281 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
283 #define nextConsoleChar() getc(stdin)
286 static Int litLines; /* count defn lines in lit script */
287 #define DEFNCHAR '>' /* definition lines begin with this */
288 static Int lastLine; /* records type of last line read: */
289 #define STARTLINE 0 /* - at start of file, none read */
290 #define BLANKLINE 1 /* - blank (may preceed definition) */
291 #define TEXTLINE 2 /* - text comment */
292 #define DEFNLINE 3 /* - line containing definition */
293 #define CODELINE 4 /* - line inside code block */
295 #define BEGINCODE "\\begin{code}"
296 #define ENDCODE "\\end{code}"
299 static char *lineBuffer = NULL; /* getline() does the initial allocation */
301 #define LINEBUFFER_SIZE 1000
302 static char lineBuffer[LINEBUFFER_SIZE];
304 static int lineLength = 0;
305 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
306 static int linePtr = 0;
308 Void consoleInput(prompt) /* prepare to input characters from */
309 String prompt; { /* standard in (i.e. console/kbd) */
310 reading = KEYBOARD; /* keyboard input is Line oriented, */
311 c0 = /* i.e. input terminated by '\n' */
317 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
318 * avoids accidentally freeing currentLine twice.
321 String oldCurrentLine = currentLine;
322 currentLine = 0; /* We may lose the space of currentLine */
323 free(oldCurrentLine); /* if interrupted here - unlikely */
325 currentLine = readline(prompt);
326 nextChar = currentLine;
329 add_history(currentLine);
339 Void projInput(nm) /* prepare to input characters from */
340 String nm; { /* from named project file */
341 if ((inputStream = fopen(nm,"r"))!=0) {
349 ERRMSG(0) "Unable to open project file \"%s\"", nm
354 static Void local fileInput(nm,len) /* prepare to input characters from*/
355 String nm; /* named file (specified length is */
356 Long len; { /* used to set target for reading) */
357 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
359 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
360 char *cmd = malloc(reallen);
362 ERRMSG(0) "Unable to allocate memory for filter command."
365 strcpy(cmd,preprocessor);
368 inputStream = popen(cmd,"r");
371 inputStream = fopen(nm,"r");
374 inputStream = fopen(nm,"r");
377 reading = SCRIPTFILE;
383 lastLine = STARTLINE; /* literate file processing */
387 thisLiterate = literateMode(nm);
391 setGoal("Parsing", (Target)len);
394 ERRMSG(0) "Unable to open file \"%s\"", nm
399 Void stringInput(s) /* prepare to input characters from string */
414 static Bool local literateMode(nm) /* Select literate mode for file */
416 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
418 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
420 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
421 filenamecmp(dot+1,"verb")==0) /* literate scripts */
424 return literateScripts; /* otherwise, use the default */
428 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
432 len = 1 + strlen ( srcName );
433 *hiName = malloc(len);
434 *oName = malloc(len);
435 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
436 (*hiName)[0] = (*oName)[0] = 0;
437 dot = strrchr(srcName, '.');
439 if (filenamecmp(dot+1, "hs")==0 &&
440 filenamecmp(dot+1, "lhs")==0 &&
441 filenamecmp(dot+1, "verb")==0) return;
443 strcpy(*hiName, srcName);
444 dot = strrchr(*hiName, '.');
449 strcpy(*oName, srcName);
450 dot = strrchr(*oName, '.');
457 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
458 * I've removed the loop (since newLineSkip contains a loop too) and
459 * replaced the warnings with errors. ADR
462 * To deal with literate \begin{code}...\end{code} blocks,
463 * add a line buffer that rooms the current line. The old c0 and c1
464 * stream pointers are used as before within that buffer -- sof
466 * Upon reading a new line into the line buffer, we check to see if
467 * we're reading in a line containing \begin{code} or \end{code} and
468 * take appropriate action.
471 static Bool local linecmp(s,line) /* compare string with line */
472 String s; /* line may end in whitespace */
475 while (s[i] != '\0' && s[i] == line[i]) {
478 /* s[0..i-1] == line[0..i-1] */
479 if (s[i] != '\0') { /* check s `isPrefixOf` line */
482 while (isIn(line[i], ZPACE)) { /* allow whitespace at end of line */
485 return (line[i] == '\0');
488 /* Returns line length (including \n) or 0 upon EOF. */
489 static Int local nextLine()
493 Forget about fgets(), it is utterly braindead.
494 (Assumes \NUL free streams and does not gracefully deal
495 with overflow.) Instead, use GNU libc's getline().
497 lineLength = getline(&lineBuffer, &lineLength, inputStream);
499 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
500 lineLength = strlen(lineBuffer);
504 /* printf("Read: \"%s\"", lineBuffer); */
505 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
508 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
509 lineBuffer[1] == '!') {
510 lineBuffer[0]='\n'; /* pretend it's a blank line */
513 } else if (thisLiterate) {
514 if (linecmp(BEGINCODE, lineBuffer)) {
515 if (!inCodeBlock) { /* Entered a code block */
517 lineBuffer[0]='\n'; /* pretend it's a blank line */
522 ERRMSG(row) "\\begin{code} encountered inside code block"
526 else if (linecmp(ENDCODE, lineBuffer)) {
527 if (inCodeBlock) { /* Finished code block */
529 lineBuffer[0]='\n'; /* pretend it's a blank line */
534 ERRMSG(row) "\\end{code} encountered outside code block"
539 /* printf("Read: \"%s\"", lineBuffer); */
543 static Void local skip() { /* move forward one char in input */
544 if (c0!=EOF) { /* stream, updating c0, c1, ... */
545 if (c0=='\n') { /* Adjusting cursor coords as nec. */
548 if (reading==SCRIPTFILE)
552 column += TABSIZE - ((column-1)%TABSIZE);
561 if (reading==SCRIPTFILE)
565 else if (reading==KEYBOARD) {
570 c1 = nextConsoleChar();
574 /* On Win32, hitting ctrl-C causes the next getchar to
575 * fail - returning "-1" to indicate an error.
576 * This is one of the rare cases where "-1" does not mean EOF.
578 if (EOF == c1 && (!feof(stdin) /* || broken==TRUE */)) {
583 else if (reading==STRING) {
584 c1 = (unsigned char) *nextStringChar++;
589 if (lineLength <=0 || linePtr == lineLength) {
590 /* Current line, exhausted - get new one */
591 if (nextLine() <= 0) { /* EOF */
596 c1 = (unsigned char)lineBuffer[linePtr++];
600 c1 = (unsigned char)lineBuffer[linePtr++];
607 static Void local thisLineIs(kind) /* register kind of current line */
608 Int kind; { /* & check for literate script errs */
609 if (literateErrors) {
610 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
611 (kind==TEXTLINE && lastLine==DEFNLINE)) {
612 ERRMSG(row) "Program line next to comment"
619 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
620 /* assert(c0=='\n'); */
621 if (reading==SCRIPTFILE && thisLiterate) {
624 if (inCodeBlock) { /* pass chars on definition lines */
625 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
629 if (c0==DEFNCHAR) { /* pass chars on definition lines */
630 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
635 while (c0 != '\n' && isIn(c0,ZPACE)) /* maybe line is blank? */
637 if (c0=='\n' || c0==EOF)
638 thisLineIs(BLANKLINE);
640 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
641 while (c0!='\n' && c0!=EOF)
643 } /* by now, c0=='\n' or c0==EOF */
644 } while (c0!=EOF); /* if new line, start again */
646 if (litLines==0 && literateErrors) {
647 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
656 static Void local closeAnyInput() { /* Close input stream, if open, */
657 switch (reading) { /* or skip to end of console line */
659 case SCRIPTFILE : if (inputStream) {
660 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
672 case KEYBOARD : while (c0!=EOF)
679 /* --------------------------------------------------------------------------
680 * Parser: Uses table driven parser generated from parser.y using yacc
681 * ------------------------------------------------------------------------*/
685 /* --------------------------------------------------------------------------
686 * Single token input routines:
688 * The following routines read the values of particular kinds of token given
689 * that the first character of the token has already been located in c0 on
690 * entry to the routine.
691 * ------------------------------------------------------------------------*/
693 #define MAX_TOKEN 4000
694 #define startToken() tokPos = 0
695 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
696 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
697 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
699 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
700 static Int tokPos; /* input position in buffer */
701 static Int identType; /* identifier type: CONID / VARID */
702 static Int opType; /* operator type : CONOP / VAROP */
704 static Void local endToken() { /* check for token overflow */
705 if (tokPos>MAX_TOKEN) {
706 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
709 tokenStr[tokPos] = '\0';
712 static Text local readOperator() { /* read operator symbol */
717 } while (isISO(c0) && isIn(c0,SYMBOL));
718 opType = (tokenStr[0]==':' ? CONOP : VAROP);
720 return findText(tokenStr);
723 static Text local readIdent() { /* read identifier */
728 } while (isISO(c0) && isIn(c0,IDAFTER));
730 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
731 if (readingInterface)
732 return unZcodeThenFindText(tokenStr); else
733 return findText(tokenStr);
737 static Bool local doesNotExceed(s,radix,limit)
744 if (s[p] == 0) return TRUE;
745 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
746 n = radix*n + (s[p]-'0');
751 static Int local stringToInt(s,radix)
757 if (s[p] == 0) return n;
758 n = radix*n + (s[p]-'0');
763 static Cell local readRadixNumber(r) /* Read literal in specified radix */
764 Int r; { /* from input of the form 0c{digs} */
767 skip(); /* skip leading zero */
768 if ((d=readHexDigit(c1))<0 || d>=r) {
769 /* Special case; no digits, lex as */
770 /* if it had been written "0 c..." */
775 saveTokenChar('0'+readHexDigit(c0));
777 d = readHexDigit(c0);
778 } while (d>=0 && d<r);
782 if (doesNotExceed(tokenStr,r,MAXPOSINT))
783 return mkInt(stringToInt(tokenStr,r));
786 return stringToBignum(tokenStr);
788 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
793 static Cell local readNumber() { /* read numeric constant */
796 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
797 return readRadixNumber(16);
798 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
799 return readRadixNumber(8);
806 } while (isISO(c0) && isIn(c0,DIGIT));
808 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
810 if (doesNotExceed(tokenStr,10,MAXPOSINT))
811 return mkInt(stringToInt(tokenStr,10)); else
812 return stringToBignum(tokenStr);
815 saveTokenChar(c0); /* save decimal point */
817 do { /* process fractional part ... */
820 } while (isISO(c0) && isIn(c0,DIGIT));
822 if (c0=='e' || c0=='E') { /* look for exponent part... */
832 if (!isISO(c0) || !isIn(c0,DIGIT)) {
833 ERRMSG(row) "Missing digits in exponent"
840 } while (isISO(c0) && isIn(c0,DIGIT));
845 return mkFloat(stringToFloat(tokenStr));
854 static Cell local readChar() { /* read character constant */
858 if (c0=='\'' || c0=='\n' || c0==EOF) {
859 ERRMSG(row) "Illegal character constant"
863 charRead = readAChar(FALSE);
868 ERRMSG(row) "Improperly terminated character constant"
874 static Cell local readString() { /* read string literal */
879 while (c0!='\"' && c0!='\n' && c0!=EOF) {
882 saveStrChr(charOf(c));
888 ERRMSG(row) "Improperly terminated string"
892 return mkStr(findText(tokenStr));
895 static Void local saveStrChr(c) /* save character in string */
897 if (c!='\0' && c!='\\') { /* save non null char as single char*/
900 else { /* save null char as TWO null chars */
901 if (tokPos+1<MAX_TOKEN) {
911 static Cell local readAChar(isStrLit) /* read single char constant */
912 Bool isStrLit; { /* TRUE => enable \& and gaps */
915 if (c0=='\\') /* escape character? */
916 return readEscapeChar(isStrLit);
918 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
921 skip(); /* normal character? */
925 /* --------------------------------------------------------------------------
926 * Character escape code sequences:
927 * ------------------------------------------------------------------------*/
929 static struct { /* table of special escape codes */
933 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
934 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
935 {"\'",'\''}, {"v", 11},
936 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
937 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
938 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
939 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
940 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
941 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
942 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
943 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
944 {"SP", 32}, {"DEL", 127},
948 static Int alreadyMatched; /* Record portion of input stream */
949 static char alreadyRead[10]; /* that has been read w/o a match */
951 static Bool local lazyReadMatches(s) /* compare input stream with string */
952 String s; { /* possibly using characters that */
953 int i; /* have already been read */
955 for (i=0; i<alreadyMatched; ++i)
956 if (alreadyRead[i]!=s[i])
959 while (s[i] && s[i]==c0) {
960 alreadyRead[alreadyMatched++]=(char)c0;
968 static Cell local readEscapeChar(isStrLit)/* read escape character */
974 case '&' : if (isStrLit) {
978 ERRMSG(row) "Illegal use of `\\&' in character constant"
982 case '^' : return readCtrlChar();
984 case 'o' : return readOctChar();
985 case 'x' : return readHexChar();
987 default : if (!isISO(c0)) {
988 ERRMSG(row) "Illegal escape sequence"
991 else if (isIn(c0,ZPACE)) {
996 ERRMSG(row) "Illegal use of gap in character constant"
1000 else if (isIn(c0,DIGIT))
1001 return readDecChar();
1004 for (alreadyMatched=0; escapes[i].codename; i++)
1005 if (lazyReadMatches(escapes[i].codename))
1006 return mkChar(escapes[i].codenumber);
1008 alreadyRead[alreadyMatched++] = (char)c0;
1009 alreadyRead[alreadyMatched++] = '\0';
1010 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
1013 return NIL;/*NOTREACHED*/
1016 static Void local skipGap() { /* skip over gap in string literal */
1017 do /* (simplified in Haskell 1.1) */
1022 while (isISO(c0) && isIn(c0,ZPACE));
1024 ERRMSG(row) "Missing `\\' terminating string literal gap"
1030 static Cell local readCtrlChar() { /* read escape sequence \^x */
1031 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1035 if ((which = strchr(controls,c0))==NULL) {
1036 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1040 return mkChar(which-controls);
1043 static Cell local readOctChar() { /* read octal character constant */
1048 if ((d = readHexDigit(c0))<0 || d>=8) {
1049 ERRMSG(row) "Empty octal character escape"
1053 if (overflows(n,8,d,MAXCHARVAL)) {
1054 ERRMSG(row) "Octal character escape out of range"
1059 } while ((d = readHexDigit(c0))>=0 && d<8);
1064 static Cell local readHexChar() { /* read hex character constant */
1069 if ((d = readHexDigit(c0))<0) {
1070 ERRMSG(row) "Empty hexadecimal character escape"
1074 if (overflows(n,16,d,MAXCHARVAL)) {
1075 ERRMSG(row) "Hexadecimal character escape out of range"
1080 } while ((d = readHexDigit(c0))>=0);
1085 static Int local readHexDigit(c) /* read single hex digit */
1087 if ('0'<=c && c<='9')
1089 if ('A'<=c && c<='F')
1090 return 10 + (c-'A');
1091 if ('a'<=c && c<='f')
1092 return 10 + (c-'a');
1096 static Cell local readDecChar() { /* read decimal character constant */
1100 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1101 ERRMSG(row) "Decimal character escape out of range"
1104 n = 10*n + (c0-'0');
1106 } while (c0!=EOF && isIn(c0,DIGIT));
1111 /* --------------------------------------------------------------------------
1112 * Produce printable representation of character:
1113 * ------------------------------------------------------------------------*/
1115 String unlexChar(c,quote) /* return string representation of */
1116 Char c; /* character... */
1117 Char quote; { /* protect quote character */
1118 static char buffer[12];
1120 if (c<0) /* deal with sign extended chars.. */
1123 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1124 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1126 buffer[1] = (char)c;
1130 buffer[0] = (char)c;
1134 else { /* look for escape code */
1136 for (escs=0; escapes[escs].codename; escs++)
1137 if (escapes[escs].codenumber==c) {
1138 sprintf(buffer,"\\%s",escapes[escs].codename);
1141 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1146 Void printString(s) /* print string s, using quotes and */
1147 String s; { /* escapes if any parts need them */
1151 while ((c = *t)!=0 && isISO(c)
1152 && isIn(c,PRINT) && c!='"' && !isIn(c,ZPACE)) {
1158 Printf("%s",unlexChar(*t,'"'));
1166 /* -------------------------------------------------------------------------
1167 * Handle special types of input for use in interpreter:
1168 * -----------------------------------------------------------------------*/
1170 Command readCommand(cmds,start,sys) /* read command at start of input */
1171 struct cmd *cmds; /* line in interpreter */
1172 Char start; /* characters introducing a cmd */
1173 Char sys; { /* character for shell escape */
1174 while (c0==' ' || c0 =='\t')
1177 if (c0=='\n') /* look for blank command lines */
1179 if (c0==EOF) /* look for end of input stream */
1181 if (c0==sys) { /* single character system escape */
1185 if (c0==start && c1==sys) { /* two character system escape */
1191 startToken(); /* All cmds start with start */
1192 if (c0==start) /* except default (usually EVAL) */
1193 do { /* which is empty */
1196 } while (c0!=EOF && !isIn(c0,ZPACE));
1199 for (; cmds->cmdString; ++cmds)
1200 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1201 (tokenStr[0]==start &&
1202 tokenStr[1]==(cmds->cmdString)[1] &&
1204 return (cmds->cmdCode);
1208 String readFilename() { /* Read filename from input (if any)*/
1209 if (reading==PROJFILE)
1212 while (c0==' ' || c0=='\t')
1215 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1219 while (c0!=EOF && !isIn(c0,ZPACE)) {
1222 while (c0!=EOF && c0!='\"') {
1223 Cell c = readAChar(TRUE);
1225 saveTokenChar(charOf(c));
1231 ERRMSG(row) "a closing quote, '\"', was expected"
1244 String readLine() { /* Read command line from input */
1245 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1249 while (c0!='\n' && c0!=EOF) {
1258 /* --------------------------------------------------------------------------
1259 * This lexer supports the Haskell layout rule:
1261 * - Layout area bounded by { ... }, with `;'s in between.
1262 * - A `{' is a HARD indentation and can only be matched by a corresponding
1264 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1265 * is inserted with the column number of the first token after the
1266 * WHERE/LET/OF keyword.
1267 * - When a soft indentation is uppermost on the indentation stack with
1268 * column col' we insert:
1269 * `}' in front of token with column<col' and pop indentation off stack,
1270 * `;' in front of token with column==col'.
1271 * ------------------------------------------------------------------------*/
1273 #define MAXINDENT 100 /* maximum nesting of layout rule */
1274 static Int layout[MAXINDENT+1];/* indentation stack */
1275 #define HARD (-1) /* indicates hard indentation */
1276 static Int indentDepth = (-1); /* current indentation nesting */
1278 static Void local goOffside(col) /* insert offside marker */
1279 Int col; { /* for specified column */
1281 if (indentDepth>=MAXINDENT) {
1282 ERRMSG(row) "Too many levels of program nesting"
1285 layout[++indentDepth] = col;
1288 static Void local unOffside() { /* leave layout rule area */
1293 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1295 return indentDepth>=0 && layout[indentDepth]!=HARD;
1298 /* --------------------------------------------------------------------------
1300 * ------------------------------------------------------------------------*/
1302 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1303 for (;;) /* Strictly speaking, this code is */
1304 if (c0==EOF) /* a little more liberal than the */
1305 return; /* report allows ... */
1308 else if (isIn(c0,ZPACE))
1310 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1312 Int origRow = row; /* Save original row number */
1315 while (nesting>0 && c0!=EOF)
1316 if (c0=='{' && c1=='-') {
1321 else if (c0=='-' && c1=='}') {
1331 ERRMSG(origRow) "Unterminated nested comment {- ..."
1335 else if (c0=='-' && c1=='-') { /* One line comment */
1338 while (c0!='\n' && c0!=EOF);
1346 static Bool firstToken; /* Set to TRUE for first token */
1347 static Int firstTokenIs; /* ... with token value stored here */
1349 static Int local yylex() { /* Read next input token ... */
1350 static Bool insertOpen = FALSE;
1351 static Bool insertedToken = FALSE;
1352 static Text textRepeat;
1354 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1356 if (firstToken) { /* Special case for first token */
1360 insertedToken = FALSE;
1361 if (reading==KEYBOARD)
1362 textRepeat = findText(repeatStr);
1363 return firstTokenIs;
1366 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1368 insertedToken = TRUE;
1370 push(yylval = mkInt(row));
1374 /* ----------------------------------------------------------------------
1375 * Skip white space, and insert tokens to support layout rules as reqd.
1376 * --------------------------------------------------------------------*/
1379 startColumn = column;
1380 push(yylval = mkInt(row)); /* default token value is line no. */
1381 /* subsequent changes to yylval must also set top() to the same value */
1383 if (indentDepth>=0) { /* layout rule(s) active ? */
1384 if (insertedToken) /* avoid inserting multiple `;'s */
1385 insertedToken = FALSE; /* or putting `;' after `{' */
1387 if (offsideON && layout[indentDepth]!=HARD) {
1388 if (column<layout[indentDepth]) {
1392 else if (column==layout[indentDepth] && c0!=EOF) {
1393 insertedToken = TRUE;
1399 /* ----------------------------------------------------------------------
1400 * Now try to identify token type:
1401 * --------------------------------------------------------------------*/
1403 if (readingInterface) {
1404 if (c0 == '(' && c1 == '#') { skip(); skip(); return UTL; };
1405 if (c0 == '#' && c1 == ')') { skip(); skip(); return UTR; };
1409 case EOF : return 0; /* End of file/input */
1411 /* The next 10 characters make up the `special' category in 1.3 */
1412 case '(' : skip(); return '(';
1413 case ')' : skip(); return ')';
1414 case ',' : skip(); return ',';
1415 case ';' : skip(); return ';';
1416 case '[' : skip(); return '[';
1417 case ']' : skip(); return ']';
1418 case '`' : skip(); return '`';
1419 case '{' : if (offsideON) goOffside(HARD);
1422 case '}' : if (offsideON && indentDepth<0) {
1423 ERRMSG(row) "Misplaced `}'"
1426 if (!(offsideON && layout[indentDepth]!=HARD))
1427 skip(); /* skip over hard }*/
1429 unOffside(); /* otherwise, we have to insert a }*/
1430 return '}'; /* to (try to) avoid an error... */
1432 /* Character and string literals */
1433 case '\'' : top() = yylval = readChar();
1436 case '\"' : top() = yylval = readString();
1441 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1442 Text it; /* Look for implicit param name */
1445 top() = yylval = ap(IPVAR,it);
1446 return identType=IPVARID;
1450 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1451 Text it; /* Look for record selector name */
1454 top() = yylval = ap(RECSEL,mkExt(it));
1455 return identType=RECSELID;
1458 if (isIn(c0,LARGE)) { /* Look for qualified name */
1459 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1460 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1462 skip(); /* Skip qualifying dot */
1463 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1464 it2 = readOperator();
1465 if (opType==CONOP) {
1466 top() = yylval = mkQConOp(it,it2);
1469 top() = yylval = mkQVarOp(it,it2);
1472 } else { /* Qualified identifier */
1474 if (identType==CONID) {
1475 top() = yylval = mkQCon(it,it2);
1478 top() = yylval = mkQVar(it,it2);
1483 top() = yylval = mkCon(it);
1487 if (isIn(c0,(SMALL|LARGE))) {
1488 Text it = readIdent();
1490 if (it==textCase) return CASEXP;
1491 if (it==textOfK) lookAhead(OF);
1492 if (it==textData) return DATA;
1493 if (it==textType) return TYPE;
1494 if (it==textIf) return IF;
1495 if (it==textThen) return THEN;
1496 if (it==textElse) return ELSE;
1497 if (it==textWhere) lookAhead(WHERE);
1498 if (it==textLet) lookAhead(LET);
1499 if (it==textIn) return IN;
1500 if (it==textInfix) return INFIXN;
1501 if (it==textInfixl) return INFIXL;
1502 if (it==textInfixr) return INFIXR;
1503 if (it==textForeign) return FOREIGN;
1504 if (it==textUnsafe) return UNSAFE;
1505 if (it==textNewtype) return TNEWTYPE;
1506 if (it==textDefault) return DEFAULT;
1507 if (it==textDeriving) return DERIVING;
1508 if (it==textDo) lookAhead(DO);
1509 if (it==textClass) return TCLASS;
1510 if (it==textInstance) return TINSTANCE;
1511 if (it==textModule) return TMODULE;
1512 if (it==textInterface) return INTERFACE;
1513 if (it==textInstImport) return INSTIMPORT;
1514 if (it==textImport) return IMPORT;
1515 if (it==textExport) return EXPORT;
1516 if (it==textDynamic) return DYNAMIC;
1517 if (it==textCcall) return CCALL;
1518 if (it==textStdcall) return STDKALL;
1519 if (it==textUUExport) return UUEXPORT;
1520 if (it==textHiding) return HIDING;
1521 if (it==textQualified) return QUALIFIED;
1522 if (it==textAsMod) return ASMOD;
1523 if (it==textPrivileged) return PRIVILEGED;
1524 if (it==textWildcard) return '_';
1525 if (it==textAll && !haskell98) return ALL;
1527 if (it==textWith && !haskell98) lookAhead(WITH);
1528 if (it==textDlet && !haskell98) lookAhead(DLET);
1530 if (it==textUUAll) return ALL;
1531 if (it==textUUUsage) return UUUSAGE;
1532 if (it==textRepeat && reading==KEYBOARD)
1533 return repeatLast();
1535 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1539 if (isIn(c0,SYMBOL)) {
1540 Text it = readOperator();
1542 if (it==textCoco) return COCO;
1543 if (it==textEq) return '=';
1544 if (it==textUpto) return UPTO;
1545 if (it==textAs) return '@';
1546 if (it==textLambda) return '\\';
1547 if (it==textBar) return '|';
1548 if (it==textFrom) return FROM;
1549 if (it==textMinus) return '-';
1550 if (it==textPlus) return '+';
1551 if (it==textBang) return '!';
1552 if (it==textDot) return '.';
1553 if (it==textArrow) return ARROW;
1554 if (it==textLazy) return '~';
1555 if (it==textImplies) return IMPLIES;
1556 if (it==textRepeat && reading==KEYBOARD)
1557 return repeatLast();
1559 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1563 if (isIn(c0,DIGIT)) {
1564 top() = yylval = readNumber();
1568 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1571 return 0; /*NOTREACHED*/
1574 static Int local repeatLast() { /* Obtain last expression entered */
1575 if (isNull(yylval=getLastExpr())) {
1576 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1582 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1583 Text t; { /* by t ... */
1584 String s = textToStr(t);
1585 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1588 Syntax syntaxOf(n) /* Find syntax for name */
1590 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1591 return defaultSyntax(name(n).text);
1592 return name(n).syntax;
1595 /* --------------------------------------------------------------------------
1596 * main entry points to parser/lexer:
1597 * ------------------------------------------------------------------------*/
1599 static Cell local parseInput(startWith)/* Parse input with given first tok,*/
1600 Int startWith; { /* determining whether to read a */
1601 Cell final = NIL; /* script or an expression */
1603 firstTokenIs = startWith;
1604 if (startWith==INTERFACE) {
1605 offsideON = FALSE; readingInterface = TRUE;
1607 offsideON = TRUE; readingInterface = FALSE;
1611 if (yyparse()) { /* This can only be parser overflow */
1612 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1613 EEND; /* in the parser... */
1616 if (startWith==SCRIPT) pop(); /* zap spurious closing } token */
1619 if (!stackEmpty()) /* stack should now be empty */
1620 internal("parseInput");
1624 Void parseExp() { /* Read an expression to evaluate */
1626 setLastExpr(inputExpr);
1629 #if EXPLAIN_INSTANCE_RESOLUTION
1630 Void parseContext() { /* Read a context to prove */
1631 parseInput(CONTEXT);
1635 Cell parseInterface(nm,len) /* Read a GHC interface file */
1637 Long len; { /* Used to set a target for reading */
1639 Printf("Reading interface \"%s\"\n", nm );
1641 return parseInput(INTERFACE);
1644 Cell parseModule(nm,len) /* Read a module */
1646 Long len; { /* Used to set a target for reading */
1648 Printf("Reading source file \"%s\"\n", nm );
1650 return parseInput(SCRIPT);
1654 /* --------------------------------------------------------------------------
1656 * ------------------------------------------------------------------------*/
1661 case POSTPREL: break;
1663 case PREPREL : initCharTab();
1664 textCase = findText("case");
1665 textOfK = findText("of");
1666 textData = findText("data");
1667 textType = findText("type");
1668 textIf = findText("if");
1669 textThen = findText("then");
1670 textElse = findText("else");
1671 textWhere = findText("where");
1672 textLet = findText("let");
1673 textIn = findText("in");
1674 textInfix = findText("infix");
1675 textInfixl = findText("infixl");
1676 textInfixr = findText("infixr");
1677 textForeign = findText("foreign");
1678 textUnsafe = findText("unsafe");
1679 textNewtype = findText("newtype");
1680 textDefault = findText("default");
1681 textDeriving = findText("deriving");
1682 textDo = findText("do");
1683 textClass = findText("class");
1685 textWith = findText("with");
1686 textDlet = findText("dlet");
1688 textInstance = findText("instance");
1689 textCoco = findText("::");
1690 textEq = findText("=");
1691 textUpto = findText("..");
1692 textAs = findText("@");
1693 textLambda = findText("\\");
1694 textBar = findText("|");
1695 textMinus = findText("-");
1696 textPlus = findText("+");
1697 textFrom = findText("<-");
1698 textArrow = findText("->");
1699 textLazy = findText("~");
1700 textBang = findText("!");
1701 textDot = findText(".");
1702 textImplies = findText("=>");
1703 textPrimPrel = findText("PrimPrel");
1704 textPrelude = findText("Prelude");
1705 textNum = findText("Num");
1706 textModule = findText("module");
1707 textInterface = findText("__interface");
1708 textInstImport = findText("__instimport");
1709 textExport = findText("export");
1710 textDynamic = findText("dynamic");
1711 textCcall = findText("ccall");
1712 textStdcall = findText("stdcall");
1713 textUUExport = findText("__export");
1714 textImport = findText("import");
1715 textHiding = findText("hiding");
1716 textQualified = findText("qualified");
1717 textAsMod = findText("as");
1718 textPrivileged = findText("privileged");
1719 textWildcard = findText("_");
1720 textAll = findText("forall");
1721 textUUAll = findText("__forall");
1722 textUUUsage = findText("__u");
1723 varMinus = mkVar(textMinus);
1724 varPlus = mkVar(textPlus);
1725 varBang = mkVar(textBang);
1726 varDot = mkVar(textDot);
1727 varHiding = mkVar(textHiding);
1728 varQualified = mkVar(textQualified);
1729 varAsMod = mkVar(textAsMod);
1730 varPrivileged = mkVar(textPrivileged);
1731 conMain = mkCon(findText("Main"));
1732 varMain = mkVar(findText("main"));
1738 case RESET : tyconDefns = NIL;
1746 foreignImports= NIL;
1747 foreignExports= NIL;
1755 case BREAK : if (reading==KEYBOARD)
1759 case MARK : mark(tyconDefns);
1766 mark(unqualImports);
1767 mark(foreignImports);
1768 mark(foreignExports);
1779 mark(varPrivileged);
1787 /*-------------------------------------------------------------------------*/