2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * Hugs 98 is Copyright (c) Mark P Jones, Alastair Reid and the Yale
6 * Haskell Group 1994-99, and is distributed as Open Source software
7 * under the Artistic License; see the file "Artistic" that is included
8 * in the distribution for details.
10 * $RCSfile: input.c,v $
12 * $Date: 1999/06/07 17:22:32 $
13 * ------------------------------------------------------------------------*/
31 /* --------------------------------------------------------------------------
33 * ------------------------------------------------------------------------*/
35 List tyconDefns = NIL; /* type constructor definitions */
36 List typeInDefns = NIL; /* type synonym restrictions */
37 List valDefns = NIL; /* value definitions in script */
38 List classDefns = NIL; /* class defns in script */
39 List instDefns = NIL; /* instance defns in script */
40 List selDefns = NIL; /* list of selector lists */
41 List genDefns = NIL; /* list of generated names */
42 List unqualImports = NIL; /* unqualified import list */
43 List foreignImports = NIL; /* foreign imports */
44 List foreignExports = NIL; /* foreign exportsd */
45 List defaultDefns = NIL; /* default definitions (if any) */
46 Int defaultLine = 0; /* line in which default defs occur*/
47 List evalDefaults = NIL; /* defaults for evaluator */
49 Cell inputExpr = NIL; /* input expression */
50 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
51 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
52 Bool offsideON = TRUE; /* TRUE => implement offside rule */
54 String repeatStr = 0; /* Repeat last expr */
56 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
57 String preprocessor = 0;
60 /* --------------------------------------------------------------------------
61 * Local function prototypes:
62 * ------------------------------------------------------------------------*/
64 static Void local initCharTab Args((Void));
65 static Void local fileInput Args((String,Long));
66 static Bool local literateMode Args((String));
67 static Bool local linecmp Args((String,String));
68 static Int local nextLine Args((Void));
69 static Void local skip Args((Void));
70 static Void local thisLineIs Args((Int));
71 static Void local newlineSkip Args((Void));
72 static Void local closeAnyInput Args((Void));
74 Int yyparse Args((Void)); /* can't stop yacc making this */
75 /* public, but don't advertise */
76 /* it in a header file. */
78 static Void local endToken Args((Void));
79 static Text local readOperator Args((Void));
80 static Text local readIdent Args((Void));
81 static Cell local readRadixNumber Args((Int));
82 static Cell local readNumber Args((Void));
83 static Cell local readChar Args((Void));
84 static Cell local readString Args((Void));
85 static Void local saveStrChr Args((Char));
86 static Cell local readAChar Args((Bool));
88 static Bool local lazyReadMatches Args((String));
89 static Cell local readEscapeChar Args((Bool));
90 static Void local skipGap Args((Void));
91 static Cell local readCtrlChar Args((Void));
92 static Cell local readOctChar Args((Void));
93 static Cell local readHexChar Args((Void));
94 static Int local readHexDigit Args((Char));
95 static Cell local readDecChar Args((Void));
97 static Void local goOffside Args((Int));
98 static Void local unOffside Args((Void));
99 static Bool local canUnOffside Args((Void));
101 static Void local skipWhitespace Args((Void));
102 static Int local yylex Args((Void));
103 static Int local repeatLast Args((Void));
105 static Void local parseInput Args((Int));
107 static Bool local doesNotExceed Args((String,Int,Int));
108 static Int local stringToInt Args((String,Int));
111 /* --------------------------------------------------------------------------
112 * Text values for reserved words and special symbols:
113 * ------------------------------------------------------------------------*/
115 static Text textCase, textOfK, textData, textType, textIf;
116 static Text textThen, textElse, textWhere, textLet, textIn;
117 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
118 static Text textDefault, textDeriving, textDo, textClass, textInstance;
120 static Text textCoco, textEq, textUpto, textAs, textLambda;
121 static Text textBar, textMinus, textFrom, textArrow, textLazy;
122 static Text textBang, textDot, textAll, textImplies;
123 static Text textWildcard;
125 static Text textModule, textImport, textInterface, textInstImport;
126 static Text textHiding, textQualified, textAsMod;
127 static Text textExport, textUnsafe, text__All;
129 Text textNum; /* Num */
130 Text textPrelude; /* Prelude */
131 Text textPlus; /* (+) */
133 static Cell conMain; /* Main */
134 static Cell varMain; /* main */
136 static Cell varMinus; /* (-) */
137 static Cell varPlus; /* (+) */
138 static Cell varBang; /* (!) */
139 static Cell varDot; /* (.) */
140 static Cell varHiding; /* hiding */
141 static Cell varQualified; /* qualified */
142 static Cell varAsMod; /* as */
144 static List imps; /* List of imports to be chased */
147 /* --------------------------------------------------------------------------
148 * Character set handling:
150 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
151 * character set. The following code provides methods for classifying
152 * input characters according to the lexical structure specified by the
153 * report. Hugs should still accept older programs because ASCII is
154 * essentially just a subset of the ISO character set.
156 * Notes: If you want to port Hugs to a machine that uses something
157 * substantially different from the ISO character set, then you will need
158 * to insert additional code to map between character sets.
160 * At some point, the following data structures may be exported in a .h
161 * file to allow the information contained here to be picked up in the
162 * implementation of LibChar is* primitives.
164 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
165 * ------------------------------------------------------------------------*/
167 static Bool charTabBuilt;
168 static unsigned char ctable[NUM_CHARS];
169 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
170 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
180 static Void local initCharTab() { /* Initialize char decode table */
181 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
182 #define setChar(x,c) ctable[c] |= (x)
183 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
184 #define setCopy(x,c) {Int i; \
185 for (i=0; i<NUM_CHARS; ++i) \
190 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
192 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
193 setRange(SMALL, 223,246); /* ISO lower case letters */
194 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
195 setChar (SMALL, '_');
197 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
198 setRange(LARGE, 192,214); /* ISO upper case letters */
199 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
201 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
202 setRange(SYMBOL, 215,215);
203 setChar (SYMBOL, 247);
204 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
206 setChar (IDAFTER, '\''); /* Characters in identifier */
207 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
209 setChar (SPACE, ' '); /* ASCII space character */
210 setChar (SPACE, 160); /* ISO non breaking space */
211 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
213 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
214 setChars(PRINT, " '\""); /* Space and quotes */
215 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
225 /* --------------------------------------------------------------------------
226 * Single character input routines:
228 * At the lowest level of input, characters are read one at a time, with the
229 * current character held in c0 and the following (lookahead) character in
230 * c1. The corrdinates of c0 within the file are held in (column,row).
231 * The input stream is advanced by one character using the skip() function.
232 * ------------------------------------------------------------------------*/
234 #define TABSIZE 8 /* spacing between tabstops */
236 #define NOTHING 0 /* what kind of input is being read?*/
237 #define KEYBOARD 1 /* - keyboard/console? */
238 #define SCRIPTFILE 2 /* - script file */
239 #define PROJFILE 3 /* - project file */
240 #define STRING 4 /* - string buffer? */
242 static Int reading = NOTHING;
244 static Target readSoFar;
245 static Int row, column, startColumn;
247 static FILE *inputStream = 0;
248 static Bool thisLiterate;
249 static String nextStringChar; /* next char in string buffer */
251 #if USE_READLINE /* for command line editors */
252 static String currentLine; /* editline or GNU readline */
253 static String nextChar;
254 #define nextConsoleChar() \
255 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
256 extern Void add_history Args((String));
257 extern String readline Args((String));
259 #define nextConsoleChar() getc(stdin)
262 static Int litLines; /* count defn lines in lit script */
263 #define DEFNCHAR '>' /* definition lines begin with this */
264 static Int lastLine; /* records type of last line read: */
265 #define STARTLINE 0 /* - at start of file, none read */
266 #define BLANKLINE 1 /* - blank (may preceed definition) */
267 #define TEXTLINE 2 /* - text comment */
268 #define DEFNLINE 3 /* - line containing definition */
269 #define CODELINE 4 /* - line inside code block */
271 #define BEGINCODE "\\begin{code}"
272 #define ENDCODE "\\end{code}"
275 static char *lineBuffer = NULL; /* getline() does the initial allocation */
277 #define LINEBUFFER_SIZE 1000
278 static char lineBuffer[LINEBUFFER_SIZE];
280 static int lineLength = 0;
281 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
282 static int linePtr = 0;
284 Void consoleInput(prompt) /* prepare to input characters from */
285 String prompt; { /* standard in (i.e. console/kbd) */
286 reading = KEYBOARD; /* keyboard input is Line oriented, */
287 c0 = /* i.e. input terminated by '\n' */
293 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
294 * avoids accidentally freeing currentLine twice.
297 String oldCurrentLine = currentLine;
298 currentLine = 0; /* We may lose the space of currentLine */
299 free(oldCurrentLine); /* if interrupted here - unlikely */
301 currentLine = readline(prompt);
302 nextChar = currentLine;
305 add_history(currentLine);
315 Void projInput(nm) /* prepare to input characters from */
316 String nm; { /* from named project file */
317 if ((inputStream = fopen(nm,"r"))!=0) {
325 ERRMSG(0) "Unable to open project file \"%s\"", nm
330 static Void local fileInput(nm,len) /* prepare to input characters from*/
331 String nm; /* named file (specified length is */
332 Long len; { /* used to set target for reading) */
333 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
335 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
336 char *cmd = malloc(reallen);
338 ERRMSG(0) "Unable to allocate memory for filter command."
341 strcpy(cmd,preprocessor);
344 inputStream = popen(cmd,"r");
347 inputStream = fopen(nm,"r");
350 inputStream = fopen(nm,"r");
353 reading = SCRIPTFILE;
359 lastLine = STARTLINE; /* literate file processing */
363 thisLiterate = literateMode(nm);
367 setGoal("Parsing", (Target)len);
370 ERRMSG(0) "Unable to open file \"%s\"", nm
375 Void stringInput(s) /* prepare to input characters from string */
390 static Bool local literateMode(nm) /* Select literate mode for file */
392 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
394 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
396 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
397 filenamecmp(dot+1,"verb")==0) /* literate scripts */
400 return literateScripts; /* otherwise, use the default */
404 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
408 len = 1 + strlen ( srcName );
409 *hiName = malloc(len);
410 *oName = malloc(len);
411 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
412 (*hiName)[0] = (*oName)[0] = 0;
413 dot = strrchr(srcName, '.');
415 if (filenamecmp(dot+1, "hs")==0 &&
416 filenamecmp(dot+1, "lhs")==0 &&
417 filenamecmp(dot+1, "verb")==0) return;
419 strcpy(*hiName, srcName);
420 dot = strrchr(*hiName, '.');
425 strcpy(*oName, srcName);
426 dot = strrchr(*oName, '.');
433 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
434 * I've removed the loop (since newLineSkip contains a loop too) and
435 * replaced the warnings with errors. ADR
438 * To deal with literate \begin{code}...\end{code} blocks,
439 * add a line buffer that rooms the current line. The old c0 and c1
440 * stream pointers are used as before within that buffer -- sof
442 * Upon reading a new line into the line buffer, we check to see if
443 * we're reading in a line containing \begin{code} or \end{code} and
444 * take appropriate action.
447 static Bool local linecmp(s,line) /* compare string with line */
448 String s; /* line may end in whitespace */
451 while (s[i] != '\0' && s[i] == line[i]) {
454 /* s[0..i-1] == line[0..i-1] */
455 if (s[i] != '\0') { /* check s `isPrefixOf` line */
458 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
461 return (line[i] == '\0');
464 /* Returns line length (including \n) or 0 upon EOF. */
465 static Int local nextLine()
469 Forget about fgets(), it is utterly braindead.
470 (Assumes \NUL free streams and does not gracefully deal
471 with overflow.) Instead, use GNU libc's getline().
473 lineLength = getline(&lineBuffer, &lineLength, inputStream);
475 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
476 lineLength = strlen(lineBuffer);
480 /* printf("Read: \"%s\"", lineBuffer); */
481 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
484 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
485 lineBuffer[1] == '!') {
486 lineBuffer[0]='\n'; /* pretend it's a blank line */
489 } else if (thisLiterate) {
490 if (linecmp(BEGINCODE, lineBuffer)) {
491 if (!inCodeBlock) { /* Entered a code block */
493 lineBuffer[0]='\n'; /* pretend it's a blank line */
498 ERRMSG(row) "\\begin{code} encountered inside code block"
502 else if (linecmp(ENDCODE, lineBuffer)) {
503 if (inCodeBlock) { /* Finished code block */
505 lineBuffer[0]='\n'; /* pretend it's a blank line */
510 ERRMSG(row) "\\end{code} encountered outside code block"
515 /* printf("Read: \"%s\"", lineBuffer); */
519 static Void local skip() { /* move forward one char in input */
520 if (c0!=EOF) { /* stream, updating c0, c1, ... */
521 if (c0=='\n') { /* Adjusting cursor coords as nec. */
524 if (reading==SCRIPTFILE)
528 column += TABSIZE - ((column-1)%TABSIZE);
537 if (reading==SCRIPTFILE)
541 else if (reading==KEYBOARD) {
546 c1 = nextConsoleChar();
547 /* On Win32, hitting ctrl-C causes the next getchar to
548 * fail - returning "-1" to indicate an error.
549 * This is one of the rare cases where "-1" does not mean EOF.
551 if (EOF == c1 && !feof(stdin)) {
556 else if (reading==STRING) {
557 c1 = (unsigned char) *nextStringChar++;
562 if (lineLength <=0 || linePtr == lineLength) {
563 /* Current line, exhausted - get new one */
564 if (nextLine() <= 0) { /* EOF */
569 c1 = (unsigned char)lineBuffer[linePtr++];
573 c1 = (unsigned char)lineBuffer[linePtr++];
580 static Void local thisLineIs(kind) /* register kind of current line */
581 Int kind; { /* & check for literate script errs */
582 if (literateErrors) {
583 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
584 (kind==TEXTLINE && lastLine==DEFNLINE)) {
585 ERRMSG(row) "Program line next to comment"
592 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
593 /* assert(c0=='\n'); */
594 if (reading==SCRIPTFILE && thisLiterate) {
597 if (inCodeBlock) { /* pass chars on definition lines */
598 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
602 if (c0==DEFNCHAR) { /* pass chars on definition lines */
603 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
608 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
610 if (c0=='\n' || c0==EOF)
611 thisLineIs(BLANKLINE);
613 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
614 while (c0!='\n' && c0!=EOF)
616 } /* by now, c0=='\n' or c0==EOF */
617 } while (c0!=EOF); /* if new line, start again */
619 if (litLines==0 && literateErrors) {
620 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
629 static Void local closeAnyInput() { /* Close input stream, if open, */
630 switch (reading) { /* or skip to end of console line */
632 case SCRIPTFILE : if (inputStream) {
633 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
645 case KEYBOARD : while (c0!=EOF)
652 /* --------------------------------------------------------------------------
653 * Parser: Uses table driven parser generated from parser.y using yacc
654 * ------------------------------------------------------------------------*/
658 /* --------------------------------------------------------------------------
659 * Single token input routines:
661 * The following routines read the values of particular kinds of token given
662 * that the first character of the token has already been located in c0 on
663 * entry to the routine.
664 * ------------------------------------------------------------------------*/
666 #define MAX_TOKEN 4000
667 #define startToken() tokPos = 0
668 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
669 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
670 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
672 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
673 static Int tokPos; /* input position in buffer */
674 static Int identType; /* identifier type: CONID / VARID */
675 static Int opType; /* operator type : CONOP / VAROP */
677 static Void local endToken() { /* check for token overflow */
678 if (tokPos>MAX_TOKEN) {
679 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
682 tokenStr[tokPos] = '\0';
685 static Text local readOperator() { /* read operator symbol */
690 } while (isISO(c0) && isIn(c0,SYMBOL));
691 opType = (tokenStr[0]==':' ? CONOP : VAROP);
693 return findText(tokenStr);
696 static Text local readIdent() { /* read identifier */
701 } while (isISO(c0) && isIn(c0,IDAFTER));
703 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
704 return findText(tokenStr);
708 static Bool local doesNotExceed(s,radix,limit)
715 if (s[p] == 0) return TRUE;
716 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
717 n = radix*n + (s[p]-'0');
722 static Int local stringToInt(s,radix)
728 if (s[p] == 0) return n;
729 n = radix*n + (s[p]-'0');
734 static Cell local readRadixNumber(r) /* Read literal in specified radix */
735 Int r; { /* from input of the form 0c{digs} */
738 skip(); /* skip leading zero */
739 if ((d=readHexDigit(c1))<0 || d>=r) {
740 /* Special case; no digits, lex as */
741 /* if it had been written "0 c..." */
746 saveTokenChar('0'+readHexDigit(c0));
748 d = readHexDigit(c0);
749 } while (d>=0 && d<r);
753 if (doesNotExceed(tokenStr,r,MAXPOSINT))
754 return mkInt(stringToInt(tokenStr,r));
757 return stringToBignum(tokenStr);
759 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
764 static Cell local readNumber() { /* read numeric constant */
767 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
768 return readRadixNumber(16);
769 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
770 return readRadixNumber(8);
777 } while (isISO(c0) && isIn(c0,DIGIT));
779 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
781 if (doesNotExceed(tokenStr,10,MAXPOSINT))
782 return mkInt(stringToInt(tokenStr,10)); else
783 return stringToBignum(tokenStr);
786 saveTokenChar(c0); /* save decimal point */
788 do { /* process fractional part ... */
791 } while (isISO(c0) && isIn(c0,DIGIT));
793 if (c0=='e' || c0=='E') { /* look for exponent part... */
803 if (!isISO(c0) || !isIn(c0,DIGIT)) {
804 ERRMSG(row) "Missing digits in exponent"
811 } while (isISO(c0) && isIn(c0,DIGIT));
816 return mkFloat(stringToFloat(tokenStr));
825 static Cell local readChar() { /* read character constant */
829 if (c0=='\'' || c0=='\n' || c0==EOF) {
830 ERRMSG(row) "Illegal character constant"
834 charRead = readAChar(FALSE);
839 ERRMSG(row) "Improperly terminated character constant"
845 static Cell local readString() { /* read string literal */
850 while (c0!='\"' && c0!='\n' && c0!=EOF) {
853 saveStrChr(charOf(c));
859 ERRMSG(row) "Improperly terminated string"
863 return mkStr(findText(tokenStr));
866 static Void local saveStrChr(c) /* save character in string */
868 if (c!='\0' && c!='\\') { /* save non null char as single char*/
871 else { /* save null char as TWO null chars */
872 if (tokPos+1<MAX_TOKEN) {
882 static Cell local readAChar(isStrLit) /* read single char constant */
883 Bool isStrLit; { /* TRUE => enable \& and gaps */
886 if (c0=='\\') /* escape character? */
887 return readEscapeChar(isStrLit);
889 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
892 skip(); /* normal character? */
896 /* --------------------------------------------------------------------------
897 * Character escape code sequences:
898 * ------------------------------------------------------------------------*/
900 static struct { /* table of special escape codes */
904 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
905 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
906 {"\'",'\''}, {"v", 11},
907 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
908 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
909 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
910 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
911 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
912 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
913 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
914 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
915 {"SP", 32}, {"DEL", 127},
919 static Int alreadyMatched; /* Record portion of input stream */
920 static char alreadyRead[10]; /* that has been read w/o a match */
922 static Bool local lazyReadMatches(s) /* compare input stream with string */
923 String s; { /* possibly using characters that */
924 int i; /* have already been read */
926 for (i=0; i<alreadyMatched; ++i)
927 if (alreadyRead[i]!=s[i])
930 while (s[i] && s[i]==c0) {
931 alreadyRead[alreadyMatched++]=(char)c0;
939 static Cell local readEscapeChar(isStrLit)/* read escape character */
945 case '&' : if (isStrLit) {
949 ERRMSG(row) "Illegal use of `\\&' in character constant"
953 case '^' : return readCtrlChar();
955 case 'o' : return readOctChar();
956 case 'x' : return readHexChar();
958 default : if (!isISO(c0)) {
959 ERRMSG(row) "Illegal escape sequence"
962 else if (isIn(c0,SPACE)) {
967 ERRMSG(row) "Illegal use of gap in character constant"
971 else if (isIn(c0,DIGIT))
972 return readDecChar();
975 for (alreadyMatched=0; escapes[i].codename; i++)
976 if (lazyReadMatches(escapes[i].codename))
977 return mkChar(escapes[i].codenumber);
979 alreadyRead[alreadyMatched++] = (char)c0;
980 alreadyRead[alreadyMatched++] = '\0';
981 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
984 return NIL;/*NOTREACHED*/
987 static Void local skipGap() { /* skip over gap in string literal */
988 do /* (simplified in Haskell 1.1) */
993 while (isISO(c0) && isIn(c0,SPACE));
995 ERRMSG(row) "Missing `\\' terminating string literal gap"
1001 static Cell local readCtrlChar() { /* read escape sequence \^x */
1002 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1006 if ((which = strchr(controls,c0))==NULL) {
1007 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1011 return mkChar(which-controls);
1014 static Cell local readOctChar() { /* read octal character constant */
1019 if ((d = readHexDigit(c0))<0 || d>=8) {
1020 ERRMSG(row) "Empty octal character escape"
1024 if (overflows(n,8,d,MAXCHARVAL)) {
1025 ERRMSG(row) "Octal character escape out of range"
1030 } while ((d = readHexDigit(c0))>=0 && d<8);
1035 static Cell local readHexChar() { /* read hex character constant */
1040 if ((d = readHexDigit(c0))<0) {
1041 ERRMSG(row) "Empty hexadecimal character escape"
1045 if (overflows(n,16,d,MAXCHARVAL)) {
1046 ERRMSG(row) "Hexadecimal character escape out of range"
1051 } while ((d = readHexDigit(c0))>=0);
1056 static Int local readHexDigit(c) /* read single hex digit */
1058 if ('0'<=c && c<='9')
1060 if ('A'<=c && c<='F')
1061 return 10 + (c-'A');
1062 if ('a'<=c && c<='f')
1063 return 10 + (c-'a');
1067 static Cell local readDecChar() { /* read decimal character constant */
1071 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1072 ERRMSG(row) "Decimal character escape out of range"
1075 n = 10*n + (c0-'0');
1077 } while (c0!=EOF && isIn(c0,DIGIT));
1082 /* --------------------------------------------------------------------------
1083 * Produce printable representation of character:
1084 * ------------------------------------------------------------------------*/
1086 String unlexChar(c,quote) /* return string representation of */
1087 Char c; /* character... */
1088 Char quote; { /* protect quote character */
1089 static char buffer[12];
1091 if (c<0) /* deal with sign extended chars.. */
1094 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1095 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1097 buffer[1] = (char)c;
1101 buffer[0] = (char)c;
1105 else { /* look for escape code */
1107 for (escs=0; escapes[escs].codename; escs++)
1108 if (escapes[escs].codenumber==c) {
1109 sprintf(buffer,"\\%s",escapes[escs].codename);
1112 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1117 Void printString(s) /* print string s, using quotes and */
1118 String s; { /* escapes if any parts need them */
1122 while ((c = *t)!=0 && isISO(c)
1123 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1129 Printf("%s",unlexChar(*t,'"'));
1137 /* -------------------------------------------------------------------------
1138 * Handle special types of input for use in interpreter:
1139 * -----------------------------------------------------------------------*/
1141 Command readCommand(cmds,start,sys) /* read command at start of input */
1142 struct cmd *cmds; /* line in interpreter */
1143 Char start; /* characters introducing a cmd */
1144 Char sys; { /* character for shell escape */
1145 while (c0==' ' || c0 =='\t')
1148 if (c0=='\n') /* look for blank command lines */
1150 if (c0==EOF) /* look for end of input stream */
1152 if (c0==sys) { /* single character system escape */
1156 if (c0==start && c1==sys) { /* two character system escape */
1162 startToken(); /* All cmds start with start */
1163 if (c0==start) /* except default (usually EVAL) */
1164 do { /* which is empty */
1167 } while (c0!=EOF && !isIn(c0,SPACE));
1170 for (; cmds->cmdString; ++cmds)
1171 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1172 (tokenStr[0]==start &&
1173 tokenStr[1]==(cmds->cmdString)[1] &&
1175 return (cmds->cmdCode);
1179 String readFilename() { /* Read filename from input (if any)*/
1180 if (reading==PROJFILE)
1183 while (c0==' ' || c0=='\t')
1186 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1190 while (c0!=EOF && !isIn(c0,SPACE)) {
1193 while (c0!=EOF && c0!='\"') {
1194 Cell c = readAChar(TRUE);
1196 saveTokenChar(charOf(c));
1202 ERRMSG(row) "a closing quote, '\"', was expected"
1215 String readLine() { /* Read command line from input */
1216 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1220 while (c0!='\n' && c0!=EOF) {
1229 /* --------------------------------------------------------------------------
1230 * This lexer supports the Haskell layout rule:
1232 * - Layout area bounded by { ... }, with `;'s in between.
1233 * - A `{' is a HARD indentation and can only be matched by a corresponding
1235 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1236 * is inserted with the column number of the first token after the
1237 * WHERE/LET/OF keyword.
1238 * - When a soft indentation is uppermost on the indetation stack with
1239 * column col' we insert:
1240 * `}' in front of token with column<col' and pop indentation off stack,
1241 * `;' in front of token with column==col'.
1242 * ------------------------------------------------------------------------*/
1244 #define MAXINDENT 100 /* maximum nesting of layout rule */
1245 static Int layout[MAXINDENT+1];/* indentation stack */
1246 #define HARD (-1) /* indicates hard indentation */
1247 static Int indentDepth = (-1); /* current indentation nesting */
1249 static Void local goOffside(col) /* insert offside marker */
1250 Int col; { /* for specified column */
1252 if (indentDepth>=MAXINDENT) {
1253 ERRMSG(row) "Too many levels of program nesting"
1256 layout[++indentDepth] = col;
1259 static Void local unOffside() { /* leave layout rule area */
1264 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1266 return indentDepth>=0 && layout[indentDepth]!=HARD;
1269 /* --------------------------------------------------------------------------
1271 * ------------------------------------------------------------------------*/
1273 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1274 for (;;) /* Strictly speaking, this code is */
1275 if (c0==EOF) /* a little more liberal than the */
1276 return; /* report allows ... */
1279 else if (isIn(c0,SPACE))
1281 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1283 Int origRow = row; /* Save original row number */
1286 while (nesting>0 && c0!=EOF)
1287 if (c0=='{' && c1=='-') {
1292 else if (c0=='-' && c1=='}') {
1302 ERRMSG(origRow) "Unterminated nested comment {- ..."
1306 else if (c0=='-' && c1=='-') { /* One line comment */
1309 while (c0!='\n' && c0!=EOF);
1317 static Bool firstToken; /* Set to TRUE for first token */
1318 static Int firstTokenIs; /* ... with token value stored here */
1320 static Int local yylex() { /* Read next input token ... */
1321 static Bool insertOpen = FALSE;
1322 static Bool insertedToken = FALSE;
1323 static Text textRepeat;
1325 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1327 if (firstToken) { /* Special case for first token */
1331 insertedToken = FALSE;
1332 if (reading==KEYBOARD)
1333 textRepeat = findText(repeatStr);
1334 return firstTokenIs;
1337 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1339 insertedToken = TRUE;
1341 push(yylval = mkInt(row));
1345 /* ----------------------------------------------------------------------
1346 * Skip white space, and insert tokens to support layout rules as reqd.
1347 * --------------------------------------------------------------------*/
1350 startColumn = column;
1351 push(yylval = mkInt(row)); /* default token value is line no. */
1352 /* subsequent changes to yylval must also set top() to the same value */
1354 if (indentDepth>=0) { /* layout rule(s) active ? */
1355 if (insertedToken) /* avoid inserting multiple `;'s */
1356 insertedToken = FALSE; /* or putting `;' after `{' */
1358 if (offsideON && layout[indentDepth]!=HARD) {
1359 if (column<layout[indentDepth]) {
1363 else if (column==layout[indentDepth] && c0!=EOF) {
1364 insertedToken = TRUE;
1370 /* ----------------------------------------------------------------------
1371 * Now try to identify token type:
1372 * --------------------------------------------------------------------*/
1375 case EOF : return 0; /* End of file/input */
1377 /* The next 10 characters make up the `special' category in 1.3 */
1378 case '(' : skip(); return '(';
1379 case ')' : skip(); return ')';
1380 case ',' : skip(); return ',';
1381 case ';' : skip(); return ';';
1382 case '[' : skip(); return '[';
1383 case ']' : skip(); return ']';
1384 case '`' : skip(); return '`';
1385 case '{' : if (offsideON) goOffside(HARD);
1388 case '}' : if (offsideON && indentDepth<0) {
1389 ERRMSG(row) "Misplaced `}'"
1392 if (!(offsideON && layout[indentDepth]!=HARD))
1393 skip(); /* skip over hard }*/
1395 unOffside(); /* otherwise, we have to insert a }*/
1396 return '}'; /* to (try to) avoid an error... */
1398 /* Character and string literals */
1399 case '\'' : top() = yylval = readChar();
1402 case '\"' : top() = yylval = readString();
1407 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1408 Text it; /* Look for record selector name */
1411 top() = yylval = ap(RECSEL,mkExt(it));
1412 return identType=RECSELID;
1415 if (isIn(c0,LARGE)) { /* Look for qualified name */
1416 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1417 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1419 skip(); /* Skip qualifying dot */
1420 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1421 it2 = readOperator();
1422 if (opType==CONOP) {
1423 top() = yylval = mkQConOp(it,it2);
1426 top() = yylval = mkQVarOp(it,it2);
1429 } else { /* Qualified identifier */
1431 if (identType==CONID) {
1432 top() = yylval = mkQCon(it,it2);
1435 top() = yylval = mkQVar(it,it2);
1440 top() = yylval = mkCon(it);
1444 if (isIn(c0,(SMALL|LARGE))) {
1445 Text it = readIdent();
1447 if (it==textCase) return CASEXP;
1448 if (it==textOfK) lookAhead(OF);
1449 if (it==textData) return DATA;
1450 if (it==textType) return TYPE;
1451 if (it==textIf) return IF;
1452 if (it==textThen) return THEN;
1453 if (it==textElse) return ELSE;
1454 if (it==textWhere) lookAhead(WHERE);
1455 if (it==textLet) lookAhead(LET);
1456 if (it==textIn) return IN;
1457 if (it==textInfix) return INFIXN;
1458 if (it==textInfixl) return INFIXL;
1459 if (it==textInfixr) return INFIXR;
1460 if (it==textForeign) return FOREIGN;
1461 if (it==textUnsafe) return UNSAFE;
1462 if (it==textNewtype) return TNEWTYPE;
1463 if (it==textDefault) return DEFAULT;
1464 if (it==textDeriving) return DERIVING;
1465 if (it==textDo) lookAhead(DO);
1466 if (it==textClass) return TCLASS;
1467 if (it==textInstance) return TINSTANCE;
1468 if (it==textModule) return TMODULE;
1469 if (it==textInterface) return INTERFACE;
1470 if (it==textInstImport) return INSTIMPORT;
1471 if (it==textImport) return IMPORT;
1472 if (it==textExport) return EXPORT;
1473 if (it==textHiding) return HIDING;
1474 if (it==textQualified) return QUALIFIED;
1475 if (it==textAsMod) return ASMOD;
1476 if (it==textWildcard) return '_';
1477 if (it==textAll && !haskell98) return ALL;
1478 if (it==text__All) return ALL;
1479 if (it==textRepeat && reading==KEYBOARD)
1480 return repeatLast();
1482 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1486 if (isIn(c0,SYMBOL)) {
1487 Text it = readOperator();
1489 if (it==textCoco) return COCO;
1490 if (it==textEq) return '=';
1491 if (it==textUpto) return UPTO;
1492 if (it==textAs) return '@';
1493 if (it==textLambda) return '\\';
1494 if (it==textBar) return '|';
1495 if (it==textFrom) return FROM;
1496 if (it==textMinus) return '-';
1497 if (it==textPlus) return '+';
1498 if (it==textBang) return '!';
1499 if (it==textDot) return '.';
1500 if (it==textArrow) return ARROW;
1501 if (it==textLazy) return '~';
1502 if (it==textImplies) return IMPLIES;
1503 if (it==textRepeat && reading==KEYBOARD)
1504 return repeatLast();
1506 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1510 if (isIn(c0,DIGIT)) {
1511 top() = yylval = readNumber();
1515 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1518 return 0; /*NOTREACHED*/
1521 static Int local repeatLast() { /* Obtain last expression entered */
1522 if (isNull(yylval=getLastExpr())) {
1523 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1529 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1530 Text t; { /* by t ... */
1531 String s = textToStr(t);
1532 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1535 Syntax syntaxOf(n) /* Find syntax for name */
1537 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1538 return defaultSyntax(name(n).text);
1539 return name(n).syntax;
1542 /* --------------------------------------------------------------------------
1543 * main entry points to parser/lexer:
1544 * ------------------------------------------------------------------------*/
1546 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1547 Int startWith; { /* determining whether to read a */
1548 firstToken = TRUE; /* script or an expression */
1549 firstTokenIs = startWith;
1550 if (startWith==INTERFACE)
1551 offsideON = FALSE; else
1555 if (yyparse()) { /* This can only be parser overflow */
1556 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1557 EEND; /* in the parser... */
1560 if (!stackEmpty()) /* stack should now be empty */
1561 internal("parseInput");
1565 static String memPrefix = "@mem@";
1566 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1568 Void makeMemScript(mem,fname)
1571 strcat(fname,memPrefix);
1572 itoa((int)mem, fname+strlen(fname), 10);
1575 Bool isMemScript(fname)
1577 return (strstr(fname,memPrefix) != NULL);
1580 String memScriptString(fname)
1582 String p = strstr(fname,memPrefix);
1584 return (String)atoi(p+lenMemPrefix);
1590 Void parseScript(fname,len) /* Read a script, possibly from mem */
1594 if (isMemScript(fname)) {
1595 char* s = memScriptString(fname);
1598 fileInput(fname,len);
1603 Void parseScript(nm,len) /* Read a script */
1605 Long len; { /* Used to set a target for reading */
1612 Void parseExp() { /* Read an expression to evaluate */
1614 setLastExpr(inputExpr);
1617 Void parseInterface(nm,len) /* Read a GHC interface file */
1619 Long len; { /* Used to set a target for reading */
1622 parseInput(INTERFACE);
1626 /* --------------------------------------------------------------------------
1628 * ------------------------------------------------------------------------*/
1633 case INSTALL : initCharTab();
1634 textCase = findText("case");
1635 textOfK = findText("of");
1636 textData = findText("data");
1637 textType = findText("type");
1638 textIf = findText("if");
1639 textThen = findText("then");
1640 textElse = findText("else");
1641 textWhere = findText("where");
1642 textLet = findText("let");
1643 textIn = findText("in");
1644 textInfix = findText("infix");
1645 textInfixl = findText("infixl");
1646 textInfixr = findText("infixr");
1647 textForeign = findText("foreign");
1648 textUnsafe = findText("unsafe");
1649 textNewtype = findText("newtype");
1650 textDefault = findText("default");
1651 textDeriving = findText("deriving");
1652 textDo = findText("do");
1653 textClass = findText("class");
1654 textInstance = findText("instance");
1655 textCoco = findText("::");
1656 textEq = findText("=");
1657 textUpto = findText("..");
1658 textAs = findText("@");
1659 textLambda = findText("\\");
1660 textBar = findText("|");
1661 textMinus = findText("-");
1662 textPlus = findText("+");
1663 textFrom = findText("<-");
1664 textArrow = findText("->");
1665 textLazy = findText("~");
1666 textBang = findText("!");
1667 textDot = findText(".");
1668 textImplies = findText("=>");
1669 textPrelude = findText("Prelude");
1670 textNum = findText("Num");
1671 textModule = findText("module");
1672 textInterface = findText("__interface");
1673 textInstImport = findText("__instimport");
1674 textExport = findText("__export");
1675 textImport = findText("import");
1676 textHiding = findText("hiding");
1677 textQualified = findText("qualified");
1678 textAsMod = findText("as");
1679 textWildcard = findText("_");
1680 textAll = findText("forall");
1681 text__All = findText("__forall");
1682 varMinus = mkVar(textMinus);
1683 varPlus = mkVar(textPlus);
1684 varBang = mkVar(textBang);
1685 varDot = mkVar(textDot);
1686 varHiding = mkVar(textHiding);
1687 varQualified = mkVar(textQualified);
1688 varAsMod = mkVar(textAsMod);
1689 conMain = mkCon(findText("Main"));
1690 varMain = mkVar(findText("main"));
1696 case RESET : tyconDefns = NIL;
1705 foreignImports= NIL;
1706 foreignExports= NIL;
1714 case BREAK : if (reading==KEYBOARD)
1718 case MARK : mark(tyconDefns);
1726 mark(unqualImports);
1727 mark(foreignImports);
1728 mark(foreignExports);
1746 /*-------------------------------------------------------------------------*/