2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
6 * Yale Haskell Group, and the Oregon Graduate Institute of Science and
7 * Technology, 1994-1999, All rights reserved. It is distributed as
8 * free software under the license in the file "License", which is
9 * included in the distribution.
11 * $RCSfile: input.c,v $
13 * $Date: 1999/10/15 23:52:00 $
14 * ------------------------------------------------------------------------*/
32 #if IS_WIN32 | HUGS_FOR_WINDOWS
36 /* --------------------------------------------------------------------------
38 * ------------------------------------------------------------------------*/
40 List tyconDefns = NIL; /* type constructor definitions */
41 List typeInDefns = NIL; /* type synonym restrictions */
42 List valDefns = NIL; /* value definitions in script */
43 List classDefns = NIL; /* class defns in script */
44 List instDefns = NIL; /* instance defns in script */
45 List selDefns = NIL; /* list of selector lists */
46 List genDefns = NIL; /* list of generated names */
47 List unqualImports = NIL; /* unqualified import list */
48 List foreignImports = NIL; /* foreign imports */
49 List foreignExports = NIL; /* foreign exportsd */
50 List defaultDefns = NIL; /* default definitions (if any) */
51 Int defaultLine = 0; /* line in which default defs occur*/
52 List evalDefaults = NIL; /* defaults for evaluator */
54 Cell inputExpr = NIL; /* input expression */
55 Cell inputContext = NIL; /* input context */
56 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
57 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
58 Bool offsideON = TRUE; /* TRUE => implement offside rule */
60 String repeatStr = 0; /* Repeat last expr */
62 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
63 String preprocessor = 0;
66 /* --------------------------------------------------------------------------
67 * Local function prototypes:
68 * ------------------------------------------------------------------------*/
70 static Void local initCharTab Args((Void));
71 static Void local fileInput Args((String,Long));
72 static Bool local literateMode Args((String));
73 static Bool local linecmp Args((String,String));
74 static Int local nextLine Args((Void));
75 static Void local skip Args((Void));
76 static Void local thisLineIs Args((Int));
77 static Void local newlineSkip Args((Void));
78 static Void local closeAnyInput Args((Void));
80 Int yyparse Args((Void)); /* can't stop yacc making this */
81 /* public, but don't advertise */
82 /* it in a header file. */
84 static Void local endToken Args((Void));
85 static Text local readOperator Args((Void));
86 static Text local readIdent Args((Void));
87 static Cell local readRadixNumber Args((Int));
88 static Cell local readNumber Args((Void));
89 static Cell local readChar Args((Void));
90 static Cell local readString Args((Void));
91 static Void local saveStrChr Args((Char));
92 static Cell local readAChar Args((Bool));
94 static Bool local lazyReadMatches Args((String));
95 static Cell local readEscapeChar Args((Bool));
96 static Void local skipGap Args((Void));
97 static Cell local readCtrlChar Args((Void));
98 static Cell local readOctChar Args((Void));
99 static Cell local readHexChar Args((Void));
100 static Int local readHexDigit Args((Char));
101 static Cell local readDecChar Args((Void));
103 static Void local goOffside Args((Int));
104 static Void local unOffside Args((Void));
105 static Bool local canUnOffside Args((Void));
107 static Void local skipWhitespace Args((Void));
108 static Int local yylex Args((Void));
109 static Int local repeatLast Args((Void));
111 static Void local parseInput Args((Int));
113 static Bool local doesNotExceed Args((String,Int,Int));
114 static Int local stringToInt Args((String,Int));
117 /* --------------------------------------------------------------------------
118 * Text values for reserved words and special symbols:
119 * ------------------------------------------------------------------------*/
121 static Text textCase, textOfK, textData, textType, textIf;
122 static Text textThen, textElse, textWhere, textLet, textIn;
123 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
124 static Text textDefault, textDeriving, textDo, textClass, textInstance;
126 static Text textWith, textDlet;
129 static Text textCoco, textEq, textUpto, textAs, textLambda;
130 static Text textBar, textMinus, textFrom, textArrow, textLazy;
131 static Text textBang, textDot, textAll, textImplies;
132 static Text textWildcard;
134 static Text textModule, textImport, textInterface, textInstImport;
135 static Text textHiding, textQualified, textAsMod;
136 static Text textExport, textDynamic, textUUExport;
137 static Text textUnsafe, textUUAll;
139 Text textNum; /* Num */
140 Text textPrelude; /* Prelude */
141 Text textPlus; /* (+) */
143 static Cell conMain; /* Main */
144 static Cell varMain; /* main */
146 static Cell varMinus; /* (-) */
147 static Cell varPlus; /* (+) */
148 static Cell varBang; /* (!) */
149 static Cell varDot; /* (.) */
150 static Cell varHiding; /* hiding */
151 static Cell varQualified; /* qualified */
152 static Cell varAsMod; /* as */
154 static List imps; /* List of imports to be chased */
157 /* --------------------------------------------------------------------------
158 * Character set handling:
160 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
161 * character set. The following code provides methods for classifying
162 * input characters according to the lexical structure specified by the
163 * report. Hugs should still accept older programs because ASCII is
164 * essentially just a subset of the ISO character set.
166 * Notes: If you want to port Hugs to a machine that uses something
167 * substantially different from the ISO character set, then you will need
168 * to insert additional code to map between character sets.
170 * At some point, the following data structures may be exported in a .h
171 * file to allow the information contained here to be picked up in the
172 * implementation of LibChar is* primitives.
174 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
175 * ------------------------------------------------------------------------*/
177 static Bool charTabBuilt;
178 static unsigned char ctable[NUM_CHARS];
179 #define isIn(c,x) (ctable[(unsigned char)(c)]&(x))
180 #define isISO(c) (0<=(c) && (c)<NUM_CHARS)
190 static Void local initCharTab() { /* Initialize char decode table */
191 #define setRange(x,f,t) {Int i=f; while (i<=t) ctable[i++] |=x;}
192 #define setChar(x,c) ctable[c] |= (x)
193 #define setChars(x,s) {char *p=s; while (*p) ctable[(Int)*p++]|=x;}
194 #define setCopy(x,c) {Int i; \
195 for (i=0; i<NUM_CHARS; ++i) \
200 setRange(DIGIT, '0','9'); /* ASCII decimal digits */
202 setRange(SMALL, 'a','z'); /* ASCII lower case letters */
203 setRange(SMALL, 223,246); /* ISO lower case letters */
204 setRange(SMALL, 248,255); /* (omits division symbol, 247) */
205 setChar (SMALL, '_');
207 setRange(LARGE, 'A','Z'); /* ASCII upper case letters */
208 setRange(LARGE, 192,214); /* ISO upper case letters */
209 setRange(LARGE, 216,222); /* (omits multiplication, 215) */
211 setRange(SYMBOL, 161,191); /* Symbol characters + ':' */
212 setRange(SYMBOL, 215,215);
213 setChar (SYMBOL, 247);
214 setChars(SYMBOL, ":!#$%&*+./<=>?@\\^|-~");
216 setChar (IDAFTER, '\''); /* Characters in identifier */
217 setCopy (IDAFTER, (DIGIT|SMALL|LARGE));
219 setChar (SPACE, ' '); /* ASCII space character */
220 setChar (SPACE, 160); /* ISO non breaking space */
221 setRange(SPACE, 9,13); /* special whitespace: \t\n\v\f\r */
223 setChars(PRINT, "(),;[]_`{}"); /* Special characters */
224 setChars(PRINT, " '\""); /* Space and quotes */
225 setCopy (PRINT, (DIGIT|SMALL|LARGE|SYMBOL));
235 /* --------------------------------------------------------------------------
236 * Single character input routines:
238 * At the lowest level of input, characters are read one at a time, with the
239 * current character held in c0 and the following (lookahead) character in
240 * c1. The corrdinates of c0 within the file are held in (column,row).
241 * The input stream is advanced by one character using the skip() function.
242 * ------------------------------------------------------------------------*/
244 #define TABSIZE 8 /* spacing between tabstops */
246 #define NOTHING 0 /* what kind of input is being read?*/
247 #define KEYBOARD 1 /* - keyboard/console? */
248 #define SCRIPTFILE 2 /* - script file */
249 #define PROJFILE 3 /* - project file */
250 #define STRING 4 /* - string buffer? */
252 static Int reading = NOTHING;
254 static Target readSoFar;
255 static Int row, column, startColumn;
257 static FILE *inputStream = 0;
258 static Bool thisLiterate;
259 static String nextStringChar; /* next char in string buffer */
261 #if USE_READLINE /* for command line editors */
262 static String currentLine; /* editline or GNU readline */
263 static String nextChar;
264 #define nextConsoleChar() \
265 (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
266 extern Void add_history Args((String));
267 extern String readline Args((String));
269 #define nextConsoleChar() getc(stdin)
272 static Int litLines; /* count defn lines in lit script */
273 #define DEFNCHAR '>' /* definition lines begin with this */
274 static Int lastLine; /* records type of last line read: */
275 #define STARTLINE 0 /* - at start of file, none read */
276 #define BLANKLINE 1 /* - blank (may preceed definition) */
277 #define TEXTLINE 2 /* - text comment */
278 #define DEFNLINE 3 /* - line containing definition */
279 #define CODELINE 4 /* - line inside code block */
281 #define BEGINCODE "\\begin{code}"
282 #define ENDCODE "\\end{code}"
285 static char *lineBuffer = NULL; /* getline() does the initial allocation */
287 #define LINEBUFFER_SIZE 1000
288 static char lineBuffer[LINEBUFFER_SIZE];
290 static int lineLength = 0;
291 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
292 static int linePtr = 0;
294 Void consoleInput(prompt) /* prepare to input characters from */
295 String prompt; { /* standard in (i.e. console/kbd) */
296 reading = KEYBOARD; /* keyboard input is Line oriented, */
297 c0 = /* i.e. input terminated by '\n' */
303 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
304 * avoids accidentally freeing currentLine twice.
307 String oldCurrentLine = currentLine;
308 currentLine = 0; /* We may lose the space of currentLine */
309 free(oldCurrentLine); /* if interrupted here - unlikely */
311 currentLine = readline(prompt);
312 nextChar = currentLine;
315 add_history(currentLine);
325 Void projInput(nm) /* prepare to input characters from */
326 String nm; { /* from named project file */
327 if ((inputStream = fopen(nm,"r"))!=0) {
335 ERRMSG(0) "Unable to open project file \"%s\"", nm
340 static Void local fileInput(nm,len) /* prepare to input characters from*/
341 String nm; /* named file (specified length is */
342 Long len; { /* used to set target for reading) */
343 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
345 Int reallen = strlen(preprocessor) + 1 + strlen(nm) + 1;
346 char *cmd = malloc(reallen);
348 ERRMSG(0) "Unable to allocate memory for filter command."
351 strcpy(cmd,preprocessor);
354 inputStream = popen(cmd,"r");
357 inputStream = fopen(nm,"r");
360 inputStream = fopen(nm,"r");
363 reading = SCRIPTFILE;
369 lastLine = STARTLINE; /* literate file processing */
373 thisLiterate = literateMode(nm);
377 setGoal("Parsing", (Target)len);
380 ERRMSG(0) "Unable to open file \"%s\"", nm
385 Void stringInput(s) /* prepare to input characters from string */
400 static Bool local literateMode(nm) /* Select literate mode for file */
402 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
404 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
406 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
407 filenamecmp(dot+1,"verb")==0) /* literate scripts */
410 return literateScripts; /* otherwise, use the default */
414 Void hi_o_namesFromSrcName ( String srcName, String* hiName, String* oName )
418 len = 1 + strlen ( srcName );
419 *hiName = malloc(len);
420 *oName = malloc(len);
421 if (!(*hiName && *oName)) internal("hi_o_namesFromSource");
422 (*hiName)[0] = (*oName)[0] = 0;
423 dot = strrchr(srcName, '.');
425 if (filenamecmp(dot+1, "hs")==0 &&
426 filenamecmp(dot+1, "lhs")==0 &&
427 filenamecmp(dot+1, "verb")==0) return;
429 strcpy(*hiName, srcName);
430 dot = strrchr(*hiName, '.');
435 strcpy(*oName, srcName);
436 dot = strrchr(*oName, '.');
443 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
444 * I've removed the loop (since newLineSkip contains a loop too) and
445 * replaced the warnings with errors. ADR
448 * To deal with literate \begin{code}...\end{code} blocks,
449 * add a line buffer that rooms the current line. The old c0 and c1
450 * stream pointers are used as before within that buffer -- sof
452 * Upon reading a new line into the line buffer, we check to see if
453 * we're reading in a line containing \begin{code} or \end{code} and
454 * take appropriate action.
457 static Bool local linecmp(s,line) /* compare string with line */
458 String s; /* line may end in whitespace */
461 while (s[i] != '\0' && s[i] == line[i]) {
464 /* s[0..i-1] == line[0..i-1] */
465 if (s[i] != '\0') { /* check s `isPrefixOf` line */
468 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
471 return (line[i] == '\0');
474 /* Returns line length (including \n) or 0 upon EOF. */
475 static Int local nextLine()
479 Forget about fgets(), it is utterly braindead.
480 (Assumes \NUL free streams and does not gracefully deal
481 with overflow.) Instead, use GNU libc's getline().
483 lineLength = getline(&lineBuffer, &lineLength, inputStream);
485 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
486 lineLength = strlen(lineBuffer);
490 /* printf("Read: \"%s\"", lineBuffer); */
491 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
494 else if (lineLength >= 2 && lineBuffer[0] == '#' &&
495 lineBuffer[1] == '!') {
496 lineBuffer[0]='\n'; /* pretend it's a blank line */
499 } else if (thisLiterate) {
500 if (linecmp(BEGINCODE, lineBuffer)) {
501 if (!inCodeBlock) { /* Entered a code block */
503 lineBuffer[0]='\n'; /* pretend it's a blank line */
508 ERRMSG(row) "\\begin{code} encountered inside code block"
512 else if (linecmp(ENDCODE, lineBuffer)) {
513 if (inCodeBlock) { /* Finished code block */
515 lineBuffer[0]='\n'; /* pretend it's a blank line */
520 ERRMSG(row) "\\end{code} encountered outside code block"
525 /* printf("Read: \"%s\"", lineBuffer); */
529 static Void local skip() { /* move forward one char in input */
530 if (c0!=EOF) { /* stream, updating c0, c1, ... */
531 if (c0=='\n') { /* Adjusting cursor coords as nec. */
534 if (reading==SCRIPTFILE)
538 column += TABSIZE - ((column-1)%TABSIZE);
547 if (reading==SCRIPTFILE)
551 else if (reading==KEYBOARD) {
556 c1 = nextConsoleChar();
557 #if IS_WIN32 && !HUGS_FOR_WINDOWS
560 /* On Win32, hitting ctrl-C causes the next getchar to
561 * fail - returning "-1" to indicate an error.
562 * This is one of the rare cases where "-1" does not mean EOF.
564 if (EOF == c1 && (!feof(stdin) || broken==TRUE)) {
569 else if (reading==STRING) {
570 c1 = (unsigned char) *nextStringChar++;
575 if (lineLength <=0 || linePtr == lineLength) {
576 /* Current line, exhausted - get new one */
577 if (nextLine() <= 0) { /* EOF */
582 c1 = (unsigned char)lineBuffer[linePtr++];
586 c1 = (unsigned char)lineBuffer[linePtr++];
593 static Void local thisLineIs(kind) /* register kind of current line */
594 Int kind; { /* & check for literate script errs */
595 if (literateErrors) {
596 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
597 (kind==TEXTLINE && lastLine==DEFNLINE)) {
598 ERRMSG(row) "Program line next to comment"
605 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
606 /* assert(c0=='\n'); */
607 if (reading==SCRIPTFILE && thisLiterate) {
610 if (inCodeBlock) { /* pass chars on definition lines */
611 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
615 if (c0==DEFNCHAR) { /* pass chars on definition lines */
616 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
621 while (c0 != '\n' && isIn(c0,SPACE)) /* maybe line is blank? */
623 if (c0=='\n' || c0==EOF)
624 thisLineIs(BLANKLINE);
626 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
627 while (c0!='\n' && c0!=EOF)
629 } /* by now, c0=='\n' or c0==EOF */
630 } while (c0!=EOF); /* if new line, start again */
632 if (litLines==0 && literateErrors) {
633 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
642 static Void local closeAnyInput() { /* Close input stream, if open, */
643 switch (reading) { /* or skip to end of console line */
645 case SCRIPTFILE : if (inputStream) {
646 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
658 case KEYBOARD : while (c0!=EOF)
665 /* --------------------------------------------------------------------------
666 * Parser: Uses table driven parser generated from parser.y using yacc
667 * ------------------------------------------------------------------------*/
671 /* --------------------------------------------------------------------------
672 * Single token input routines:
674 * The following routines read the values of particular kinds of token given
675 * that the first character of the token has already been located in c0 on
676 * entry to the routine.
677 * ------------------------------------------------------------------------*/
679 #define MAX_TOKEN 4000
680 #define startToken() tokPos = 0
681 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
682 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
683 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
685 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
686 static Int tokPos; /* input position in buffer */
687 static Int identType; /* identifier type: CONID / VARID */
688 static Int opType; /* operator type : CONOP / VAROP */
690 static Void local endToken() { /* check for token overflow */
691 if (tokPos>MAX_TOKEN) {
692 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
695 tokenStr[tokPos] = '\0';
698 static Text local readOperator() { /* read operator symbol */
703 } while (isISO(c0) && isIn(c0,SYMBOL));
704 opType = (tokenStr[0]==':' ? CONOP : VAROP);
706 return findText(tokenStr);
709 static Text local readIdent() { /* read identifier */
714 } while (isISO(c0) && isIn(c0,IDAFTER));
716 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
717 return findText(tokenStr);
721 static Bool local doesNotExceed(s,radix,limit)
728 if (s[p] == 0) return TRUE;
729 if (overflows(n,radix,s[p]-'0',limit)) return FALSE;
730 n = radix*n + (s[p]-'0');
735 static Int local stringToInt(s,radix)
741 if (s[p] == 0) return n;
742 n = radix*n + (s[p]-'0');
747 static Cell local readRadixNumber(r) /* Read literal in specified radix */
748 Int r; { /* from input of the form 0c{digs} */
751 skip(); /* skip leading zero */
752 if ((d=readHexDigit(c1))<0 || d>=r) {
753 /* Special case; no digits, lex as */
754 /* if it had been written "0 c..." */
759 saveTokenChar('0'+readHexDigit(c0));
761 d = readHexDigit(c0);
762 } while (d>=0 && d<r);
766 if (doesNotExceed(tokenStr,r,MAXPOSINT))
767 return mkInt(stringToInt(tokenStr,r));
770 return stringToBignum(tokenStr);
772 ERRMSG(row) "Hexadecimal or octal constant exceeds `Int' range"
777 static Cell local readNumber() { /* read numeric constant */
780 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
781 return readRadixNumber(16);
782 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
783 return readRadixNumber(8);
790 } while (isISO(c0) && isIn(c0,DIGIT));
792 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
794 if (doesNotExceed(tokenStr,10,MAXPOSINT))
795 return mkInt(stringToInt(tokenStr,10)); else
796 return stringToBignum(tokenStr);
799 saveTokenChar(c0); /* save decimal point */
801 do { /* process fractional part ... */
804 } while (isISO(c0) && isIn(c0,DIGIT));
806 if (c0=='e' || c0=='E') { /* look for exponent part... */
816 if (!isISO(c0) || !isIn(c0,DIGIT)) {
817 ERRMSG(row) "Missing digits in exponent"
824 } while (isISO(c0) && isIn(c0,DIGIT));
829 return mkFloat(stringToFloat(tokenStr));
838 static Cell local readChar() { /* read character constant */
842 if (c0=='\'' || c0=='\n' || c0==EOF) {
843 ERRMSG(row) "Illegal character constant"
847 charRead = readAChar(FALSE);
852 ERRMSG(row) "Improperly terminated character constant"
858 static Cell local readString() { /* read string literal */
863 while (c0!='\"' && c0!='\n' && c0!=EOF) {
866 saveStrChr(charOf(c));
872 ERRMSG(row) "Improperly terminated string"
876 return mkStr(findText(tokenStr));
879 static Void local saveStrChr(c) /* save character in string */
881 if (c!='\0' && c!='\\') { /* save non null char as single char*/
884 else { /* save null char as TWO null chars */
885 if (tokPos+1<MAX_TOKEN) {
895 static Cell local readAChar(isStrLit) /* read single char constant */
896 Bool isStrLit; { /* TRUE => enable \& and gaps */
899 if (c0=='\\') /* escape character? */
900 return readEscapeChar(isStrLit);
902 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
905 skip(); /* normal character? */
909 /* --------------------------------------------------------------------------
910 * Character escape code sequences:
911 * ------------------------------------------------------------------------*/
913 static struct { /* table of special escape codes */
917 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
918 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
919 {"\'",'\''}, {"v", 11},
920 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
921 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
922 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
923 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
924 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
925 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
926 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
927 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
928 {"SP", 32}, {"DEL", 127},
932 static Int alreadyMatched; /* Record portion of input stream */
933 static char alreadyRead[10]; /* that has been read w/o a match */
935 static Bool local lazyReadMatches(s) /* compare input stream with string */
936 String s; { /* possibly using characters that */
937 int i; /* have already been read */
939 for (i=0; i<alreadyMatched; ++i)
940 if (alreadyRead[i]!=s[i])
943 while (s[i] && s[i]==c0) {
944 alreadyRead[alreadyMatched++]=(char)c0;
952 static Cell local readEscapeChar(isStrLit)/* read escape character */
958 case '&' : if (isStrLit) {
962 ERRMSG(row) "Illegal use of `\\&' in character constant"
966 case '^' : return readCtrlChar();
968 case 'o' : return readOctChar();
969 case 'x' : return readHexChar();
971 default : if (!isISO(c0)) {
972 ERRMSG(row) "Illegal escape sequence"
975 else if (isIn(c0,SPACE)) {
980 ERRMSG(row) "Illegal use of gap in character constant"
984 else if (isIn(c0,DIGIT))
985 return readDecChar();
988 for (alreadyMatched=0; escapes[i].codename; i++)
989 if (lazyReadMatches(escapes[i].codename))
990 return mkChar(escapes[i].codenumber);
992 alreadyRead[alreadyMatched++] = (char)c0;
993 alreadyRead[alreadyMatched++] = '\0';
994 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
997 return NIL;/*NOTREACHED*/
1000 static Void local skipGap() { /* skip over gap in string literal */
1001 do /* (simplified in Haskell 1.1) */
1006 while (isISO(c0) && isIn(c0,SPACE));
1008 ERRMSG(row) "Missing `\\' terminating string literal gap"
1014 static Cell local readCtrlChar() { /* read escape sequence \^x */
1015 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1019 if ((which = strchr(controls,c0))==NULL) {
1020 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
1024 return mkChar(which-controls);
1027 static Cell local readOctChar() { /* read octal character constant */
1032 if ((d = readHexDigit(c0))<0 || d>=8) {
1033 ERRMSG(row) "Empty octal character escape"
1037 if (overflows(n,8,d,MAXCHARVAL)) {
1038 ERRMSG(row) "Octal character escape out of range"
1043 } while ((d = readHexDigit(c0))>=0 && d<8);
1048 static Cell local readHexChar() { /* read hex character constant */
1053 if ((d = readHexDigit(c0))<0) {
1054 ERRMSG(row) "Empty hexadecimal character escape"
1058 if (overflows(n,16,d,MAXCHARVAL)) {
1059 ERRMSG(row) "Hexadecimal character escape out of range"
1064 } while ((d = readHexDigit(c0))>=0);
1069 static Int local readHexDigit(c) /* read single hex digit */
1071 if ('0'<=c && c<='9')
1073 if ('A'<=c && c<='F')
1074 return 10 + (c-'A');
1075 if ('a'<=c && c<='f')
1076 return 10 + (c-'a');
1080 static Cell local readDecChar() { /* read decimal character constant */
1084 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
1085 ERRMSG(row) "Decimal character escape out of range"
1088 n = 10*n + (c0-'0');
1090 } while (c0!=EOF && isIn(c0,DIGIT));
1095 /* --------------------------------------------------------------------------
1096 * Produce printable representation of character:
1097 * ------------------------------------------------------------------------*/
1099 String unlexChar(c,quote) /* return string representation of */
1100 Char c; /* character... */
1101 Char quote; { /* protect quote character */
1102 static char buffer[12];
1104 if (c<0) /* deal with sign extended chars.. */
1107 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
1108 if (c==quote || c=='\\') { /* look for quote of approp. kind */
1110 buffer[1] = (char)c;
1114 buffer[0] = (char)c;
1118 else { /* look for escape code */
1120 for (escs=0; escapes[escs].codename; escs++)
1121 if (escapes[escs].codenumber==c) {
1122 sprintf(buffer,"\\%s",escapes[escs].codename);
1125 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
1130 Void printString(s) /* print string s, using quotes and */
1131 String s; { /* escapes if any parts need them */
1135 while ((c = *t)!=0 && isISO(c)
1136 && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
1142 Printf("%s",unlexChar(*t,'"'));
1150 /* -------------------------------------------------------------------------
1151 * Handle special types of input for use in interpreter:
1152 * -----------------------------------------------------------------------*/
1154 Command readCommand(cmds,start,sys) /* read command at start of input */
1155 struct cmd *cmds; /* line in interpreter */
1156 Char start; /* characters introducing a cmd */
1157 Char sys; { /* character for shell escape */
1158 while (c0==' ' || c0 =='\t')
1161 if (c0=='\n') /* look for blank command lines */
1163 if (c0==EOF) /* look for end of input stream */
1165 if (c0==sys) { /* single character system escape */
1169 if (c0==start && c1==sys) { /* two character system escape */
1175 startToken(); /* All cmds start with start */
1176 if (c0==start) /* except default (usually EVAL) */
1177 do { /* which is empty */
1180 } while (c0!=EOF && !isIn(c0,SPACE));
1183 for (; cmds->cmdString; ++cmds)
1184 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1185 (tokenStr[0]==start &&
1186 tokenStr[1]==(cmds->cmdString)[1] &&
1188 return (cmds->cmdCode);
1192 String readFilename() { /* Read filename from input (if any)*/
1193 if (reading==PROJFILE)
1196 while (c0==' ' || c0=='\t')
1199 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1203 while (c0!=EOF && !isIn(c0,SPACE)) {
1206 while (c0!=EOF && c0!='\"') {
1207 Cell c = readAChar(TRUE);
1209 saveTokenChar(charOf(c));
1215 ERRMSG(row) "a closing quote, '\"', was expected"
1228 String readLine() { /* Read command line from input */
1229 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1233 while (c0!='\n' && c0!=EOF) {
1242 /* --------------------------------------------------------------------------
1243 * This lexer supports the Haskell layout rule:
1245 * - Layout area bounded by { ... }, with `;'s in between.
1246 * - A `{' is a HARD indentation and can only be matched by a corresponding
1248 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1249 * is inserted with the column number of the first token after the
1250 * WHERE/LET/OF keyword.
1251 * - When a soft indentation is uppermost on the indetation stack with
1252 * column col' we insert:
1253 * `}' in front of token with column<col' and pop indentation off stack,
1254 * `;' in front of token with column==col'.
1255 * ------------------------------------------------------------------------*/
1257 #define MAXINDENT 100 /* maximum nesting of layout rule */
1258 static Int layout[MAXINDENT+1];/* indentation stack */
1259 #define HARD (-1) /* indicates hard indentation */
1260 static Int indentDepth = (-1); /* current indentation nesting */
1262 static Void local goOffside(col) /* insert offside marker */
1263 Int col; { /* for specified column */
1265 if (indentDepth>=MAXINDENT) {
1266 ERRMSG(row) "Too many levels of program nesting"
1269 layout[++indentDepth] = col;
1272 static Void local unOffside() { /* leave layout rule area */
1277 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1279 return indentDepth>=0 && layout[indentDepth]!=HARD;
1282 /* --------------------------------------------------------------------------
1284 * ------------------------------------------------------------------------*/
1286 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1287 for (;;) /* Strictly speaking, this code is */
1288 if (c0==EOF) /* a little more liberal than the */
1289 return; /* report allows ... */
1292 else if (isIn(c0,SPACE))
1294 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1296 Int origRow = row; /* Save original row number */
1299 while (nesting>0 && c0!=EOF)
1300 if (c0=='{' && c1=='-') {
1305 else if (c0=='-' && c1=='}') {
1315 ERRMSG(origRow) "Unterminated nested comment {- ..."
1319 else if (c0=='-' && c1=='-') { /* One line comment */
1322 while (c0!='\n' && c0!=EOF);
1330 static Bool firstToken; /* Set to TRUE for first token */
1331 static Int firstTokenIs; /* ... with token value stored here */
1333 static Int local yylex() { /* Read next input token ... */
1334 static Bool insertOpen = FALSE;
1335 static Bool insertedToken = FALSE;
1336 static Text textRepeat;
1338 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1340 if (firstToken) { /* Special case for first token */
1344 insertedToken = FALSE;
1345 if (reading==KEYBOARD)
1346 textRepeat = findText(repeatStr);
1347 return firstTokenIs;
1350 if (offsideON && insertOpen) { /* insert `soft' opening brace */
1352 insertedToken = TRUE;
1354 push(yylval = mkInt(row));
1358 /* ----------------------------------------------------------------------
1359 * Skip white space, and insert tokens to support layout rules as reqd.
1360 * --------------------------------------------------------------------*/
1363 startColumn = column;
1364 push(yylval = mkInt(row)); /* default token value is line no. */
1365 /* subsequent changes to yylval must also set top() to the same value */
1367 if (indentDepth>=0) { /* layout rule(s) active ? */
1368 if (insertedToken) /* avoid inserting multiple `;'s */
1369 insertedToken = FALSE; /* or putting `;' after `{' */
1371 if (offsideON && layout[indentDepth]!=HARD) {
1372 if (column<layout[indentDepth]) {
1376 else if (column==layout[indentDepth] && c0!=EOF) {
1377 insertedToken = TRUE;
1383 /* ----------------------------------------------------------------------
1384 * Now try to identify token type:
1385 * --------------------------------------------------------------------*/
1388 case EOF : return 0; /* End of file/input */
1390 /* The next 10 characters make up the `special' category in 1.3 */
1391 case '(' : skip(); return '(';
1392 case ')' : skip(); return ')';
1393 case ',' : skip(); return ',';
1394 case ';' : skip(); return ';';
1395 case '[' : skip(); return '[';
1396 case ']' : skip(); return ']';
1397 case '`' : skip(); return '`';
1398 case '{' : if (offsideON) goOffside(HARD);
1401 case '}' : if (offsideON && indentDepth<0) {
1402 ERRMSG(row) "Misplaced `}'"
1405 if (!(offsideON && layout[indentDepth]!=HARD))
1406 skip(); /* skip over hard }*/
1408 unOffside(); /* otherwise, we have to insert a }*/
1409 return '}'; /* to (try to) avoid an error... */
1411 /* Character and string literals */
1412 case '\'' : top() = yylval = readChar();
1415 case '\"' : top() = yylval = readString();
1420 if (c0=='?' && isIn(c1,SMALL) && !haskell98) {
1421 Text it; /* Look for implicit param name */
1424 top() = yylval = ap(IPVAR,it);
1425 return identType=IPVARID;
1429 if (c0=='#' && isIn(c1,SMALL) && !haskell98) {
1430 Text it; /* Look for record selector name */
1433 top() = yylval = ap(RECSEL,mkExt(it));
1434 return identType=RECSELID;
1437 if (isIn(c0,LARGE)) { /* Look for qualified name */
1438 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1439 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1441 skip(); /* Skip qualifying dot */
1442 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1443 it2 = readOperator();
1444 if (opType==CONOP) {
1445 top() = yylval = mkQConOp(it,it2);
1448 top() = yylval = mkQVarOp(it,it2);
1451 } else { /* Qualified identifier */
1453 if (identType==CONID) {
1454 top() = yylval = mkQCon(it,it2);
1457 top() = yylval = mkQVar(it,it2);
1462 top() = yylval = mkCon(it);
1466 if (isIn(c0,(SMALL|LARGE))) {
1467 Text it = readIdent();
1469 if (it==textCase) return CASEXP;
1470 if (it==textOfK) lookAhead(OF);
1471 if (it==textData) return DATA;
1472 if (it==textType) return TYPE;
1473 if (it==textIf) return IF;
1474 if (it==textThen) return THEN;
1475 if (it==textElse) return ELSE;
1476 if (it==textWhere) lookAhead(WHERE);
1477 if (it==textLet) lookAhead(LET);
1478 if (it==textIn) return IN;
1479 if (it==textInfix) return INFIXN;
1480 if (it==textInfixl) return INFIXL;
1481 if (it==textInfixr) return INFIXR;
1482 if (it==textForeign) return FOREIGN;
1483 if (it==textUnsafe) return UNSAFE;
1484 if (it==textNewtype) return TNEWTYPE;
1485 if (it==textDefault) return DEFAULT;
1486 if (it==textDeriving) return DERIVING;
1487 if (it==textDo) lookAhead(DO);
1488 if (it==textClass) return TCLASS;
1489 if (it==textInstance) return TINSTANCE;
1490 if (it==textModule) return TMODULE;
1491 if (it==textInterface) return INTERFACE;
1492 if (it==textInstImport) return INSTIMPORT;
1493 if (it==textImport) return IMPORT;
1494 if (it==textExport) return EXPORT;
1495 if (it==textDynamic) return DYNAMIC;
1496 if (it==textUUExport) return UUEXPORT;
1497 if (it==textHiding) return HIDING;
1498 if (it==textQualified) return QUALIFIED;
1499 if (it==textAsMod) return ASMOD;
1500 if (it==textWildcard) return '_';
1501 if (it==textAll && !haskell98) return ALL;
1503 if (it==textWith && !haskell98) lookAhead(WITH);
1504 if (it==textDlet && !haskell98) lookAhead(DLET);
1506 if (it==textUUAll) return ALL;
1507 if (it==textRepeat && reading==KEYBOARD)
1508 return repeatLast();
1510 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1514 if (isIn(c0,SYMBOL)) {
1515 Text it = readOperator();
1517 if (it==textCoco) return COCO;
1518 if (it==textEq) return '=';
1519 if (it==textUpto) return UPTO;
1520 if (it==textAs) return '@';
1521 if (it==textLambda) return '\\';
1522 if (it==textBar) return '|';
1523 if (it==textFrom) return FROM;
1524 if (it==textMinus) return '-';
1525 if (it==textPlus) return '+';
1526 if (it==textBang) return '!';
1527 if (it==textDot) return '.';
1528 if (it==textArrow) return ARROW;
1529 if (it==textLazy) return '~';
1530 if (it==textImplies) return IMPLIES;
1531 if (it==textRepeat && reading==KEYBOARD)
1532 return repeatLast();
1534 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1538 if (isIn(c0,DIGIT)) {
1539 top() = yylval = readNumber();
1543 ERRMSG(row) "Unrecognised character `\\%d' in column %d",
1546 return 0; /*NOTREACHED*/
1549 static Int local repeatLast() { /* Obtain last expression entered */
1550 if (isNull(yylval=getLastExpr())) {
1551 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1557 Syntax defaultSyntax(t) /* Find default syntax of var named*/
1558 Text t; { /* by t ... */
1559 String s = textToStr(t);
1560 return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
1563 Syntax syntaxOf(n) /* Find syntax for name */
1565 if (name(n).syntax==NO_SYNTAX) /* Return default if no syntax set */
1566 return defaultSyntax(name(n).text);
1567 return name(n).syntax;
1570 /* --------------------------------------------------------------------------
1571 * main entry points to parser/lexer:
1572 * ------------------------------------------------------------------------*/
1574 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1575 Int startWith; { /* determining whether to read a */
1576 firstToken = TRUE; /* script or an expression */
1577 firstTokenIs = startWith;
1578 if (startWith==INTERFACE)
1579 offsideON = FALSE; else
1583 if (yyparse()) { /* This can only be parser overflow */
1584 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1585 EEND; /* in the parser... */
1588 if (!stackEmpty()) /* stack should now be empty */
1589 internal("parseInput");
1593 static String memPrefix = "@mem@";
1594 static Int lenMemPrefix = 5; /* strlen(memPrefix)*/
1596 Void makeMemScript(mem,fname)
1599 strcat(fname,memPrefix);
1600 itoa((int)mem, fname+strlen(fname), 10);
1603 Bool isMemScript(fname)
1605 return (strstr(fname,memPrefix) != NULL);
1608 String memScriptString(fname)
1610 String p = strstr(fname,memPrefix);
1612 return (String)atoi(p+lenMemPrefix);
1618 Void parseScript(fname,len) /* Read a script, possibly from mem */
1622 if (isMemScript(fname)) {
1623 char* s = memScriptString(fname);
1626 fileInput(fname,len);
1631 Void parseScript(nm,len) /* Read a script */
1633 Long len; { /* Used to set a target for reading */
1640 Void parseExp() { /* Read an expression to evaluate */
1642 setLastExpr(inputExpr);
1645 Void parseContext() { /* Read a context to prove */
1646 parseInput(CONTEXT);
1649 Void parseInterface(nm,len) /* Read a GHC interface file */
1651 Long len; { /* Used to set a target for reading */
1654 parseInput(INTERFACE);
1658 /* --------------------------------------------------------------------------
1660 * ------------------------------------------------------------------------*/
1665 case INSTALL : initCharTab();
1666 textCase = findText("case");
1667 textOfK = findText("of");
1668 textData = findText("data");
1669 textType = findText("type");
1670 textIf = findText("if");
1671 textThen = findText("then");
1672 textElse = findText("else");
1673 textWhere = findText("where");
1674 textLet = findText("let");
1675 textIn = findText("in");
1676 textInfix = findText("infix");
1677 textInfixl = findText("infixl");
1678 textInfixr = findText("infixr");
1679 textForeign = findText("foreign");
1680 textUnsafe = findText("unsafe");
1681 textNewtype = findText("newtype");
1682 textDefault = findText("default");
1683 textDeriving = findText("deriving");
1684 textDo = findText("do");
1685 textClass = findText("class");
1687 textWith = findText("with");
1688 textDlet = findText("dlet");
1690 textInstance = findText("instance");
1691 textCoco = findText("::");
1692 textEq = findText("=");
1693 textUpto = findText("..");
1694 textAs = findText("@");
1695 textLambda = findText("\\");
1696 textBar = findText("|");
1697 textMinus = findText("-");
1698 textPlus = findText("+");
1699 textFrom = findText("<-");
1700 textArrow = findText("->");
1701 textLazy = findText("~");
1702 textBang = findText("!");
1703 textDot = findText(".");
1704 textImplies = findText("=>");
1705 textPrelude = findText("Prelude");
1706 textNum = findText("Num");
1707 textModule = findText("module");
1708 textInterface = findText("__interface");
1709 textInstImport = findText("__instimport");
1710 textExport = findText("export");
1711 textDynamic = findText("dynamic");
1712 textUUExport = findText("__export");
1713 textImport = findText("import");
1714 textHiding = findText("hiding");
1715 textQualified = findText("qualified");
1716 textAsMod = findText("as");
1717 textWildcard = findText("_");
1718 textAll = findText("forall");
1719 textUUAll = findText("__forall");
1720 varMinus = mkVar(textMinus);
1721 varPlus = mkVar(textPlus);
1722 varBang = mkVar(textBang);
1723 varDot = mkVar(textDot);
1724 varHiding = mkVar(textHiding);
1725 varQualified = mkVar(textQualified);
1726 varAsMod = mkVar(textAsMod);
1727 conMain = mkCon(findText("Main"));
1728 varMain = mkVar(findText("main"));
1734 case RESET : tyconDefns = NIL;
1743 foreignImports= NIL;
1744 foreignExports= NIL;
1752 case BREAK : if (reading==KEYBOARD)
1756 case MARK : mark(tyconDefns);
1764 mark(unqualImports);
1765 mark(foreignImports);
1766 mark(foreignExports);
1784 /*-------------------------------------------------------------------------*/