1 /* -*- mode: hugs-c; -*- */
2 /* --------------------------------------------------------------------------
3 * Input functions, lexical analysis parsing etc...
5 * Copyright (c) The University of Nottingham and Yale University, 1994-1997.
6 * All rights reserved. See NOTICE for details and conditions of use etc...
7 * Hugs version 1.4, December 1997
9 * $RCSfile: input.c,v $
11 * $Date: 1998/12/02 13:22:12 $
12 * ------------------------------------------------------------------------*/
20 #include "interface.h"
24 #include "hugs.h" /* for target */
30 #include "machdep.h" /* for findPathname */
36 /* --------------------------------------------------------------------------
38 * ------------------------------------------------------------------------*/
40 List tyconDefns = NIL; /* type constructor definitions */
41 List typeInDefns = NIL; /* type synonym restrictions */
42 List valDefns = NIL; /* value definitions in script */
43 List opDefns = NIL; /* operator defns in script */
44 List classDefns = NIL; /* class defns in script */
45 List instDefns = NIL; /* instance defns in script */
46 List selDefns = NIL; /* list of selector lists */
47 List genDefns = NIL; /* list of generated names */
48 List unqualImports = NIL; /* unqualified import list */
49 List foreignImports = NIL; /* foreign imports */
50 List foreignExports = NIL; /* foreign exportsd */
51 List defaultDefns = NIL; /* default definitions (if any) */
52 Int defaultLine = 0; /* line in which default defs occur*/
53 List evalDefaults = NIL; /* defaults for evaluator */
55 Cell inputExpr = NIL; /* input expression */
56 Bool literateScripts = FALSE; /* TRUE => default to lit scripts */
57 Bool literateErrors = TRUE; /* TRUE => report errs in lit scrs */
59 String repeatStr = 0; /* Repeat last expr */
61 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
62 String preprocessor = 0;
65 /* --------------------------------------------------------------------------
66 * Local function prototypes:
67 * ------------------------------------------------------------------------*/
69 static Void local fileInput Args((String,Long));
70 static Bool local literateMode Args((String));
71 static Bool local linecmp Args((String,String));
72 static Int local nextLine Args((Void));
73 static Void local skip Args((Void));
74 static Void local thisLineIs Args((Int));
75 static Void local newlineSkip Args((Void));
76 static Void local closeAnyInput Args((Void));
78 Int yyparse Args((Void)); /* can't stop yacc making this */
79 /* public, but don't advertise */
80 /* it in a header file. */
82 static Void local endToken Args((Void));
83 static Text local readOperator Args((Void));
84 static Text local readIdent Args((Void));
85 static Cell local readRadixNumber Args((Int));
86 static Cell local readNumber Args((Void));
87 static Cell local readChar Args((Void));
88 static Cell local readString Args((Void));
89 static Void local saveStrChr Args((Char));
90 static Cell local readAChar Args((Bool));
92 static Bool local lazyReadMatches Args((String));
93 static Cell local readEscapeChar Args((Bool));
94 static Void local skipGap Args((Void));
95 static Cell local readCtrlChar Args((Void));
96 static Cell local readOctChar Args((Void));
97 static Cell local readHexChar Args((Void));
98 static Int local readHexDigit Args((Char));
99 static Cell local readDecChar Args((Void));
101 static Void local goOffside Args((Int));
102 static Void local unOffside Args((Void));
103 static Bool local canUnOffside Args((Void));
105 static Void local skipWhitespace Args((Void));
106 static Int local yylex Args((Void));
107 static Int local repeatLast Args((Void));
109 static Void local parseInput Args((Int));
111 /* --------------------------------------------------------------------------
112 * Text values for reserved words and special symbols:
113 * ------------------------------------------------------------------------*/
115 static Text textCase, textOfK, textData, textType, textIf;
116 static Text textThen, textElse, textWhere, textLet, textIn;
117 static Text textInfix, textInfixl, textInfixr, textForeign, textNewtype;
118 static Text textDefault, textDeriving, textDo, textClass, textInstance;
120 static Text textCoco, textEq, textUpto, textAs, textLambda;
121 static Text textBar, textMinus, textFrom, textArrow, textLazy;
122 static Text textBang, textDot, textAll, textImplies;
123 static Text textWildcard;
125 static Text textModule, textImport, textPrelude, textPreludeHugs;
126 static Text textHiding, textQualified, textAsMod;
127 static Text textExport, textInterface, textRequires, textUnsafe;
130 Text textPlus; /* (+) */
132 Cell conPrelude; /* Prelude */
134 static Cell conMain; /* Main */
135 static Cell varMain; /* main */
137 static Cell conUnit; /* () */
138 static Cell conList; /* [] */
139 static Cell conNil; /* [] */
140 static Cell conPreludeUnit; /* Prelude.() */
141 static Cell conPreludeList; /* Prelude.[] */
142 static Cell conPreludeNil; /* Prelude.[] */
144 static Cell varMinus; /* (-) */
145 static Cell varBang; /* (!) */
146 static Cell varDot; /* (.) */
147 static Cell varHiding; /* hiding */
148 static Cell varQualified; /* qualified */
149 static Cell varAsMod; /* as */
151 static Cell varNegate;
153 static Cell varEnumFrom;
154 static Cell varEnumFromThen;
155 static Cell varEnumFromTo;
156 static Cell varEnumFromThenTo;
158 static List imps; /* List of imports to be chased */
160 /* --------------------------------------------------------------------------
161 * Single character input routines:
163 * At the lowest level of input, characters are read one at a time, with the
164 * current character held in c0 and the following (lookahead) character in
165 * c1. The corrdinates of c0 within the file are held in (column,row).
166 * The input stream is advanced by one character using the skip() function.
167 * ------------------------------------------------------------------------*/
169 #define TABSIZE 8 /* spacing between tabstops */
171 #define NOTHING 0 /* what kind of input is being read?*/
172 #define KEYBOARD 1 /* - keyboard/console? */
173 #define SCRIPTFILE 2 /* - script file */
174 #define PROJFILE 3 /* - project file */
175 #define STRING 4 /* - string buffer? */
177 static Int reading = NOTHING;
179 static Target readSoFar;
180 static Int row, column, startColumn;
182 static FILE *inputStream = 0;
183 static Bool thisLiterate;
184 static String nextStringChar; /* next char in string buffer */
186 #if USE_READLINE /* for command line editors */
187 static String currentLine; /* editline or GNU readline */
188 static String nextChar;
189 #define nextConsoleChar() (unsigned char)(*nextChar=='\0' ? '\n' : *nextChar++)
190 extern Void add_history Args((String));
191 extern String readline Args((String));
193 #define nextConsoleChar() getc(stdin)
196 static Int litLines; /* count defn lines in lit script */
197 #define DEFNCHAR '>' /* definition lines begin with this */
198 static Int lastLine; /* records type of last line read: */
199 #define STARTLINE 0 /* - at start of file, none read */
200 #define BLANKLINE 1 /* - blank (may preceed definition) */
201 #define TEXTLINE 2 /* - text comment */
202 #define DEFNLINE 3 /* - line containing definition */
203 #define CODELINE 4 /* - line inside code block */
205 #define BEGINCODE "\\begin{code}"
206 #define ENDCODE "\\end{code}"
209 static char *lineBuffer = NULL; /* getline() does the initial allocation */
211 #define LINEBUFFER_SIZE 1000
212 static char lineBuffer[LINEBUFFER_SIZE];
214 static int lineLength = 0;
215 static int inCodeBlock = FALSE; /* Inside \begin{code}..\end{code} */
216 static int linePtr = 0;
218 Void consoleInput(prompt) /* prepare to input characters from */
219 String prompt; { /* standard in (i.e. console/kbd) */
220 reading = KEYBOARD; /* keyboard input is Line oriented, */
221 c0 = /* i.e. input terminated by '\n' */
227 /* Paranoid freeing code supplied by Sverker Nilsson (sverker@opq.se)
228 * avoids accidentally freeing currentLine twice.
231 String oldCurrentLine = currentLine;
232 currentLine = 0; /* We may lose the space of currentLine */
233 free(oldCurrentLine); /* if interrupted here - unlikely */
235 currentLine = readline(prompt);
236 nextChar = currentLine;
239 add_history(currentLine);
249 Void projInput(nm) /* prepare to input characters from */
250 String nm; { /* from named project file */
251 if ((inputStream = fopen(nm,"r"))!=0) {
259 ERRMSG(0) "Unable to open project file \"%s\"", nm
264 static Void local fileInput(nm,len) /* prepare to input characters from*/
265 String nm; /* named file (specified length is */
266 Long len; { /* used to set target for reading) */
267 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
270 strncpy(cmd,preprocessor,100);
271 strncat(cmd," ",100);
273 cmd[99] = '\0'; /* paranoia */
274 inputStream = popen(cmd,"r");
276 inputStream = fopen(nm,"r");
279 inputStream = fopen(nm,"r");
282 reading = SCRIPTFILE;
288 lastLine = STARTLINE; /* literate file processing */
292 thisLiterate = literateMode(nm);
296 setGoal("Parsing", (Target)len);
299 ERRMSG(0) "Unable to open file \"%s\"", nm
304 Void stringInput(s) /* prepare to input characters from string */
317 static Bool local literateMode(nm) /* select literate mode for file */
319 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
321 if (filenamecmp(dot+1,"hs")==0) /* .hs files are never literate */
323 if (filenamecmp(dot+1,"lhs") ==0 || /* .lhs, .verb files are always*/
324 filenamecmp(dot+1,"verb")==0) /* literate scripts */
327 return literateScripts; /* otherwise, use the default */
330 Bool isInterfaceFile(nm) /* is nm an interface file? */
332 char *dot = strrchr(nm,'.'); /* look for last dot in file name */
333 return (dot && filenamecmp(dot+1,"myhi")==0);
337 /* This code originally came from Sigbjorn Finne (sof@dcs.gla.ac.uk).
338 * I've removed the loop (since newLineSkip contains a loop too) and
339 * replaced the warnings with errors. ADR
342 * To deal with literate \begin{code}...\end{code} blocks,
343 * add a line buffer that rooms the current line. The old c0 and c1
344 * stream pointers are used as before within that buffer -- sof
346 * Upon reading a new line into the line buffer, we check to see if
347 * we're reading in a line containing \begin{code} or \end{code} and
348 * take appropriate action.
351 static Bool local linecmp(s,line) /* compare string with line */
352 String s; /* line may end in whitespace */
355 while (s[i] != '\0' && s[i] == line[i]) {
358 /* s[0..i-1] == line[0..i-1] */
359 if (s[i] != '\0') { /* check s `isPrefixOf` line */
362 while (isIn(line[i], SPACE)) { /* allow whitespace at end of line */
365 return (line[i] == '\0');
368 /* Returns line length (including \n) or 0 upon EOF. */
369 static Int local nextLine()
373 Forget about fgets(), it is utterly braindead.
374 (Assumes \NUL free streams and does not gracefully deal
375 with overflow.) Instead, use GNU libc's getline().
377 lineLength = getline(&lineBuffer, &lineLength, inputStream);
379 if (NULL != fgets(lineBuffer, LINEBUFFER_SIZE, inputStream))
380 lineLength = strlen(lineBuffer);
384 /* printf("Read: \"%s\"", lineBuffer); */
385 if (lineLength <= 0) { /* EOF / IO error, who knows.. */
388 else if (lineLength >= 2 && lineBuffer[0] == '#' && lineBuffer[1] == '!') {
389 lineBuffer[0]='\n'; /* pretend it's a blank line */
392 } else if (thisLiterate) {
393 if (linecmp(BEGINCODE, lineBuffer)) {
394 if (!inCodeBlock) { /* Entered a code block */
396 lineBuffer[0]='\n'; /* pretend it's a blank line */
401 ERRMSG(row) "\\begin{code} encountered inside code block"
405 else if (linecmp(ENDCODE, lineBuffer)) {
406 if (inCodeBlock) { /* Finished code block */
408 lineBuffer[0]='\n'; /* pretend it's a blank line */
413 ERRMSG(row) "\\end{code} encountered outside code block"
418 /* printf("Read: \"%s\"", lineBuffer); */
422 static Void local skip() { /* move forward one char in input */
423 if (c0!=EOF) { /* stream, updating c0, c1, ... */
424 if (c0=='\n') { /* Adjusting cursor coords as nec. */
427 if (reading==SCRIPTFILE)
431 column += TABSIZE - ((column-1)%TABSIZE);
440 if (reading==SCRIPTFILE)
444 else if (reading==KEYBOARD) {
449 c1 = nextConsoleChar();
450 /* On Win32, hitting ctrl-C causes the next getchar to
451 * fail - returning "-1" to indicate an error.
452 * This is one of the rare cases where "-1" does not mean EOF.
454 if (EOF == c1 && !feof(stdin)) {
459 else if (reading==STRING) {
460 c1 = (unsigned char) *nextStringChar++;
465 if (lineLength <=0 || linePtr == lineLength) {
466 /* Current line, exhausted - get new one */
467 if (nextLine() <= 0) { /* EOF */
472 c1 = (unsigned char)lineBuffer[linePtr++];
476 c1 = (unsigned char)lineBuffer[linePtr++];
483 static Void local thisLineIs(kind) /* register kind of current line */
484 Int kind; { /* & check for literate script errs */
485 if (literateErrors) {
486 if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
487 (kind==TEXTLINE && lastLine==DEFNLINE)) {
488 ERRMSG(row) "Program line next to comment"
495 static Void local newlineSkip() { /* skip `\n' (supports lit scripts) */
496 /* assert(c0=='\n'); */
497 if (reading==SCRIPTFILE && thisLiterate) {
500 if (inCodeBlock) { /* pass chars on definition lines */
501 thisLineIs(CODELINE); /* to lexer (w/o leading DEFNCHAR) */
505 if (c0==DEFNCHAR) { /* pass chars on definition lines */
506 thisLineIs(DEFNLINE); /* to lexer (w/o leading DEFNCHAR) */
511 while (c0==' ' || c0=='\t')/* maybe line is blank? */
513 if (c0=='\n' || c0==EOF)
514 thisLineIs(BLANKLINE);
516 thisLineIs(TEXTLINE); /* otherwise it must be a comment */
517 while (c0!='\n' && c0!=EOF)
519 } /* by now, c0=='\n' or c0==EOF */
520 } while (c0!=EOF); /* if new line, start again */
522 if (litLines==0 && literateErrors) {
523 ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
532 static Void local closeAnyInput() { /* Close input stream, if open, */
533 switch (reading) { /* or skip to end of console line */
535 case SCRIPTFILE : if (inputStream) {
536 #if USE_PREPROCESSOR && (defined(HAVE_POPEN) || defined(HAVE__POPEN))
548 case KEYBOARD : while (c0!=EOF)
555 /* --------------------------------------------------------------------------
556 * Parser: Uses table driven parser generated from parser.y using yacc
557 * ------------------------------------------------------------------------*/
561 /* --------------------------------------------------------------------------
562 * Single token input routines:
564 * The following routines read the values of particular kinds of token given
565 * that the first character of the token has already been located in c0 on
566 * entry to the routine.
567 * ------------------------------------------------------------------------*/
569 #define MAX_TOKEN 500
570 #define startToken() tokPos = 0
571 #define saveTokenChar(c) if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
572 #define saveChar(c) tokenStr[tokPos++]=(char)(c)
573 #define overflows(n,b,d,m) (n > ((m)-(d))/(b))
575 static char tokenStr[MAX_TOKEN+1]; /* token buffer */
576 static Int tokPos; /* input position in buffer */
577 static Int identType; /* identifier type: CONID / VARID */
578 static Int opType; /* operator type : CONOP / VAROP */
580 static Void local endToken() { /* check for token overflow */
581 if (tokPos>MAX_TOKEN) {
582 ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
585 tokenStr[tokPos] = '\0';
588 static Text local readOperator() { /* read operator symbol */
593 } while (isISO(c0) && isIn(c0,SYMBOL));
594 opType = (tokenStr[0]==':' ? CONOP : VAROP);
596 return findText(tokenStr);
599 static Text local readIdent() { /* read identifier */
604 } while (isISO(c0) && isIn(c0,IDAFTER));
606 identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
607 return findText(tokenStr);
610 static Cell local readRadixNumber(r) /* Read literal in specified radix */
611 Int r; { /* from input of the form 0c{digs} */
615 skip(); /* skip leading zero */
616 if ((d=readHexDigit(c1))<0 || d>=r) {
617 /* Special case; no digits, lex as */
618 /* if it had been written "0 c..." */
627 d = readHexDigit(c0);
628 } while (d>=0 && d<r);
631 /* ToDo: return an INTCELL if small enough */
632 return stringToBignum(tokenStr);
635 static Cell local readNumber() { /* read numeric constant */
636 Bool intTooLarge = FALSE;
639 if (c1=='x' || c1=='X') /* Maybe a hexadecimal literal? */
640 return readRadixNumber(16);
641 if (c1=='o' || c1=='O') /* Maybe an octal literal? */
642 return readRadixNumber(8);
649 } while (isISO(c0) && isIn(c0,DIGIT));
651 if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
653 /* ToDo: return an INTCELL if small enough */
654 return stringToBignum(tokenStr);
657 saveTokenChar(c0); /* save decimal point */
659 do { /* process fractional part ... */
662 } while (isISO(c0) && isIn(c0,DIGIT));
664 if (c0=='e' || c0=='E') { /* look for exponent part... */
674 if (!isISO(c0) || !isIn(c0,DIGIT)) {
675 ERRMSG(row) "Missing digits in exponent"
682 } while (isISO(c0) && isIn(c0,DIGIT));
687 return stringToFloat(tokenStr);
690 static Cell local readChar() { /* read character constant */
694 if (c0=='\'' || c0=='\n' || c0==EOF) {
695 ERRMSG(row) "Illegal character constant"
699 charRead = readAChar(FALSE);
704 ERRMSG(row) "Improperly terminated character constant"
710 static Cell local readString() { /* read string literal */
715 while (c0!='\"' && c0!='\n' && c0!=EOF) {
718 saveStrChr(charOf(c));
724 ERRMSG(row) "Improperly terminated string"
728 return mkStr(findText(tokenStr));
731 static Void local saveStrChr(c) /* save character in string */
733 if (c!='\0' && c!='\\') { /* save non null char as single char*/
736 else { /* save null char as TWO null chars */
737 if (tokPos+1<MAX_TOKEN) {
747 static Cell local readAChar(isStrLit) /* read single char constant */
748 Bool isStrLit; { /* TRUE => enable \& and gaps */
751 if (c0=='\\') /* escape character? */
752 return readEscapeChar(isStrLit);
754 ERRMSG(row) "Non ISO character `\\%d' in constant", ((int)c0)
757 skip(); /* normal character? */
761 /* --------------------------------------------------------------------------
762 * Character escape code sequences:
763 * ------------------------------------------------------------------------*/
765 static struct { /* table of special escape codes */
769 {"a", 7}, {"b", 8}, {"f", 12}, {"n", 10}, /* common escapes */
770 {"r", 13}, {"t", 9}, {"\\",'\\'}, {"\"",'\"'},
771 {"\'",'\''}, {"v", 11},
772 {"NUL", 0}, {"SOH", 1}, {"STX", 2}, {"ETX", 3}, /* ascii codenames */
773 {"EOT", 4}, {"ENQ", 5}, {"ACK", 6}, {"BEL", 7},
774 {"BS", 8}, {"HT", 9}, {"LF", 10}, {"VT", 11},
775 {"FF", 12}, {"CR", 13}, {"SO", 14}, {"SI", 15},
776 {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
777 {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
778 {"CAN", 24}, {"EM", 25}, {"SUB", 26}, {"ESC", 27},
779 {"FS", 28}, {"GS", 29}, {"RS", 30}, {"US", 31},
780 {"SP", 32}, {"DEL", 127},
784 static Int alreadyMatched; /* Record portion of input stream */
785 static char alreadyRead[10]; /* that has been read w/o a match */
787 static Bool local lazyReadMatches(s) /* compare input stream with string */
788 String s; { /* possibly using characters that */
789 int i; /* have already been read */
791 for (i=0; i<alreadyMatched; ++i)
792 if (alreadyRead[i]!=s[i])
795 while (s[i] && s[i]==c0) {
796 alreadyRead[alreadyMatched++]=(char)c0;
804 static Cell local readEscapeChar(isStrLit)/* read escape character */
810 case '&' : if (isStrLit) {
814 ERRMSG(row) "Illegal use of `\\&' in character constant"
818 case '^' : return readCtrlChar();
820 case 'o' : return readOctChar();
821 case 'x' : return readHexChar();
823 default : if (!isISO(c0)) {
824 ERRMSG(row) "Illegal escape sequence"
827 else if (isIn(c0,SPACE)) {
832 ERRMSG(row) "Illegal use of gap in character constant"
836 else if (isIn(c0,DIGIT))
837 return readDecChar();
840 for (alreadyMatched=0; escapes[i].codename; i++)
841 if (lazyReadMatches(escapes[i].codename))
842 return mkChar(escapes[i].codenumber);
844 alreadyRead[alreadyMatched++] = (char)c0;
845 alreadyRead[alreadyMatched++] = '\0';
846 ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
849 return NIL;/*NOTREACHED*/
852 static Void local skipGap() { /* skip over gap in string literal */
853 do /* (simplified in Haskell 1.1) */
858 while (isISO(c0) && isIn(c0,SPACE));
860 ERRMSG(row) "Missing `\\' terminating string literal gap"
866 static Cell local readCtrlChar() { /* read escape sequence \^x */
867 static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
871 if ((which = strchr(controls,c0))==NULL) {
872 ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
876 return mkChar(which-controls);
879 static Cell local readOctChar() { /* read octal character constant */
884 if ((d = readHexDigit(c0))<0 || d>=8) {
885 ERRMSG(row) "Empty octal character escape"
889 if (overflows(n,8,d,MAXCHARVAL)) {
890 ERRMSG(row) "Octal character escape out of range"
895 } while ((d = readHexDigit(c0))>=0 && d<8);
900 static Cell local readHexChar() { /* read hex character constant */
905 if ((d = readHexDigit(c0))<0) {
906 ERRMSG(row) "Empty hexadecimal character escape"
910 if (overflows(n,16,d,MAXCHARVAL)) {
911 ERRMSG(row) "Hexadecimal character escape out of range"
916 } while ((d = readHexDigit(c0))>=0);
921 static Int local readHexDigit(c) /* read single hex digit */
923 if ('0'<=c && c<='9')
925 if ('A'<=c && c<='F')
927 if ('a'<=c && c<='f')
932 static Cell local readDecChar() { /* read decimal character constant */
936 if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
937 ERRMSG(row) "Decimal character escape out of range"
942 } while (c0!=EOF && isIn(c0,DIGIT));
947 /* --------------------------------------------------------------------------
948 * Produce printable representation of character:
949 * ------------------------------------------------------------------------*/
951 String unlexChar(c,quote) /* return string representation of */
952 Char c; /* character... */
953 Char quote; { /* protect quote character */
954 static char buffer[12];
956 if (c<0) /* deal with sign extended chars.. */
959 if (isISO(c) && isIn(c,PRINT)) { /* normal printable character */
960 if (c==quote || c=='\\') { /* look for quote of approp. kind */
970 else { /* look for escape code */
972 for (escs=0; escapes[escs].codename; escs++)
973 if (escapes[escs].codenumber==c) {
974 sprintf(buffer,"\\%s",escapes[escs].codename);
977 sprintf(buffer,"\\%d",c); /* otherwise use numeric escape */
982 Void printString(s) /* print string s, using quotes and */
983 String s; { /* escapes if any parts need them */
987 while ((c = *t)!=0 && isISO(c) && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE)) {
993 Printf("%s",unlexChar(*t,'"'));
1001 /* -------------------------------------------------------------------------
1002 * Handle special types of input for us in interpreter:
1003 * -----------------------------------------------------------------------*/
1005 Command readCommand(cmds,start,sys) /* read command at start of input */
1006 struct cmd *cmds; /* line in interpreter */
1007 Char start; /* characters introducing a cmd */
1008 Char sys; { /* character for shell escape */
1009 while (c0==' ' || c0 =='\t')
1012 if (c0=='\n') /* look for blank command lines */
1014 if (c0==EOF) /* look for end of input stream */
1016 if (c0==sys) { /* single character system escape */
1020 if (c0==start && c1==sys) { /* two character system escape */
1026 startToken(); /* All cmds start with start */
1027 if (c0==start) /* except default (usually EVAL) */
1028 do { /* which is empty */
1031 } while (c0!=EOF && !isIn(c0,SPACE));
1034 for (; cmds->cmdString; ++cmds)
1035 if (strcmp((cmds->cmdString),tokenStr)==0 ||
1036 (tokenStr[0]==start &&
1037 tokenStr[1]==(cmds->cmdString)[1] &&
1039 return (cmds->cmdCode);
1043 String readFilename() { /* Read filename from input (if any)*/
1044 if (reading==PROJFILE)
1047 while (c0==' ' || c0=='\t')
1050 if (c0=='\n' || c0==EOF) /* return null string at end of line*/
1054 while (c0!=EOF && !isIn(c0,SPACE)) {
1057 while (c0!=EOF && c0!='\"') {
1058 Cell c = readAChar(TRUE);
1060 saveTokenChar(charOf(c));
1065 ERRMSG(row) "a closing quote, '\"', was expected"
1078 String readLine() { /* Read command line from input */
1079 while (c0==' ' || c0=='\t') /* skip leading whitespace */
1083 while (c0!='\n' && c0!=EOF) {
1092 /* --------------------------------------------------------------------------
1093 * This lexer supports the Haskell layout rule:
1095 * - Layout area bounded by { ... }, with `;'s in between.
1096 * - A `{' is a HARD indentation and can only be matched by a corresponding
1098 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
1099 * is inserted with the column number of the first token after the
1100 * WHERE/LET/OF keyword.
1101 * - When a soft indentation is uppermost on the indetation stack with
1102 * column col' we insert:
1103 * `}' in front of token with column<col' and pop indentation off stack,
1104 * `;' in front of token with column==col'.
1105 * ------------------------------------------------------------------------*/
1107 #define MAXINDENT 100 /* maximum nesting of layout rule */
1108 static Int layout[MAXINDENT+1];/* indentation stack */
1109 #define HARD (-1) /* indicates hard indentation */
1110 static Int indentDepth = (-1); /* current indentation nesting */
1112 static Void local goOffside(col) /* insert offside marker */
1113 Int col; { /* for specified column */
1114 if (indentDepth>=MAXINDENT) {
1115 ERRMSG(row) "Too many levels of program nesting"
1118 layout[++indentDepth] = col;
1121 static Void local unOffside() { /* leave layout rule area */
1125 static Bool local canUnOffside() { /* Decide if unoffside permitted */
1126 return indentDepth>=0 && layout[indentDepth]!=HARD;
1129 /* --------------------------------------------------------------------------
1131 * ------------------------------------------------------------------------*/
1133 static Void local skipWhitespace() { /* Skip over whitespace/comments */
1134 for (;;) /* Strictly speaking, this code is */
1135 if (c0==EOF) /* a little more liberal than the */
1136 return; /* report allows ... */
1139 else if (isIn(c0,SPACE))
1141 else if (c0=='{' && c1=='-') { /* (potentially) nested comment */
1143 Int origRow = row; /* Save original row number */
1146 while (nesting>0 && c0!=EOF)
1147 if (c0=='{' && c1=='-') {
1152 else if (c0=='-' && c1=='}') {
1162 ERRMSG(origRow) "Unterminated nested comment {- ..."
1166 else if (c0=='-' && c1=='-') { /* One line comment */
1169 while (c0!='\n' && c0!=EOF);
1177 static Bool firstToken; /* Set to TRUE for first token */
1178 static Int firstTokenIs; /* ... with token value stored here */
1180 static Int local yylex() { /* Read next input token ... */
1181 static Bool insertOpen = FALSE;
1182 static Bool insertedToken = FALSE;
1183 static Text textRepeat;
1185 #define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}
1187 if (firstToken) { /* Special case for first token */
1191 insertedToken = FALSE;
1192 if (reading==KEYBOARD)
1193 textRepeat = findText(repeatStr);
1194 return firstTokenIs;
1197 if (insertOpen) { /* insert `soft' opening brace */
1199 insertedToken = TRUE;
1201 push(yylval = mkInt(row));
1205 /* ----------------------------------------------------------------------
1206 * Skip white space, and insert tokens to support layout rules as reqd.
1207 * --------------------------------------------------------------------*/
1210 startColumn = column;
1211 push(yylval = mkInt(row)); /* default token value is line no. */
1212 /* subsequent changes to yylval must also set top() to the same value */
1214 if (indentDepth>=0) /* layout rule(s) active ? */
1215 if (insertedToken) /* avoid inserting multiple `;'s */
1216 insertedToken = FALSE; /* or putting `;' after `{' */
1217 else if (layout[indentDepth]!=HARD)
1218 if (column<layout[indentDepth]) {
1222 else if (column==layout[indentDepth] && c0!=EOF) {
1223 insertedToken = TRUE;
1227 /* ----------------------------------------------------------------------
1228 * Now try to identify token type:
1229 * --------------------------------------------------------------------*/
1232 case EOF : return 0; /* End of file/input */
1234 /* The next 10 characters make up the `special' category in 1.3 */
1235 case '(' : skip(); return '(';
1236 case ')' : skip(); return ')';
1237 case ',' : skip(); return ',';
1238 case ';' : skip(); return ';';
1239 case '[' : skip(); return '[';
1240 case ']' : skip(); return ']';
1241 case '`' : skip(); return '`';
1242 case '{' : goOffside(HARD);
1245 case '}' : if (indentDepth<0) {
1246 ERRMSG(row) "Misplaced `}'"
1249 if (layout[indentDepth]==HARD) /* skip over hard }*/
1251 unOffside(); /* otherwise, we have to insert a }*/
1252 return '}'; /* to (try to) avoid an error... */
1254 /* Character and string literals */
1255 case '\'' : top() = yylval = readChar();
1258 case '\"' : top() = yylval = readString();
1263 if (c0=='#' && isIn(c1,SMALL)) { /* Look for record selector name */
1267 top() = yylval = ap(RECSEL,mkExt(it));
1268 return identType=RECSELID;
1271 if (isIn(c0,LARGE)) { /* Look for qualified name */
1272 Text it = readIdent(); /* No keyword begins with LARGE ...*/
1273 if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL))) {
1275 skip(); /* Skip qualifying dot */
1276 if (isIn(c0,SYMBOL)) { /* Qualified operator */
1277 it2 = readOperator();
1278 if (opType==CONOP) {
1279 top() = yylval = mkQConOp(it,it2);
1282 top() = yylval = mkQVarOp(it,it2);
1285 } else { /* Qualified identifier */
1287 if (identType==CONID) {
1288 top() = yylval = mkQCon(it,it2);
1291 top() = yylval = mkQVar(it,it2);
1296 top() = yylval = mkCon(it);
1298 } /* We could easily keep a record of*/
1299 } /* the qualifying name here ... */
1300 if (isIn(c0,(SMALL|LARGE)) || c0 == '_') {
1301 Text it = readIdent();
1303 if (it==textCase) return CASEXP;
1304 if (it==textOfK) lookAhead(OF);
1305 if (it==textData) return DATA;
1306 if (it==textType) return TYPE;
1307 if (it==textIf) return IF;
1308 if (it==textThen) return THEN;
1309 if (it==textElse) return ELSE;
1310 if (it==textWhere) lookAhead(WHERE);
1311 if (it==textLet) lookAhead(LET);
1312 if (it==textIn) return IN;
1313 if (it==textInfix) return INFIX;
1314 if (it==textInfixl) return INFIXL;
1315 if (it==textInfixr) return INFIXR;
1316 if (it==textForeign) return FOREIGN;
1317 if (it==textUnsafe) return UNSAFE;
1318 if (it==textNewtype) return TNEWTYPE;
1319 if (it==textDefault) return DEFAULT;
1320 if (it==textDeriving) return DERIVING;
1321 if (it==textDo) lookAhead(DO);
1322 if (it==textClass) return TCLASS;
1323 if (it==textInstance) return TINSTANCE;
1324 if (it==textModule) return MODULETOK;
1325 if (it==textInterface) return INTERFACE;
1326 if (it==textRequires) return REQUIRES;
1327 if (it==textImport) return IMPORT;
1328 if (it==textExport) return EXPORT;
1329 if (it==textHiding) return HIDING;
1330 if (it==textQualified) return QUALIFIED;
1331 if (it==textAsMod) return ASMOD;
1332 if (it==textWildcard) return '_';
1333 if (it==textAll) return ALL;
1334 if (it==textRepeat && reading==KEYBOARD)
1335 return repeatLast();
1337 top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
1341 if (isIn(c0,SYMBOL)) {
1342 Text it = readOperator();
1344 if (it==textCoco) return COCO;
1345 if (it==textEq) return '=';
1346 if (it==textUpto) return UPTO;
1347 if (it==textAs) return '@';
1348 if (it==textLambda) return '\\';
1349 if (it==textBar) return '|';
1350 if (it==textFrom) return FROM;
1351 if (it==textMinus) return '-';
1352 if (it==textBang) return '!';
1353 if (it==textDot) return '.';
1354 if (it==textArrow) return ARROW;
1355 if (it==textLazy) return '~';
1356 if (it==textImplies) return IMPLIES;
1357 if (it==textRepeat && reading==KEYBOARD)
1358 return repeatLast();
1360 top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
1364 if (isIn(c0,DIGIT)) {
1365 top() = yylval = readNumber();
1369 ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
1371 return 0; /*NOTREACHED*/
1374 static Int local repeatLast() { /* Obtain last expression entered */
1375 if (isNull(yylval=getLastExpr())) {
1376 ERRMSG(row) "Cannot use %s without any previous input", repeatStr
1382 /* --------------------------------------------------------------------------
1383 * main entry points to parser/lexer:
1384 * ------------------------------------------------------------------------*/
1386 static Void local parseInput(startWith)/* Parse input with given first tok,*/
1387 Int startWith; { /* determining whether to read a */
1388 firstToken = TRUE; /* script or an expression */
1389 firstTokenIs = startWith;
1392 if (yyparse()) { /* This can only be parser overflow */
1393 ERRMSG(row) "Parser overflow" /* as all syntax errors are caught */
1394 EEND; /* in the parser... */
1397 assert(stackEmpty()); /* stack should now be empty */
1400 Void parseScript(nm,len) /* Read a script */
1402 Long len; { /* Used to set a target for reading */
1408 Void parseInterface(nm,len) /* Read a GHC interface file */
1410 Long len; { /* Used to set a target for reading */
1413 parseInput(INTERFACE);
1416 Void parseExp() { /* Read an expression to evaluate */
1418 setLastExpr(inputExpr);
1421 /* --------------------------------------------------------------------------
1423 * ------------------------------------------------------------------------*/
1428 case INSTALL : initCharTab();
1429 textCase = findText("case");
1430 textOfK = findText("of");
1431 textData = findText("data");
1432 textType = findText("type");
1433 textIf = findText("if");
1434 textThen = findText("then");
1435 textElse = findText("else");
1436 textWhere = findText("where");
1437 textLet = findText("let");
1438 textIn = findText("in");
1439 textInfix = findText("infix");
1440 textInfixl = findText("infixl");
1441 textInfixr = findText("infixr");
1442 textForeign = findText("foreign");
1443 textUnsafe = findText("unsafe");
1444 textNewtype = findText("newtype");
1445 textDefault = findText("default");
1446 textDeriving = findText("deriving");
1447 textDo = findText("do");
1448 textClass = findText("class");
1449 textInstance = findText("instance");
1450 textCoco = findText("::");
1451 textEq = findText("=");
1452 textUpto = findText("..");
1453 textAs = findText("@");
1454 textLambda = findText("\\");
1455 textBar = findText("|");
1456 textMinus = findText("-");
1457 textFrom = findText("<-");
1458 textArrow = findText("->");
1459 textLazy = findText("~");
1460 textBang = findText("!");
1461 textDot = findText(".");
1462 textImplies = findText("=>");
1464 textPlus = findText("+");
1466 textModule = findText("module");
1467 textInterface = findText("__interface");
1468 textRequires = findText("__requires");
1469 textImport = findText("import");
1470 textExport = findText("__export");
1471 textHiding = findText("hiding");
1472 textQualified = findText("qualified");
1473 textAsMod = findText("as");
1474 textWildcard = findText("_");
1475 textAll = findText("forall");
1476 varMinus = mkVar(textMinus);
1477 varBang = mkVar(textBang);
1478 varDot = mkVar(textDot);
1479 varHiding = mkVar(textHiding);
1480 varQualified = mkVar(textQualified);
1481 varAsMod = mkVar(textAsMod);
1482 conMain = mkCon(findText("Main"));
1483 varMain = mkVar(findText("main"));
1484 textPrelude = findText("Prelude");
1485 textPreludeHugs= findText("PreludeBuiltin");
1486 conPrelude = mkCon(textPrelude);
1487 conNil = mkCon(findText("[]"));
1488 conList = mkCon(findText("[]"));
1489 conUnit = mkCon(findText("()"));
1490 conPreludeNil = mkQCon(textPreludeHugs,findText("[]"));
1491 conPreludeList = mkQCon(textPreludeHugs,findText("[]"));
1492 conPreludeUnit = mkQCon(textPreludeHugs,findText("()"));
1493 varNegate = mkQVar(textPreludeHugs,findText("negate"));
1494 varFlip = mkQVar(textPreludeHugs,findText("flip"));
1495 varEnumFrom = mkQVar(textPreludeHugs,findText("enumFrom"));
1496 varEnumFromThen = mkQVar(textPreludeHugs,findText("enumFromThen"));
1497 varEnumFromTo = mkQVar(textPreludeHugs,findText("enumFromTo"));
1498 varEnumFromThenTo = mkQVar(textPreludeHugs,findText("enumFromThenTo"));
1505 case RESET : tyconDefns = NIL;
1514 foreignImports= NIL;
1515 foreignExports= NIL;
1523 case BREAK : if (reading==KEYBOARD)
1527 case MARK : mark(tyconDefns);
1535 mark(unqualImports);
1536 mark(foreignImports);
1537 mark(foreignExports);
1545 mark(varEnumFromThen);
1546 mark(varEnumFromTo);
1547 mark(varEnumFromThenTo);
1559 mark(conPreludeNil);
1560 mark(conPreludeList);
1561 mark(conPreludeUnit);
1567 /*-------------------------------------------------------------------------*/