[project @ 1996-03-22 09:24:22 by partain]
[ghc-hetmet.git] / ghc / compiler / yaccParser / hslexer-DPH.lex
1 %{
2 /**********************************************************************
3 *                                                                     *
4 *                                                                     *
5 *       LEX grammar for Haskell.                                      *
6 *       ------------------------                                      *
7 *                                                                     *
8 *       (c) Copyright K. Hammond, University of Glasgow,              *
9 *               10th. February 1989                                   *
10 *                                                                     *
11 *       Modification History                                          *
12 *       --------------------                                          *
13 *                                                                     *
14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
15 *       18/10/91 kh             Added 'ccall'.                        *
16 *       19/11/91 kh             Tidied generally.                     *
17 *       04/12/91 kh             Added Int#.                           *
18 *       31/01/92 kh             Haskell 1.2 version.                  *
19 *       19/03/92 Jon Hill       Added Data Parallel Notation          *
20 *       24/04/92 ps             Added 'scc'.                          *
21 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
22 *                                                                     *
23 *                                                                     *
24 *       Known Problems:                                               *
25 *                                                                     *
26 *               None, any more.                                       *
27 *                                                                     *
28 **********************************************************************/
29
30 #include "include.h"
31 #include "hsparser-DPH.tab.h"
32 #include <stdio.h>
33 #include <ctype.h>
34 #include "constants.h"
35
36 char    *input_filename = NULL;
37
38 #include "utils.h"
39
40
41 /**********************************************************************
42 *                                                                     *
43 *                                                                     *
44 *      Declarations                                                   *
45 *                                                                     *
46 *                                                                     *
47 **********************************************************************/
48
49
50 extern int yylineno;
51 unsigned yylastlineno = 0;      /* Line number of previous token */
52 unsigned startlineno = 0;       /* temp; used to save the line no where something starts */
53 int yylastposn = 0;             /* Absolute position of last token */
54 int yylinestart = 0;            /* Absolute position of line start */
55
56 static int yyposn = 0;
57
58 /* Essential forward declarations */
59
60 static int readstring(), readasciiname(), readcomment(),
61            lookupascii(), yynewid() /* OLD:, parse_pragma()*/;
62 static char escval();
63
64 static BOOLEAN incomment = FALSE;
65 static unsigned commentdepth = 0;
66
67 static BOOLEAN indenteof = FALSE;
68
69 /* Pragmas */
70 /* OLD: char *pragmatype, *pragmaid, *pragmavalue; */
71
72 /* Special file handling for IMPORTS */
73
74 static  FILE  *yyin_save = NULL;                /*  Saved File Pointer          */
75 static  char  *filename_save;                   /*  File Name                   */
76 static  int   yylineno_save = 0,                /*  Line Number                 */
77               yyposn_save = 0,                  /*  This Token                  */
78               yylastposn_save = 0,              /*  Last Token                  */
79               yyindent_save,                    /*  Indentation                 */
80               yylindent_save,                   /*  Left Indentation            */
81               yytchar_save = 0,                 /*  Next Input Character        */ 
82               icontexts_save = 0;               /*  Indent Context Level        */
83 static unsigned yylastlineno_save = 0;          /*  Line Number of Prev. token  */
84
85 static BOOLEAN leof = FALSE;                    /*  EOF for interfaces          */
86
87
88 extern BOOLEAN ignorePragmas;           /*  True when we should ignore pragmas */
89 extern BOOLEAN ignoreArityPragmas;      /*  And various specific flavors... */
90 extern BOOLEAN ignoreSpecializePragmas;
91 extern BOOLEAN ignoreStrictnessPragmas;
92 extern BOOLEAN ignoreUpdatePragmas;
93
94
95
96 /**********************************************************************
97 *                                                                     *
98 *                                                                     *
99 *     Layout Processing                                               *
100 *                                                                     *
101 *                                                                     *
102 **********************************************************************/
103
104
105 /*
106         The following section deals with Haskell Layout conventions
107         forcing insertion of ; or } as appropriate
108 */
109
110
111 static short 
112         yyindent = 0,           /* Current indentation */
113         yylindent = 0,          /* Indentation of the leftmost char in the current lexeme */
114         yyslindent = -1,        /* Indentation of the leftmost char in a string */
115         yytabindent = 0,        /* Indentation before a tab in case we have to backtrack */
116         forgetindent = FALSE;   /* Don't bother applying indentation rules */
117
118 static int yysttok = -1;        /* Stacked Token:
119                                         -1   -- no token;
120                                         -ve  -- ";" inserted before token
121                                         +ve  -- "}" inserted before token
122                                 */
123
124 short icontexts = 0;            /* Which context we're in */
125
126
127
128 /*
129         Table of indentations:  right bit indicates whether to use
130           indentation rules (1 = use rules; 0 = ignore)
131
132     partain:
133     push one of these "contexts" at every "case" or "where"; the right bit says
134     whether user supplied braces,etc., or not.  pop appropriately (yyendindent).
135
136     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
137     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
138     push is shown just below.
139
140 */
141
142
143 static short indenttab[MAX_CONTEXTS] = { -1 };
144
145 #define INDENTPT (indenttab[icontexts]>>1)
146 #define INDENTON (indenttab[icontexts]&1)
147
148
149 yyshouldindent()
150 {
151   return(!leof && !forgetindent && INDENTON);
152 }
153
154
155 /* Enter new context and set new indentation level */
156 yysetindent()
157 {
158 #ifdef DEBUG
159           fprintf(stderr,"yysetindent:yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",yyindent,yylindent,icontexts,INDENTPT);
160 #endif
161
162   /* partain: first chk that new indent won't be less than current one;
163         this code doesn't make sense to me; yyindent tells the position of the _end_
164         of the current token; what that has to do with indenting, I don't know.
165   */
166
167
168   if(yyindent-1 <= INDENTPT)
169     {
170       if (INDENTPT == -1)
171           return;       /* Empty input OK for Haskell 1.1 */
172       else
173         {
174           char errbuf[ERR_BUF_SIZE];
175           sprintf(errbuf,"Layout error -- indentation should be > %d cols",INDENTPT);
176           yyerror(errbuf);
177         }
178     }
179   yyentercontext((yylindent << 1) | 1);
180 }
181
182
183 /* Enter a new context without changing the indentation level */
184
185 yyincindent()
186 {
187 #ifdef DEBUG
188           fprintf(stderr,"yyincindent:yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",yyindent,yylindent,icontexts,INDENTPT);
189 #endif
190   yyentercontext(indenttab[icontexts] & ~1);
191 }
192
193
194 /* Turn off indentation processing, usually because an explicit "{" has been seen */
195
196 yyindentoff()
197 {
198   forgetindent = TRUE;
199 }
200
201
202 /* Enter a new layout context. */
203
204 yyentercontext(indent)
205 int indent;
206 {
207   /* Enter new context and set indentation as specified */
208   if(++icontexts >= MAX_CONTEXTS)
209     {
210       char errbuf[ERR_BUF_SIZE];
211       sprintf(errbuf,"'wheres' and 'cases' nested too deeply (>%d)", MAX_CONTEXTS-1);
212       yyerror(errbuf);
213     }
214
215   forgetindent = FALSE;
216   indenttab[icontexts] = indent;
217 #ifdef DEBUG
218           fprintf(stderr,"yyentercontext:indent=%d,yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",indent,yyindent,yylindent,icontexts,INDENTPT);
219 #endif
220 }
221
222
223 /* Exit a layout context */
224
225 yyendindent()
226 {
227   --icontexts;
228 #ifdef DEBUG
229           fprintf(stderr,"yyendindent:yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",yyindent,yylindent,icontexts,INDENTPT);
230 #endif
231 }
232
233
234
235
236 /* 
237  *      Return checks the indentation level and returns ;, } or the specified token.
238  */
239
240 #define RETURN(tok) return(Return(tok))
241
242 Return(tok)
243 int tok;
244 {
245   if(yyslindent != -1)
246     {
247       yylindent = yyslindent;
248       yyslindent = -1;
249     }
250   else
251     yylindent = yyindent-yyleng;
252
253   if (yyshouldindent())
254     {
255       if (yylindent < INDENTPT)
256         {
257 #ifdef DEBUG
258           fprintf(stderr,"inserted '}' before %d (%d:%d:%d:%d)\n",tok,yylindent,yyindent,yyleng,INDENTPT);
259 #endif
260           yysttok=tok;
261           return(VCCURLY);
262         }
263
264       else if (yylindent == INDENTPT)
265         {
266 #ifdef DEBUG
267           fprintf(stderr,"inserted ';' before %d (%d:%d)\n",tok,yylindent,INDENTPT);
268 #endif
269           yysttok = -tok;
270           return (SEMI);
271         }
272     }
273   yysttok = -1;
274   leof = FALSE;
275 #ifdef DEBUG
276           fprintf(stderr,"returning %d (%d:%d)\n",tok,yylindent,INDENTPT);
277 #endif
278   return(tok);
279 }
280
281
282 /**********************************************************************
283 *                                                                     *
284 *                                                                     *
285 *     Input Processing for Interfaces                                 *
286 *                                                                     *
287 *                                                                     *
288 **********************************************************************/
289
290
291 /* setyyin(file)        open file as new yyin */
292 /* partain: got rid of .ext stuff */
293 setyyin(file)
294 char *file;
295 {
296   char fbuf[FILENAME_SIZE];
297
298   strcpy(fbuf,file);
299
300   yyin_save = yyin;
301
302   if((yyin=fopen(fbuf,"r"))==NULL)
303     {
304       char errbuf[ERR_BUF_SIZE];
305       sprintf(errbuf,"can't read \"%-.50s\"", fbuf);
306       yyerror(errbuf);
307     }
308
309   yylineno_save = yylineno;
310   yylastlineno_save = yylastlineno;
311   yylineno = yylastlineno = 0;
312
313   yylastposn_save = yylastposn;
314   yyposn_save = yyposn;
315   yyposn = yylastposn = -1;
316
317   filename_save = xmalloc(strlen(input_filename)+1);
318   strcpy(filename_save,input_filename);
319   new_filename(fbuf);
320   yyindent_save = yyindent;
321   yylindent_save = yylindent;
322   yyindent = yylindent = 0;
323   yyentercontext(-1);           /* partain: changed this from 0 */
324   icontexts_save = icontexts;
325   yytchar_save = yytchar;
326 #ifdef DEBUG
327   fprintf(stderr,"yytchar = %c(%d)\n",yytchar,(int)yytchar);
328 #endif
329   yysptr = yysbuf;
330 #ifdef DEBUG
331   fprintf(stderr,"reading %s (%d:%d:%d)\n",input_filename,yyindent_save,yylindent_save,INDENTPT);
332 #endif
333 }
334   
335     
336
337 /*
338         input() is the raw input routine used by yylex()
339 */
340
341 #undef input                    /*  so we can define our own versions to handle layout */
342 #undef unput
343
344
345 static 
346 input()
347 {
348   if(yytchar==10)
349     yyindent = 0;                       /* Avoid problems with backtracking over EOL */
350
351   yytchar=yytchar==EOF?EOF:(++yyposn,yysptr>yysbuf?U(*--yysptr):getc(yyin));
352
353   if(yytchar==10)
354     {
355       yylinestart = yyposn;
356       yylineno++;
357     }
358
359   if (yytchar == '\t')
360     {
361       yytabindent = yyindent;           /* Remember TAB indentation - only 1, though! */
362       yyindent += 8 - (yyindent % 8);   /* Tabs stops are 8 columns apart */
363     }
364   else
365     ++yyindent;
366
367
368   /* Special EOF processing inserts all missing '}'s into the input stream */
369
370   if(yytchar==EOF)
371     {
372       if(icontexts>icontexts_save && !incomment)
373         {
374           if(INDENTON)
375             {
376               indenttab[icontexts] = 0;
377               indenteof = TRUE;
378               return('\002');
379             }
380           else
381               yyerror("missing '}' at end of file");
382         }
383
384       else if (yyin_save != NULL)
385           {
386             fclose(yyin);
387             yyin = yyin_save;
388             yyin_save = NULL;
389             new_filename(filename_save);
390             free(filename_save);
391             yylineno = yylineno_save;
392             yylastlineno = yylastlineno_save;
393             yyindent = 0;
394             yylindent = 0;
395             yyindent = yyindent_save;
396             yylindent = yylindent_save;
397             yyslindent = -1;
398             icontexts = icontexts_save -1;
399             icontexts_save = 0;
400             leof = TRUE;
401             yyposn = yyposn_save;
402             yylastposn = yylastposn_save;
403 #ifdef DEBUG
404   fprintf(stderr,"finished reading interface (%d:%d:%d)\n",yyindent,yylindent,INDENTPT);
405 #endif
406             return('\001');     /* YUCK */
407           }
408         else
409           return(0);
410     }
411   else
412     return(yytchar);
413 }
414
415 setstartlineno()
416 {
417   if(yytchar == 10)
418     startlineno = yylineno -1;
419   else
420     startlineno = yylineno;
421 }
422
423
424 /*
425  *      unput() backtracks over a character.  With luck it will never backtrack over
426  *              multiple EOLs and TABs (since these are lexical delimiters).
427  */
428
429 static
430 unput(c) 
431 char c;
432 {
433   /* fprintf(stderr,"Unputting %c\n",c); */
434
435   yytchar= (c);
436
437   if(yytchar=='\n' || yytchar=='\r')
438     yylineno--;
439
440   *yysptr++=yytchar;
441   if(c == '\t')
442     yyindent = yytabindent;
443   else
444     --yyindent;
445
446   --yyposn;
447 }
448
449
450 /* 
451  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
452  */
453
454 yylex()
455 {
456   if(yysttok != -1)
457     {
458       if(yysttok < 0)
459         {
460           int tok = -yysttok;
461           yysttok = -1;
462           return(tok);
463         }
464       RETURN(yysttok);
465     }
466   else
467     {
468       /* not quite right, and should take account of stacking */
469       yylastlineno = yylineno;
470       yylastposn = yyposn;
471       return(yylex1());
472     }
473 }
474
475 #define yylex() yylex1()
476 %}
477
478 %start PRIM
479
480 D                       [0-9]
481 O                       [0-7]
482 H                       [0-9A-Fa-f]
483 N                       {D}+
484 S                       [!#$%&*+./<=>?@\\^|~:]
485 NS                      [^!#$%&*+./<=>?@\\^|~:]
486 SId                     ({S}|~|-){S}*
487 Char                    [ !\"#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
488 L                       [A-Z]
489 I                       [A-Za-z]
490 i                       [A-Za-z0-9'_]
491 Id                      {I}({i})*
492 A                       (NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|SP|DEL)
493 WS                      [ \t\n\r\f]*
494
495 %e 1000
496 %o 2100
497 %a 2100
498 %p 3600
499 %n 490
500 %k 350
501
502 %%
503
504 ^"# ".*[\n\r]           {  char tempf[FILENAME_SIZE];
505                            sscanf(yytext+1, "%d \"%[^\"]", &yylineno, tempf); 
506                            new_filename(tempf); 
507                         }
508
509 ^"{-# LINE ".*"-}"[\n\r] { /* partain: pragma-style line directive */
510                           char tempf[FILENAME_SIZE];
511                           sscanf(yytext+9, "%d \"%[^\"]", &yylineno, tempf); 
512                           new_filename(tempf);
513                         }
514
515 "{-# ARITY "            { if ( ignorePragmas || ignoreArityPragmas ) {
516                              incomment = 1;
517                              readcomment();
518                              incomment = 0;
519                           } else {
520                              RETURN(ARITY_PRAGMA);
521                           }
522                         }
523 "{-# SPECIALIZE "       { if ( ignorePragmas || ignoreSpecializePragmas ) {
524                              incomment = 1;
525                              readcomment();
526                              incomment = 0;
527                           } else {
528                              RETURN(SPECIALIZE_PRAGMA);
529                           }
530                         }
531 "{-# STRICTNESS "       { if ( ignorePragmas || ignoreStrictnessPragmas ) {
532                              incomment = 1;
533                              readcomment();
534                              incomment = 0;
535                           } else {
536                              RETURN(STRICTNESS_PRAGMA);
537                           }
538                         }
539 "{-# UPDATE "           { if ( ignorePragmas || ignoreUpdatePragmas ) {
540                              incomment = 1;
541                              readcomment();
542                              incomment = 0;
543                           } else {
544                              RETURN(UPDATE_PRAGMA);
545                           }
546                         }
547
548 " #-}"                  { RETURN(END_PRAGMA); }
549
550 <PRIM>"void#"           { RETURN(VOIDPRIM); }
551 <PRIM>{Id}"#"           {       yynewid(yytext,yyleng);
552                                 RETURN(isconstr(yytext)? CONID: VARID);
553                                 /* Must appear before keywords -- KH */
554                         }
555
556 "case"                  { RETURN(CASE); }
557 "class"                 { RETURN(CLASS); }
558 "data"                  { RETURN(DATA); }
559 "default"               { RETURN(DEFAULT); }
560 "deriving"              { RETURN(DERIVING); }
561 "else"                  { RETURN(ELSE); }
562 "hiding"                { RETURN(HIDING); }
563 "if"                    { RETURN(IF); }
564 "import"                { RETURN(IMPORT); }
565 "infix"                 { RETURN(INFIX); }
566 "infixl"                { RETURN(INFIXL); }
567 "infixr"                { RETURN(INFIXR); }
568 "instance"              { RETURN(INSTANCE); }
569 "interface"             { RETURN(INTERFACE); }
570 "module"                { RETURN(MODULE); }
571 "of"                    { RETURN(OF); }
572 "renaming"              { RETURN(RENAMING); }
573 "then"                  { RETURN(THEN); }
574 "to"                    { RETURN(TO); }
575 "type"                  { RETURN(TYPE); }
576 "where"                 { RETURN(WHERE); }
577 "in"                    { RETURN(IN); }
578 "let"                   { RETURN(LET); }
579 "ccall"                 { RETURN(CCALL); }
580 "veryDangerousCcall"    { RETURN(CCALL_DANGEROUS); }
581 "casm"                  { RETURN(CASM); }
582 "veryDangerousCasm"     { RETURN(CASM_DANGEROUS); }
583 "scc"                   { RETURN(SCC); }
584
585 ".."                    { RETURN(DOTDOT); }
586 ";"                     { RETURN(SEMI); }
587 ","                     { RETURN(COMMA); }
588 "|"                     { RETURN(VBAR); }
589 "="                     { RETURN(EQUAL); }
590 "<-"                    { RETURN(LARROW); }
591 "->"                    { RETURN(RARROW); }
592 "=>"                    { RETURN(DARROW); }
593 "::"                    { RETURN(DCOLON); }
594 "("                     { RETURN(OPAREN); }
595 ")"                     { RETURN(CPAREN); }
596 "["                     { RETURN(OBRACK); }
597 "]"                     { RETURN(CBRACK); }
598 "{"                     { RETURN(OCURLY); }
599 "}"                     { RETURN(CCURLY); }
600 "+"                     { RETURN(PLUS); }
601 "@"                     { RETURN(AT); }
602 "\\"                    { RETURN(LAMBDA); }
603 "_"                     { RETURN(WILDCARD); }
604 "`"                     { RETURN(BQUOTE); }
605 "<<"                    { RETURN(OPOD); }
606 ">>"                    { RETURN(CPOD); }
607 "(|"                    { RETURN(OPROC); }
608 "|)"                    { RETURN(CPROC); }
609 "<<-"                   { RETURN(DRAWNFROM); }
610 "<<="                   { RETURN(INDEXFROM); }
611
612 <PRIM>("-")?{N}"#"      {
613                                 yytext[yyleng-1] = '\0';        /* clobber the # first */
614                                 yylval.uid = xstrdup(yytext);
615                                 RETURN(INTPRIM);
616                         }
617 {N}                     {
618                                 yylval.uid = xstrdup(yytext);
619                                 RETURN(INTEGER);
620                         }
621
622 <PRIM>{N}"."{N}(("e"|"E")("+"|"-")?{N})?"##"    {
623                                 yytext[yyleng-2] = '\0';        /* clobber the # first */
624                                 yylval.uid = xstrdup(yytext);
625                                 RETURN(DOUBLEPRIM);
626                         }
627
628 <PRIM>{N}"."{N}(("e"|"E")("+"|"-")?{N})?"#"    {
629                                 yytext[yyleng-1] = '\0';        /* clobber the # first */
630                                 yylval.uid = xstrdup(yytext);
631                                 RETURN(FLOATPRIM);
632                         }
633
634 {N}"."{N}(("e"|"E")("+"|"-")?{N})?    {
635                                 yylval.uid = xstrdup(yytext);
636                                 RETURN(FLOAT);
637                         }
638
639
640 <PRIM>"``"[^']+"''"     {       yytext[yyleng-2] = '\0';        /* clobber '' first */
641                                 yynewid(yytext+2,yyleng-2);
642                                 RETURN(CLITLIT);
643                         }
644
645 {Id}                    {       yynewid(yytext,yyleng);
646                                 RETURN(isconstr(yytext)? CONID: VARID);
647                         }
648
649 {SId}                   {       yynewid(yytext,yyleng);
650                                 if(yyleng == 1)
651                                   if (*yytext == '~')
652                                     return( LAZY );
653                                   else if ( *yytext == '-' )
654                                     return( MINUS );
655                                 RETURN(isconstr(yytext)? CONSYM: VARSYM);
656                         }
657
658 <PRIM>"`"{Id}"#`"       {       yynewid(yytext+1,yyleng-2);
659                                 RETURN(isconstr(yytext+1)? CONSYM: VARSYM);
660                         }
661
662 '{Char}'                {
663                                 yytext[2] = '\0';
664                                 yylval.uid = xstrdup(yytext);
665                                 RETURN(CHAR); 
666
667                           /* WDP note:
668                                 we don't yet return CHARPRIMs
669                                 (ToDo)
670                           */
671                         }
672
673 '\\(a|b|f|n|r|t|v)'     {
674                                 yytext[1] = escval(yytext[2]);
675                                 yytext[2] = '\0';
676                                 yylval.uid = xstrdup(yytext);
677                                 RETURN(CHAR);
678                         }
679
680 '\\(\"|\'|\\)'          {
681                                 yytext[1] = yytext[2];
682                                 yytext[2] = '\0';
683                                 yylval.uid = xstrdup(yytext);
684                                 RETURN(CHAR);
685                         }
686
687 '\\{A}'                 {       yytext[yyleng-1] = '\0';
688                                 if(strcmp(yytext+2,"DEL")==0)
689                                   {
690                                     yylval.uid = xstrdup("'\177");
691                                     RETURN(CHAR);
692                                   }
693                                 else
694                                   {
695                                     int a = lookupascii(yytext+2);
696                                     if(a >= 0)
697                                       {
698                                         yytext[1] = a;
699                                         yytext[2] = '\0';
700                                         yylval.uid = xstrdup(yytext);
701                                         RETURN(CHAR);
702                                       }
703                                     else
704                                       {
705                                         char errbuf[ERR_BUF_SIZE];
706                                         sprintf(errbuf,"invalid ASCII name in character constant: %s",yytext);
707                                         yyerror(errbuf);
708                                       }
709                                   }
710                         }
711
712 '\\{D}+'                {       if(convchar(yytext+2,yyleng-3,10))
713                                   RETURN(CHAR);
714                         }
715
716 '\\o{O}+'               {       if(convchar(yytext+3,yyleng-4,8))
717                                   RETURN(CHAR);
718                         }
719
720 '\\x{H}+'               {       if(convchar(yytext+3,yyleng-4,16))
721                                   RETURN(CHAR);
722                         }
723
724 '\\\^[A-Z\[\\\]^_]'     {       yytext[1] = yytext[3]-'A'+ 1;
725                                 yytext[2] = '\0';
726                                 yylval.uid = xstrdup(yytext);
727                                 RETURN(CHAR); 
728                         }
729
730 '\\\^@'                 {       yytext[1] = '\0'; /* partain: most doubtful... */
731                                 yytext[2] = '\0';
732                                 yylval.uid = xstrdup(yytext);
733                                 RETURN(CHAR); 
734                         }
735
736 "\""                    {
737                                 readstring();
738                                 yylval.uid = installString(yyleng, yytext);
739                                 RETURN(STRING); 
740                         }
741
742
743 "--".*[\n\r]            ;       /* hm-hm -style comment */
744
745 "\001"                  {       if (leof)
746                                   {
747                                     unput(yytchar_save);
748                                     RETURN(LEOF);
749                                   }
750
751                                 fprintf(stderr, "illegal char: %c (%d) in line %d\n",
752                                         yytext[0], yytext[0], yylineno); 
753                         }
754
755 "\002"                  {       if (indenteof)
756                                   {
757                                     indenteof = FALSE;
758                                     RETURN(VCCURLY);
759                                   }
760
761                                 fprintf(stderr, "illegal char: %c (%d) in line %d\n",
762                                         yytext[0], yytext[0], yylineno); 
763                         }
764
765 [\r\n \t\v\f]           ;
766
767 .                       { fprintf(stderr, "illegal char: %c (%d) in line %d\n",
768                                         yytext[0], yytext[0], yylineno); 
769                         }
770
771 "{-"                    {
772                                 incomment = 1;
773                                 readcomment();
774                                 incomment = 0;
775                         }
776 %%
777
778
779 /**********************************************************************
780 *                                                                     *
781 *                                                                     *
782 *     YACC/LEX Initialisation etc.                                    *
783 *                                                                     *
784 *                                                                     *
785 **********************************************************************/
786
787
788 /* 
789    We initialise input_filename to "<NONAME>".
790    This allows unnamed sources to be piped into the parser. 
791 */
792
793 yyinit()
794 {
795   extern BOOLEAN acceptPrim;
796
797   input_filename = xstrdup("<NONAME>");
798
799   yytchar = '\n';
800
801   if(acceptPrim)
802     BEGIN PRIM;
803 }
804
805
806 new_filename(f)
807 char *f;
808 {
809   if(input_filename != NULL)
810     free(input_filename);
811   input_filename = xstrdup(f);
812 }
813   
814
815
816 yywrap()
817 {
818         return(1);
819 }
820
821
822 /**********************************************************************
823 *                                                                     *
824 *                                                                     *
825 *                      Comment Handling                               *
826 *                                                                     *
827 *                                                                     *
828 **********************************************************************/
829
830
831
832 /*
833         readcomment()   reads Haskell nested comments {- ... -}
834                         Indentation is automatically taken care of since input() is used.
835
836                         While in principle this could be done using Lex rules, in
837                         practice it's easier and neater to use special code for this
838                         and for strings.
839 */
840
841 static readcomment()
842 {
843   int c;
844
845   do {
846     while ((c = input()) != '-' && !eof(c))
847       {
848         if(c=='{')
849           if ((c=input()) == '-')
850             readcomment();
851         
852           else if (eof(c))
853             {
854               yyerror("comment not terminated by end of file");
855             }
856       }
857
858     while (c == '-')
859       c = input();
860
861     if (c == '}')
862       break;
863
864     if (eof(c))
865       {
866         yyerror("comment not terminated by end of file");
867       }
868
869   } while (1);
870 }
871
872
873 /*
874     eof(c)      Returns TRUE when EOF read.
875 */
876
877 eof(c)
878 int c;
879 {
880   return (c == 0 || c == 1 && leof);
881 }
882
883
884
885 /**********************************************************************
886 *                                                                     *
887 *                                                                     *
888 *    Identifier Processing                                             *
889 *                                                                     *
890 *                                                                     *
891 **********************************************************************/
892
893
894 /*
895         yynewid         Enters an id of length n into the symbol table.
896 */
897
898 static yynewid(yyt,len)
899 char *yyt;
900 int len;
901 {
902   char yybuf[1024];
903   strcpy(yybuf,yyt);
904   yybuf[len] = '\0';
905   yylval.uid = installid(yybuf);
906 }
907
908
909 /*
910         isconstr(s)     True iff s is a constructor id.
911 */
912
913 isconstr(s)
914 char *s;
915 {
916   return(*s == ':' || isupper(*s));
917 }
918
919
920
921
922 /**********************************************************************
923 *                                                                     *
924 *                                                                     *
925 *     Character Kind Predicates                                       *
926 *                                                                     *
927 *                                                                     *
928 **********************************************************************/
929
930
931 /*
932  * ishspace(ch) determines whether ch is a valid Haskell space character
933  */
934
935
936 static int ishspace(ch)
937 char ch;
938 {
939   return(ch == '\n' || ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f');
940 }
941
942
943 /*
944  * isddigit(ch) determines whether ch is a valid Decimal digit
945  */
946
947
948 static int isddigit(ch)
949 char ch;
950 {
951  return (isdigit(ch));
952 }
953
954
955 /*
956  * ishexdigit(ch) determines whether ch is a valid Hexadecimal digit
957  */
958
959
960 static int ishexdigit(ch)
961 char ch;
962 {
963  return (isdigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'));
964 }
965
966 /*
967  * isodigit(ch) determines whether ch is a valid Octal digit
968  */
969
970
971 static int isodigit(ch)
972 char ch;
973 {
974  return ((ch >= '0' && ch <= '7'));
975 }
976
977
978 /**********************************************************************
979 *                                                                     *
980 *                                                                     *
981 *       Lexical Analysis of Strings  -- Gaps and escapes mean that    *
982 *            lex isn't (wo)man enough for this job.                   *
983 *                                                                     *
984 *                                                                     *
985 **********************************************************************/
986
987
988 /*
989  * readstring()         reads a string constant and places it in yytext
990  */
991
992 static readstring()
993 {
994   int ch, c;
995   
996   yyslindent = yyindent-1;
997
998   yyleng = 1;
999   yytext[1] = '\0';
1000
1001   do
1002     {
1003       ch = input();
1004
1005       if (ch == '\\')
1006         {
1007           ch = input();
1008
1009           if(isdigit(ch))
1010               ch = readescnum(isddigit,10,ch);
1011
1012           else if (ch == 'o')
1013             {
1014               ch = input();
1015               if(isodigit(ch))
1016                 ch = readescnum(isodigit,8,ch);
1017               else
1018                 {
1019                   char errbuf[ERR_BUF_SIZE];
1020                   sprintf(errbuf,"strange Octal character code (%c) in string",ch);
1021                   yyerror(errbuf);
1022                 }
1023             }
1024
1025           else if (ch == 'x')
1026             {
1027               ch = input();
1028               if(ishexdigit(ch))
1029                 ch = readescnum(ishexdigit,16,ch);
1030               else
1031                 {
1032                   char errbuf[ERR_BUF_SIZE];
1033                   sprintf(errbuf,"strange Hexadecimal character code (%c) in string",ch);
1034                   yyerror(errbuf);
1035                 }
1036             }
1037
1038           else if(ch == '"' || ch == '\\' || ch == '\'')
1039             /* SKIP */;
1040
1041           else if (isupper(ch))
1042             {
1043               if((ch = readasciiname(ch)) == -1)
1044                 yyerror("invalid ASCII name in string");
1045             }
1046           
1047           else if (ch == '^')
1048             {
1049               if(isupper(ch = input()) || (ch >= '[' && ch <= '_'))
1050                 ch = ch - 'A' + 1;
1051               else if (ch == '@')
1052                 ch = '\0';
1053               else
1054                 {
1055                   char errbuf[ERR_BUF_SIZE];
1056                   sprintf(errbuf,"strange control sequence (^%c) in string",ch);
1057                   yyerror(errbuf);
1058                 }
1059             }
1060
1061           else if (ishspace(ch))
1062             {
1063               /* partain: we may want clearer error msgs if \v, \f seen */
1064
1065               while (ch == '\t' || ch == ' ')
1066                 ch = input();
1067
1068               if (ch != '\n' && ch != '\r')
1069                 yyerror("newline not seen when expected in string gap");
1070               else
1071                 ch = input();
1072                 
1073               while (ch == '\t' || ch == ' ')
1074                 ch = input();
1075
1076               if(ch != '\\')
1077                 yyerror("trailing \\ not seen when expected in string gap");
1078               
1079               ch = -1;
1080             }
1081
1082           else if (ch == 'a')
1083             ch = '\007';
1084
1085           else if (ch == 'b')
1086             ch = '\b';
1087
1088           else if (ch == 'f')
1089             ch = '\f';
1090
1091           else if (ch == 'n')
1092             ch = '\n';
1093
1094           else if (ch == 'r')
1095             ch = '\r';
1096
1097           else if (ch == 't')
1098             ch = '\t';
1099
1100           else if (ch == 'v')
1101             ch = '\v';
1102
1103           else if (ch == '&')
1104             ch = -1;
1105
1106           else
1107             {
1108               char errbuf[ERR_BUF_SIZE];
1109               sprintf(errbuf,"invalid escape sequence (\\%c) in string",ch);
1110               yyerror(errbuf);
1111             }
1112         }
1113
1114       else if (ch == '\n' || ch == '\r' || ch == '\f' || ch == '\v' || ch == 0 || ch == '"')
1115         break;
1116
1117       else if (!isprint(ch) && !ishspace(ch))
1118         {
1119           char errbuf[ERR_BUF_SIZE];
1120           sprintf(errbuf,"invalid character (%c) in string",ch);
1121           yyerror(errbuf);
1122         }
1123
1124       if((yyleng < YYLMAX-3 && ch != -1) || (yyleng == YYLMAX-3 && (ch == '\t' || ch == '\\')))
1125         {
1126           /* The LML back-end treats \\ and \t specially in strings... */
1127
1128           if(ch == '\t' || ch == '\\')
1129             {
1130               yytext[yyleng++] = '\\';
1131               if (ch == '\t')
1132                 ch = 't';
1133             }
1134           if(yyleng<YYLMAX-2)
1135             {
1136               yytext[yyleng++] = ch;
1137               yytext[yyleng] = '\0';
1138             }
1139         }
1140       else if (ch != -1)
1141         {
1142           char errbuf[ERR_BUF_SIZE];
1143           sprintf(errbuf,"string too long (> %d characters)",YYLMAX-3-2);
1144           yyerror(errbuf);
1145         }
1146     }
1147   while(1);
1148
1149   if (ch != '"')
1150     yyerror("string incorrectly terminated");
1151
1152   else
1153     {
1154       yytext[yyleng++] = '"';
1155       yytext[yyleng] = '\0';
1156     }
1157 #ifdef DEBUG
1158   fprintf(stderr,"string: %s (%d chars)\n",yytext,yyleng-2);
1159 #endif
1160 }
1161
1162
1163
1164 /**********************************************************************
1165 *                                                                     *
1166 *                                                                     *
1167 *      Haskell String and Character Escape Codes                      *
1168 *                                                                     *
1169 *                                                                     *
1170 **********************************************************************/
1171
1172
1173 /* Names of ASCII control characters, used in strings and character constants */
1174
1175 static char *asciinames[] =
1176   {
1177     "NUL",      "SOH",  "STX",  "ETX",  "EOT",  "ENQ",  "ACK",  "BEL",  "BS",   "HT",
1178     "LF",       "VT",   "FF",   "CR",   "SO",   "SI",   "DLE",  "DC1",  "DC2",  "DC3",
1179     "DC4",      "NAK",  "SYN",  "ETB",  "CAN",  "EM",   "SUB",  "ESC",  "FS",   "GS",
1180     "RS",       "US",   "SP",   "DEL"
1181     };
1182
1183
1184 /*
1185  * readasciiname()      read ASCII name and translate to an ASCII code
1186  *                      -1 indicates invalid name
1187  */
1188
1189 static int readasciiname(ch)
1190 int ch;
1191 {
1192   char asciiname[4];
1193
1194   asciiname[0] = ch;
1195   if(!isupper(asciiname[1]= input()))
1196     {
1197       unput(asciiname[1]);
1198       return(-1);
1199     }
1200
1201   if(!isupper(asciiname[2]=input()))
1202     {
1203       /* partain: have to have something extra for DC[1-4] */
1204       if (asciiname[0] == 'D' && asciiname[1] == 'C' && isdigit(asciiname[2])) {
1205           asciiname[3] = '\0';
1206       } else {
1207           unput(asciiname[2]);
1208           asciiname[2] = '\0';
1209       }
1210     }
1211   else
1212     asciiname[3] = '\0';
1213
1214   if (strcmp(asciiname,"DEL") == 0)
1215     return('\177');
1216
1217   else
1218     return(lookupascii(asciiname));
1219 }
1220
1221
1222 /*
1223    lookupascii(ascii)   look up ascii in asciinames[]
1224
1225    returns -1 if ascii is not found, otherwise its index.
1226 */
1227
1228 static int lookupascii(ascii)
1229 char *ascii;
1230 {
1231   int i;
1232   for(i='\0'; i <= ' '; ++i)
1233     if(strcmp(ascii,asciinames[i])==0)
1234       return(i);
1235   return(-1);
1236 }
1237
1238
1239 /**********************************************************************
1240 *                                                                     *
1241 *                                                                     *
1242 *      Numeric Escapes in Characters/Strings                          *
1243 *                                                                     *
1244 *                                                                     *
1245 **********************************************************************/
1246
1247 int convnum(num,numlen,base)
1248 char *num;
1249 int numlen, base;
1250 {
1251   int i, res = 0, mul;
1252
1253   for (i = numlen-1, mul = 1; i >= 0; --i, mul *= base)
1254     {
1255       if(isdigit(num[i]))
1256         res += (num[i] - '0') * mul;
1257       else if (isupper(num[i]))
1258         res += (num[i] - 'A' + 10) * mul;
1259       else if (islower(num[i]))
1260         res += (num[i] - 'a' + 10) * mul;
1261     }
1262   return(res);
1263 }
1264
1265 convchar(num,numlen,base)
1266 char *num;
1267 int numlen, base;
1268 {
1269   int n = convnum(num,numlen,base);
1270   if (n <= MAX_ESC_CHAR)
1271     {
1272       yytext[1] = n;
1273       yytext[2] = '\0';
1274       yylval.uid = xstrdup(yytext);
1275       return(1);
1276     }
1277   else
1278     {
1279       char errbuf[ERR_BUF_SIZE];
1280       sprintf(errbuf,"ASCII code > %d in character constant",MAX_ESC_CHAR);
1281       yyerror(errbuf);
1282     }
1283 }
1284
1285 readescnum(isadigit,mulbase,ch)
1286 int (*isadigit)();
1287 int mulbase;
1288 int ch;
1289 {
1290   char digit[MAX_ESC_DIGITS];
1291   int digcount;
1292
1293   digcount = 1;
1294   digit[0] = ch;
1295   
1296   while((*isadigit)(ch=input()))
1297     {
1298       if(digcount < MAX_ESC_DIGITS)
1299         digit[digcount] = ch;
1300       ++digcount;
1301     }
1302
1303   unput(ch);
1304
1305   if(digcount > MAX_ESC_DIGITS)
1306     {
1307       char errbuf[ERR_BUF_SIZE];
1308       sprintf(errbuf,"numeric character code too long (> %d characters) in string",MAX_ESC_DIGITS);
1309       yyerror(errbuf);
1310     }
1311
1312   ch = convnum(digit,digcount,mulbase);
1313   
1314   if (ch > MAX_ESC_CHAR)
1315     {
1316       char errbuf[ERR_BUF_SIZE];
1317       sprintf(errbuf,"character code > ASCII %d in string",MAX_ESC_CHAR);
1318       yyerror(errbuf);
1319     }
1320
1321   return(ch);
1322 }
1323
1324
1325 /*
1326   escval(c)     return the value of an escaped character.
1327
1328                 \a      BELL
1329                 \b      BACKSPACE
1330                 \f      FORMFEED
1331                 \n      NEWLINE
1332                 \r      CARRIAGE RETURN
1333                 \t      TAB
1334                 \v      VERTICAL TAB
1335
1336    These definitions are standard ANSI C values.
1337 */
1338
1339 static char escval(c)
1340 char c;
1341 {
1342   return(c == 'a'? '\007': c == 'b'? '\b': c == 'f'? '\f': c == 'n'? '\n':
1343          c == 'r'? '\r': c == 't'? '\t': c == 'v'? '\v': '\0');
1344 }
1345
1346 /*
1347   OLD: Lexical analysis for Haskell pragmas.
1348 */
1349
1350 #if 0
1351 static parse_pragma(s,len)
1352 char *s;
1353 int len;
1354 {
1355   char pragma_name[1024];
1356   char identifier[1024];
1357   char value[1024];
1358   int i;
1359
1360   *(s+len) = '\0';
1361
1362   while(isspace(*s))
1363     s++;
1364
1365   /* Pragma name */
1366   for(i=0; !isspace(*s); ++i, ++s)
1367     pragma_name[i] = *s;
1368   pragma_name[i] = '\0';
1369
1370   while(isspace(*s))
1371     s++;
1372
1373   /* Identifier */
1374   for(i=0; !isspace(*s); ++i, ++s)
1375     identifier[i] = *s;
1376   identifier[i] = '\0';
1377
1378   while(isspace(*s))
1379     s++;
1380
1381   /* equals */
1382   s++;
1383
1384   while(isspace(*s))
1385     s++;
1386
1387   /* Value */
1388   for(i=0; !isspace(*s); ++i, ++s)
1389     value[i] = *s;
1390   value[i] = '\0';
1391
1392   pragmatype = installid(pragma_name);
1393   pragmaid = installid(identifier);
1394   pragmavalue = xstrdup(value);
1395 }
1396
1397 #endif /* 0 */