ghc/compiler/yaccParser/hslexer-DPH.lex

   1 %{
   2 /**********************************************************************
   3 *                                                                     *
   4 *                                                                     *
   5 *       LEX grammar for Haskell.                                      *
   6 *       ------------------------                                      *
   7 *                                                                     *
   8 *       (c) Copyright K. Hammond, University of Glasgow,              *
   9 *               10th. February 1989                                   *
  10 *                                                                     *
  11 *       Modification History                                          *
  12 *       --------------------                                          *
  13 *                                                                     *
  14 *       22/08/91 kh             Initial Haskell 1.1 version.          *
  15 *       18/10/91 kh             Added 'ccall'.                        *
  16 *       19/11/91 kh             Tidied generally.                     *
  17 *       04/12/91 kh             Added Int#.                           *
  18 *       31/01/92 kh             Haskell 1.2 version.                  *
  19 *       19/03/92 Jon Hill       Added Data Parallel Notation          *
  20 *       24/04/92 ps             Added 'scc'.                          *
  21 *       03/06/92 kh             Changed Infix/Prelude Handling.       *
  22 *                                                                     *
  23 *                                                                     *
  24 *       Known Problems:                                               *
  25 *                                                                     *
  26 *               None, any more.                                       *
  27 *                                                                     *
  28 **********************************************************************/
  29
  30 #include "include.h"
  31 #include "hsparser-DPH.tab.h"
  32 #include <stdio.h>
  33 #include <ctype.h>
  34 #include "constants.h"
  35
  36 char    *input_filename = NULL;
  37
  38 #include "utils.h"
  39
  40
  41 /**********************************************************************
  42 *                                                                     *
  43 *                                                                     *
  44 *      Declarations                                                   *
  45 *                                                                     *
  46 *                                                                     *
  47 **********************************************************************/
  48
  49
  50 extern int yylineno;
  51 unsigned yylastlineno = 0;      /* Line number of previous token */
  52 unsigned startlineno = 0;       /* temp; used to save the line no where something starts */
  53 int yylastposn = 0;             /* Absolute position of last token */
  54 int yylinestart = 0;            /* Absolute position of line start */
  55
  56 static int yyposn = 0;
  57
  58 /* Essential forward declarations */
  59
  60 static int readstring(), readasciiname(), readcomment(),
  61            lookupascii(), yynewid() /* OLD:, parse_pragma()*/;
  62 static char escval();
  63
  64 static BOOLEAN incomment = FALSE;
  65 static unsigned commentdepth = 0;
  66
  67 static BOOLEAN indenteof = FALSE;
  68
  69 /* Pragmas */
  70 /* OLD: char *pragmatype, *pragmaid, *pragmavalue; */
  71
  72 /* Special file handling for IMPORTS */
  73
  74 static  FILE  *yyin_save = NULL;                /*  Saved File Pointer          */
  75 static  char  *filename_save;                   /*  File Name                   */
  76 static  int   yylineno_save = 0,                /*  Line Number                 */
  77               yyposn_save = 0,                  /*  This Token                  */
  78               yylastposn_save = 0,              /*  Last Token                  */
  79               yyindent_save,                    /*  Indentation                 */
  80               yylindent_save,                   /*  Left Indentation            */
  81               yytchar_save = 0,                 /*  Next Input Character        */
  82               icontexts_save = 0;               /*  Indent Context Level        */
  83 static unsigned yylastlineno_save = 0;          /*  Line Number of Prev. token  */
  84
  85 static BOOLEAN leof = FALSE;                    /*  EOF for interfaces          */
  86
  87
  88 extern BOOLEAN ignorePragmas;           /*  True when we should ignore pragmas */
  89 extern BOOLEAN ignoreArityPragmas;      /*  And various specific flavors... */
  90 extern BOOLEAN ignoreSpecializePragmas;
  91 extern BOOLEAN ignoreStrictnessPragmas;
  92 extern BOOLEAN ignoreUpdatePragmas;
  93
  94
  95
  96 /**********************************************************************
  97 *                                                                     *
  98 *                                                                     *
  99 *     Layout Processing                                               *
 100 *                                                                     *
 101 *                                                                     *
 102 **********************************************************************/
 103
 104
 105 /*
 106         The following section deals with Haskell Layout conventions
 107         forcing insertion of ; or } as appropriate
 108 */
 109
 110
 111 static short
 112         yyindent = 0,           /* Current indentation */
 113         yylindent = 0,          /* Indentation of the leftmost char in the current lexeme */
 114         yyslindent = -1,        /* Indentation of the leftmost char in a string */
 115         yytabindent = 0,        /* Indentation before a tab in case we have to backtrack */
 116         forgetindent = FALSE;   /* Don't bother applying indentation rules */
 117
 118 static int yysttok = -1;        /* Stacked Token:
 119                                         -1   -- no token;
 120                                         -ve  -- ";" inserted before token
 121                                         +ve  -- "}" inserted before token
 122                                 */
 123
 124 short icontexts = 0;            /* Which context we're in */
 125
 126
 127
 128 /*
 129         Table of indentations:  right bit indicates whether to use
 130           indentation rules (1 = use rules; 0 = ignore)
 131
 132     partain:
 133     push one of these "contexts" at every "case" or "where"; the right bit says
 134     whether user supplied braces,etc., or not.  pop appropriately (yyendindent).
 135
 136     ALSO, a push/pop when enter/exit a new file (e.g., on importing).  A -1 is
 137     pushed (the "column" for "module", "interface" and EOF).  The -1 from the initial
 138     push is shown just below.
 139
 140 */
 141
 142
 143 static short indenttab[MAX_CONTEXTS] = { -1 };
 144
 145 #define INDENTPT (indenttab[icontexts]>>1)
 146 #define INDENTON (indenttab[icontexts]&1)
 147
 148
 149 yyshouldindent()
 150 {
 151   return(!leof && !forgetindent && INDENTON);
 152 }
 153
 154
 155 /* Enter new context and set new indentation level */
 156 yysetindent()
 157 {
 158 #ifdef DEBUG
 159           fprintf(stderr,"yysetindent:yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",yyindent,yylindent,icontexts,INDENTPT);
 160 #endif
 161
 162   /* partain: first chk that new indent won't be less than current one;
 163         this code doesn't make sense to me; yyindent tells the position of the _end_
 164         of the current token; what that has to do with indenting, I don't know.
 165   */
 166
 167
 168   if(yyindent-1 <= INDENTPT)
 169     {
 170       if (INDENTPT == -1)
 171           return;       /* Empty input OK for Haskell 1.1 */
 172       else
 173         {
 174           char errbuf[ERR_BUF_SIZE];
 175           sprintf(errbuf,"Layout error -- indentation should be > %d cols",INDENTPT);
 176           yyerror(errbuf);
 177         }
 178     }
 179   yyentercontext((yylindent << 1) | 1);
 180 }
 181
 182
 183 /* Enter a new context without changing the indentation level */
 184
 185 yyincindent()
 186 {
 187 #ifdef DEBUG
 188           fprintf(stderr,"yyincindent:yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",yyindent,yylindent,icontexts,INDENTPT);
 189 #endif
 190   yyentercontext(indenttab[icontexts] & ~1);
 191 }
 192
 193
 194 /* Turn off indentation processing, usually because an explicit "{" has been seen */
 195
 196 yyindentoff()
 197 {
 198   forgetindent = TRUE;
 199 }
 200
 201
 202 /* Enter a new layout context. */
 203
 204 yyentercontext(indent)
 205 int indent;
 206 {
 207   /* Enter new context and set indentation as specified */
 208   if(++icontexts >= MAX_CONTEXTS)
 209     {
 210       char errbuf[ERR_BUF_SIZE];
 211       sprintf(errbuf,"'wheres' and 'cases' nested too deeply (>%d)", MAX_CONTEXTS-1);
 212       yyerror(errbuf);
 213     }
 214
 215   forgetindent = FALSE;
 216   indenttab[icontexts] = indent;
 217 #ifdef DEBUG
 218           fprintf(stderr,"yyentercontext:indent=%d,yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",indent,yyindent,yylindent,icontexts,INDENTPT);
 219 #endif
 220 }
 221
 222
 223 /* Exit a layout context */
 224
 225 yyendindent()
 226 {
 227   --icontexts;
 228 #ifdef DEBUG
 229           fprintf(stderr,"yyendindent:yyindent=%d,yylindent=%d,INDENTPT[%d]=%d\n",yyindent,yylindent,icontexts,INDENTPT);
 230 #endif
 231 }
 232
 233
 234
 235
 236 /*
 237  *      Return checks the indentation level and returns ;, } or the specified token.
 238  */
 239
 240 #define RETURN(tok) return(Return(tok))
 241
 242 Return(tok)
 243 int tok;
 244 {
 245   if(yyslindent != -1)
 246     {
 247       yylindent = yyslindent;
 248       yyslindent = -1;
 249     }
 250   else
 251     yylindent = yyindent-yyleng;
 252
 253   if (yyshouldindent())
 254     {
 255       if (yylindent < INDENTPT)
 256         {
 257 #ifdef DEBUG
 258           fprintf(stderr,"inserted '}' before %d (%d:%d:%d:%d)\n",tok,yylindent,yyindent,yyleng,INDENTPT);
 259 #endif
 260           yysttok=tok;
 261           return(VCCURLY);
 262         }
 263
 264       else if (yylindent == INDENTPT)
 265         {
 266 #ifdef DEBUG
 267           fprintf(stderr,"inserted ';' before %d (%d:%d)\n",tok,yylindent,INDENTPT);
 268 #endif
 269           yysttok = -tok;
 270           return (SEMI);
 271         }
 272     }
 273   yysttok = -1;
 274   leof = FALSE;
 275 #ifdef DEBUG
 276           fprintf(stderr,"returning %d (%d:%d)\n",tok,yylindent,INDENTPT);
 277 #endif
 278   return(tok);
 279 }
 280
 281
 282 /**********************************************************************
 283 *                                                                     *
 284 *                                                                     *
 285 *     Input Processing for Interfaces                                 *
 286 *                                                                     *
 287 *                                                                     *
 288 **********************************************************************/
 289
 290
 291 /* setyyin(file)        open file as new yyin */
 292 /* partain: got rid of .ext stuff */
 293 setyyin(file)
 294 char *file;
 295 {
 296   char fbuf[FILENAME_SIZE];
 297
 298   strcpy(fbuf,file);
 299
 300   yyin_save = yyin;
 301
 302   if((yyin=fopen(fbuf,"r"))==NULL)
 303     {
 304       char errbuf[ERR_BUF_SIZE];
 305       sprintf(errbuf,"can't read \"%-.50s\"", fbuf);
 306       yyerror(errbuf);
 307     }
 308
 309   yylineno_save = yylineno;
 310   yylastlineno_save = yylastlineno;
 311   yylineno = yylastlineno = 0;
 312
 313   yylastposn_save = yylastposn;
 314   yyposn_save = yyposn;
 315   yyposn = yylastposn = -1;
 316
 317   filename_save = xmalloc(strlen(input_filename)+1);
 318   strcpy(filename_save,input_filename);
 319   new_filename(fbuf);
 320   yyindent_save = yyindent;
 321   yylindent_save = yylindent;
 322   yyindent = yylindent = 0;
 323   yyentercontext(-1);           /* partain: changed this from 0 */
 324   icontexts_save = icontexts;
 325   yytchar_save = yytchar;
 326 #ifdef DEBUG
 327   fprintf(stderr,"yytchar = %c(%d)\n",yytchar,(int)yytchar);
 328 #endif
 329   yysptr = yysbuf;
 330 #ifdef DEBUG
 331   fprintf(stderr,"reading %s (%d:%d:%d)\n",input_filename,yyindent_save,yylindent_save,INDENTPT);
 332 #endif
 333 }
 334
 335
 336
 337 /*
 338         input() is the raw input routine used by yylex()
 339 */
 340
 341 #undef input                    /*  so we can define our own versions to handle layout */
 342 #undef unput
 343
 344
 345 static
 346 input()
 347 {
 348   if(yytchar==10)
 349     yyindent = 0;                       /* Avoid problems with backtracking over EOL */
 350
 351   yytchar=yytchar==EOF?EOF:(++yyposn,yysptr>yysbuf?U(*--yysptr):getc(yyin));
 352
 353   if(yytchar==10)
 354     {
 355       yylinestart = yyposn;
 356       yylineno++;
 357     }
 358
 359   if (yytchar == '\t')
 360     {
 361       yytabindent = yyindent;           /* Remember TAB indentation - only 1, though! */
 362       yyindent += 8 - (yyindent % 8);   /* Tabs stops are 8 columns apart */
 363     }
 364   else
 365     ++yyindent;
 366
 367
 368   /* Special EOF processing inserts all missing '}'s into the input stream */
 369
 370   if(yytchar==EOF)
 371     {
 372       if(icontexts>icontexts_save && !incomment)
 373         {
 374           if(INDENTON)
 375             {
 376               indenttab[icontexts] = 0;
 377               indenteof = TRUE;
 378               return('\002');
 379             }
 380           else
 381               yyerror("missing '}' at end of file");
 382         }
 383
 384       else if (yyin_save != NULL)
 385           {
 386             fclose(yyin);
 387             yyin = yyin_save;
 388             yyin_save = NULL;
 389             new_filename(filename_save);
 390             free(filename_save);
 391             yylineno = yylineno_save;
 392             yylastlineno = yylastlineno_save;
 393             yyindent = 0;
 394             yylindent = 0;
 395             yyindent = yyindent_save;
 396             yylindent = yylindent_save;
 397             yyslindent = -1;
 398             icontexts = icontexts_save -1;
 399             icontexts_save = 0;
 400             leof = TRUE;
 401             yyposn = yyposn_save;
 402             yylastposn = yylastposn_save;
 403 #ifdef DEBUG
 404   fprintf(stderr,"finished reading interface (%d:%d:%d)\n",yyindent,yylindent,INDENTPT);
 405 #endif
 406             return('\001');     /* YUCK */
 407           }
 408         else
 409           return(0);
 410     }
 411   else
 412     return(yytchar);
 413 }
 414
 415 setstartlineno()
 416 {
 417   if(yytchar == 10)
 418     startlineno = yylineno -1;
 419   else
 420     startlineno = yylineno;
 421 }
 422
 423
 424 /*
 425  *      unput() backtracks over a character.  With luck it will never backtrack over
 426  *              multiple EOLs and TABs (since these are lexical delimiters).
 427  */
 428
 429 static
 430 unput(c)
 431 char c;
 432 {
 433   /* fprintf(stderr,"Unputting %c\n",c); */
 434
 435   yytchar= (c);
 436
 437   if(yytchar=='\n' || yytchar=='\r')
 438     yylineno--;
 439
 440   *yysptr++=yytchar;
 441   if(c == '\t')
 442     yyindent = yytabindent;
 443   else
 444     --yyindent;
 445
 446   --yyposn;
 447 }
 448
 449
 450 /*
 451  *      Redefine yylex to check for stacked tokens, yylex1() is the original yylex()
 452  */
 453
 454 yylex()
 455 {
 456   if(yysttok != -1)
 457     {
 458       if(yysttok < 0)
 459         {
 460           int tok = -yysttok;
 461           yysttok = -1;
 462           return(tok);
 463         }
 464       RETURN(yysttok);
 465     }
 466   else
 467     {
 468       /* not quite right, and should take account of stacking */
 469       yylastlineno = yylineno;
 470       yylastposn = yyposn;
 471       return(yylex1());
 472     }
 473 }
 474
 475 #define yylex() yylex1()
 476 %}
 477
 478 %start PRIM
 479
 480 D                       [0-9]
 481 O                       [0-7]
 482 H                       [0-9A-Fa-f]
 483 N                       {D}+
 484 S                       [!#$%&*+./<=>?@\\^|~:]
 485 NS                      [^!#$%&*+./<=>?@\\^|~:]
 486 SId                     ({S}|~|-){S}*
 487 Char                    [ !\"#$%&()*+,\-./0-9:;<=>?@A-Z\[\]^_`a-z{|}~]
 488 L                       [A-Z]
 489 I                       [A-Za-z]
 490 i                       [A-Za-z0-9'_]
 491 Id                      {I}({i})*
 492 A                       (NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|SP|DEL)
 493 WS                      [ \t\n\r\f]*
 494
 495 %e 1000
 496 %o 2100
 497 %a 2100
 498 %p 3600
 499 %n 490
 500 %k 350
 501
 502 %%
 503
 504 ^"# ".*[\n\r]           {  char tempf[FILENAME_SIZE];
 505                            sscanf(yytext+1, "%d \"%[^\"]", &yylineno, tempf);
 506                            new_filename(tempf);
 507                         }
 508
 509 ^"{-# LINE ".*"-}"[\n\r] { /* partain: pragma-style line directive */
 510                           char tempf[FILENAME_SIZE];
 511                           sscanf(yytext+9, "%d \"%[^\"]", &yylineno, tempf);
 512                           new_filename(tempf);
 513                         }
 514
 515 "{-# ARITY "            { if ( ignorePragmas || ignoreArityPragmas ) {
 516                              incomment = 1;
 517                              readcomment();
 518                              incomment = 0;
 519                           } else {
 520                              RETURN(ARITY_PRAGMA);
 521                           }
 522                         }
 523 "{-# SPECIALIZE "       { if ( ignorePragmas || ignoreSpecializePragmas ) {
 524                              incomment = 1;
 525                              readcomment();
 526                              incomment = 0;
 527                           } else {
 528                              RETURN(SPECIALIZE_PRAGMA);
 529                           }
 530                         }
 531 "{-# STRICTNESS "       { if ( ignorePragmas || ignoreStrictnessPragmas ) {
 532                              incomment = 1;
 533                              readcomment();
 534                              incomment = 0;
 535                           } else {
 536                              RETURN(STRICTNESS_PRAGMA);
 537                           }
 538                         }
 539 "{-# UPDATE "           { if ( ignorePragmas || ignoreUpdatePragmas ) {
 540                              incomment = 1;
 541                              readcomment();
 542                              incomment = 0;
 543                           } else {
 544                              RETURN(UPDATE_PRAGMA);
 545                           }
 546                         }
 547
 548 " #-}"                  { RETURN(END_PRAGMA); }
 549
 550 <PRIM>"void#"           { RETURN(VOIDPRIM); }
 551 <PRIM>{Id}"#"           {       yynewid(yytext,yyleng);
 552                                 RETURN(isconstr(yytext)? CONID: VARID);
 553                                 /* Must appear before keywords -- KH */
 554                         }
 555
 556 "case"                  { RETURN(CASE); }
 557 "class"                 { RETURN(CLASS); }
 558 "data"                  { RETURN(DATA); }
 559 "default"               { RETURN(DEFAULT); }
 560 "deriving"              { RETURN(DERIVING); }
 561 "else"                  { RETURN(ELSE); }
 562 "hiding"                { RETURN(HIDING); }
 563 "if"                    { RETURN(IF); }
 564 "import"                { RETURN(IMPORT); }
 565 "infix"                 { RETURN(INFIX); }
 566 "infixl"                { RETURN(INFIXL); }
 567 "infixr"                { RETURN(INFIXR); }
 568 "instance"              { RETURN(INSTANCE); }
 569 "interface"             { RETURN(INTERFACE); }
 570 "module"                { RETURN(MODULE); }
 571 "of"                    { RETURN(OF); }
 572 "renaming"              { RETURN(RENAMING); }
 573 "then"                  { RETURN(THEN); }
 574 "to"                    { RETURN(TO); }
 575 "type"                  { RETURN(TYPE); }
 576 "where"                 { RETURN(WHERE); }
 577 "in"                    { RETURN(IN); }
 578 "let"                   { RETURN(LET); }
 579 "ccall"                 { RETURN(CCALL); }
 580 "veryDangerousCcall"    { RETURN(CCALL_DANGEROUS); }
 581 "casm"                  { RETURN(CASM); }
 582 "veryDangerousCasm"     { RETURN(CASM_DANGEROUS); }
 583 "scc"                   { RETURN(SCC); }
 584
 585 ".."                    { RETURN(DOTDOT); }
 586 ";"                     { RETURN(SEMI); }
 587 ","                     { RETURN(COMMA); }
 588 "|"                     { RETURN(VBAR); }
 589 "="                     { RETURN(EQUAL); }
 590 "<-"                    { RETURN(LARROW); }
 591 "->"                    { RETURN(RARROW); }
 592 "=>"                    { RETURN(DARROW); }
 593 "::"                    { RETURN(DCOLON); }
 594 "("                     { RETURN(OPAREN); }
 595 ")"                     { RETURN(CPAREN); }
 596 "["                     { RETURN(OBRACK); }
 597 "]"                     { RETURN(CBRACK); }
 598 "{"                     { RETURN(OCURLY); }
 599 "}"                     { RETURN(CCURLY); }
 600 "+"                     { RETURN(PLUS); }
 601 "@"                     { RETURN(AT); }
 602 "\\"                    { RETURN(LAMBDA); }
 603 "_"                     { RETURN(WILDCARD); }
 604 "`"                     { RETURN(BQUOTE); }
 605 "<<"                    { RETURN(OPOD); }
 606 ">>"                    { RETURN(CPOD); }
 607 "(|"                    { RETURN(OPROC); }
 608 "|)"                    { RETURN(CPROC); }
 609 "<<-"                   { RETURN(DRAWNFROM); }
 610 "<<="                   { RETURN(INDEXFROM); }
 611
 612 <PRIM>("-")?{N}"#"      {
 613                                 yytext[yyleng-1] = '\0';        /* clobber the # first */
 614                                 yylval.uid = xstrdup(yytext);
 615                                 RETURN(INTPRIM);
 616                         }
 617 {N}                     {
 618                                 yylval.uid = xstrdup(yytext);
 619                                 RETURN(INTEGER);
 620                         }
 621
 622 <PRIM>{N}"."{N}(("e"|"E")("+"|"-")?{N})?"##"    {
 623                                 yytext[yyleng-2] = '\0';        /* clobber the # first */
 624                                 yylval.uid = xstrdup(yytext);
 625                                 RETURN(DOUBLEPRIM);
 626                         }
 627
 628 <PRIM>{N}"."{N}(("e"|"E")("+"|"-")?{N})?"#"    {
 629                                 yytext[yyleng-1] = '\0';        /* clobber the # first */
 630                                 yylval.uid = xstrdup(yytext);
 631                                 RETURN(FLOATPRIM);
 632                         }
 633
 634 {N}"."{N}(("e"|"E")("+"|"-")?{N})?    {
 635                                 yylval.uid = xstrdup(yytext);
 636                                 RETURN(FLOAT);
 637                         }
 638
 639
 640 <PRIM>"``"[^']+"''"     {       yytext[yyleng-2] = '\0';        /* clobber '' first */
 641                                 yynewid(yytext+2,yyleng-2);
 642                                 RETURN(CLITLIT);
 643                         }
 644
 645 {Id}                    {       yynewid(yytext,yyleng);
 646                                 RETURN(isconstr(yytext)? CONID: VARID);
 647                         }
 648
 649 {SId}                   {       yynewid(yytext,yyleng);
 650                                 if(yyleng == 1)
 651                                   if (*yytext == '~')
 652                                     return( LAZY );
 653                                   else if ( *yytext == '-' )
 654                                     return( MINUS );
 655                                 RETURN(isconstr(yytext)? CONSYM: VARSYM);
 656                         }
 657
 658 <PRIM>"`"{Id}"#`"       {       yynewid(yytext+1,yyleng-2);
 659                                 RETURN(isconstr(yytext+1)? CONSYM: VARSYM);
 660                         }
 661
 662 '{Char}'                {
 663                                 yytext[2] = '\0';
 664                                 yylval.uid = xstrdup(yytext);
 665                                 RETURN(CHAR);
 666
 667                           /* WDP note:
 668                                 we don't yet return CHARPRIMs
 669                                 (ToDo)
 670                           */
 671                         }
 672
 673 '\\(a|b|f|n|r|t|v)'     {
 674                                 yytext[1] = escval(yytext[2]);
 675                                 yytext[2] = '\0';
 676                                 yylval.uid = xstrdup(yytext);
 677                                 RETURN(CHAR);
 678                         }
 679
 680 '\\(\"|\'|\\)'          {
 681                                 yytext[1] = yytext[2];
 682                                 yytext[2] = '\0';
 683                                 yylval.uid = xstrdup(yytext);
 684                                 RETURN(CHAR);
 685                         }
 686
 687 '\\{A}'                 {       yytext[yyleng-1] = '\0';
 688                                 if(strcmp(yytext+2,"DEL")==0)
 689                                   {
 690                                     yylval.uid = xstrdup("'\177");
 691                                     RETURN(CHAR);
 692                                   }
 693                                 else
 694                                   {
 695                                     int a = lookupascii(yytext+2);
 696                                     if(a >= 0)
 697                                       {
 698                                         yytext[1] = a;
 699                                         yytext[2] = '\0';
 700                                         yylval.uid = xstrdup(yytext);
 701                                         RETURN(CHAR);
 702                                       }
 703                                     else
 704                                       {
 705                                         char errbuf[ERR_BUF_SIZE];
 706                                         sprintf(errbuf,"invalid ASCII name in character constant: %s",yytext);
 707                                         yyerror(errbuf);
 708                                       }
 709                                   }
 710                         }
 711
 712 '\\{D}+'                {       if(convchar(yytext+2,yyleng-3,10))
 713                                   RETURN(CHAR);
 714                         }
 715
 716 '\\o{O}+'               {       if(convchar(yytext+3,yyleng-4,8))
 717                                   RETURN(CHAR);
 718                         }
 719
 720 '\\x{H}+'               {       if(convchar(yytext+3,yyleng-4,16))
 721                                   RETURN(CHAR);
 722                         }
 723
 724 '\\\^[A-Z\[\\\]^_]'     {       yytext[1] = yytext[3]-'A'+ 1;
 725                                 yytext[2] = '\0';
 726                                 yylval.uid = xstrdup(yytext);
 727                                 RETURN(CHAR);
 728                         }
 729
 730 '\\\^@'                 {       yytext[1] = '\0'; /* partain: most doubtful... */
 731                                 yytext[2] = '\0';
 732                                 yylval.uid = xstrdup(yytext);
 733                                 RETURN(CHAR);
 734                         }
 735
 736 "\""                    {
 737                                 readstring();
 738                                 yylval.uid = installString(yyleng, yytext);
 739                                 RETURN(STRING);
 740                         }
 741
 742
 743 "--".*[\n\r]            ;       /* hm-hm -style comment */
 744
 745 "\001"                  {       if (leof)
 746                                   {
 747                                     unput(yytchar_save);
 748                                     RETURN(LEOF);
 749                                   }
 750
 751                                 fprintf(stderr, "illegal char: %c (%d) in line %d\n",
 752                                         yytext[0], yytext[0], yylineno);
 753                         }
 754
 755 "\002"                  {       if (indenteof)
 756                                   {
 757                                     indenteof = FALSE;
 758                                     RETURN(VCCURLY);
 759                                   }
 760
 761                                 fprintf(stderr, "illegal char: %c (%d) in line %d\n",
 762                                         yytext[0], yytext[0], yylineno);
 763                         }
 764
 765 [\r\n \t\v\f]           ;
 766
 767 .                       { fprintf(stderr, "illegal char: %c (%d) in line %d\n",
 768                                         yytext[0], yytext[0], yylineno);
 769                         }
 770
 771 "{-"                    {
 772                                 incomment = 1;
 773                                 readcomment();
 774                                 incomment = 0;
 775                         }
 776 %%
 777
 778
 779 /**********************************************************************
 780 *                                                                     *
 781 *                                                                     *
 782 *     YACC/LEX Initialisation etc.                                    *
 783 *                                                                     *
 784 *                                                                     *
 785 **********************************************************************/
 786
 787
 788 /*
 789    We initialise input_filename to "<NONAME>".
 790    This allows unnamed sources to be piped into the parser.
 791 */
 792
 793 yyinit()
 794 {
 795   extern BOOLEAN acceptPrim;
 796
 797   input_filename = xstrdup("<NONAME>");
 798
 799   yytchar = '\n';
 800
 801   if(acceptPrim)
 802     BEGIN PRIM;
 803 }
 804
 805
 806 new_filename(f)
 807 char *f;
 808 {
 809   if(input_filename != NULL)
 810     free(input_filename);
 811   input_filename = xstrdup(f);
 812 }
 813
 814
 815
 816 yywrap()
 817 {
 818         return(1);
 819 }
 820
 821
 822 /**********************************************************************
 823 *                                                                     *
 824 *                                                                     *
 825 *                      Comment Handling                               *
 826 *                                                                     *
 827 *                                                                     *
 828 **********************************************************************/
 829
 830
 831
 832 /*
 833         readcomment()   reads Haskell nested comments {- ... -}
 834                         Indentation is automatically taken care of since input() is used.
 835
 836                         While in principle this could be done using Lex rules, in
 837                         practice it's easier and neater to use special code for this
 838                         and for strings.
 839 */
 840
 841 static readcomment()
 842 {
 843   int c;
 844
 845   do {
 846     while ((c = input()) != '-' && !eof(c))
 847       {
 848         if(c=='{')
 849           if ((c=input()) == '-')
 850             readcomment();
 851
 852           else if (eof(c))
 853             {
 854               yyerror("comment not terminated by end of file");
 855             }
 856       }
 857
 858     while (c == '-')
 859       c = input();
 860
 861     if (c == '}')
 862       break;
 863
 864     if (eof(c))
 865       {
 866         yyerror("comment not terminated by end of file");
 867       }
 868
 869   } while (1);
 870 }
 871
 872
 873 /*
 874     eof(c)      Returns TRUE when EOF read.
 875 */
 876
 877 eof(c)
 878 int c;
 879 {
 880   return (c == 0 || c == 1 && leof);
 881 }
 882
 883
 884
 885 /**********************************************************************
 886 *                                                                     *
 887 *                                                                     *
 888 *    Identifier Processing                                             *
 889 *                                                                     *
 890 *                                                                     *
 891 **********************************************************************/
 892
 893
 894 /*
 895         yynewid         Enters an id of length n into the symbol table.
 896 */
 897
 898 static yynewid(yyt,len)
 899 char *yyt;
 900 int len;
 901 {
 902   char yybuf[1024];
 903   strcpy(yybuf,yyt);
 904   yybuf[len] = '\0';
 905   yylval.uid = installid(yybuf);
 906 }
 907
 908
 909 /*
 910         isconstr(s)     True iff s is a constructor id.
 911 */
 912
 913 isconstr(s)
 914 char *s;
 915 {
 916   return(*s == ':' || isupper(*s));
 917 }
 918
 919
 920
 921
 922 /**********************************************************************
 923 *                                                                     *
 924 *                                                                     *
 925 *     Character Kind Predicates                                       *
 926 *                                                                     *
 927 *                                                                     *
 928 **********************************************************************/
 929
 930
 931 /*
 932  * ishspace(ch) determines whether ch is a valid Haskell space character
 933  */
 934
 935
 936 static int ishspace(ch)
 937 char ch;
 938 {
 939   return(ch == '\n' || ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f');
 940 }
 941
 942
 943 /*
 944  * isddigit(ch) determines whether ch is a valid Decimal digit
 945  */
 946
 947
 948 static int isddigit(ch)
 949 char ch;
 950 {
 951  return (isdigit(ch));
 952 }
 953
 954
 955 /*
 956  * ishexdigit(ch) determines whether ch is a valid Hexadecimal digit
 957  */
 958
 959
 960 static int ishexdigit(ch)
 961 char ch;
 962 {
 963  return (isdigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'));
 964 }
 965
 966 /*
 967  * isodigit(ch) determines whether ch is a valid Octal digit
 968  */
 969
 970
 971 static int isodigit(ch)
 972 char ch;
 973 {
 974  return ((ch >= '0' && ch <= '7'));
 975 }
 976
 977
 978 /**********************************************************************
 979 *                                                                     *
 980 *                                                                     *
 981 *       Lexical Analysis of Strings  -- Gaps and escapes mean that    *
 982 *            lex isn't (wo)man enough for this job.                   *
 983 *                                                                     *
 984 *                                                                     *
 985 **********************************************************************/
 986
 987
 988 /*
 989  * readstring()         reads a string constant and places it in yytext
 990  */
 991
 992 static readstring()
 993 {
 994   int ch, c;
 995
 996   yyslindent = yyindent-1;
 997
 998   yyleng = 1;
 999   yytext[1] = '\0';
1000
1001   do
1002     {
1003       ch = input();
1004
1005       if (ch == '\\')
1006         {
1007           ch = input();
1008
1009           if(isdigit(ch))
1010               ch = readescnum(isddigit,10,ch);
1011
1012           else if (ch == 'o')
1013             {
1014               ch = input();
1015               if(isodigit(ch))
1016                 ch = readescnum(isodigit,8,ch);
1017               else
1018                 {
1019                   char errbuf[ERR_BUF_SIZE];
1020                   sprintf(errbuf,"strange Octal character code (%c) in string",ch);
1021                   yyerror(errbuf);
1022                 }
1023             }
1024
1025           else if (ch == 'x')
1026             {
1027               ch = input();
1028               if(ishexdigit(ch))
1029                 ch = readescnum(ishexdigit,16,ch);
1030               else
1031                 {
1032                   char errbuf[ERR_BUF_SIZE];
1033                   sprintf(errbuf,"strange Hexadecimal character code (%c) in string",ch);
1034                   yyerror(errbuf);
1035                 }
1036             }
1037
1038           else if(ch == '"' || ch == '\\' || ch == '\'')
1039             /* SKIP */;
1040
1041           else if (isupper(ch))
1042             {
1043               if((ch = readasciiname(ch)) == -1)
1044                 yyerror("invalid ASCII name in string");
1045             }
1046
1047           else if (ch == '^')
1048             {
1049               if(isupper(ch = input()) || (ch >= '[' && ch <= '_'))
1050                 ch = ch - 'A' + 1;
1051               else if (ch == '@')
1052                 ch = '\0';
1053               else
1054                 {
1055                   char errbuf[ERR_BUF_SIZE];
1056                   sprintf(errbuf,"strange control sequence (^%c) in string",ch);
1057                   yyerror(errbuf);
1058                 }
1059             }
1060
1061           else if (ishspace(ch))
1062             {
1063               /* partain: we may want clearer error msgs if \v, \f seen */
1064
1065               while (ch == '\t' || ch == ' ')
1066                 ch = input();
1067
1068               if (ch != '\n' && ch != '\r')
1069                 yyerror("newline not seen when expected in string gap");
1070               else
1071                 ch = input();
1072
1073               while (ch == '\t' || ch == ' ')
1074                 ch = input();
1075
1076               if(ch != '\\')
1077                 yyerror("trailing \\ not seen when expected in string gap");
1078
1079               ch = -1;
1080             }
1081
1082           else if (ch == 'a')
1083             ch = '\007';
1084
1085           else if (ch == 'b')
1086             ch = '\b';
1087
1088           else if (ch == 'f')
1089             ch = '\f';
1090
1091           else if (ch == 'n')
1092             ch = '\n';
1093
1094           else if (ch == 'r')
1095             ch = '\r';
1096
1097           else if (ch == 't')
1098             ch = '\t';
1099
1100           else if (ch == 'v')
1101             ch = '\v';
1102
1103           else if (ch == '&')
1104             ch = -1;
1105
1106           else
1107             {
1108               char errbuf[ERR_BUF_SIZE];
1109               sprintf(errbuf,"invalid escape sequence (\\%c) in string",ch);
1110               yyerror(errbuf);
1111             }
1112         }
1113
1114       else if (ch == '\n' || ch == '\r' || ch == '\f' || ch == '\v' || ch == 0 || ch == '"')
1115         break;
1116
1117       else if (!isprint(ch) && !ishspace(ch))
1118         {
1119           char errbuf[ERR_BUF_SIZE];
1120           sprintf(errbuf,"invalid character (%c) in string",ch);
1121           yyerror(errbuf);
1122         }
1123
1124       if((yyleng < YYLMAX-3 && ch != -1) || (yyleng == YYLMAX-3 && (ch == '\t' || ch == '\\')))
1125         {
1126           /* The LML back-end treats \\ and \t specially in strings... */
1127
1128           if(ch == '\t' || ch == '\\')
1129             {
1130               yytext[yyleng++] = '\\';
1131               if (ch == '\t')
1132                 ch = 't';
1133             }
1134           if(yyleng<YYLMAX-2)
1135             {
1136               yytext[yyleng++] = ch;
1137               yytext[yyleng] = '\0';
1138             }
1139         }
1140       else if (ch != -1)
1141         {
1142           char errbuf[ERR_BUF_SIZE];
1143           sprintf(errbuf,"string too long (> %d characters)",YYLMAX-3-2);
1144           yyerror(errbuf);
1145         }
1146     }
1147   while(1);
1148
1149   if (ch != '"')
1150     yyerror("string incorrectly terminated");
1151
1152   else
1153     {
1154       yytext[yyleng++] = '"';
1155       yytext[yyleng] = '\0';
1156     }
1157 #ifdef DEBUG
1158   fprintf(stderr,"string: %s (%d chars)\n",yytext,yyleng-2);
1159 #endif
1160 }
1161
1162
1163
1164 /**********************************************************************
1165 *                                                                     *
1166 *                                                                     *
1167 *      Haskell String and Character Escape Codes                      *
1168 *                                                                     *
1169 *                                                                     *
1170 **********************************************************************/
1171
1172
1173 /* Names of ASCII control characters, used in strings and character constants */
1174
1175 static char *asciinames[] =
1176   {
1177     "NUL",      "SOH",  "STX",  "ETX",  "EOT",  "ENQ",  "ACK",  "BEL",  "BS",   "HT",
1178     "LF",       "VT",   "FF",   "CR",   "SO",   "SI",   "DLE",  "DC1",  "DC2",  "DC3",
1179     "DC4",      "NAK",  "SYN",  "ETB",  "CAN",  "EM",   "SUB",  "ESC",  "FS",   "GS",
1180     "RS",       "US",   "SP",   "DEL"
1181     };
1182
1183
1184 /*
1185  * readasciiname()      read ASCII name and translate to an ASCII code
1186  *                      -1 indicates invalid name
1187  */
1188
1189 static int readasciiname(ch)
1190 int ch;
1191 {
1192   char asciiname[4];
1193
1194   asciiname[0] = ch;
1195   if(!isupper(asciiname[1]= input()))
1196     {
1197       unput(asciiname[1]);
1198       return(-1);
1199     }
1200
1201   if(!isupper(asciiname[2]=input()))
1202     {
1203       /* partain: have to have something extra for DC[1-4] */
1204       if (asciiname[0] == 'D' && asciiname[1] == 'C' && isdigit(asciiname[2])) {
1205           asciiname[3] = '\0';
1206       } else {
1207           unput(asciiname[2]);
1208           asciiname[2] = '\0';
1209       }
1210     }
1211   else
1212     asciiname[3] = '\0';
1213
1214   if (strcmp(asciiname,"DEL") == 0)
1215     return('\177');
1216
1217   else
1218     return(lookupascii(asciiname));
1219 }
1220
1221
1222 /*
1223    lookupascii(ascii)   look up ascii in asciinames[]
1224
1225    returns -1 if ascii is not found, otherwise its index.
1226 */
1227
1228 static int lookupascii(ascii)
1229 char *ascii;
1230 {
1231   int i;
1232   for(i='\0'; i <= ' '; ++i)
1233     if(strcmp(ascii,asciinames[i])==0)
1234       return(i);
1235   return(-1);
1236 }
1237
1238
1239 /**********************************************************************
1240 *                                                                     *
1241 *                                                                     *
1242 *      Numeric Escapes in Characters/Strings                          *
1243 *                                                                     *
1244 *                                                                     *
1245 **********************************************************************/
1246
1247 int convnum(num,numlen,base)
1248 char *num;
1249 int numlen, base;
1250 {
1251   int i, res = 0, mul;
1252
1253   for (i = numlen-1, mul = 1; i >= 0; --i, mul *= base)
1254     {
1255       if(isdigit(num[i]))
1256         res += (num[i] - '0') * mul;
1257       else if (isupper(num[i]))
1258         res += (num[i] - 'A' + 10) * mul;
1259       else if (islower(num[i]))
1260         res += (num[i] - 'a' + 10) * mul;
1261     }
1262   return(res);
1263 }
1264
1265 convchar(num,numlen,base)
1266 char *num;
1267 int numlen, base;
1268 {
1269   int n = convnum(num,numlen,base);
1270   if (n <= MAX_ESC_CHAR)
1271     {
1272       yytext[1] = n;
1273       yytext[2] = '\0';
1274       yylval.uid = xstrdup(yytext);
1275       return(1);
1276     }
1277   else
1278     {
1279       char errbuf[ERR_BUF_SIZE];
1280       sprintf(errbuf,"ASCII code > %d in character constant",MAX_ESC_CHAR);
1281       yyerror(errbuf);
1282     }
1283 }
1284
1285 readescnum(isadigit,mulbase,ch)
1286 int (*isadigit)();
1287 int mulbase;
1288 int ch;
1289 {
1290   char digit[MAX_ESC_DIGITS];
1291   int digcount;
1292
1293   digcount = 1;
1294   digit[0] = ch;
1295
1296   while((*isadigit)(ch=input()))
1297     {
1298       if(digcount < MAX_ESC_DIGITS)
1299         digit[digcount] = ch;
1300       ++digcount;
1301     }
1302
1303   unput(ch);
1304
1305   if(digcount > MAX_ESC_DIGITS)
1306     {
1307       char errbuf[ERR_BUF_SIZE];
1308       sprintf(errbuf,"numeric character code too long (> %d characters) in string",MAX_ESC_DIGITS);
1309       yyerror(errbuf);
1310     }
1311
1312   ch = convnum(digit,digcount,mulbase);
1313
1314   if (ch > MAX_ESC_CHAR)
1315     {
1316       char errbuf[ERR_BUF_SIZE];
1317       sprintf(errbuf,"character code > ASCII %d in string",MAX_ESC_CHAR);
1318       yyerror(errbuf);
1319     }
1320
1321   return(ch);
1322 }
1323
1324
1325 /*
1326   escval(c)     return the value of an escaped character.
1327
1328                 \a      BELL
1329                 \b      BACKSPACE
1330                 \f      FORMFEED
1331                 \n      NEWLINE
1332                 \r      CARRIAGE RETURN
1333                 \t      TAB
1334                 \v      VERTICAL TAB
1335
1336    These definitions are standard ANSI C values.
1337 */
1338
1339 static char escval(c)
1340 char c;
1341 {
1342   return(c == 'a'? '\007': c == 'b'? '\b': c == 'f'? '\f': c == 'n'? '\n':
1343          c == 'r'? '\r': c == 't'? '\t': c == 'v'? '\v': '\0');
1344 }
1345
1346 /*
1347   OLD: Lexical analysis for Haskell pragmas.
1348 */
1349
1350 #if 0
1351 static parse_pragma(s,len)
1352 char *s;
1353 int len;
1354 {
1355   char pragma_name[1024];
1356   char identifier[1024];
1357   char value[1024];
1358   int i;
1359
1360   *(s+len) = '\0';
1361
1362   while(isspace(*s))
1363     s++;
1364
1365   /* Pragma name */
1366   for(i=0; !isspace(*s); ++i, ++s)
1367     pragma_name[i] = *s;
1368   pragma_name[i] = '\0';
1369
1370   while(isspace(*s))
1371     s++;
1372
1373   /* Identifier */
1374   for(i=0; !isspace(*s); ++i, ++s)
1375     identifier[i] = *s;
1376   identifier[i] = '\0';
1377
1378   while(isspace(*s))
1379     s++;
1380
1381   /* equals */
1382   s++;
1383
1384   while(isspace(*s))
1385     s++;
1386
1387   /* Value */
1388   for(i=0; !isspace(*s); ++i, ++s)
1389     value[i] = *s;
1390   value[i] = '\0';
1391
1392   pragmatype = installid(pragma_name);
1393   pragmaid = installid(identifier);
1394   pragmavalue = xstrdup(value);
1395 }
1396
1397 #endif /* 0 */