[project @ 1999-06-01 16:15:42 by simonmar]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
index ab3300e..a3bb035 100644 (file)
@@ -7,7 +7,8 @@
 *                                                                     *
 **********************************************************************/
 
-#include "../../includes/config.h"
+/* The includes/config.h one */
+#include "config.h"
 
 #include <stdio.h>
 
@@ -113,6 +114,7 @@ static BOOLEAN noGap = TRUE;        /* For checking string gaps */
 static BOOLEAN forgetindent = FALSE;   /* Don't bother applying indentation rules */
 
 static int nested_comments;    /* For counting comment nesting depth */
+static int comment_start;
 
 /* OLD: Hacky definition of yywrap: see flex doc.
 
@@ -136,6 +138,8 @@ static void new_filename PROTO((char *));
 static int  Return      PROTO((int));
 static void hsentercontext PROTO((int));
 
+static BOOLEAN is_commment PROTO((char*, int));
+
 /* Special file handling for IMPORTS */
 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 
@@ -241,7 +245,7 @@ F                           {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 S                      [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 SId                    {S}{S}*
 L                      [A-Z\xc0-\xd6\xd8-\xde]
-l                      [a-z\xdf-\xf6\xf8-\xff]
+l                      [a-z_\xdf-\xf6\xf8-\xff]
 I                      {L}|{l}
 i                      {L}|{l}|[0-9'_]
 Id                     {I}{i}*
@@ -254,12 +258,39 @@ NL                        [\n\r]
 %%
 
 %{
+    /*
+     * Simple comments and whitespace.  Normally, we would just ignore these, but
+     * in case we're processing a string escape, we need to note that we've seen
+     * a gap.
+     *
+     * Note that we cater for a comment line that *doesn't* end in a newline.
+     * This is incorrect, strictly speaking, but seems like the right thing
+     * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
+     *
+     * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
+     */
+%}
+
+<Code,GlaExt,UserPragma,StringEsc>{WS}+        { noGap = FALSE; }
+
+%{
     /* 
      * Special GHC pragma rules.  Do we need a start state for interface files,
      * so these won't be matched in source files? --JSM
      */
+
 %}
 
+%{
+/* I believe the next rule is not ever matched.
+
+   The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
+   Driver has now been modified to output `standard' {-# LINE ..-} pragmas
+   where possible, so the lexer should now never see cpp directives
+   like '# ' and '#line'.
+
+   -- SOF
+
 <Code,GlaExt>^"# ".*{NL}    {
                          char tempf[FILENAME_SIZE];
                          sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
@@ -273,6 +304,8 @@ NL                          [\n\r]
                          new_filename(tempf); 
                          hsplineno = hslineno; hscolno = 0; hspcolno = 0;
                        }
+*/
+%}
 
 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
                          /* partain: pragma-style line directive */
@@ -290,32 +323,61 @@ NL                        [\n\r]
                              PUSH_STATE(UserPragma);
                              RETURN(SPECIALISE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
+                             PUSH_STATE(UserPragma);
+                             RETURN(SPECIALISE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"INLINE" {
                              PUSH_STATE(UserPragma);
                              RETURN(INLINE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"RULES" {
+                             PUSH_STATE(Code); /* I'm not sure about this */
+                             RETURN(RULES_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"inline" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(INLINE_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"NOINLINE" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"notInline" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
                              PUSH_STATE(UserPragma);
                              RETURN(MAGIC_UNFOLDING_UPRAGMA);
                            }
-<Code,GlaExt>"{-#"{WS}*"DEFOREST" {
-                              PUSH_STATE(UserPragma);
-                              RETURN(DEFOREST_UPRAGMA);
-                           }
 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
                              /* these are handled by hscpp */
-                             nested_comments =1;
+                             nested_comments =1; comment_start = hsplineno;
                               PUSH_STATE(Comment);
                            }
-<Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
-                             fprintf(stderr, "\"%s\", line %d: Warning: Unrecognised pragma '",
+<Code,GlaExt>"{-#"{WS}*"OPTIONS" {
+                             /* these are for the driver! */
+                             nested_comments =1; comment_start = hsplineno;
+                              PUSH_STATE(Comment);
+                           }
+<Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
+                             /* these are used by `make depend' and the
+                                compiler to indicate that a module should
+                                be imported from source */
+                             nested_comments =1; comment_start = hsplineno; 
+                              RETURN(SOURCE_UPRAGMA); 
+                           }
+
+<Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
+                             fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
                                input_filename, hsplineno);
                              format_string(stderr, (unsigned char *) yytext, yyleng);
                              fputs("'\n", stderr);
-                             nested_comments = 1;
+                             nested_comments = 1; comment_start = hsplineno;
                              PUSH_STATE(Comment);
                            }
-<UserPragma>"#-}"          { POP_STATE; RETURN(END_UPRAGMA); }
+<Code,GlaExt,UserPragma>"#-}"      { POP_STATE; RETURN(END_UPRAGMA); }
 
 %{
     /*
@@ -350,11 +412,26 @@ NL                        [\n\r]
 <Code,GlaExt>"hiding"          { RETURN(HIDING); }
 <Code,GlaExt>"qualified"       { RETURN(QUALIFIED); }
 
+<Code,GlaExt>"forall"          { RETURN(FORALL); }
+
 <Code,GlaExt>"_scc_"           { RETURN(SCC); }
 <GlaExt>"_ccall_"              { RETURN(CCALL); }
 <GlaExt>"_ccall_GC_"           { RETURN(CCALL_GC); }
 <GlaExt>"_casm_"               { RETURN(CASM); }
 <GlaExt>"_casm_GC_"            { RETURN(CASM_GC); }
+<GlaExt>"(#"                   { RETURN(OUNBOXPAREN); }
+<GlaExt>"#)"                   { RETURN(CUNBOXPAREN); }
+<GlaExt>"foreign"              { RETURN(FOREIGN); }
+<GlaExt>"export"               { RETURN(EXPORT); }
+<GlaExt>"label"                        { RETURN(LABEL); }
+<GlaExt>"unsafe"               { RETURN(UNSAFE); }
+<GlaExt>"_stdcall"             { RETURN(STDCALL); }
+<GlaExt>"_ccall"               { RETURN(C_CALL); }
+<GlaExt>"_pascal"              { RETURN(PASCAL); }
+<GlaExt>"stdcall"              { RETURN(STDCALL); }
+<GlaExt>"ccall"                        { RETURN(C_CALL); }
+<GlaExt>"pascal"               { RETURN(PASCAL); }
+<GlaExt>"dynamic"              { RETURN(DYNAMIC); }
 
 %{
     /* 
@@ -371,8 +448,8 @@ NL                          [\n\r]
 <Code,GlaExt,UserPragma>","    { RETURN(COMMA); }
 <Code,GlaExt>";"               { RETURN(SEMI); }
 <Code,GlaExt>"`"               { RETURN(BQUOTE); }
-<Code,GlaExt>"_"               { RETURN(WILDCARD); }
 
+<Code,GlaExt>"."               { RETURN(DOT); }
 <Code,GlaExt>".."              { RETURN(DOTDOT); }
 <Code,GlaExt,UserPragma>"::"   { RETURN(DCOLON); }
 <Code,GlaExt,UserPragma>"="    { RETURN(EQUAL); }
@@ -381,6 +458,7 @@ NL                          [\n\r]
 <Code,GlaExt>"<-"              { RETURN(LARROW); }
 <Code,GlaExt,UserPragma>"->"   { RETURN(RARROW); }
 <Code,GlaExt>"-"               { RETURN(MINUS); }
+<Code,GlaExt>"+"               { RETURN(PLUS); }
 
 <Code,GlaExt,UserPragma>"=>"   { RETURN(DARROW); }
 <Code,GlaExt>"@"               { RETURN(AT); }
@@ -461,8 +539,8 @@ NL                          [\n\r]
 %{
 /* These SHOULDNAE work in "Code" (sigh) */
 %}
-<Code,GlaExt,UserPragma>{Id}"#" { 
-                        if (! nonstandardFlag) {
+<GlaExt,UserPragma>{Id}"#" { 
+                       if (! nonstandardFlag) {
                            char errbuf[ERR_BUF_SIZE];
                            sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
                            hsperror(errbuf);
@@ -475,8 +553,26 @@ NL                         [\n\r]
                         RETURN(isconstr(yytext) ? CONID : VARID);
                        }
 <Code,GlaExt,UserPragma>{SId}  {
-                        hsnewid(yytext, yyleng);
-                        RETURN(isconstr(yytext) ? CONSYM : VARSYM);
+                        if (is_commment(yytext,yyleng)) {
+                               int c;
+                               while ((c = input()) != '\n' && c != '\r' && c!= EOF )
+                                       ;
+                               if (c != EOF)
+                                  unput(c);
+                        } else {
+                           hsnewid(yytext, yyleng);
+                           RETURN(isconstr(yytext) ? CONSYM : VARSYM);
+                        }
+                       }
+<Code,GlaExt,UserPragma>{Mod}"."{Id}"#"        {
+                        BOOLEAN is_constr;
+                        if (! nonstandardFlag) {
+                           char errbuf[ERR_BUF_SIZE];
+                           sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
+                           hsperror(errbuf);
+                        }
+                        is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONID : QVARID);
                        }
 <Code,GlaExt,UserPragma>{Mod}"."{Id}   {
                         BOOLEAN is_constr = hsnewqid(yytext, yyleng);
@@ -548,7 +644,7 @@ NL                          [\n\r]
                         }
 
                         if (length > 1) {
-                           fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
+                           fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
                              input_filename, hsplineno, hspcolno + 1);
                            format_string(stderr, (unsigned char *) text, length);
                            fputs("' too long\n", stderr);
@@ -567,7 +663,7 @@ NL                          [\n\r]
                         text = fetchtext(&length);
 
                         if (length > 1) {
-                           fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
+                           fprintf(stderr, "%s:%d:%d: Character literal '",
                              input_filename, hsplineno, hspcolno + 1);
                            format_string(stderr, (unsigned char *) text, length);
                            fputs("' too long\n", stderr);
@@ -666,6 +762,19 @@ NL                         [\n\r]
 <CharEsc>\\                    { addchar(*yytext); POP_STATE; }
 <StringEsc>\\          { if (noGap) { addchar(*yytext); } POP_STATE; }
 
+%{
+/*
+ Not 100% correct, tokenizes "foo \  --<>--
+                                 \ bar"
+
+ as "foo  bar", but this is not correct as per Haskell 98 report and its
+ maximal munch rule for "--"-style comments.
+
+ For the moment, not deemed worthy to fix.
+*/
+%}
+<StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
+
 <CharEsc,StringEsc>["']        { addchar(*yytext); POP_STATE; }
 <CharEsc,StringEsc>NUL         { addchar('\000'); POP_STATE; }
 <CharEsc,StringEsc>SOH         { addchar('\001'); POP_STATE; }
@@ -746,20 +855,6 @@ NL                         [\n\r]
                          POP_STATE;
                        }
 
-%{
-    /*
-     * Simple comments and whitespace.  Normally, we would just ignore these, but
-     * in case we're processing a string escape, we need to note that we've seen
-     * a gap.
-     *
-     * Note that we cater for a comment line that *doesn't* end in a newline.
-     * This is incorrect, strictly speaking, but seems like the right thing
-     * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
-     */
-%}
-
-<Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
-<Code,GlaExt,UserPragma,StringEsc>{WS}+        { noGap = FALSE; }
 
 %{
     /*
@@ -770,7 +865,7 @@ NL                          [\n\r]
 %}
 
 <Code,GlaExt,UserPragma,StringEsc>"{-" { 
-                         noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
+                         noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment); 
                        }
 
 <Comment>[^-{]*        |
@@ -780,6 +875,7 @@ NL                          [\n\r]
 <Comment>"-}"          { if (--nested_comments == 0) POP_STATE; }
 <Comment>(.|\n)                ;
 
+
 %{
     /*
      * Illegal characters.  This used to be a single rule, but we might as well
@@ -789,21 +885,21 @@ NL                        [\n\r]
 %}
 
 <INITIAL,Code,GlaExt,UserPragma>(.|\n) { 
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
+                        fprintf(stderr, "%s:%d:%d: Illegal character: `", 
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("'\n", stderr);
                         hsperror("");
                        }
 <Char>(.|\n)           { 
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
+                        fprintf(stderr, "%s:%d:%d: Illegal character: `",
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("' in a character literal\n", stderr);
                         hsperror("");
                        }
 <CharEsc>(.|\n)                {
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
+                        fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("'\n", stderr);
@@ -812,7 +908,7 @@ NL                          [\n\r]
 <String>(.|\n)         { if (nonstandardFlag) {
                              addtext(yytext, yyleng);
                           } else { 
-                                fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
+                                fprintf(stderr, "%s:%d:%d: Illegal character: `", 
                                 input_filename, hsplineno, hspcolno + 1); 
                                 format_string(stderr, (unsigned char *) yytext, 1);
                                 fputs("' in a string literal\n", stderr);
@@ -821,13 +917,13 @@ NL                        [\n\r]
                        }
 <StringEsc>(.|\n)      {
                         if (noGap) {
-                            fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
+                            fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
                                input_filename, hsplineno, hspcolno + 1); 
                             format_string(stderr, (unsigned char *) yytext, 1);
                             fputs("'\n", stderr);
                             hsperror("");
                         } else {
-                            fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
+                            fprintf(stderr, "%s:%d:%d: Illegal character: `",
                                input_filename, hsplineno, hspcolno + 1);
                             format_string(stderr, (unsigned char *) yytext, 1);
                             fputs("' in a string gap\n", stderr);
@@ -853,8 +949,10 @@ NL                         [\n\r]
                          hsperror("unterminated character literal");
                        }
 <Comment><<EOF>>       { 
+                         char errbuf[ERR_BUF_SIZE];
                          hsplineno = hslineno; hspcolno = hscolno;
-                         hsperror("unterminated comment"); 
+                         sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
+                         hsperror(errbuf); 
                        }
 <String,StringEsc><<EOF>>   { 
                          hsplineno = hslineno; hspcolno = hscolno;
@@ -917,6 +1015,11 @@ new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
        forcing insertion of ; or } as appropriate
 */
 
+#ifdef HSP_DEBUG
+#define LAYOUT_DEBUG
+#endif
+
+
 static BOOLEAN
 hsshouldindent(void)
 {
@@ -928,7 +1031,7 @@ hsshouldindent(void)
 void
 hssetindent(void)
 {
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 
@@ -957,7 +1060,7 @@ hssetindent(void)
 void
 hsincindent(void)
 {
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
     hsentercontext(indenttab[icontexts] & ~1);
@@ -985,7 +1088,7 @@ hsentercontext(int indent)
     }
     forgetindent = FALSE;
     indenttab[icontexts] = indent;
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 }
@@ -996,7 +1099,7 @@ void
 hsendindent(void)
 {
     --icontexts;
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 }
@@ -1004,14 +1107,12 @@ hsendindent(void)
 /*
  *     Return checks the indentation level and returns ;, } or the specified token.
  */
-
 static int
 Return(int tok)
 {
 #ifdef HSP_DEBUG
     extern int yyleng;
 #endif
-
     if (hsshouldindent()) {
        if (hspcolno < INDENTPT) {
 #ifdef HSP_DEBUG
@@ -1027,6 +1128,7 @@ Return(int tok)
            return (SEMI);
        }
     }
+
     hssttok = -1;
 #ifdef HSP_DEBUG
     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
@@ -1287,3 +1389,21 @@ hsnewqid(char *name, int length)
 
     return isconstr(dot+1);
 }
+
+static
+BOOLEAN
+is_commment(char* lexeme, int len)
+{
+   char* ptr;
+   int i;
+       
+   if (len < 2) {
+      return FALSE;
+   }
+
+   for(i=0;i<len;i++) {
+     if (lexeme[i] != '-') return FALSE;
+   }        
+   return TRUE;
+}
+