[project @ 1999-06-01 16:15:42 by simonmar]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
index dac635e..a3bb035 100644 (file)
@@ -114,6 +114,7 @@ static BOOLEAN noGap = TRUE;        /* For checking string gaps */
 static BOOLEAN forgetindent = FALSE;   /* Don't bother applying indentation rules */
 
 static int nested_comments;    /* For counting comment nesting depth */
+static int comment_start;
 
 /* OLD: Hacky definition of yywrap: see flex doc.
 
@@ -137,6 +138,8 @@ static void new_filename PROTO((char *));
 static int  Return      PROTO((int));
 static void hsentercontext PROTO((int));
 
+static BOOLEAN is_commment PROTO((char*, int));
+
 /* Special file handling for IMPORTS */
 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 
@@ -242,7 +245,7 @@ F                           {N}"."{N}(("e"|"E")("+"|"-")?{N})?
 S                      [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 SId                    {S}{S}*
 L                      [A-Z\xc0-\xd6\xd8-\xde]
-l                      [a-z\xdf-\xf6\xf8-\xff]
+l                      [a-z_\xdf-\xf6\xf8-\xff]
 I                      {L}|{l}
 i                      {L}|{l}|[0-9'_]
 Id                     {I}{i}*
@@ -268,7 +271,6 @@ NL                          [\n\r]
      */
 %}
 
-<Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
 <Code,GlaExt,UserPragma,StringEsc>{WS}+        { noGap = FALSE; }
 
 %{
@@ -276,8 +278,19 @@ NL                         [\n\r]
      * Special GHC pragma rules.  Do we need a start state for interface files,
      * so these won't be matched in source files? --JSM
      */
+
 %}
 
+%{
+/* I believe the next rule is not ever matched.
+
+   The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
+   Driver has now been modified to output `standard' {-# LINE ..-} pragmas
+   where possible, so the lexer should now never see cpp directives
+   like '# ' and '#line'.
+
+   -- SOF
+
 <Code,GlaExt>^"# ".*{NL}    {
                          char tempf[FILENAME_SIZE];
                          sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
@@ -291,6 +304,8 @@ NL                          [\n\r]
                          new_filename(tempf); 
                          hsplineno = hslineno; hscolno = 0; hspcolno = 0;
                        }
+*/
+%}
 
 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
                          /* partain: pragma-style line directive */
@@ -308,45 +323,61 @@ NL                        [\n\r]
                              PUSH_STATE(UserPragma);
                              RETURN(SPECIALISE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
+                             PUSH_STATE(UserPragma);
+                             RETURN(SPECIALISE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"INLINE" {
                              PUSH_STATE(UserPragma);
                              RETURN(INLINE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"RULES" {
+                             PUSH_STATE(Code); /* I'm not sure about this */
+                             RETURN(RULES_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"inline" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(INLINE_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"NOINLINE" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"notInline" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
                              PUSH_STATE(UserPragma);
                              RETURN(MAGIC_UNFOLDING_UPRAGMA);
                            }
-<Code,GlaExt>"{-#"{WS}*"DEFOREST" {
-                              PUSH_STATE(UserPragma);
-                              RETURN(DEFOREST_UPRAGMA);
-                           }
 <Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
                              /* these are handled by hscpp */
-                             nested_comments =1;
+                             nested_comments =1; comment_start = hsplineno;
                               PUSH_STATE(Comment);
                            }
 <Code,GlaExt>"{-#"{WS}*"OPTIONS" {
                              /* these are for the driver! */
-                             nested_comments =1;
+                             nested_comments =1; comment_start = hsplineno;
                               PUSH_STATE(Comment);
                            }
 <Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
                              /* these are used by `make depend' and the
                                 compiler to indicate that a module should
                                 be imported from source */
-                             nested_comments =1;
+                             nested_comments =1; comment_start = hsplineno; 
                               RETURN(SOURCE_UPRAGMA); 
                            }
 
-<Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
+<Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
                              fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
                                input_filename, hsplineno);
                              format_string(stderr, (unsigned char *) yytext, yyleng);
                              fputs("'\n", stderr);
-                             nested_comments = 1;
+                             nested_comments = 1; comment_start = hsplineno;
                              PUSH_STATE(Comment);
                            }
-<UserPragma>"#-}"          { POP_STATE; RETURN(END_UPRAGMA); }
+<Code,GlaExt,UserPragma>"#-}"      { POP_STATE; RETURN(END_UPRAGMA); }
 
 %{
     /*
@@ -381,11 +412,26 @@ NL                        [\n\r]
 <Code,GlaExt>"hiding"          { RETURN(HIDING); }
 <Code,GlaExt>"qualified"       { RETURN(QUALIFIED); }
 
+<Code,GlaExt>"forall"          { RETURN(FORALL); }
+
 <Code,GlaExt>"_scc_"           { RETURN(SCC); }
 <GlaExt>"_ccall_"              { RETURN(CCALL); }
 <GlaExt>"_ccall_GC_"           { RETURN(CCALL_GC); }
 <GlaExt>"_casm_"               { RETURN(CASM); }
 <GlaExt>"_casm_GC_"            { RETURN(CASM_GC); }
+<GlaExt>"(#"                   { RETURN(OUNBOXPAREN); }
+<GlaExt>"#)"                   { RETURN(CUNBOXPAREN); }
+<GlaExt>"foreign"              { RETURN(FOREIGN); }
+<GlaExt>"export"               { RETURN(EXPORT); }
+<GlaExt>"label"                        { RETURN(LABEL); }
+<GlaExt>"unsafe"               { RETURN(UNSAFE); }
+<GlaExt>"_stdcall"             { RETURN(STDCALL); }
+<GlaExt>"_ccall"               { RETURN(C_CALL); }
+<GlaExt>"_pascal"              { RETURN(PASCAL); }
+<GlaExt>"stdcall"              { RETURN(STDCALL); }
+<GlaExt>"ccall"                        { RETURN(C_CALL); }
+<GlaExt>"pascal"               { RETURN(PASCAL); }
+<GlaExt>"dynamic"              { RETURN(DYNAMIC); }
 
 %{
     /* 
@@ -402,8 +448,8 @@ NL                          [\n\r]
 <Code,GlaExt,UserPragma>","    { RETURN(COMMA); }
 <Code,GlaExt>";"               { RETURN(SEMI); }
 <Code,GlaExt>"`"               { RETURN(BQUOTE); }
-<Code,GlaExt>"_"               { RETURN(WILDCARD); }
 
+<Code,GlaExt>"."               { RETURN(DOT); }
 <Code,GlaExt>".."              { RETURN(DOTDOT); }
 <Code,GlaExt,UserPragma>"::"   { RETURN(DCOLON); }
 <Code,GlaExt,UserPragma>"="    { RETURN(EQUAL); }
@@ -507,8 +553,16 @@ NL                         [\n\r]
                         RETURN(isconstr(yytext) ? CONID : VARID);
                        }
 <Code,GlaExt,UserPragma>{SId}  {
-                        hsnewid(yytext, yyleng);
-                        RETURN(isconstr(yytext) ? CONSYM : VARSYM);
+                        if (is_commment(yytext,yyleng)) {
+                               int c;
+                               while ((c = input()) != '\n' && c != '\r' && c!= EOF )
+                                       ;
+                               if (c != EOF)
+                                  unput(c);
+                        } else {
+                           hsnewid(yytext, yyleng);
+                           RETURN(isconstr(yytext) ? CONSYM : VARSYM);
+                        }
                        }
 <Code,GlaExt,UserPragma>{Mod}"."{Id}"#"        {
                         BOOLEAN is_constr;
@@ -708,6 +762,19 @@ NL                         [\n\r]
 <CharEsc>\\                    { addchar(*yytext); POP_STATE; }
 <StringEsc>\\          { if (noGap) { addchar(*yytext); } POP_STATE; }
 
+%{
+/*
+ Not 100% correct, tokenizes "foo \  --<>--
+                                 \ bar"
+
+ as "foo  bar", but this is not correct as per Haskell 98 report and its
+ maximal munch rule for "--"-style comments.
+
+ For the moment, not deemed worthy to fix.
+*/
+%}
+<StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
+
 <CharEsc,StringEsc>["']        { addchar(*yytext); POP_STATE; }
 <CharEsc,StringEsc>NUL         { addchar('\000'); POP_STATE; }
 <CharEsc,StringEsc>SOH         { addchar('\001'); POP_STATE; }
@@ -798,7 +865,7 @@ NL                          [\n\r]
 %}
 
 <Code,GlaExt,UserPragma,StringEsc>"{-" { 
-                         noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
+                         noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment); 
                        }
 
 <Comment>[^-{]*        |
@@ -808,6 +875,7 @@ NL                          [\n\r]
 <Comment>"-}"          { if (--nested_comments == 0) POP_STATE; }
 <Comment>(.|\n)                ;
 
+
 %{
     /*
      * Illegal characters.  This used to be a single rule, but we might as well
@@ -881,8 +949,10 @@ NL                         [\n\r]
                          hsperror("unterminated character literal");
                        }
 <Comment><<EOF>>       { 
+                         char errbuf[ERR_BUF_SIZE];
                          hsplineno = hslineno; hspcolno = hscolno;
-                         hsperror("unterminated comment"); 
+                         sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
+                         hsperror(errbuf); 
                        }
 <String,StringEsc><<EOF>>   { 
                          hsplineno = hslineno; hspcolno = hscolno;
@@ -945,6 +1015,11 @@ new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
        forcing insertion of ; or } as appropriate
 */
 
+#ifdef HSP_DEBUG
+#define LAYOUT_DEBUG
+#endif
+
+
 static BOOLEAN
 hsshouldindent(void)
 {
@@ -956,7 +1031,7 @@ hsshouldindent(void)
 void
 hssetindent(void)
 {
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 
@@ -985,7 +1060,7 @@ hssetindent(void)
 void
 hsincindent(void)
 {
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
     hsentercontext(indenttab[icontexts] & ~1);
@@ -1013,7 +1088,7 @@ hsentercontext(int indent)
     }
     forgetindent = FALSE;
     indenttab[icontexts] = indent;
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 }
@@ -1024,7 +1099,7 @@ void
 hsendindent(void)
 {
     --icontexts;
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 }
@@ -1032,14 +1107,12 @@ hsendindent(void)
 /*
  *     Return checks the indentation level and returns ;, } or the specified token.
  */
-
 static int
 Return(int tok)
 {
 #ifdef HSP_DEBUG
     extern int yyleng;
 #endif
-
     if (hsshouldindent()) {
        if (hspcolno < INDENTPT) {
 #ifdef HSP_DEBUG
@@ -1055,6 +1128,7 @@ Return(int tok)
            return (SEMI);
        }
     }
+
     hssttok = -1;
 #ifdef HSP_DEBUG
     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
@@ -1315,3 +1389,21 @@ hsnewqid(char *name, int length)
 
     return isconstr(dot+1);
 }
+
+static
+BOOLEAN
+is_commment(char* lexeme, int len)
+{
+   char* ptr;
+   int i;
+       
+   if (len < 2) {
+      return FALSE;
+   }
+
+   for(i=0;i<len;i++) {
+     if (lexeme[i] != '-') return FALSE;
+   }        
+   return TRUE;
+}
+