[project @ 1998-08-14 12:07:18 by sof]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
index f66949f..a5adef3 100644 (file)
@@ -7,7 +7,8 @@
 *                                                                     *
 **********************************************************************/
 
-#include "../../includes/config.h"
+/* The includes/config.h one */
+#include "config.h"
 
 #include <stdio.h>
 
@@ -41,9 +42,6 @@
 #define _O      0x8
 #define _C     0x10
 
-#define _isconstr(s)   (CharTable[*s]&(_C))
-BOOLEAN isconstr PROTO((char *)); /* fwd decl */
-
 static unsigned char CharTable[NCHARS] = {
 /* nul */      0,      0,      0,      0,      0,      0,      0,      0,
 /* bs  */      0,      _S,     _S,     _S,     _S,     0,      0,      0,
@@ -80,6 +78,12 @@ static unsigned char CharTable[NCHARS] = {
 /*     */      0,      0,      0,      0,      0,      0,      0,      0,
 };
 
+BOOLEAN
+isconstr (char *s)
+{
+    return(CharTable[*s]&(_C));
+}
+
 /**********************************************************************
 *                                                                     *
 *                                                                     *
@@ -111,15 +115,15 @@ static BOOLEAN forgetindent = FALSE;      /* Don't bother applying indentation rules
 
 static int nested_comments;    /* For counting comment nesting depth */
 
-/* Hacky definition of yywrap: see flex doc.
+/* OLD: Hacky definition of yywrap: see flex doc.
 
    If we don't do this, then we'll have to get the default
    yywrap from the flex library, which is often something
    we are not good at locating.  This avoids that difficulty.
    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
    WDP 94/09/05
-*/
 #define yywrap() 1
+*/
 
 /* Essential forward declarations */
 
@@ -193,26 +197,21 @@ static short indenttab[MAX_CONTEXTS] = {-1};
 #endif
 
 /* Each time we enter a new start state, we push it onto the state stack.
-   Note that the rules do not allow us to underflow or overflow the stack.
-   (At least, they shouldn't.)  The maximum expected depth is 4:
-   0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 */
-static int StateStack[5];
-static int StateDepth = -1;
-
-#ifdef HSP_DEBUG
-#define PUSH_STATE(n)   do {\
-    fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
-    StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
-#define POP_STATE       do {--StateDepth;\
-    fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
-    BEGIN(StateStack[StateDepth]);} while(0)
-#else
-#define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
-#define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
-#endif
+#define PUSH_STATE(n)   yy_push_state(n)
+#define POP_STATE       yy_pop_state()
 
 %}
+/* Options:
+    8bit (8-bit input)
+    noyywrap (do not call yywrap on end of file; avoid use of -lfl)
+    never-interactive (to go a bit faster)
+    stack (use a start-condition stack)
+*/
+%option 8bit
+%option noyywrap
+%option never-interactive
+%option stack
 
 /* The start states are:
    Code -- normal Haskell code (principal lexer)
@@ -240,7 +239,7 @@ O                   [0-7]
 H                      [0-9A-Fa-f]
 N                      {D}+
 F                      {N}"."{N}(("e"|"E")("+"|"-")?{N})?
-S                      [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
+S                      [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 SId                    {S}{S}*
 L                      [A-Z\xc0-\xd6\xd8-\xde]
 l                      [a-z\xdf-\xf6\xf8-\xff]
@@ -256,12 +255,40 @@ NL                        [\n\r]
 %%
 
 %{
+    /*
+     * Simple comments and whitespace.  Normally, we would just ignore these, but
+     * in case we're processing a string escape, we need to note that we've seen
+     * a gap.
+     *
+     * Note that we cater for a comment line that *doesn't* end in a newline.
+     * This is incorrect, strictly speaking, but seems like the right thing
+     * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
+     *
+     * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
+     */
+%}
+
+<Code,GlaExt,StringEsc>"--"[^\n\r]*{NL}?{WS}* |
+<Code,GlaExt,UserPragma,StringEsc>{WS}+        { noGap = FALSE; }
+
+%{
     /* 
      * Special GHC pragma rules.  Do we need a start state for interface files,
      * so these won't be matched in source files? --JSM
      */
+
 %}
 
+%{
+/* I believe the next rule is not ever matched.
+
+   The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
+   Driver has now been modified to output `standard' {-# LINE ..-} pragmas
+   where possible, so the lexer should now never see cpp directives
+   like '# ' and '#line'.
+
+   -- SOF
+
 <Code,GlaExt>^"# ".*{NL}    {
                          char tempf[FILENAME_SIZE];
                          sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
@@ -275,6 +302,8 @@ NL                          [\n\r]
                          new_filename(tempf); 
                          hsplineno = hslineno; hscolno = 0; hspcolno = 0;
                        }
+*/
+%}
 
 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
                          /* partain: pragma-style line directive */
@@ -296,16 +325,34 @@ NL                        [\n\r]
                              PUSH_STATE(UserPragma);
                              RETURN(INLINE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"NOINLINE" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
                              PUSH_STATE(UserPragma);
                              RETURN(MAGIC_UNFOLDING_UPRAGMA);
                            }
-<Code,GlaExt>"{-#"{WS}*"DEFOREST" {
-                              PUSH_STATE(UserPragma);
-                              RETURN(DEFOREST_UPRAGMA);
+<Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
+                             /* these are handled by hscpp */
+                             nested_comments =1;
+                              PUSH_STATE(Comment);
                            }
+<Code,GlaExt>"{-#"{WS}*"OPTIONS" {
+                             /* these are for the driver! */
+                             nested_comments =1;
+                              PUSH_STATE(Comment);
+                           }
+<Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
+                             /* these are used by `make depend' and the
+                                compiler to indicate that a module should
+                                be imported from source */
+                             nested_comments =1;
+                              RETURN(SOURCE_UPRAGMA); 
+                           }
+
 <Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
-                             fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
+                             fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
                                input_filename, hsplineno);
                              format_string(stderr, (unsigned char *) yytext, yyleng);
                              fputs("'\n", stderr);
@@ -352,6 +399,14 @@ NL                         [\n\r]
 <GlaExt>"_ccall_GC_"           { RETURN(CCALL_GC); }
 <GlaExt>"_casm_"               { RETURN(CASM); }
 <GlaExt>"_casm_GC_"            { RETURN(CASM_GC); }
+<GlaExt>"foreign"              { RETURN(FOREIGN); }
+<GlaExt>"export"               { RETURN(EXPORT); }
+<GlaExt>"unsafe"               { RETURN(UNSAFE); }
+<GlaExt>"_stdcall"             { RETURN(STDCALL); }
+<GlaExt>"_ccall"               { RETURN(C_CALL); }
+<GlaExt>"_pascal"              { RETURN(PASCAL); }
+<GlaExt>"_fastcall"            { RETURN(FASTCALL); }
+<GlaExt>"dynamic"              { RETURN(DYNAMIC); }
 
 %{
     /* 
@@ -378,6 +433,7 @@ NL                          [\n\r]
 <Code,GlaExt>"<-"              { RETURN(LARROW); }
 <Code,GlaExt,UserPragma>"->"   { RETURN(RARROW); }
 <Code,GlaExt>"-"               { RETURN(MINUS); }
+<Code,GlaExt>"+"               { RETURN(PLUS); }
 
 <Code,GlaExt,UserPragma>"=>"   { RETURN(DARROW); }
 <Code,GlaExt>"@"               { RETURN(AT); }
@@ -458,40 +514,40 @@ NL                        [\n\r]
 %{
 /* These SHOULDNAE work in "Code" (sigh) */
 %}
-<Code,GlaExt,UserPragma>{Id}"#" { 
-                        if (! nonstandardFlag) {
+<GlaExt,UserPragma>{Id}"#" { 
+                       if (! nonstandardFlag) {
                            char errbuf[ERR_BUF_SIZE];
                            sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
                            hsperror(errbuf);
                         }
                         hsnewid(yytext, yyleng);
-                        RETURN(_isconstr(yytext) ? CONID : VARID);
-                       }
-<Code,GlaExt,UserPragma>_+{Id} { 
-                        if (! nonstandardFlag) {
-                           char errbuf[ERR_BUF_SIZE];
-                           sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
-                           hsperror(errbuf);
-                        }
-                        hsnewid(yytext, yyleng);
                         RETURN(isconstr(yytext) ? CONID : VARID);
-                        /* NB: ^^^^^^^^ : not the macro! */
                        }
 <Code,GlaExt,UserPragma>{Id}   {
                         hsnewid(yytext, yyleng);
-                        RETURN(_isconstr(yytext) ? CONID : VARID);
+                        RETURN(isconstr(yytext) ? CONID : VARID);
                        }
 <Code,GlaExt,UserPragma>{SId}  {
                         hsnewid(yytext, yyleng);
-                        RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
+                        RETURN(isconstr(yytext) ? CONSYM : VARSYM);
+                       }
+<Code,GlaExt,UserPragma>{Mod}"."{Id}"#"        {
+                        BOOLEAN is_constr;
+                        if (! nonstandardFlag) {
+                           char errbuf[ERR_BUF_SIZE];
+                           sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
+                           hsperror(errbuf);
+                        }
+                        is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONID : QVARID);
                        }
 <Code,GlaExt,UserPragma>{Mod}"."{Id}   {
-                        BOOLEAN isconstr = hsnewqid(yytext, yyleng);
-                        RETURN(isconstr ? QCONID : QVARID);
+                        BOOLEAN is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONID : QVARID);
                        }
 <Code,GlaExt,UserPragma>{Mod}"."{SId}  {
-                        BOOLEAN isconstr = hsnewqid(yytext, yyleng);
-                        RETURN(isconstr ? QCONSYM : QVARSYM);
+                        BOOLEAN is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONSYM : QVARSYM);
                        }
 
 %{
@@ -506,7 +562,7 @@ NL                          [\n\r]
 
 <GlaExt,UserPragma>"`"{Id}"#`" {       
                         hsnewid(yytext + 1, yyleng - 2);
-                        RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
+                        RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
                        }
 
 %{
@@ -555,7 +611,7 @@ NL                          [\n\r]
                         }
 
                         if (length > 1) {
-                           fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
+                           fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
                              input_filename, hsplineno, hspcolno + 1);
                            format_string(stderr, (unsigned char *) text, length);
                            fputs("' too long\n", stderr);
@@ -574,7 +630,7 @@ NL                          [\n\r]
                         text = fetchtext(&length);
 
                         if (length > 1) {
-                           fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
+                           fprintf(stderr, "%s:%d:%d: Character literal '",
                              input_filename, hsplineno, hspcolno + 1);
                            format_string(stderr, (unsigned char *) text, length);
                            fputs("' too long\n", stderr);
@@ -753,20 +809,6 @@ NL                         [\n\r]
                          POP_STATE;
                        }
 
-%{
-    /*
-     * Simple comments and whitespace.  Normally, we would just ignore these, but
-     * in case we're processing a string escape, we need to note that we've seen
-     * a gap.
-     *
-     * Note that we cater for a comment line that *doesn't* end in a newline.
-     * This is incorrect, strictly speaking, but seems like the right thing
-     * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
-     */
-%}
-
-<Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
-<Code,GlaExt,UserPragma,StringEsc>{WS}+        { noGap = FALSE; }
 
 %{
     /*
@@ -796,21 +838,21 @@ NL                        [\n\r]
 %}
 
 <INITIAL,Code,GlaExt,UserPragma>(.|\n) { 
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
+                        fprintf(stderr, "%s:%d:%d: Illegal character: `", 
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("'\n", stderr);
                         hsperror("");
                        }
 <Char>(.|\n)           { 
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
+                        fprintf(stderr, "%s:%d:%d: Illegal character: `",
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("' in a character literal\n", stderr);
                         hsperror("");
                        }
 <CharEsc>(.|\n)                {
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
+                        fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("'\n", stderr);
@@ -819,7 +861,7 @@ NL                          [\n\r]
 <String>(.|\n)         { if (nonstandardFlag) {
                              addtext(yytext, yyleng);
                           } else { 
-                                fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
+                                fprintf(stderr, "%s:%d:%d: Illegal character: `", 
                                 input_filename, hsplineno, hspcolno + 1); 
                                 format_string(stderr, (unsigned char *) yytext, 1);
                                 fputs("' in a string literal\n", stderr);
@@ -828,13 +870,13 @@ NL                        [\n\r]
                        }
 <StringEsc>(.|\n)      {
                         if (noGap) {
-                            fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
+                            fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
                                input_filename, hsplineno, hspcolno + 1); 
                             format_string(stderr, (unsigned char *) yytext, 1);
                             fputs("'\n", stderr);
                             hsperror("");
                         } else {
-                            fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
+                            fprintf(stderr, "%s:%d:%d: Illegal character: `",
                                input_filename, hsplineno, hspcolno + 1);
                             format_string(stderr, (unsigned char *) yytext, 1);
                             fputs("' in a string gap\n", stderr);
@@ -888,8 +930,6 @@ NL                          [\n\r]
    This allows unnamed sources to be piped into the parser.
 */
 
-extern BOOLEAN acceptPrim;
-
 void
 yyinit(void)
 {
@@ -899,7 +939,7 @@ yyinit(void)
        setyyin _before_ calling yylex for the first time! */
     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 
-    if (acceptPrim)
+    if (nonstandardFlag)
        PUSH_STATE(GlaExt);
     else
        PUSH_STATE(Code);
@@ -1294,15 +1334,5 @@ hsnewqid(char *name, int length)
     *dot = '.';
     name[length] = save;
 
-    return _isconstr(dot+1);
-}
-
-BOOLEAN 
-isconstr(char *s) /* walks past leading underscores before using the macro */
-{
-    char *temp = s;
-
-    for ( ; temp != NULL && *temp == '_' ; temp++ );
-
-    return _isconstr(temp);
+    return isconstr(dot+1);
 }