[project @ 1999-06-01 16:15:42 by simonmar]
[ghc-hetmet.git] / ghc / compiler / parser / hslexer.flex
index 892d2f9..a3bb035 100644 (file)
@@ -7,7 +7,8 @@
 *                                                                     *
 **********************************************************************/
 
-#include "../../includes/config.h"
+/* The includes/config.h one */
+#include "config.h"
 
 #include <stdio.h>
 
@@ -41,9 +42,6 @@
 #define _O      0x8
 #define _C     0x10
 
-#define _isconstr(s)   (CharTable[*s]&(_C))
-BOOLEAN isconstr PROTO((char *)); /* fwd decl */
-
 static unsigned char CharTable[NCHARS] = {
 /* nul */      0,      0,      0,      0,      0,      0,      0,      0,
 /* bs  */      0,      _S,     _S,     _S,     _S,     0,      0,      0,
@@ -80,6 +78,12 @@ static unsigned char CharTable[NCHARS] = {
 /*     */      0,      0,      0,      0,      0,      0,      0,      0,
 };
 
+BOOLEAN
+isconstr (char *s)
+{
+    return(CharTable[*s]&(_C));
+}
+
 /**********************************************************************
 *                                                                     *
 *                                                                     *
@@ -110,16 +114,17 @@ static BOOLEAN noGap = TRUE;      /* For checking string gaps */
 static BOOLEAN forgetindent = FALSE;   /* Don't bother applying indentation rules */
 
 static int nested_comments;    /* For counting comment nesting depth */
+static int comment_start;
 
-/* Hacky definition of yywrap: see flex doc.
+/* OLD: Hacky definition of yywrap: see flex doc.
 
    If we don't do this, then we'll have to get the default
    yywrap from the flex library, which is often something
    we are not good at locating.  This avoids that difficulty.
    (Besides which, this is the way old flexes (pre 2.4.x) did it.)
    WDP 94/09/05
-*/
 #define yywrap() 1
+*/
 
 /* Essential forward declarations */
 
@@ -133,6 +138,8 @@ static void new_filename PROTO((char *));
 static int  Return      PROTO((int));
 static void hsentercontext PROTO((int));
 
+static BOOLEAN is_commment PROTO((char*, int));
+
 /* Special file handling for IMPORTS */
 /*  Note: imports only ever go *one deep* (hence no need for a stack) WDP 94/09 */
 
@@ -144,17 +151,10 @@ static int hslineno_save = 0,             /* Line Number                   */
  hspcolno_save = 0;                    /* Left Indentation              */
 static short icontexts_save = 0;       /* Indent Context Level          */
 
-static BOOLEAN etags_save; /* saved: whether doing etags stuff or not */
-extern BOOLEAN etags;     /* that which is saved */
-
-extern BOOLEAN nonstandardFlag;        /* Glasgow extensions allowed */
+static BOOLEAN etags_save;              /* saved: whether doing etags stuff or not */
+extern BOOLEAN etags;                  /* that which is saved */
 
-static BOOLEAN in_interface = FALSE; /* TRUE if we are reading a .hi file */
-
-extern BOOLEAN ignorePragmas;          /* True when we should ignore pragmas */
-extern int minAcceptablePragmaVersion; /* see documentation in main.c */
-extern int maxAcceptablePragmaVersion;
-extern int thisIfacePragmaVersion;
+extern BOOLEAN nonstandardFlag;                /* Glasgow extensions allowed */
 
 static int hssttok = -1;       /* Stacked Token: -1   -- no token; -ve  -- ";"
                                 * inserted before token +ve  -- "}" inserted before
@@ -162,8 +162,6 @@ static int hssttok = -1;    /* Stacked Token: -1   -- no token; -ve  -- ";"
 
 short icontexts = 0;           /* Which context we're in */
 
-
-
 /*
        Table of indentations:  right bit indicates whether to use
          indentation rules (1 = use rules; 0 = ignore)
@@ -202,26 +200,21 @@ static short indenttab[MAX_CONTEXTS] = {-1};
 #endif
 
 /* Each time we enter a new start state, we push it onto the state stack.
-   Note that the rules do not allow us to underflow or overflow the stack.
-   (At least, they shouldn't.)  The maximum expected depth is 4:
-   0: Code -> 1: String -> 2: StringEsc -> 3: Comment
 */
-static int StateStack[5];
-static int StateDepth = -1;
-
-#ifdef HSP_DEBUG
-#define PUSH_STATE(n)   do {\
-    fprintf(stderr,"Pushing %d (%d)\n", n, StateDepth + 1);\
-    StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
-#define POP_STATE       do {--StateDepth;\
-    fprintf(stderr,"Popping %d (%d)\n", StateStack[StateDepth], StateDepth);\
-    BEGIN(StateStack[StateDepth]);} while(0)
-#else
-#define PUSH_STATE(n)   do {StateStack[++StateDepth] = (n); BEGIN(n);} while(0)
-#define POP_STATE       do {--StateDepth; BEGIN(StateStack[StateDepth]);} while(0)
-#endif
+#define PUSH_STATE(n)   yy_push_state(n)
+#define POP_STATE       yy_pop_state()
 
 %}
+/* Options:
+    8bit (8-bit input)
+    noyywrap (do not call yywrap on end of file; avoid use of -lfl)
+    never-interactive (to go a bit faster)
+    stack (use a start-condition stack)
+*/
+%option 8bit
+%option noyywrap
+%option never-interactive
+%option stack
 
 /* The start states are:
    Code -- normal Haskell code (principal lexer)
@@ -237,7 +230,7 @@ static int StateDepth = -1;
    list of start states.
  */
 
-%x Char CharEsc Code Comment GlaExt GhcPragma UserPragma String StringEsc
+%x Char CharEsc Code Comment GlaExt UserPragma String StringEsc
 
 isoS                   [\xa1-\xbf\xd7\xf7]
 isoL                   [\xc0-\xd6\xd8-\xde]
@@ -249,10 +242,10 @@ O                 [0-7]
 H                      [0-9A-Fa-f]
 N                      {D}+
 F                      {N}"."{N}(("e"|"E")("+"|"-")?{N})?
-S                      [!#$%&*+./<=>?@\\^|-~:\xa1-\xbf\xd7\xf7]
+S                      [!#$%&*+./<=>?@\\^|\-~:\xa1-\xbf\xd7\xf7]
 SId                    {S}{S}*
 L                      [A-Z\xc0-\xd6\xd8-\xde]
-l                      [a-z\xdf-\xf6\xf8-\xff]
+l                      [a-z_\xdf-\xf6\xf8-\xff]
 I                      {L}|{l}
 i                      {L}|{l}|[0-9'_]
 Id                     {I}{i}*
@@ -265,12 +258,39 @@ NL                        [\n\r]
 %%
 
 %{
+    /*
+     * Simple comments and whitespace.  Normally, we would just ignore these, but
+     * in case we're processing a string escape, we need to note that we've seen
+     * a gap.
+     *
+     * Note that we cater for a comment line that *doesn't* end in a newline.
+     * This is incorrect, strictly speaking, but seems like the right thing
+     * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
+     *
+     * Hackily moved up here so that --<<EOF>> will match     -- SOF 5/97
+     */
+%}
+
+<Code,GlaExt,UserPragma,StringEsc>{WS}+        { noGap = FALSE; }
+
+%{
     /* 
      * Special GHC pragma rules.  Do we need a start state for interface files,
      * so these won't be matched in source files? --JSM
      */
+
 %}
 
+%{
+/* I believe the next rule is not ever matched.
+
+   The '#line ' rule is un-cool, recognising a cpp directive inside hs source.
+   Driver has now been modified to output `standard' {-# LINE ..-} pragmas
+   where possible, so the lexer should now never see cpp directives
+   like '# ' and '#line'.
+
+   -- SOF
+
 <Code,GlaExt>^"# ".*{NL}    {
                          char tempf[FILENAME_SIZE];
                          sscanf(yytext+1, "%d \"%[^\"]", &hslineno, tempf); 
@@ -284,6 +304,8 @@ NL                          [\n\r]
                          new_filename(tempf); 
                          hsplineno = hslineno; hscolno = 0; hspcolno = 0;
                        }
+*/
+%}
 
 <Code,GlaExt>"{-# LINE ".*"-}"{NL} { 
                          /* partain: pragma-style line directive */
@@ -292,92 +314,70 @@ NL                        [\n\r]
                          new_filename(tempf);
                          hsplineno = hslineno; hscolno = 0; hspcolno = 0;
                        }
-<Code,GlaExt>"{-# GHC_PRAGMA INTERFACE VERSION "{D}+" #-}"   {
-                         sscanf(yytext+33,"%d ",&thisIfacePragmaVersion);
-                       }
-<Code,GlaExt>"{-# GHC_PRAGMA "   { 
-                         if ( ignorePragmas ||
-                              thisIfacePragmaVersion < minAcceptablePragmaVersion || 
-                              thisIfacePragmaVersion > maxAcceptablePragmaVersion) {
-                            nested_comments = 1;
-                            PUSH_STATE(Comment);
-                         } else {
-                            PUSH_STATE(GhcPragma);
-                            RETURN(GHC_PRAGMA);
-                         }
-                       }
-<GhcPragma>"_N_"           { RETURN(NO_PRAGMA); }
-<GhcPragma>"_NI_"          { RETURN(NOINFO_PRAGMA); }
-<GhcPragma>"_DEFOREST_"            { RETURN(DEFOREST_PRAGMA); }
-<GhcPragma>"_SPECIALISE_"   { RETURN(SPECIALISE_PRAGMA); }
-<GhcPragma>"_A_"           { RETURN(ARITY_PRAGMA); }
-<GhcPragma>"_U_"           { RETURN(UPDATE_PRAGMA); }
-<GhcPragma>"_S_"           { RETURN(STRICTNESS_PRAGMA); }
-<GhcPragma>"_K_"           { RETURN(KIND_PRAGMA); }
-<GhcPragma>"_MF_"          { RETURN(MAGIC_UNFOLDING_PRAGMA); }
-<GhcPragma>"_F_"           { RETURN(UNFOLDING_PRAGMA); }
-
-<GhcPragma>"_!_"           { RETURN(COCON); }
-<GhcPragma>"_#_"           { RETURN(COPRIM); }
-<GhcPragma>"_APP_"         { RETURN(COAPP); }
-<GhcPragma>"_TYAPP_"       { RETURN(COTYAPP); }
-<GhcPragma>"_ALG_"         { RETURN(CO_ALG_ALTS); }
-<GhcPragma>"_PRIM_"        { RETURN(CO_PRIM_ALTS); }
-<GhcPragma>"_NO_DEFLT_"            { RETURN(CO_NO_DEFAULT); }
-<GhcPragma>"_LETREC_"      { RETURN(CO_LETREC); }
-
-<GhcPragma>"_PRELUDE_DICTS_CC_" { RETURN(CO_PRELUDE_DICTS_CC); }
-<GhcPragma>"_ALL_DICTS_CC_" { RETURN(CO_ALL_DICTS_CC); }
-<GhcPragma>"_USER_CC_"     { RETURN(CO_USER_CC); }
-<GhcPragma>"_AUTO_CC_"     { RETURN(CO_AUTO_CC); }
-<GhcPragma>"_DICT_CC_"     { RETURN(CO_DICT_CC); }
-
-<GhcPragma>"_DUPD_CC_"     { RETURN(CO_DUPD_CC); }
-<GhcPragma>"_CAF_CC_"      { RETURN(CO_CAF_CC); }
-
-<GhcPragma>"_SDSEL_"       { RETURN(CO_SDSEL_ID); }
-<GhcPragma>"_METH_"        { RETURN(CO_METH_ID); }
-<GhcPragma>"_DEFM_"        { RETURN(CO_DEFM_ID); }
-<GhcPragma>"_DFUN_"        { RETURN(CO_DFUN_ID); }
-<GhcPragma>"_CONSTM_"      { RETURN(CO_CONSTM_ID); }
-<GhcPragma>"_SPEC_"        { RETURN(CO_SPEC_ID); }
-<GhcPragma>"_WRKR_"        { RETURN(CO_WRKR_ID); }
-<GhcPragma>"_ORIG_"        { RETURN(CO_ORIG_NM); /* fully-qualified original name*/ }
-
-<GhcPragma>"_ALWAYS_"      { RETURN(UNFOLD_ALWAYS); }
-<GhcPragma>"_IF_ARGS_"      { RETURN(UNFOLD_IF_ARGS); }
-
-<GhcPragma>"_NOREP_I_"     { RETURN(NOREP_INTEGER); }
-<GhcPragma>"_NOREP_R_"     { RETURN(NOREP_RATIONAL); }
-<GhcPragma>"_NOREP_S_"     { RETURN(NOREP_STRING); }
-
-<GhcPragma>" #-}"          { POP_STATE; RETURN(END_PRAGMA); }
 
+<Code,GlaExt>"{-#"{WS}*"INTERFACE" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(INTERFACE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"SPECIALI"[SZ]E {
                              PUSH_STATE(UserPragma);
                              RETURN(SPECIALISE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"speciali"[sz]e {
+                             PUSH_STATE(UserPragma);
+                             RETURN(SPECIALISE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"INLINE" {
                              PUSH_STATE(UserPragma);
                              RETURN(INLINE_UPRAGMA);
                            }
+<Code,GlaExt>"{-#"{WS}*"RULES" {
+                             PUSH_STATE(Code); /* I'm not sure about this */
+                             RETURN(RULES_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"inline" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(INLINE_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"NOINLINE" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
+<Code,GlaExt>"{-#"{WS}*"notInline" {
+                             PUSH_STATE(UserPragma);
+                             RETURN(NOINLINE_UPRAGMA);
+                           }
 <Code,GlaExt>"{-#"{WS}*"MAGIC_UNFOLDING" {
                              PUSH_STATE(UserPragma);
                              RETURN(MAGIC_UNFOLDING_UPRAGMA);
                            }
-<Code,GlaExt>"{-#"{WS}*"DEFOREST" {
-                              PUSH_STATE(UserPragma);
-                              RETURN(DEFOREST_UPRAGMA);
+<Code,GlaExt>"{-#"{WS}*"GENERATE_SPECS" {
+                             /* these are handled by hscpp */
+                             nested_comments =1; comment_start = hsplineno;
+                              PUSH_STATE(Comment);
+                           }
+<Code,GlaExt>"{-#"{WS}*"OPTIONS" {
+                             /* these are for the driver! */
+                             nested_comments =1; comment_start = hsplineno;
+                              PUSH_STATE(Comment);
                            }
-<Code,GlaExt>"{-#"{WS}*[A-Z_]+ {
-                             fprintf(stderr, "Warning: \"%s\", line %d: Unrecognised pragma '",
+<Code,GlaExt>"{-#"{WS}*"SOURCE"{WS}*"#"?"-}" {
+                             /* these are used by `make depend' and the
+                                compiler to indicate that a module should
+                                be imported from source */
+                             nested_comments =1; comment_start = hsplineno; 
+                              RETURN(SOURCE_UPRAGMA); 
+                           }
+
+<Code,GlaExt>"{-#"{WS}*[a-zA-Z_]+ {
+                             fprintf(stderr, "%s:%d: Warning: Unrecognised pragma '",
                                input_filename, hsplineno);
                              format_string(stderr, (unsigned char *) yytext, yyleng);
                              fputs("'\n", stderr);
-                             nested_comments = 1;
+                             nested_comments = 1; comment_start = hsplineno;
                              PUSH_STATE(Comment);
                            }
-<UserPragma>"#-}"          { POP_STATE; RETURN(END_UPRAGMA); }
+<Code,GlaExt,UserPragma>"#-}"      { POP_STATE; RETURN(END_UPRAGMA); }
 
 %{
     /*
@@ -386,7 +386,7 @@ NL                          [\n\r]
      */
 %}
 
-<Code,GlaExt,GhcPragma>"case"  { RETURN(CASE); }
+<Code,GlaExt>"case"            { RETURN(CASE); }
 <Code,GlaExt>"class"           { RETURN(CLASS); }
 <Code,GlaExt,UserPragma>"data" { RETURN(DATA); }
 <Code,GlaExt>"default"         { RETURN(DEFAULT); }
@@ -395,15 +395,15 @@ NL                        [\n\r]
 <Code,GlaExt>"else"            { RETURN(ELSE); }
 <Code,GlaExt>"if"              { RETURN(IF); }
 <Code,GlaExt>"import"          { RETURN(IMPORT); }
-<Code,GlaExt,GhcPragma>"in"    { RETURN(IN); }
+<Code,GlaExt>"in"              { RETURN(IN); }
 <Code,GlaExt>"infix"           { RETURN(INFIX); }
 <Code,GlaExt>"infixl"          { RETURN(INFIXL); }
 <Code,GlaExt>"infixr"          { RETURN(INFIXR); }
 <Code,GlaExt,UserPragma>"instance" { RETURN(INSTANCE); }
-<Code,GlaExt,GhcPragma>"let"   { RETURN(LET); }
+<Code,GlaExt>"let"             { RETURN(LET); }
 <Code,GlaExt>"module"          { RETURN(MODULE); }
 <Code,GlaExt>"newtype"                 { RETURN(NEWTYPE); }
-<Code,GlaExt,GhcPragma>"of"    { RETURN(OF); }
+<Code,GlaExt>"of"              { RETURN(OF); }
 <Code,GlaExt>"then"            { RETURN(THEN); }
 <Code,GlaExt>"type"            { RETURN(TYPE); }
 <Code,GlaExt>"where"           { RETURN(WHERE); }
@@ -411,14 +411,27 @@ NL                        [\n\r]
 <Code,GlaExt>"as"              { RETURN(AS); }
 <Code,GlaExt>"hiding"          { RETURN(HIDING); }
 <Code,GlaExt>"qualified"       { RETURN(QUALIFIED); }
-<Code,GlaExt>"interface"        { RETURN(INTERFACE); }
 
-<Code,GlaExt,GhcPragma>"_scc_" { RETURN(SCC); }
-<GlaExt,GhcPragma>"_ccall_"    { RETURN(CCALL); }
-<GlaExt,GhcPragma>"_ccall_GC_" { RETURN(CCALL_GC); }
-<GlaExt,GhcPragma>"_casm_"     { RETURN(CASM); }
-<GlaExt,GhcPragma>"_casm_GC_"  { RETURN(CASM_GC); }
-<GhcPragma>"_forall_"          { RETURN(FORALL); }
+<Code,GlaExt>"forall"          { RETURN(FORALL); }
+
+<Code,GlaExt>"_scc_"           { RETURN(SCC); }
+<GlaExt>"_ccall_"              { RETURN(CCALL); }
+<GlaExt>"_ccall_GC_"           { RETURN(CCALL_GC); }
+<GlaExt>"_casm_"               { RETURN(CASM); }
+<GlaExt>"_casm_GC_"            { RETURN(CASM_GC); }
+<GlaExt>"(#"                   { RETURN(OUNBOXPAREN); }
+<GlaExt>"#)"                   { RETURN(CUNBOXPAREN); }
+<GlaExt>"foreign"              { RETURN(FOREIGN); }
+<GlaExt>"export"               { RETURN(EXPORT); }
+<GlaExt>"label"                        { RETURN(LABEL); }
+<GlaExt>"unsafe"               { RETURN(UNSAFE); }
+<GlaExt>"_stdcall"             { RETURN(STDCALL); }
+<GlaExt>"_ccall"               { RETURN(C_CALL); }
+<GlaExt>"_pascal"              { RETURN(PASCAL); }
+<GlaExt>"stdcall"              { RETURN(STDCALL); }
+<GlaExt>"ccall"                        { RETURN(C_CALL); }
+<GlaExt>"pascal"               { RETURN(PASCAL); }
+<GlaExt>"dynamic"              { RETURN(DYNAMIC); }
 
 %{
     /* 
@@ -426,32 +439,31 @@ NL                        [\n\r]
      */
 %}
 
-<Code,GlaExt,GhcPragma,UserPragma>"("  { RETURN(OPAREN); }
-<Code,GlaExt,GhcPragma,UserPragma>")"  { RETURN(CPAREN); }
-<Code,GlaExt,GhcPragma,UserPragma>"["  { RETURN(OBRACK); }
-<Code,GlaExt,GhcPragma,UserPragma>"]"  { RETURN(CBRACK); }
-<Code,GlaExt,GhcPragma>"{"             { RETURN(OCURLY); }
-<Code,GlaExt,GhcPragma>"}"             { RETURN(CCURLY); }
-<Code,GlaExt,GhcPragma,UserPragma>","  { RETURN(COMMA); }
-<Code,GlaExt,GhcPragma>";"             { RETURN(SEMI); }
-<Code,GlaExt,GhcPragma>"`"             { RETURN(BQUOTE); }
-<Code,GlaExt>"_"                       { RETURN(WILDCARD); }
-
-<Code,GlaExt>".."                      { RETURN(DOTDOT); }
-<Code,GlaExt,GhcPragma,UserPragma>"::" { RETURN(DCOLON); }
-<Code,GlaExt,GhcPragma,UserPragma>"="  { RETURN(EQUAL); }
-<Code,GlaExt,GhcPragma>"\\"            { RETURN(LAMBDA); }
-<Code,GlaExt,GhcPragma>"|"             { RETURN(VBAR); }
-<Code,GlaExt>"<-"                      { RETURN(LARROW); }
-<Code,GlaExt,GhcPragma,UserPragma>"->" { RETURN(RARROW); }
-<Code,GlaExt>"-"                       { RETURN(MINUS); }
-
-<Code,GlaExt,GhcPragma,UserPragma>"=>" { RETURN(DARROW); }
-<Code,GlaExt>"@"                       { RETURN(AT); }
-<Code,GlaExt>"!"                       { RETURN(BANG); }
-<Code,GlaExt>"~"                       { RETURN(LAZY); }
-
-<GhcPragma>"_/\\_"                     { RETURN(TYLAMBDA); }
+<Code,GlaExt,UserPragma>"("    { RETURN(OPAREN); }
+<Code,GlaExt,UserPragma>")"    { RETURN(CPAREN); }
+<Code,GlaExt,UserPragma>"["    { RETURN(OBRACK); }
+<Code,GlaExt,UserPragma>"]"    { RETURN(CBRACK); }
+<Code,GlaExt>"{"               { RETURN(OCURLY); }
+<Code,GlaExt>"}"               { RETURN(CCURLY); }
+<Code,GlaExt,UserPragma>","    { RETURN(COMMA); }
+<Code,GlaExt>";"               { RETURN(SEMI); }
+<Code,GlaExt>"`"               { RETURN(BQUOTE); }
+
+<Code,GlaExt>"."               { RETURN(DOT); }
+<Code,GlaExt>".."              { RETURN(DOTDOT); }
+<Code,GlaExt,UserPragma>"::"   { RETURN(DCOLON); }
+<Code,GlaExt,UserPragma>"="    { RETURN(EQUAL); }
+<Code,GlaExt>"\\"              { RETURN(LAMBDA); }
+<Code,GlaExt>"|"               { RETURN(VBAR); }
+<Code,GlaExt>"<-"              { RETURN(LARROW); }
+<Code,GlaExt,UserPragma>"->"   { RETURN(RARROW); }
+<Code,GlaExt>"-"               { RETURN(MINUS); }
+<Code,GlaExt>"+"               { RETURN(PLUS); }
+
+<Code,GlaExt,UserPragma>"=>"   { RETURN(DARROW); }
+<Code,GlaExt>"@"               { RETURN(AT); }
+<Code,GlaExt>"!"               { RETURN(BANG); }
+<Code,GlaExt>"~"               { RETURN(LAZY); }
 
 %{
     /*
@@ -477,11 +489,11 @@ NL                        [\n\r]
                         yylval.uid = xstrndup(yytext, yyleng);
                         RETURN(INTEGER);
                        }
-<GlaExt,GhcPragma>("-")?{N}"#" {
+<GlaExt>("-")?{N}"#"   {
                         yylval.uid = xstrndup(yytext, yyleng - 1);
                         RETURN(INTPRIM);
                        }
-<Code,GlaExt,GhcPragma>{N} {
+<Code,GlaExt,UserPragma>{N} {
                         yylval.uid = xstrndup(yytext, yyleng);
                         RETURN(INTEGER);
                        }
@@ -492,11 +504,11 @@ NL                        [\n\r]
      */
 %}
 
-<GlaExt,GhcPragma>("-")?{F}"##" {
+<GlaExt>("-")?{F}"##"  {
                         yylval.uid = xstrndup(yytext, yyleng - 2);
                         RETURN(DOUBLEPRIM);
                        }
-<GlaExt,GhcPragma>("-")?{F}"#" {
+<GlaExt>("-")?{F}"#"   {
                         yylval.uid = xstrndup(yytext, yyleng - 1);
                         RETURN(FLOATPRIM);
                        }
@@ -511,7 +523,7 @@ NL                          [\n\r]
      */
 %}
 
-<GlaExt,GhcPragma>"``"[^']+"''"        {
+<GlaExt>"``"[^']+"''"  {
                         hsnewid(yytext + 2, yyleng - 4);
                         RETURN(CLITLIT);
                        }
@@ -523,47 +535,52 @@ NL                        [\n\r]
      */
 %}
 
-<GhcPragma>"_NIL_"             { hsnewid(yytext, yyleng); RETURN(CONID); }
-<GhcPragma>"_TUP_"{D}+         { hsnewid(yytext, yyleng); RETURN(CONID); }
-<GhcPragma>[a-z]{i}*"$"[a-z]{i}* { hsnewid(yytext, yyleng); RETURN(TYVAR_TEMPLATE_ID); }
 
 %{
 /* These SHOULDNAE work in "Code" (sigh) */
 %}
-<Code,GlaExt,GhcPragma,UserPragma>{Id}"#" { 
-                        if (! (nonstandardFlag || in_interface)) {
+<GlaExt,UserPragma>{Id}"#" { 
+                       if (! nonstandardFlag) {
                            char errbuf[ERR_BUF_SIZE];
                            sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
                            hsperror(errbuf);
                         }
                         hsnewid(yytext, yyleng);
-                        RETURN(_isconstr(yytext) ? CONID : VARID);
-                       }
-<Code,GlaExt,GhcPragma,UserPragma>_+{Id} { 
-                        if (! (nonstandardFlag || in_interface)) {
-                           char errbuf[ERR_BUF_SIZE];
-                           sprintf(errbuf, "Non-standard identifier (leading underscore): %s\n", yytext);
-                           hsperror(errbuf);
-                        }
-                        hsnewid(yytext, yyleng);
                         RETURN(isconstr(yytext) ? CONID : VARID);
-                        /* NB: ^^^^^^^^ : not the macro! */
                        }
-<Code,GlaExt,GhcPragma,UserPragma>{Id} {
+<Code,GlaExt,UserPragma>{Id}   {
                         hsnewid(yytext, yyleng);
-                        RETURN(_isconstr(yytext) ? CONID : VARID);
+                        RETURN(isconstr(yytext) ? CONID : VARID);
                        }
-<Code,GlaExt,GhcPragma,UserPragma>{SId}        {
-                        hsnewid(yytext, yyleng);
-                        RETURN(_isconstr(yytext) ? CONSYM : VARSYM);
+<Code,GlaExt,UserPragma>{SId}  {
+                        if (is_commment(yytext,yyleng)) {
+                               int c;
+                               while ((c = input()) != '\n' && c != '\r' && c!= EOF )
+                                       ;
+                               if (c != EOF)
+                                  unput(c);
+                        } else {
+                           hsnewid(yytext, yyleng);
+                           RETURN(isconstr(yytext) ? CONSYM : VARSYM);
+                        }
                        }
-<Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{Id} {
-                        BOOLEAN isconstr = hsnewqid(yytext, yyleng);
-                        RETURN(isconstr ? QCONID : QVARID);
+<Code,GlaExt,UserPragma>{Mod}"."{Id}"#"        {
+                        BOOLEAN is_constr;
+                        if (! nonstandardFlag) {
+                           char errbuf[ERR_BUF_SIZE];
+                           sprintf(errbuf, "Non-standard identifier (trailing `#'): %s\n", yytext);
+                           hsperror(errbuf);
+                        }
+                        is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONID : QVARID);
                        }
-<Code,GlaExt,GhcPragma,UserPragma>{Mod}"."{SId}        {
-                        BOOLEAN isconstr = hsnewqid(yytext, yyleng);
-                        RETURN(isconstr ? QCONSYM : QVARSYM);
+<Code,GlaExt,UserPragma>{Mod}"."{Id}   {
+                        BOOLEAN is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONID : QVARID);
+                       }
+<Code,GlaExt,UserPragma>{Mod}"."{SId}  {
+                        BOOLEAN is_constr = hsnewqid(yytext, yyleng);
+                        RETURN(is_constr ? QCONSYM : QVARSYM);
                        }
 
 %{
@@ -576,9 +593,9 @@ NL                          [\n\r]
     */
 %}
 
-<GlaExt,GhcPragma,UserPragma>"`"{Id}"#`"       {       
+<GlaExt,UserPragma>"`"{Id}"#`" {       
                         hsnewid(yytext + 1, yyleng - 2);
-                        RETURN(_isconstr(yytext+1) ? CONSYM : VARSYM);
+                        RETURN(isconstr(yytext+1) ? CONSYM : VARSYM);
                        }
 
 %{
@@ -595,7 +612,7 @@ NL                          [\n\r]
      */
 %}
 
-<GlaExt,GhcPragma>'({CHAR}|"\"")"'#" {
+<GlaExt>'({CHAR}|"\"")"'#" {
                         yylval.uhstring = installHstring(1, yytext+1);
                         RETURN(CHARPRIM);
                        }
@@ -607,7 +624,7 @@ NL                          [\n\r]
                         sprintf(errbuf, "'' is not a valid character (or string) literal\n");
                         hsperror(errbuf);
                        }
-<Code,GlaExt,GhcPragma>'({CHAR}|"\"")* {
+<Code,GlaExt>'({CHAR}|"\"")* {
                         hsmlcolno = hspcolno;
                         cleartext();
                         addtext(yytext+1, yyleng-1);
@@ -620,14 +637,14 @@ NL                        [\n\r]
                         addtext(yytext, yyleng - 2);
                         text = fetchtext(&length);
 
-                        if (! (nonstandardFlag || in_interface)) {
+                        if (! nonstandardFlag) {
                            char errbuf[ERR_BUF_SIZE];
                            sprintf(errbuf, "`Char-hash' literals are non-standard: %s\n", text);
                            hsperror(errbuf);
                         }
 
                         if (length > 1) {
-                           fprintf(stderr, "\"%s\", line %d, column %d: Unboxed character literal '",
+                           fprintf(stderr, "%s:%d:%d: Unboxed character literal '",
                              input_filename, hsplineno, hspcolno + 1);
                            format_string(stderr, (unsigned char *) text, length);
                            fputs("' too long\n", stderr);
@@ -646,7 +663,7 @@ NL                          [\n\r]
                         text = fetchtext(&length);
 
                         if (length > 1) {
-                           fprintf(stderr, "\"%s\", line %d, column %d: Character literal '",
+                           fprintf(stderr, "%s:%d:%d: Character literal '",
                              input_filename, hsplineno, hspcolno + 1);
                            format_string(stderr, (unsigned char *) text, length);
                            fputs("' too long\n", stderr);
@@ -675,16 +692,16 @@ NL                        [\n\r]
      */
 %}
 
-<GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""#  {
+<GlaExt>"\""({CHAR}|"'")*"\""#  {
                         yylval.uhstring = installHstring(yyleng-3, yytext+1);
                            /* the -3 accounts for the " on front, "# on the end */
                         RETURN(STRINGPRIM); 
                        }
-<Code,GlaExt,GhcPragma>"\""({CHAR}|"'")*"\""  {
+<Code,GlaExt>"\""({CHAR}|"'")*"\""  {
                         yylval.uhstring = installHstring(yyleng-2, yytext+1);
                         RETURN(STRING); 
                        }
-<Code,GlaExt,GhcPragma>"\""({CHAR}|"'")* {
+<Code,GlaExt>"\""({CHAR}|"'")* {
                         hsmlcolno = hspcolno;
                         cleartext();
                         addtext(yytext+1, yyleng-1);
@@ -697,7 +714,7 @@ NL                          [\n\r]
                         addtext(yytext, yyleng-2);
                         text = fetchtext(&length);
 
-                        if (! (nonstandardFlag || in_interface)) {
+                        if (! nonstandardFlag) {
                            char errbuf[ERR_BUF_SIZE];
                            sprintf(errbuf, "`String-hash' literals are non-standard: %s\n", text);
                            hsperror(errbuf);
@@ -745,6 +762,19 @@ NL                         [\n\r]
 <CharEsc>\\                    { addchar(*yytext); POP_STATE; }
 <StringEsc>\\          { if (noGap) { addchar(*yytext); } POP_STATE; }
 
+%{
+/*
+ Not 100% correct, tokenizes "foo \  --<>--
+                                 \ bar"
+
+ as "foo  bar", but this is not correct as per Haskell 98 report and its
+ maximal munch rule for "--"-style comments.
+
+ For the moment, not deemed worthy to fix.
+*/
+%}
+<StringEsc>"--"[^\n\r]*{NL}?{WS}*  { noGap=FALSE; }
+
 <CharEsc,StringEsc>["']        { addchar(*yytext); POP_STATE; }
 <CharEsc,StringEsc>NUL         { addchar('\000'); POP_STATE; }
 <CharEsc,StringEsc>SOH         { addchar('\001'); POP_STATE; }
@@ -825,20 +855,6 @@ NL                         [\n\r]
                          POP_STATE;
                        }
 
-%{
-    /*
-     * Simple comments and whitespace.  Normally, we would just ignore these, but
-     * in case we're processing a string escape, we need to note that we've seen
-     * a gap.
-     *
-     * Note that we cater for a comment line that *doesn't* end in a newline.
-     * This is incorrect, strictly speaking, but seems like the right thing
-     * to do.  Reported by Rajiv Mirani.  (WDP 95/08)
-     */
-%}
-
-<Code,GlaExt,StringEsc>"--".*{NL}?{WS}* |
-<Code,GlaExt,GhcPragma,UserPragma,StringEsc>{WS}+      { noGap = FALSE; }
 
 %{
     /*
@@ -848,8 +864,8 @@ NL                          [\n\r]
      */
 %}
 
-<Code,GlaExt,GhcPragma,UserPragma,StringEsc>"{-"       { 
-                         noGap = FALSE; nested_comments = 1; PUSH_STATE(Comment); 
+<Code,GlaExt,UserPragma,StringEsc>"{-" { 
+                         noGap = FALSE; nested_comments = 1; comment_start = hsplineno; PUSH_STATE(Comment); 
                        }
 
 <Comment>[^-{]*        |
@@ -859,6 +875,7 @@ NL                          [\n\r]
 <Comment>"-}"          { if (--nested_comments == 0) POP_STATE; }
 <Comment>(.|\n)                ;
 
+
 %{
     /*
      * Illegal characters.  This used to be a single rule, but we might as well
@@ -867,22 +884,22 @@ NL                        [\n\r]
      */
 %}
 
-<INITIAL,Code,GlaExt,GhcPragma,UserPragma>(.|\n)       { 
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
+<INITIAL,Code,GlaExt,UserPragma>(.|\n) { 
+                        fprintf(stderr, "%s:%d:%d: Illegal character: `", 
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("'\n", stderr);
                         hsperror("");
                        }
 <Char>(.|\n)           { 
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
+                        fprintf(stderr, "%s:%d:%d: Illegal character: `",
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("' in a character literal\n", stderr);
                         hsperror("");
                        }
 <CharEsc>(.|\n)                {
-                        fprintf(stderr, "\"%s\", line %d, column %d: Illegal character escape: `\\",
+                        fprintf(stderr, "%s:%d:%d: Illegal character escape: `\\",
                            input_filename, hsplineno, hspcolno + 1); 
                         format_string(stderr, (unsigned char *) yytext, 1);
                         fputs("'\n", stderr);
@@ -891,7 +908,7 @@ NL                          [\n\r]
 <String>(.|\n)         { if (nonstandardFlag) {
                              addtext(yytext, yyleng);
                           } else { 
-                                fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `", 
+                                fprintf(stderr, "%s:%d:%d: Illegal character: `", 
                                 input_filename, hsplineno, hspcolno + 1); 
                                 format_string(stderr, (unsigned char *) yytext, 1);
                                 fputs("' in a string literal\n", stderr);
@@ -900,13 +917,13 @@ NL                        [\n\r]
                        }
 <StringEsc>(.|\n)      {
                         if (noGap) {
-                            fprintf(stderr, "\"%s\", line %d, column %d: Illegal string escape: `\\", 
+                            fprintf(stderr, "%s:%d:%d: Illegal string escape: `\\", 
                                input_filename, hsplineno, hspcolno + 1); 
                             format_string(stderr, (unsigned char *) yytext, 1);
                             fputs("'\n", stderr);
                             hsperror("");
                         } else {
-                            fprintf(stderr, "\"%s\", line %d, column %d: Illegal character: `",
+                            fprintf(stderr, "%s:%d:%d: Illegal character: `",
                                input_filename, hsplineno, hspcolno + 1);
                             format_string(stderr, (unsigned char *) yytext, 1);
                             fputs("' in a string gap\n", stderr);
@@ -932,17 +949,15 @@ NL                        [\n\r]
                          hsperror("unterminated character literal");
                        }
 <Comment><<EOF>>       { 
+                         char errbuf[ERR_BUF_SIZE];
                          hsplineno = hslineno; hspcolno = hscolno;
-                         hsperror("unterminated comment"); 
+                         sprintf(errbuf, "unterminated comment (which started on line %d)", comment_start);
+                         hsperror(errbuf); 
                        }
 <String,StringEsc><<EOF>>   { 
                          hsplineno = hslineno; hspcolno = hscolno;
                          hsperror("unterminated string literal"); 
                        }
-<GhcPragma><<EOF>>     {
-                         hsplineno = hslineno; hspcolno = hscolno;
-                         hsperror("unterminated interface pragma"); 
-                       }
 <UserPragma><<EOF>>    {
                          hsplineno = hslineno; hspcolno = hscolno;
                          hsperror("unterminated user-specified pragma"); 
@@ -964,8 +979,6 @@ NL                          [\n\r]
    This allows unnamed sources to be piped into the parser.
 */
 
-extern BOOLEAN acceptPrim;
-
 void
 yyinit(void)
 {
@@ -975,7 +988,7 @@ yyinit(void)
        setyyin _before_ calling yylex for the first time! */
     yy_switch_to_buffer(yy_create_buffer(stdin, YY_BUF_SIZE));
 
-    if (acceptPrim)
+    if (nonstandardFlag)
        PUSH_STATE(GlaExt);
     else
        PUSH_STATE(Code);
@@ -1002,6 +1015,11 @@ new_filename(char *f) /* This looks pretty dodgy to me (WDP) */
        forcing insertion of ; or } as appropriate
 */
 
+#ifdef HSP_DEBUG
+#define LAYOUT_DEBUG
+#endif
+
+
 static BOOLEAN
 hsshouldindent(void)
 {
@@ -1013,7 +1031,7 @@ hsshouldindent(void)
 void
 hssetindent(void)
 {
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hssetindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 
@@ -1042,7 +1060,7 @@ hssetindent(void)
 void
 hsincindent(void)
 {
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsincindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
     hsentercontext(indenttab[icontexts] & ~1);
@@ -1070,7 +1088,7 @@ hsentercontext(int indent)
     }
     forgetindent = FALSE;
     indenttab[icontexts] = indent;
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsentercontext:indent=%d,hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", indent, hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 }
@@ -1081,7 +1099,7 @@ void
 hsendindent(void)
 {
     --icontexts;
-#ifdef HSP_DEBUG
+#ifdef LAYOUT_DEBUG
     fprintf(stderr, "hsendindent:hscolno=%d,hspcolno=%d,INDENTPT[%d]=%d\n", hscolno, hspcolno, icontexts, INDENTPT);
 #endif
 }
@@ -1089,14 +1107,12 @@ hsendindent(void)
 /*
  *     Return checks the indentation level and returns ;, } or the specified token.
  */
-
 static int
 Return(int tok)
 {
 #ifdef HSP_DEBUG
     extern int yyleng;
 #endif
-
     if (hsshouldindent()) {
        if (hspcolno < INDENTPT) {
 #ifdef HSP_DEBUG
@@ -1112,6 +1128,7 @@ Return(int tok)
            return (SEMI);
        }
     }
+
     hssttok = -1;
 #ifdef HSP_DEBUG
     fprintf(stderr, "returning %d (%d:%d)\n", tok, hspcolno, INDENTPT);
@@ -1164,14 +1181,16 @@ yylex()
        hscolno = hscolno_save;
        hspcolno = hspcolno_save;
        etags = etags_save;
-       in_interface = FALSE;
        icontexts = icontexts_save - 1;
        icontexts_save = 0;
 #ifdef HSP_DEBUG
        fprintf(stderr, "finished reading interface (%d:%d:%d)\n", hscolno, hspcolno, INDENTPT);
 #endif
        eof = FALSE;
-       RETURN(LEOF);
+
+       /* RETURN(LEOF); */
+        hsperror("No longer using yacc to parse interface files");
+
     } else {
        yyterminate();
     }
@@ -1182,7 +1201,7 @@ yylex()
 /**********************************************************************
 *                                                                     *
 *                                                                     *
-*     Input Processing for Interfaces                                 *
+*     Input Processing for Interfaces -- Not currently used !!!       *
 *                                                                     *
 *                                                                     *
 **********************************************************************/
@@ -1212,7 +1231,6 @@ setyyin(char *file)
     hscolno_save = hscolno;
     hspcolno_save = hspcolno;
     hscolno = hspcolno = 0;
-    in_interface = TRUE;
     etags_save = etags; /* do not do "etags" stuff in interfaces */
     etags = 0;         /* We remember whether we are doing it in
                           the module, so we can restore it later [WDP 94/09] */
@@ -1369,15 +1387,23 @@ hsnewqid(char *name, int length)
     *dot = '.';
     name[length] = save;
 
-    return _isconstr(dot+1);
+    return isconstr(dot+1);
 }
 
-BOOLEAN 
-isconstr(char *s) /* walks past leading underscores before using the macro */
+static
+BOOLEAN
+is_commment(char* lexeme, int len)
 {
-    char *temp = s;
-
-    for ( ; temp != NULL && *temp == '_' ; temp++ );
-
-    return _isconstr(temp);
+   char* ptr;
+   int i;
+       
+   if (len < 2) {
+      return FALSE;
+   }
+
+   for(i=0;i<len;i++) {
+     if (lexeme[i] != '-') return FALSE;
+   }        
+   return TRUE;
 }
+