1 /* unlit.c Wed Dec 5 17:16:24 GMT 1990
3 * Literate script filter. In contrast with the format used by most
4 * programming languages, a literate script is a program in which
5 * comments are given the leading role, whilst program text must be
6 * explicitly flagged as such by placing a `>' character in the first
7 * column on each line. It is hoped that this style of programming will
8 * encourage the writing of accurate and clearly documented programs
9 * in which the writer may include motivating arguments, examples
12 * Unlit is a filter that can be used to strip all of the comment lines
13 * out of a literate script file. The command format for unlit is:
14 * unlit [-n] [-q] ifile ofile
15 * where ifile and ofile are the names of the input (literate script) and
16 * output (raw program) files respectively. Either of these names may
17 * be `-' representing the standard input or the standard output resp.
18 * A number of rules are used in an attempt to guard against the most
19 * common errors that are made when writing literate scripts:
20 * 1) Empty script files are not permitted. A file in which no lines
21 * begin with `>' usually indicates a file in which the programmer
22 * has forgotten about the literate script convention.
23 * 2) A line containing part of program definition (i.e. preceeded by `>')
24 * cannot be used immediately before or after a comment line unless
25 * the comment line is blank. This error usually indicates that
26 * the `>' character has been omitted from a line in a section of
27 * program spread over a number of lines.
28 * Using the -q (quiet) flag suppresses the signalling of these error
29 * conditions. The default behaviour can be selected explicitly using
30 * the -n (noisy) option so that any potential errors in the script file
33 * The original idea for the use of literate scripts is due to Richard
34 * Bird of the programming Research Group, Oxford and was initially
35 * adopted for use in the implementation of the functional programming
36 * language Orwell used for teaching in Oxford. This idea has subsequently
37 * been borrowed in a number of other language implementations.
39 * Modified to understand \begin{code} ... \end{code} used in Glasgow. -- LA
40 * And \begin{pseudocode} ... \end{pseudocode}. -- LA
48 #define NULLSTR ((char *)0)
50 #define MISSINGBLANK "unlit: Program line next to comment"
51 #define EMPTYSCRIPT "unlit: No definitions in file (perhaps you forgot the '>'s?)"
52 #define USAGE "usage: unlit [-q] [-n] [-c] [-#] [-P] [-h label] file1 file2\n"
53 #define CANNOTOPEN "unlit: cannot open \"%s\"\n"
54 #define CANNOTWRITE "unlit: error writing \"%s\"\n"
55 #define CANNOTWRITESTDOUT "unlit: error writing standard output\n"
56 #define DISTINCTNAMES "unlit: input and output filenames must differ\n"
57 #define MISSINGENDCODE "unlit: missing \\end{code}\n"
59 #define BEGINCODE "\\begin{code}"
60 #define LENBEGINCODE 12
61 #define ENDCODE "\\end{code}"
64 /* According to Will Partain, the inventor of pseudocode, this gone now. */
65 #define MISSINGENDPSEUDOCODE "unlit: missing \\end{pseudocode}\n"
66 #define BEGINPSEUDOCODE "\\begin{pseudocode}"
67 #define LENBEGINPSEUDOCODE 18
68 #define ENDPSEUDOCODE "\\end{pseudocode}"
69 #define LENENDPSEUDOCODE 16
72 typedef enum { START, BLANK, TEXT, DEFN, BEGIN, /*PSEUDO,*/ END, HASH, SHEBANG } line;
73 #define isWhitespace(c) (c==' ' || c=='\t' || c=='\r')
74 #define isLineTerm(c) (c=='\n' || c==EOF)
76 static int noisy = 1; /* 0 => keep quiet about errors, 1 => report errors */
77 static int errors = 0; /* count the number of errors reported */
78 static int crunchnl = 0; /* don't print \n for removed lines */
79 static int leavecpp = 1; /* leave preprocessor lines */
80 static int ignore_shebang = 1; /* Leave out shebang (#!) lines */
81 static int no_line_pragma = 0; /* Leave out initial line pragma */
83 static char* prefix_str = NULL; /* Prefix output with a string */
85 static char *ofilename = NULL;
87 /* complain(file,line,what)
89 * print error message `what' for `file' at `line'. The error is suppressed
90 * if noisy is not set.
93 void complain(char *file, int lin, char *what)
97 fprintf(stderr, "%s ", file);
98 fprintf(stderr,"line %d: %s\n",lin,what);
103 void writeerror(void)
105 if (!strcmp(ofilename,"-")) {
106 fprintf(stderr, CANNOTWRITESTDOUT);
108 fprintf(stderr, CANNOTWRITE, ofilename);
113 void myputc(char c, FILE *ostream)
115 if (putc(c,ostream) == EOF) {
122 /* As getc, but does TAB expansion */
127 static int spleft = 0;
128 static int linepos = 0;
139 else if (c == '\n' || c == '\f') {
142 } else if (c == '\t') {
143 spleft = TABPOS - linepos % TABPOS;
154 /* readline(istream, ostream)
156 * Read a line from the input stream `istream', and return a value
157 * indicating whether that line was:
158 * BLANK (whitespace only),
159 * DEFN (first character is DEFNCHAR),
160 * TEXT (a line of text)
161 * BEGIN (a \begin{code} line)
162 * PSEUDO (a \begin{pseodocode} line)
163 * HASH (a preprocessor line)
164 * or END (indicating an EOF).
165 * Lines of type DEFN are copied to the output stream `ostream'
166 * (without the leading DEFNCHAR). BLANK and TEXT lines are
167 * replaced by empty (i.e. blank lines) in the output stream, so
168 * that error messages refering to line numbers in the output file
169 * can also be used to locate the corresponding line in the input
173 line readline(istream,ostream)
174 FILE *istream, *ostream; {
185 if ( ignore_shebang ) {
188 while (c=egetc(istream), !isLineTerm(c)) ;
196 while (c=egetc(istream), !isLineTerm(c))
198 myputc('\n',ostream);
205 while (c=egetc(istream), !isLineTerm(c))
207 myputc('\n',ostream);
212 myputc('\n',ostream);
214 while (isWhitespace(c))
221 while (c=egetc(istream), !isLineTerm(c))
222 if (i < sizeof buf - 1)
224 while(i > 0 && isspace(buf[i-1]))
227 if (strcmp(buf, BEGINCODE) == 0)
230 else if (strcmp(buf, BEGINPSEUDOCODE) == 0)
238 /* unlit(file,istream,ostream)
240 * Copy the file named `file', accessed using the input stream `istream'
241 * to the output stream `ostream', removing any comments and checking
242 * for bad use of literate script features:
243 * - there should be at least one BLANK line between a DEFN and TEXT
244 * - there should be at least one DEFN line in a script.
247 void unlit(char *file, FILE *istream, FILE *ostream)
249 line last, this=START;
255 this = readline(istream, ostream);
259 if (last==DEFN && this==TEXT)
260 complain(file, linesread-1, MISSINGBLANK);
261 if (last==TEXT && this==DEFN)
262 complain(file, linesread, MISSINGBLANK);
264 /* start of code, copy to end */
267 if (fgets(lineb, sizeof lineb, istream) == NULL) {
268 complain(file, linesread, MISSINGENDCODE);
272 if (strncmp(lineb,ENDCODE,LENENDCODE) == 0) {
273 myputc('\n', ostream);
276 fputs(lineb, ostream);
281 if (this == PSEUDO) {
284 if (fgets(lineb, sizeof lineb, istream) == NULL) {
285 complain(file, linesread, MISSINGENDPSEUDOCODE);
289 myputc('\n', ostream);
290 if (strncmp(lineb,ENDPSEUDOCODE,LENENDPSEUDOCODE) == 0) {
299 complain(file,linesread,EMPTYSCRIPT);
304 * Main program. Processes command line arguments, looking for leading:
305 * -q quiet mode - do not complain about bad literate script files
306 * -n noisy mode - complain about bad literate script files.
307 * -r remove cpp droppings in output.
308 * -P don't output any CPP line pragmas.
309 * Expects two additional arguments, a file name for the input and a file
310 * name for the output file. These two names must normally be distinct.
311 * An exception is made for the special name "-" which can be used in either
312 * position to specify the standard input or the standard output respectively.
315 int main(int argc,char **argv)
317 FILE *istream, *ostream;
320 for (argc--, argv++; argc > 0; argc--, argv++)
321 if (strcmp(*argv,"-n")==0)
323 else if (strcmp(*argv,"-q")==0)
325 else if (strcmp(*argv,"-c")==0)
327 else if (strcmp(*argv,"-P")==0)
329 else if (strcmp(*argv,"-h")==0) {
334 prefix_str = (char*)malloc(sizeof(char)*(1+strlen(*argv)));
336 strcpy(prefix_str, *argv);
338 } else if (strcmp(*argv,"-#")==0)
344 fprintf(stderr, USAGE);
348 if (strcmp(argv[0],argv[1])==0 && strcmp(argv[0],"-")!=0) {
349 fprintf(stderr, DISTINCTNAMES);
354 if (strcmp(argv[0], "-")==0) {
359 if ((istream=fopen(argv[0], "r")) == NULL) {
360 fprintf(stderr, CANNOTOPEN, argv[0]);
365 if (strcmp(argv[1], "-")==0)
368 if ((ostream=fopen(argv[1], "w")) == NULL) {
369 fprintf(stderr, CANNOTOPEN, argv[1]);
373 /* Prefix the output with line pragmas */
374 if (!no_line_pragma && prefix_str) {
375 /* Both GHC and CPP understand the #line pragma.
376 * We used to throw in both a #line and a {-# LINE #-} pragma
377 * here, but CPP doesn't understand {-# LINE #-} so it thought
378 * the line numbers were off by one. We could put the {-# LINE
379 * #-} before the #line, but there's no point since GHC
380 * understands #line anyhow. --SDM 8/2003
382 fprintf(ostream, "#line 1 \"%s\"\n", prefix_str);
385 unlit(file, istream, ostream);
387 if (istream != stdin) fclose(istream);
388 if (ostream != stdout) {
389 if (fclose(ostream) == EOF) {
394 exit(errors==0 ? 0 : 1);