fix handling of 0x220c, the backwards $\in$
[unicode2tex.git] / src / MkTable.java
1 import java.io.*;
2 import java.util.*;
3 import java.net.*;
4
5 public class MkTable {
6
7     private static final String locator = "href=\"tables/";
8
9     public static void main(String[] args) throws Exception {
10         BufferedReader index = new BufferedReader(new InputStreamReader(new URL("http://www.stixfonts.org/charactertable.html")
11                                                                         .openStream()));
12         PrintWriter    table = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[0]), "UTF-8"));
13
14         String iline = null;
15         while( (iline = index.readLine()) != null) {
16
17             if (iline.indexOf(locator) == -1) continue;
18             iline = iline.substring(iline.indexOf(locator)+locator.length());
19             iline = iline.substring(0, iline.indexOf('\"'));
20
21             String url = "http://www.stixfonts.org/tables/"+iline;
22             System.out.println();
23             System.out.println("fetching " + url);
24             try {
25                 BufferedReader in = new BufferedReader(new InputStreamReader(new URL(url).openStream()));
26                 String s = null;
27                 while( (s = in.readLine()) != null) {
28                     if (s.equals("  <tr height=\"17\">")) {
29                         s = in.readLine();
30                         s = in.readLine().trim();
31                         
32                         if (s.startsWith("<td>") && s.endsWith("</td>")) {
33                             s = s.substring(4);
34                             s = s.substring(0, s.length()-5);
35                             if (!s.startsWith("G") && !s.startsWith("M")) {
36                                 int code = Integer.parseInt(s, 16);
37                                 s = in.readLine();
38                                 s = in.readLine().trim();
39                                 if (s.startsWith("<td>\\") && s.endsWith("</td>")) {
40                                     s = s.substring(4);
41                                     s = s.substring(0, s.length()-5);
42                                     
43                                     // FIXME more
44                                     s = s.replace("&amp;" , "&");
45                                     System.out.println("charnum: " + code + " char: " +((char)code)+" tex: "+s);
46
47                                     if (code >= 0x391 && code <= 0x3f6 &&
48                                         s.startsWith("\\up") && Character.isUpperCase(s.charAt(3)))
49                                         s = "$\\Up"+s.substring(3,4).toLowerCase()+s.substring(4)+"$";
50                                     else
51                                         if (s.equals("\\textquotedbl")) s = "\"";
52                                     else if (s.equals("\\endash")) s = "-";
53                                     else if (s.equals("\\emdash")) s = "--";
54                                     else if (!s.startsWith("\\text")) s = "$"+s+"$";
55
56                                     table.println(((char)code)+" "+Integer.toString(code,16)+ " " +s);
57                                 }
58                             }
59                         }
60                     }
61                 }
62             } catch (java.io.FileNotFoundException f) {
63                 System.out.println("  could not fetch");
64             }
65         }
66
67         // override how small-caps are handled
68         for(int i=0; i<26; i++)
69             table.println( ((char)(0x1d10+i)) + " " + Integer.toString(0x1d10+i, 16) + " {\\textsc{"+((char)('A'+i))+"}}");
70
71         // my own customizations
72         for(int i=0; i<10; i++)
73             table.println(((char)(0x2070+i))+" 207"+i+" ${}^{\\text{"+i+"}}$");
74         table.println(((char)0x207A)+" 207A ${}^{\\text{+}}$");
75         table.println(((char)0x207B)+" 207B ${}^{\\text{-}}$");
76         table.println(((char)0x207C)+" 207C ${}^{\\text{=}}$");
77         table.println(((char)0x207D)+" 207D ${}^{\\text{(}}$");
78         table.println(((char)0x207E)+" 207E ${}^{\\text{)}}$");
79
80         for(int i=0; i<10; i++)
81             table.println(((char)(0x2080+i))+" 208"+i+" ${}_{\\text{"+i+"}}$");
82         table.println(((char)0x208A)+" 208A ${}_{\\text{+}}$");
83         table.println(((char)0x208B)+" 208B ${}_{\\text{-}}$");
84         table.println(((char)0x208C)+" 208C ${}_{\\text{=}}$");
85         table.println(((char)0x208D)+" 208D ${}_{\\text{(}}$");
86         table.println(((char)0x208E)+" 208E ${}_{\\text{)}}$");
87
88         table.println(((char)0x1d00)+" 1d00 ${}_{\\text{A}}$");
89         table.println(((char)0x1d04)+" 1d04 ${}_{\\text{C}}$");
90         table.println(((char)0x1d05)+" 1d05 ${}_{\\text{D}}$");
91         table.println(((char)0x1d07)+" 1d07 ${}_{\\text{E}}$");
92         table.println(((char)0x1d0b)+" 1d0b ${}_{\\text{K}}$");
93         table.println(((char)0x1d0d)+" 1d0d ${}_{\\text{M}}$");
94         table.println(((char)0x1d0f)+" 1d0f ${}_{\\text{O}}$");
95         table.println(((char)0x1d18)+" 1d18 ${}_{\\text{P}}$");
96         table.println(((char)0x1d1b)+" 1d1b ${}_{\\text{T}}$");
97         table.println(((char)0x1d1c)+" 1d1c ${}_{\\text{U}}$");
98         table.println(((char)0x1d20)+" 1d20 ${}_{\\text{V}}$");
99         table.println(((char)0x1d21)+" 1d21 ${}_{\\text{W}}$");
100         table.println(((char)0x1d22)+" 1d22 ${}_{\\text{Z}}$");
101         table.println(((char)0x1d29)+" 1d29 ${}_{\\text{P}}$");
102
103         table.println(((char)0x25ef)+" 25ef $\\bigcirc$");
104
105         table.println(((char)0x220c)+" 220c /\\llap{$\\ni$}");
106
107         table.println(((char)0x2a1f)+" 2a1f $\\fatsemi$");
108         table.println(((char)0x2982)+" 2982 $\\colon$");          // this is less-than-ideal
109
110         table.println(((char)0x00B2)+" 00b2 ${}^{\\text{2}}$");
111         table.println(((char)0x00B3)+" 00b3 ${}^{\\text{3}}$");
112         table.println(((char)0x00BA)+" 00ba ${}^{\\text{o}}$");
113         table.println(((char)0x00B9)+" 00b9 ${}^{\\text{1}}$");
114
115         table.println(((char)0x25cb)+" 25cb $\\circ$");
116
117         table.flush();
118         table.close();
119     }
120
121 }