fix handling of carets
[unicode2tex.git] / src / Unicode2Tex.java
1 import java.io.*;
2 import java.util.*;
3 import java.net.*;
4
5 public class Unicode2Tex {
6
7     private static HashMap<Integer,String> table = new HashMap<Integer,String>();
8     private static HashSet<Integer> warned       = new HashSet<Integer>();
9
10     public static void main(String[] args) throws Exception {
11
12         BufferedReader tabletxt =
13             new BufferedReader(new InputStreamReader(Unicode2Tex.class
14                                                      .getClassLoader().getResource("table.utf8.txt").openStream(),
15                                                      "UTF-8"));
16         String s = null;
17         while ( (s = tabletxt.readLine()) != null ) {
18             if (s.length() < 2) continue;
19             s = s.substring(2);
20             int code = Integer.parseInt(s.substring(0, s.indexOf(' ')) ,16);
21             String tex = s.substring(s.indexOf(' ')+1).trim();
22             table.put(code, tex);
23         }
24         tabletxt.close();
25
26         BufferedReader in  = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
27         PrintWriter    out = new PrintWriter(new OutputStreamWriter(System.out, "ASCII"));
28         PrintWriter    err = new PrintWriter(new OutputStreamWriter(System.err, "UTF-8"));
29
30         int c;
31         while ( (c = in.read()) != -1 ) {
32             int i = (c & 0xffff);
33             s = table.get(c);
34
35             switch ((char)i) {
36                 case '\n': out.println("\\\\"); break;
37                 case '\r': break;
38                 case ' ' : out.print("{\\phantom{X}}"); break;
39                 case '\\': out.print("{\\char`\\\\}"); break;
40                 case '~' : out.print("{\\rlap{\\lower.85ex\\hbox{\\large\\char126}}\\ }"); break;
41                 case '{' : out.print("{\\char123}"); break;
42                 case '}' : out.print("{\\char125}"); break;
43                 case '_' : out.print("{\\char95}"); break;
44                 case '$' : out.print("{\\$}"); break;
45                 case '%' : out.print("{\\%}"); break;
46                 case '&' : out.print("{\\&}"); break;
47                 case '^' : out.print("{\\text{\\textasciicircum}}"); break;
48                 case '`' : out.print("{`}"); break;
49                 default:
50                     if (i >= 0x30 && i <= 0x7a) out.print((char)c);
51                     else if (s != null) {
52                         if (s.startsWith("$")) {
53                             out.print("{\\rlap{"+s+"}\\ }");
54                         } else {
55                             out.print('{'+s+'}');
56                         }
57                     } else if (i < 127) {
58                         out.print((char)c);
59                     } else {
60                         if (!warned.contains(i)) {
61                             err.println("warning: character '"+c+"' (0x"+(Integer.toString(i, 16))+") not found in stix table");
62                             err.flush();
63                             warned.add(i);
64                         }
65                         out.print(c);
66                     }
67             }
68         }
69         out.flush();
70         out.close();
71     }
72
73 }