1 /*-------------------------------------------------------------------------
2 This is an automatically generated file: do not edit
3 Generated by udconfc at Mon Jan 31 23:37:36 EST 2005
4 -------------------------------------------------------------------------*/
8 /* Unicode general categories, listed in the same order as in the Unicode
9 * standard -- this must be the same order as in GHC.Unicode.
13 NUMCAT_LU, /* Letter, Uppercase */
14 NUMCAT_LL, /* Letter, Lowercase */
15 NUMCAT_LT, /* Letter, Titlecase */
16 NUMCAT_LM, /* Letter, Modifier */
17 NUMCAT_LO, /* Letter, Other */
18 NUMCAT_MN, /* Mark, Non-Spacing */
19 NUMCAT_MC, /* Mark, Spacing Combining */
20 NUMCAT_ME, /* Mark, Enclosing */
21 NUMCAT_ND, /* Number, Decimal */
22 NUMCAT_NL, /* Number, Letter */
23 NUMCAT_NO, /* Number, Other */
24 NUMCAT_PC, /* Punctuation, Connector */
25 NUMCAT_PD, /* Punctuation, Dash */
26 NUMCAT_PS, /* Punctuation, Open */
27 NUMCAT_PE, /* Punctuation, Close */
28 NUMCAT_PI, /* Punctuation, Initial quote */
29 NUMCAT_PF, /* Punctuation, Final quote */
30 NUMCAT_PO, /* Punctuation, Other */
31 NUMCAT_SM, /* Symbol, Math */
32 NUMCAT_SC, /* Symbol, Currency */
33 NUMCAT_SK, /* Symbol, Modifier */
34 NUMCAT_SO, /* Symbol, Other */
35 NUMCAT_ZS, /* Separator, Space */
36 NUMCAT_ZL, /* Separator, Line */
37 NUMCAT_ZP, /* Separator, Paragraph */
38 NUMCAT_CC, /* Other, Control */
39 NUMCAT_CF, /* Other, Format */
40 NUMCAT_CS, /* Other, Surrogate */
41 NUMCAT_CO, /* Other, Private Use */
42 NUMCAT_CN /* Other, Not Assigned */
47 unsigned int category;
48 unsigned int catnumber;
59 const struct _convrule_ *rule;
62 #define GENCAT_ZP 67108864
63 #define GENCAT_MC 8388608
64 #define GENCAT_NO 65536
65 #define GENCAT_SK 1024
66 #define GENCAT_CO 268435456
67 #define GENCAT_ME 4194304
70 #define GENCAT_LT 524288
71 #define GENCAT_PC 2048
77 #define GENCAT_SO 8192
79 #define GENCAT_CS 134217728
80 #define GENCAT_PF 131072
81 #define GENCAT_CF 32768
84 #define GENCAT_LL 4096
85 #define GENCAT_ZL 33554432
86 #define GENCAT_LM 1048576
87 #define GENCAT_PI 16384
88 #define GENCAT_NL 16777216
89 #define GENCAT_MN 2097152
90 #define GENCAT_LO 262144
91 #define MAX_UNI_CHAR 1114109
92 #define NUM_BLOCKS 1916
93 #define NUM_CONVBLOCKS 835
94 #define NUM_SPACEBLOCKS 8
95 #define NUM_LAT1BLOCKS 63
97 static const struct _convrule_ rule116={GENCAT_LU, NUMCAT_LU, 1, 0, -8383, 0};
98 static const struct _convrule_ rule108={GENCAT_LU, NUMCAT_LU, 1, 0, -86, 0};
99 static const struct _convrule_ rule88={GENCAT_LU, NUMCAT_LU, 1, 0, 80, 0};
100 static const struct _convrule_ rule86={GENCAT_LL, NUMCAT_LL, 1, -96, 0, -96};
101 static const struct _convrule_ rule79={GENCAT_LU, NUMCAT_LU, 0, 0, 0, 0};
102 static const struct _convrule_ rule56={GENCAT_LL, NUMCAT_LL, 1, -203, 0, -203};
103 static const struct _convrule_ rule54={GENCAT_LL, NUMCAT_LL, 1, -205, 0, -205};
104 static const struct _convrule_ rule48={GENCAT_LL, NUMCAT_LL, 1, -79, 0, -79};
105 static const struct _convrule_ rule40={GENCAT_LU, NUMCAT_LU, 1, 0, 218, 0};
106 static const struct _convrule_ rule113={GENCAT_ZL, NUMCAT_ZL, 0, 0, 0, 0};
107 static const struct _convrule_ rule103={GENCAT_LT, NUMCAT_LT, 1, 0, -8, 0};
108 static const struct _convrule_ rule98={GENCAT_LL, NUMCAT_LL, 1, 86, 0, 86};
109 static const struct _convrule_ rule95={GENCAT_LL, NUMCAT_LL, 1, 8, 0, 8};
110 static const struct _convrule_ rule39={GENCAT_LU, NUMCAT_LU, 1, 0, 214, 0};
111 static const struct _convrule_ rule119={GENCAT_NL, NUMCAT_NL, 1, -16, 0, -16};
112 static const struct _convrule_ rule101={GENCAT_LL, NUMCAT_LL, 1, 112, 0, 112};
113 static const struct _convrule_ rule93={GENCAT_NL, NUMCAT_NL, 0, 0, 0, 0};
114 static const struct _convrule_ rule60={GENCAT_LL, NUMCAT_LL, 1, -213, 0, -213};
115 static const struct _convrule_ rule59={GENCAT_LL, NUMCAT_LL, 1, -211, 0, -211};
116 static const struct _convrule_ rule42={GENCAT_LU, NUMCAT_LU, 1, 0, 219, 0};
117 static const struct _convrule_ rule38={GENCAT_LL, NUMCAT_LL, 1, 130, 0, 130};
118 static const struct _convrule_ rule34={GENCAT_LL, NUMCAT_LL, 1, 97, 0, 97};
119 static const struct _convrule_ rule25={GENCAT_LU, NUMCAT_LU, 1, 0, -121, 0};
120 static const struct _convrule_ rule24={GENCAT_LL, NUMCAT_LL, 1, -232, 0, -232};
121 static const struct _convrule_ rule20={GENCAT_LL, NUMCAT_LL, 1, 121, 0, 121};
122 static const struct _convrule_ rule16={GENCAT_CF, NUMCAT_CF, 0, 0, 0, 0};
123 static const struct _convrule_ rule4={GENCAT_PS, NUMCAT_PS, 0, 0, 0, 0};
124 static const struct _convrule_ rule123={GENCAT_CO, NUMCAT_CO, 0, 0, 0, 0};
125 static const struct _convrule_ rule112={GENCAT_LU, NUMCAT_LU, 1, 0, -126, 0};
126 static const struct _convrule_ rule106={GENCAT_LT, NUMCAT_LT, 1, 0, -9, 0};
127 static const struct _convrule_ rule105={GENCAT_LU, NUMCAT_LU, 1, 0, -74, 0};
128 static const struct _convrule_ rule97={GENCAT_LL, NUMCAT_LL, 1, 74, 0, 74};
129 static const struct _convrule_ rule65={GENCAT_LM, NUMCAT_LM, 0, 0, 0, 0};
130 static const struct _convrule_ rule30={GENCAT_LU, NUMCAT_LU, 1, 0, 79, 0};
131 static const struct _convrule_ rule5={GENCAT_PE, NUMCAT_PE, 0, 0, 0, 0};
132 static const struct _convrule_ rule114={GENCAT_ZP, NUMCAT_ZP, 0, 0, 0, 0};
133 static const struct _convrule_ rule104={GENCAT_LL, NUMCAT_LL, 1, 9, 0, 9};
134 static const struct _convrule_ rule94={GENCAT_LL, NUMCAT_LL, 1, -59, 0, -59};
135 static const struct _convrule_ rule92={GENCAT_MC, NUMCAT_MC, 0, 0, 0, 0};
136 static const struct _convrule_ rule91={GENCAT_LL, NUMCAT_LL, 1, -48, 0, -48};
137 static const struct _convrule_ rule82={GENCAT_LL, NUMCAT_LL, 1, -86, 0, -86};
138 static const struct _convrule_ rule78={GENCAT_LL, NUMCAT_LL, 1, -57, 0, -57};
139 static const struct _convrule_ rule66={GENCAT_MN, NUMCAT_MN, 0, 0, 0, 0};
140 static const struct _convrule_ rule55={GENCAT_LL, NUMCAT_LL, 1, -202, 0, -202};
141 static const struct _convrule_ rule50={GENCAT_LU, NUMCAT_LU, 1, 0, -56, 0};
142 static const struct _convrule_ rule45={GENCAT_LU, NUMCAT_LU, 1, 0, 2, 1};
143 static const struct _convrule_ rule31={GENCAT_LU, NUMCAT_LU, 1, 0, 202, 0};
144 static const struct _convrule_ rule6={GENCAT_SM, NUMCAT_SM, 0, 0, 0, 0};
145 static const struct _convrule_ rule107={GENCAT_LL, NUMCAT_LL, 1, -7205, 0, -7205};
146 static const struct _convrule_ rule90={GENCAT_LU, NUMCAT_LU, 1, 0, 48, 0};
147 static const struct _convrule_ rule87={GENCAT_LU, NUMCAT_LU, 1, 0, -7, 0};
148 static const struct _convrule_ rule44={GENCAT_LL, NUMCAT_LL, 1, 56, 0, 56};
149 static const struct _convrule_ rule33={GENCAT_LU, NUMCAT_LU, 1, 0, 207, 0};
150 static const struct _convrule_ rule18={GENCAT_LL, NUMCAT_LL, 1, 743, 0, 743};
151 static const struct _convrule_ rule17={GENCAT_NO, NUMCAT_NO, 0, 0, 0, 0};
152 static const struct _convrule_ rule10={GENCAT_SK, NUMCAT_SK, 0, 0, 0, 0};
153 static const struct _convrule_ rule8={GENCAT_ND, NUMCAT_ND, 0, 0, 0, 0};
154 static const struct _convrule_ rule122={GENCAT_CS, NUMCAT_CS, 0, 0, 0, 0};
155 static const struct _convrule_ rule99={GENCAT_LL, NUMCAT_LL, 1, 100, 0, 100};
156 static const struct _convrule_ rule67={GENCAT_MN, NUMCAT_MN, 1, 84, 0, 84};
157 static const struct _convrule_ rule52={GENCAT_LL, NUMCAT_LL, 1, -210, 0, -210};
158 static const struct _convrule_ rule51={GENCAT_LU, NUMCAT_LU, 1, 0, -130, 0};
159 static const struct _convrule_ rule32={GENCAT_LU, NUMCAT_LU, 1, 0, 203, 0};
160 static const struct _convrule_ rule27={GENCAT_LU, NUMCAT_LU, 1, 0, 210, 0};
161 static const struct _convrule_ rule15={GENCAT_PI, NUMCAT_PI, 0, 0, 0, 0};
162 static const struct _convrule_ rule111={GENCAT_LU, NUMCAT_LU, 1, 0, -128, 0};
163 static const struct _convrule_ rule96={GENCAT_LU, NUMCAT_LU, 1, 0, -8, 0};
164 static const struct _convrule_ rule71={GENCAT_LU, NUMCAT_LU, 1, 0, 63, 0};
165 static const struct _convrule_ rule64={GENCAT_LL, NUMCAT_LL, 1, -219, 0, -219};
166 static const struct _convrule_ rule62={GENCAT_LL, NUMCAT_LL, 1, -218, 0, -218};
167 static const struct _convrule_ rule23={GENCAT_LU, NUMCAT_LU, 1, 0, -199, 0};
168 static const struct _convrule_ rule19={GENCAT_PF, NUMCAT_PF, 0, 0, 0, 0};
169 static const struct _convrule_ rule1={GENCAT_ZS, NUMCAT_ZS, 0, 0, 0, 0};
170 static const struct _convrule_ rule120={GENCAT_SO, NUMCAT_SO, 1, 0, 26, 0};
171 static const struct _convrule_ rule115={GENCAT_LU, NUMCAT_LU, 1, 0, -7517, 0};
172 static const struct _convrule_ rule83={GENCAT_LL, NUMCAT_LL, 1, -80, 0, -80};
173 static const struct _convrule_ rule81={GENCAT_LL, NUMCAT_LL, 1, -54, 0, -54};
174 static const struct _convrule_ rule80={GENCAT_LL, NUMCAT_LL, 1, -47, 0, -47};
175 static const struct _convrule_ rule77={GENCAT_LL, NUMCAT_LL, 1, -62, 0, -62};
176 static const struct _convrule_ rule76={GENCAT_LL, NUMCAT_LL, 1, -63, 0, -63};
177 static const struct _convrule_ rule75={GENCAT_LL, NUMCAT_LL, 1, -64, 0, -64};
178 static const struct _convrule_ rule73={GENCAT_LL, NUMCAT_LL, 1, -37, 0, -37};
179 static const struct _convrule_ rule72={GENCAT_LL, NUMCAT_LL, 1, -38, 0, -38};
180 static const struct _convrule_ rule35={GENCAT_LU, NUMCAT_LU, 1, 0, 211, 0};
181 static const struct _convrule_ rule14={GENCAT_LL, NUMCAT_LL, 0, 0, 0, 0};
182 static const struct _convrule_ rule11={GENCAT_PC, NUMCAT_PC, 0, 0, 0, 0};
183 static const struct _convrule_ rule3={GENCAT_SC, NUMCAT_SC, 0, 0, 0, 0};
184 static const struct _convrule_ rule2={GENCAT_PO, NUMCAT_PO, 0, 0, 0, 0};
185 static const struct _convrule_ rule70={GENCAT_LU, NUMCAT_LU, 1, 0, 64, 0};
186 static const struct _convrule_ rule58={GENCAT_LL, NUMCAT_LL, 1, -209, 0, -209};
187 static const struct _convrule_ rule57={GENCAT_LL, NUMCAT_LL, 1, -207, 0, -207};
188 static const struct _convrule_ rule53={GENCAT_LL, NUMCAT_LL, 1, -206, 0, -206};
189 static const struct _convrule_ rule46={GENCAT_LT, NUMCAT_LT, 1, -1, 1, 0};
190 static const struct _convrule_ rule36={GENCAT_LU, NUMCAT_LU, 1, 0, 209, 0};
191 static const struct _convrule_ rule26={GENCAT_LL, NUMCAT_LL, 1, -300, 0, -300};
192 static const struct _convrule_ rule9={GENCAT_LU, NUMCAT_LU, 1, 0, 32, 0};
193 static const struct _convrule_ rule121={GENCAT_SO, NUMCAT_SO, 1, -26, 0, -26};
194 static const struct _convrule_ rule117={GENCAT_LU, NUMCAT_LU, 1, 0, -8262, 0};
195 static const struct _convrule_ rule109={GENCAT_LU, NUMCAT_LU, 1, 0, -100, 0};
196 static const struct _convrule_ rule69={GENCAT_LU, NUMCAT_LU, 1, 0, 37, 0};
197 static const struct _convrule_ rule29={GENCAT_LU, NUMCAT_LU, 1, 0, 205, 0};
198 static const struct _convrule_ rule21={GENCAT_LU, NUMCAT_LU, 1, 0, 1, 0};
199 static const struct _convrule_ rule124={GENCAT_LU, NUMCAT_LU, 1, 0, 40, 0};
200 static const struct _convrule_ rule110={GENCAT_LU, NUMCAT_LU, 1, 0, -112, 0};
201 static const struct _convrule_ rule102={GENCAT_LL, NUMCAT_LL, 1, 126, 0, 126};
202 static const struct _convrule_ rule100={GENCAT_LL, NUMCAT_LL, 1, 128, 0, 128};
203 static const struct _convrule_ rule85={GENCAT_LU, NUMCAT_LU, 1, 0, -60, 0};
204 static const struct _convrule_ rule84={GENCAT_LL, NUMCAT_LL, 1, 7, 0, 7};
205 static const struct _convrule_ rule63={GENCAT_LL, NUMCAT_LL, 1, -217, 0, -217};
206 static const struct _convrule_ rule61={GENCAT_LL, NUMCAT_LL, 1, -214, 0, -214};
207 static const struct _convrule_ rule43={GENCAT_LO, NUMCAT_LO, 0, 0, 0, 0};
208 static const struct _convrule_ rule41={GENCAT_LU, NUMCAT_LU, 1, 0, 217, 0};
209 static const struct _convrule_ rule125={GENCAT_LL, NUMCAT_LL, 1, -40, 0, -40};
210 static const struct _convrule_ rule118={GENCAT_NL, NUMCAT_NL, 1, 0, 16, 0};
211 static const struct _convrule_ rule89={GENCAT_ME, NUMCAT_ME, 0, 0, 0, 0};
212 static const struct _convrule_ rule74={GENCAT_LL, NUMCAT_LL, 1, -31, 0, -31};
213 static const struct _convrule_ rule68={GENCAT_LU, NUMCAT_LU, 1, 0, 38, 0};
214 static const struct _convrule_ rule49={GENCAT_LU, NUMCAT_LU, 1, 0, -97, 0};
215 static const struct _convrule_ rule47={GENCAT_LL, NUMCAT_LL, 1, -2, 0, -1};
216 static const struct _convrule_ rule37={GENCAT_LU, NUMCAT_LU, 1, 0, 213, 0};
217 static const struct _convrule_ rule28={GENCAT_LU, NUMCAT_LU, 1, 0, 206, 0};
218 static const struct _convrule_ rule22={GENCAT_LL, NUMCAT_LL, 1, -1, 0, -1};
219 static const struct _convrule_ rule13={GENCAT_SO, NUMCAT_SO, 0, 0, 0, 0};
220 static const struct _convrule_ rule12={GENCAT_LL, NUMCAT_LL, 1, -32, 0, -32};
221 static const struct _convrule_ rule7={GENCAT_PD, NUMCAT_PD, 0, 0, 0, 0};
222 static const struct _convrule_ rule0={GENCAT_CC, NUMCAT_CC, 0, 0, 0, 0};
223 static const struct _charblock_ allchars[]={
1004 {2835, 22, &rule43},
1051 {3090, 23, &rule43},
1052 {3114, 10, &rule43},
1064 {3218, 23, &rule43},
1065 {3242, 10, &rule43},
1083 {3346, 23, &rule43},
1084 {3370, 16, &rule43},
1094 {3461, 18, &rule43},
1095 {3482, 24, &rule43},
1106 {3585, 48, &rule43},
1146 {3882, 10, &rule17},
1159 {3913, 34, &rule43},
1160 {3953, 14, &rule66},
1167 {3993, 36, &rule66},
1172 {4096, 34, &rule43},
1187 {4256, 38, &rule79},
1188 {4304, 41, &rule43},
1190 {4352, 90, &rule43},
1191 {4447, 68, &rule43},
1192 {4520, 82, &rule43},
1194 {4616, 63, &rule43},
1200 {4704, 39, &rule43},
1203 {4752, 31, &rule43},
1211 {4824, 23, &rule43},
1212 {4848, 31, &rule43},
1216 {4896, 39, &rule43},
1217 {4936, 19, &rule43},
1220 {4978, 11, &rule17},
1221 {5024, 85, &rule43},
1222 {5121, 620, &rule43},
1226 {5761, 26, &rule43},
1229 {5792, 75, &rule43},
1232 {5888, 13, &rule43},
1235 {5920, 18, &rule43},
1238 {5952, 18, &rule43},
1240 {5984, 13, &rule43},
1243 {6016, 52, &rule43},
1250 {6089, 11, &rule66},
1258 {6128, 10, &rule17},
1265 {6176, 35, &rule43},
1267 {6212, 52, &rule43},
1268 {6272, 41, &rule43},
1270 {6400, 29, &rule43},
1282 {6480, 30, &rule43},
1284 {6624, 32, &rule13},
1285 {7424, 44, &rule14},
1286 {7468, 54, &rule65},
1287 {7522, 10, &rule14},
1557 {8056, 2, &rule100},
1558 {8058, 2, &rule101},
1559 {8060, 2, &rule102},
1561 {8072, 8, &rule103},
1563 {8088, 8, &rule103},
1565 {8104, 8, &rule103},
1568 {8115, 1, &rule104},
1572 {8122, 2, &rule105},
1573 {8124, 1, &rule106},
1575 {8126, 1, &rule107},
1578 {8131, 1, &rule104},
1581 {8136, 4, &rule108},
1582 {8140, 1, &rule106},
1588 {8154, 2, &rule109},
1595 {8170, 2, &rule110},
1599 {8179, 1, &rule104},
1602 {8184, 2, &rule111},
1603 {8186, 2, &rule112},
1604 {8188, 1, &rule106},
1618 {8232, 1, &rule113},
1619 {8233, 1, &rule114},
1646 {8320, 10, &rule17},
1651 {8400, 13, &rule66},
1673 {8486, 1, &rule115},
1677 {8490, 1, &rule116},
1678 {8491, 1, &rule117},
1696 {8531, 13, &rule17},
1697 {8544, 16, &rule118},
1698 {8560, 16, &rule119},
1711 {8623, 31, &rule13},
1717 {8661, 31, &rule13},
1718 {8692, 268, &rule6},
1721 {8972, 20, &rule13},
1726 {9003, 81, &rule13},
1728 {9085, 30, &rule13},
1733 {9143, 26, &rule13},
1734 {9216, 39, &rule13},
1735 {9280, 11, &rule13},
1736 {9312, 60, &rule17},
1737 {9372, 26, &rule13},
1738 {9398, 26, &rule120},
1739 {9424, 26, &rule121},
1740 {9450, 22, &rule17},
1741 {9472, 183, &rule13},
1745 {9666, 54, &rule13},
1747 {9728, 24, &rule13},
1748 {9753, 86, &rule13},
1750 {9840, 14, &rule13},
1751 {9856, 18, &rule13},
1755 {9996, 28, &rule13},
1756 {10025, 35, &rule13},
1757 {10061, 1, &rule13},
1758 {10063, 4, &rule13},
1759 {10070, 1, &rule13},
1760 {10072, 7, &rule13},
1761 {10081, 7, &rule13},
1776 {10102, 30, &rule17},
1777 {10132, 1, &rule13},
1778 {10136, 24, &rule13},
1779 {10161, 14, &rule13},
1780 {10192, 22, &rule6},
1787 {10224, 16, &rule6},
1788 {10240, 256, &rule13},
1789 {10496, 131, &rule6},
1812 {10649, 63, &rule6},
1817 {10716, 32, &rule6},
1820 {10750, 258, &rule6},
1821 {11008, 14, &rule13},
1822 {11904, 26, &rule13},
1823 {11931, 89, &rule13},
1824 {12032, 214, &rule13},
1825 {12272, 12, &rule13},
1828 {12292, 1, &rule13},
1829 {12293, 1, &rule65},
1830 {12294, 1, &rule43},
1831 {12295, 1, &rule93},
1842 {12306, 2, &rule13},
1854 {12320, 1, &rule13},
1855 {12321, 9, &rule93},
1856 {12330, 6, &rule66},
1858 {12337, 5, &rule65},
1859 {12342, 2, &rule13},
1860 {12344, 3, &rule93},
1861 {12347, 1, &rule65},
1862 {12348, 1, &rule43},
1864 {12350, 2, &rule13},
1865 {12353, 86, &rule43},
1866 {12441, 2, &rule66},
1867 {12443, 2, &rule10},
1868 {12445, 2, &rule65},
1869 {12447, 1, &rule43},
1871 {12449, 90, &rule43},
1872 {12539, 1, &rule11},
1873 {12540, 3, &rule65},
1874 {12543, 1, &rule43},
1875 {12549, 40, &rule43},
1876 {12593, 94, &rule43},
1877 {12688, 2, &rule13},
1878 {12690, 4, &rule17},
1879 {12694, 10, &rule13},
1880 {12704, 24, &rule43},
1881 {12784, 16, &rule43},
1882 {12800, 31, &rule13},
1883 {12832, 10, &rule17},
1884 {12842, 26, &rule13},
1885 {12880, 1, &rule13},
1886 {12881, 15, &rule17},
1887 {12896, 30, &rule13},
1888 {12927, 1, &rule13},
1889 {12928, 10, &rule17},
1890 {12938, 39, &rule13},
1891 {12977, 15, &rule17},
1892 {12992, 63, &rule13},
1893 {13056, 256, &rule13},
1894 {13312, 6582, &rule43},
1895 {19904, 64, &rule13},
1896 {19968, 20902, &rule43},
1897 {40960, 1165, &rule43},
1898 {42128, 55, &rule13},
1899 {44032, 11172, &rule43},
1900 {55296, 896, &rule122},
1901 {56192, 128, &rule122},
1902 {56320, 1024, &rule122},
1903 {57344, 6400, &rule123},
1904 {63744, 302, &rule43},
1905 {64048, 59, &rule43},
1906 {64256, 7, &rule14},
1907 {64275, 5, &rule14},
1908 {64285, 1, &rule43},
1909 {64286, 1, &rule66},
1910 {64287, 10, &rule43},
1912 {64298, 13, &rule43},
1913 {64312, 5, &rule43},
1914 {64318, 1, &rule43},
1915 {64320, 2, &rule43},
1916 {64323, 2, &rule43},
1917 {64326, 108, &rule43},
1918 {64467, 363, &rule43},
1921 {64848, 64, &rule43},
1922 {64914, 54, &rule43},
1923 {65008, 12, &rule43},
1925 {65021, 1, &rule13},
1926 {65024, 16, &rule66},
1927 {65056, 4, &rule66},
1930 {65075, 2, &rule11},
1951 {65101, 3, &rule11},
1968 {65136, 5, &rule43},
1969 {65142, 135, &rule43},
1970 {65279, 1, &rule16},
1981 {65296, 10, &rule8},
1985 {65313, 26, &rule9},
1989 {65342, 1, &rule10},
1990 {65343, 1, &rule11},
1991 {65344, 1, &rule10},
1992 {65345, 26, &rule12},
2003 {65381, 1, &rule11},
2004 {65382, 10, &rule43},
2005 {65392, 1, &rule65},
2006 {65393, 45, &rule43},
2007 {65438, 2, &rule65},
2008 {65440, 31, &rule43},
2009 {65474, 6, &rule43},
2010 {65482, 6, &rule43},
2011 {65490, 6, &rule43},
2012 {65498, 3, &rule43},
2015 {65507, 1, &rule10},
2016 {65508, 1, &rule13},
2018 {65512, 1, &rule13},
2020 {65517, 2, &rule13},
2021 {65529, 3, &rule16},
2022 {65532, 2, &rule13},
2023 {65536, 12, &rule43},
2024 {65549, 26, &rule43},
2025 {65576, 19, &rule43},
2026 {65596, 2, &rule43},
2027 {65599, 15, &rule43},
2028 {65616, 14, &rule43},
2029 {65664, 123, &rule43},
2031 {65794, 1, &rule13},
2032 {65799, 45, &rule17},
2033 {65847, 9, &rule13},
2034 {66304, 31, &rule43},
2035 {66336, 4, &rule17},
2036 {66352, 26, &rule43},
2037 {66378, 1, &rule93},
2038 {66432, 30, &rule43},
2040 {66560, 40, &rule124},
2041 {66600, 40, &rule125},
2042 {66640, 78, &rule43},
2043 {66720, 10, &rule8},
2044 {67584, 6, &rule43},
2045 {67592, 1, &rule43},
2046 {67594, 44, &rule43},
2047 {67639, 2, &rule43},
2048 {67644, 1, &rule43},
2049 {67647, 1, &rule43},
2050 {118784, 246, &rule13},
2051 {119040, 39, &rule13},
2052 {119082, 59, &rule13},
2053 {119141, 2, &rule92},
2054 {119143, 3, &rule66},
2055 {119146, 3, &rule13},
2056 {119149, 6, &rule92},
2057 {119155, 8, &rule16},
2058 {119163, 8, &rule66},
2059 {119171, 2, &rule13},
2060 {119173, 7, &rule66},
2061 {119180, 30, &rule13},
2062 {119210, 4, &rule66},
2063 {119214, 48, &rule13},
2064 {119552, 87, &rule13},
2065 {119808, 26, &rule79},
2066 {119834, 26, &rule14},
2067 {119860, 26, &rule79},
2068 {119886, 7, &rule14},
2069 {119894, 18, &rule14},
2070 {119912, 26, &rule79},
2071 {119938, 26, &rule14},
2072 {119964, 1, &rule79},
2073 {119966, 2, &rule79},
2074 {119970, 1, &rule79},
2075 {119973, 2, &rule79},
2076 {119977, 4, &rule79},
2077 {119982, 8, &rule79},
2078 {119990, 4, &rule14},
2079 {119995, 1, &rule14},
2080 {119997, 7, &rule14},
2081 {120005, 11, &rule14},
2082 {120016, 26, &rule79},
2083 {120042, 26, &rule14},
2084 {120068, 2, &rule79},
2085 {120071, 4, &rule79},
2086 {120077, 8, &rule79},
2087 {120086, 7, &rule79},
2088 {120094, 26, &rule14},
2089 {120120, 2, &rule79},
2090 {120123, 4, &rule79},
2091 {120128, 5, &rule79},
2092 {120134, 1, &rule79},
2093 {120138, 7, &rule79},
2094 {120146, 26, &rule14},
2095 {120172, 26, &rule79},
2096 {120198, 26, &rule14},
2097 {120224, 26, &rule79},
2098 {120250, 26, &rule14},
2099 {120276, 26, &rule79},
2100 {120302, 26, &rule14},
2101 {120328, 26, &rule79},
2102 {120354, 26, &rule14},
2103 {120380, 26, &rule79},
2104 {120406, 26, &rule14},
2105 {120432, 26, &rule79},
2106 {120458, 26, &rule14},
2107 {120488, 25, &rule79},
2108 {120513, 1, &rule6},
2109 {120514, 25, &rule14},
2110 {120539, 1, &rule6},
2111 {120540, 6, &rule14},
2112 {120546, 25, &rule79},
2113 {120571, 1, &rule6},
2114 {120572, 25, &rule14},
2115 {120597, 1, &rule6},
2116 {120598, 6, &rule14},
2117 {120604, 25, &rule79},
2118 {120629, 1, &rule6},
2119 {120630, 25, &rule14},
2120 {120655, 1, &rule6},
2121 {120656, 6, &rule14},
2122 {120662, 25, &rule79},
2123 {120687, 1, &rule6},
2124 {120688, 25, &rule14},
2125 {120713, 1, &rule6},
2126 {120714, 6, &rule14},
2127 {120720, 25, &rule79},
2128 {120745, 1, &rule6},
2129 {120746, 25, &rule14},
2130 {120771, 1, &rule6},
2131 {120772, 6, &rule14},
2132 {120782, 50, &rule8},
2133 {131072, 42711, &rule43},
2134 {194560, 542, &rule43},
2135 {917505, 1, &rule16},
2136 {917536, 96, &rule16},
2137 {917760, 240, &rule66},
2138 {983040, 65534, &rule123},
2139 {1048576, 65534, &rule123}
2141 static const struct _charblock_ convchars[]={
2508 {1024, 16, &rule88},
2510 {1072, 32, &rule12},
2511 {1104, 16, &rule83},
2670 {1329, 38, &rule90},
2671 {1377, 38, &rule91},
2936 {8056, 2, &rule100},
2937 {8058, 2, &rule101},
2938 {8060, 2, &rule102},
2940 {8072, 8, &rule103},
2942 {8088, 8, &rule103},
2944 {8104, 8, &rule103},
2946 {8115, 1, &rule104},
2948 {8122, 2, &rule105},
2949 {8124, 1, &rule106},
2950 {8126, 1, &rule107},
2951 {8131, 1, &rule104},
2952 {8136, 4, &rule108},
2953 {8140, 1, &rule106},
2956 {8154, 2, &rule109},
2960 {8170, 2, &rule110},
2962 {8179, 1, &rule104},
2963 {8184, 2, &rule111},
2964 {8186, 2, &rule112},
2965 {8188, 1, &rule106},
2966 {8486, 1, &rule115},
2967 {8490, 1, &rule116},
2968 {8491, 1, &rule117},
2969 {8544, 16, &rule118},
2970 {8560, 16, &rule119},
2971 {9398, 26, &rule120},
2972 {9424, 26, &rule121},
2973 {65313, 26, &rule9},
2974 {65345, 26, &rule12},
2975 {66560, 40, &rule124},
2976 {66600, 40, &rule125}
2978 static const struct _charblock_ spacechars[]={
2990 Obtain the reference to character rule by doing
2991 binary search over the specified array of blocks.
2992 To make checkattr shorter, the address of
2993 nullrule is returned if the search fails:
2994 this rule defines no category and no conversion
2995 distances. The compare function returns 0 when
2996 key->start is within the block. Otherwise
2997 result of comparison of key->start and start of the
2998 current block is returned as usual.
3001 static const struct _convrule_ nullrule={0,NUMCAT_CN,0,0,0,0};
3003 static int blkcmp(const void *vk,const void *vb)
3005 const struct _charblock_ *key,*cur;
3008 if((key->start>=cur->start)&&(key->start<(cur->start+cur->length)))
3012 if(key->start>cur->start) return 1;
3016 static const struct _convrule_ *getrule(
3017 const struct _charblock_ *blocks,
3021 struct _charblock_ key={unichar,1,(void *)0};
3022 struct _charblock_ *cb=bsearch(&key,blocks,numblocks,sizeof(key),blkcmp);
3023 if(cb==(void *)0) return &nullrule;
3030 Check whether a character (internal code) has certain attributes.
3031 Attributes (category flags) may be ORed. The function ANDs
3032 character category flags and the mask and returns the result.
3033 If the character belongs to one of the categories requested,
3034 the result will be nonzero.
3037 inline static int checkattr(int c,unsigned int catmask)
3039 return (catmask & (getrule(allchars,(c<256)?NUM_LAT1BLOCKS:NUM_BLOCKS,c)->category));
3042 inline static int checkattr_s(int c,unsigned int catmask)
3044 return (catmask & (getrule(spacechars,NUM_SPACEBLOCKS,c)->category));
3048 Define predicate functions for some combinations of categories.
3051 #define unipred(p,m) \
3054 return checkattr(c,m); \
3057 #define unipred_s(p,m) \
3060 return checkattr_s(c,m); \
3064 Make these rules as close to Hugs as possible.
3067 unipred(u_iswcntrl,GENCAT_CC)
3068 unipred(u_iswprint, \
3069 (GENCAT_MC | GENCAT_NO | GENCAT_SK | GENCAT_ME | GENCAT_ND | \
3070 GENCAT_PO | GENCAT_LT | GENCAT_PC | GENCAT_SM | GENCAT_ZS | \
3071 GENCAT_LU | GENCAT_PD | GENCAT_SO | GENCAT_PE | GENCAT_PF | \
3072 GENCAT_PS | GENCAT_SC | GENCAT_LL | GENCAT_LM | GENCAT_PI | \
3073 GENCAT_NL | GENCAT_MN | GENCAT_LO))
3074 unipred_s(u_iswspace,GENCAT_ZS)
3075 unipred(u_iswupper,(GENCAT_LU|GENCAT_LT))
3076 unipred(u_iswlower,GENCAT_LL)
3077 unipred(u_iswalpha,(GENCAT_LL|GENCAT_LU|GENCAT_LT|GENCAT_LM|GENCAT_LO))
3079 unipred(u_iswalnum,(GENCAT_LT|GENCAT_LU|GENCAT_LL|GENCAT_LM|GENCAT_LO|
3080 GENCAT_MC|GENCAT_ME|GENCAT_MN|
3081 GENCAT_NO|GENCAT_ND|GENCAT_NL))
3083 #define caseconv(p,to) \
3086 const struct _convrule_ *rule=getrule(convchars,NUM_CONVBLOCKS,c);\
3087 if(rule==&nullrule) return c;\
3091 caseconv(u_towupper,updist)
3092 caseconv(u_towlower,lowdist)
3093 caseconv(u_towtitle,titledist)
3097 return getrule(allchars,NUM_BLOCKS,c)->catnumber;