00001
00002
00003
00004
00005 #include "transcode_data.h"
00006
00007
00008
00009 static const unsigned char
00010 utf_16_32_byte_array[1288] = {
00011 #define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0
00012 220, 223,
00013 1, 1, 1, 1,
00014
00015 #define from_UTF_16LE_00toFF_D8toDB_offsets 6
00016 0, 255,
00017 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00018 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00020 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00033
00034 #define from_UTF_16LE_00toFF_offsets 264
00035 0, 255,
00036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00049 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
00050 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00051 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00052
00053 #define from_UTF_32LE_00toFF_00toD7_00_offsets 522
00054 0, 0,
00055 0,
00056
00057 #define from_UTF_32LE_00toFF_00toD7_offsets 525
00058 0, 16,
00059 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00060 0,
00061
00062 #define from_UTF_32LE_00toFF_D8toDF_offsets 544
00063 1, 16,
00064 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00065
00066 #define from_UTF_32LE_00toFF_offsets 562
00067 0, 255,
00068 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00069 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00078 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00079 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00080 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00081 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
00082 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00084
00085 #define from_UTF_32BE_00_offsets 820
00086 0, 16,
00087 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00088 1,
00089
00090 #define from_UTF_8_C2toDF_offsets 839
00091 128, 191,
00092 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00093 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00094 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00095 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00096
00097 #define from_UTF_8_E0_offsets 905
00098 160, 191,
00099 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00101
00102 #define from_UTF_8_ED_offsets 939
00103 128, 159,
00104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00106
00107 #define from_UTF_8_F0_offsets 973
00108 144, 191,
00109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00112
00113 #define from_UTF_8_F4_offsets 1023
00114 128, 143,
00115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00116
00117 #define from_UTF_8_offsets 1041
00118 0, 244,
00119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00123 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00131 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00132 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00133 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
00134 6, 7, 7, 7, 8,
00135
00136 };
00137 static const unsigned int
00138 utf_16_32_word_array[94] = {
00139 #define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0)
00140 INVALID, FUNso,
00141
00142 #define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2)
00143 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
00144 from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
00145
00146 #define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4)
00147 from_UTF_16LE_00toFF_D8toDB_00toFF,
00148
00149 #define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5)
00150 from_UTF_16LE_00toFF_D8toDB_offsets,
00151 from_UTF_16LE_00toFF_D8toDB_infos,
00152
00153 #define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7)
00154 FUNso, from_UTF_16LE_00toFF_D8toDB,
00155 INVALID,
00156
00157 #define from_UTF_16LE_00toFF WORDINDEX2INFO(10)
00158 from_UTF_16LE_00toFF_offsets,
00159 from_UTF_16LE_00toFF_infos,
00160
00161 #define from_UTF_16LE_infos WORDINDEX2INFO(12)
00162 from_UTF_16LE_00toFF,
00163
00164 #define from_UTF_16LE WORDINDEX2INFO(13)
00165 from_UTF_16LE_00toFF_D8toDB_offsets,
00166 from_UTF_16LE_infos,
00167
00168 #define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15)
00169 FUNso, INVALID,
00170
00171 #define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17)
00172 from_UTF_32LE_00toFF_00toD7_00_offsets,
00173 from_UTF_32LE_00toFF_00toD7_00_infos,
00174
00175 #define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19)
00176 from_UTF_32LE_00toFF_00toD7_00, INVALID,
00177
00178 #define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21)
00179 from_UTF_32LE_00toFF_00toD7_offsets,
00180 from_UTF_32LE_00toFF_00toD7_infos,
00181
00182 #define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23)
00183 INVALID, from_UTF_32LE_00toFF_00toD7_00,
00184
00185 #define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25)
00186 from_UTF_32LE_00toFF_D8toDF_offsets,
00187 from_UTF_32LE_00toFF_D8toDF_infos,
00188
00189 #define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27)
00190 from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF,
00191
00192 #define from_UTF_32LE_00toFF WORDINDEX2INFO(29)
00193 from_UTF_32LE_00toFF_offsets,
00194 from_UTF_32LE_00toFF_infos,
00195
00196 #define from_UTF_32LE_infos WORDINDEX2INFO(31)
00197 from_UTF_32LE_00toFF,
00198
00199 #define from_UTF_32LE WORDINDEX2INFO(32)
00200 from_UTF_16LE_00toFF_D8toDB_offsets,
00201 from_UTF_32LE_infos,
00202
00203 #define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34)
00204 FUNso,
00205
00206 #define from_UTF_16BE_00toD7 WORDINDEX2INFO(35)
00207 from_UTF_16LE_00toFF_D8toDB_offsets,
00208 from_UTF_16BE_00toD7_infos,
00209
00210 #define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37)
00211 INVALID, from_UTF_16BE_00toD7,
00212
00213 #define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39)
00214 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
00215 from_UTF_16BE_D8toDB_00toFF_infos,
00216
00217 #define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41)
00218 from_UTF_16BE_D8toDB_00toFF,
00219
00220 #define from_UTF_16BE_D8toDB WORDINDEX2INFO(42)
00221 from_UTF_16LE_00toFF_D8toDB_offsets,
00222 from_UTF_16BE_D8toDB_infos,
00223
00224 #define from_UTF_16BE_infos WORDINDEX2INFO(44)
00225 from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB,
00226 INVALID,
00227
00228 #define from_UTF_16BE WORDINDEX2INFO(47)
00229 from_UTF_16LE_00toFF_offsets,
00230 from_UTF_16BE_infos,
00231
00232 #define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49)
00233 from_UTF_16BE_00toD7, INVALID,
00234
00235 #define from_UTF_32BE_00_00 WORDINDEX2INFO(51)
00236 from_UTF_32LE_00toFF_offsets,
00237 from_UTF_32BE_00_00_infos,
00238
00239 #define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53)
00240 from_UTF_16BE_00toD7,
00241
00242 #define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54)
00243 from_UTF_16LE_00toFF_D8toDB_offsets,
00244 from_UTF_32BE_00_01to10_infos,
00245
00246 #define from_UTF_32BE_00_infos WORDINDEX2INFO(56)
00247 from_UTF_32BE_00_00, from_UTF_32BE_00_01to10,
00248 INVALID,
00249
00250 #define from_UTF_32BE_00 WORDINDEX2INFO(59)
00251 from_UTF_32BE_00_offsets,
00252 from_UTF_32BE_00_infos,
00253
00254 #define from_UTF_32BE_infos WORDINDEX2INFO(61)
00255 from_UTF_32BE_00, INVALID,
00256
00257 #define from_UTF_32BE WORDINDEX2INFO(63)
00258 from_UTF_32LE_00toFF_00toD7_00_offsets,
00259 from_UTF_32BE_infos,
00260
00261 #define from_UTF_8_C2toDF WORDINDEX2INFO(65)
00262 from_UTF_8_C2toDF_offsets,
00263 from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
00264
00265 #define from_UTF_8_E0_infos WORDINDEX2INFO(67)
00266 INVALID, from_UTF_8_C2toDF,
00267
00268 #define from_UTF_8_E0 WORDINDEX2INFO(69)
00269 from_UTF_8_E0_offsets,
00270 from_UTF_8_E0_infos,
00271
00272 #define from_UTF_8_E1toEC WORDINDEX2INFO(71)
00273 from_UTF_8_C2toDF_offsets,
00274 from_UTF_8_E0_infos,
00275
00276 #define from_UTF_8_ED WORDINDEX2INFO(73)
00277 from_UTF_8_ED_offsets,
00278 from_UTF_8_E0_infos,
00279
00280 #define from_UTF_8_F0_infos WORDINDEX2INFO(75)
00281 INVALID, from_UTF_8_E1toEC,
00282
00283 #define from_UTF_8_F0 WORDINDEX2INFO(77)
00284 from_UTF_8_F0_offsets,
00285 from_UTF_8_F0_infos,
00286
00287 #define from_UTF_8_F1toF3 WORDINDEX2INFO(79)
00288 from_UTF_8_C2toDF_offsets,
00289 from_UTF_8_F0_infos,
00290
00291 #define from_UTF_8_F4 WORDINDEX2INFO(81)
00292 from_UTF_8_F4_offsets,
00293 from_UTF_8_F0_infos,
00294
00295 #define from_UTF_8_infos WORDINDEX2INFO(83)
00296 FUNso, INVALID,
00297 from_UTF_8_C2toDF, from_UTF_8_E0,
00298 from_UTF_8_E1toEC, from_UTF_8_ED,
00299 from_UTF_8_F0, from_UTF_8_F1toF3,
00300 from_UTF_8_F4,
00301
00302 #define from_UTF_8 WORDINDEX2INFO(92)
00303 from_UTF_8_offsets,
00304 from_UTF_8_infos,
00305
00306 };
00307 #define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 94, ((int)sizeof(unsigned int))
00308
00309
00310 static ssize_t
00311 fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00312 {
00313 if (!s[0] && s[1]<0x80) {
00314 o[0] = s[1];
00315 return 1;
00316 }
00317 else if (s[0]<0x08) {
00318 o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
00319 o[1] = 0x80 | (s[1]&0x3F);
00320 return 2;
00321 }
00322 else if ((s[0]&0xF8)!=0xD8) {
00323 o[0] = 0xE0 | (s[0]>>4);
00324 o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
00325 o[2] = 0x80 | (s[1]&0x3F);
00326 return 3;
00327 }
00328 else {
00329 unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
00330 o[0] = 0xF0 | (u>>2);
00331 o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
00332 o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
00333 o[3] = 0x80 | (s[3]&0x3F);
00334 return 4;
00335 }
00336 }
00337
00338 static ssize_t
00339 fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00340 {
00341 if (!(s[0]&0x80)) {
00342 o[0] = 0x00;
00343 o[1] = s[0];
00344 return 2;
00345 }
00346 else if ((s[0]&0xE0)==0xC0) {
00347 o[0] = (s[0]>>2)&0x07;
00348 o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00349 return 2;
00350 }
00351 else if ((s[0]&0xF0)==0xE0) {
00352 o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
00353 o[1] = (s[1]<<6) | (s[2]^0x80);
00354 return 2;
00355 }
00356 else {
00357 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
00358 o[0] = 0xD8 | (w>>2);
00359 o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
00360 o[2] = 0xDC | ((s[2]>>2)&0x03);
00361 o[3] = (s[2]<<6) | (s[3]&~0x80);
00362 return 4;
00363 }
00364 }
00365
00366 static ssize_t
00367 fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00368 {
00369 if (!s[1] && s[0]<0x80) {
00370 o[0] = s[0];
00371 return 1;
00372 }
00373 else if (s[1]<0x08) {
00374 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
00375 o[1] = 0x80 | (s[0]&0x3F);
00376 return 2;
00377 }
00378 else if ((s[1]&0xF8)!=0xD8) {
00379 o[0] = 0xE0 | (s[1]>>4);
00380 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00381 o[2] = 0x80 | (s[0]&0x3F);
00382 return 3;
00383 }
00384 else {
00385 unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
00386 o[0] = 0xF0 | u>>2;
00387 o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
00388 o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
00389 o[3] = 0x80 | (s[2]&0x3F);
00390 return 4;
00391 }
00392 }
00393
00394 static ssize_t
00395 fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00396 {
00397 if (!(s[0]&0x80)) {
00398 o[1] = 0x00;
00399 o[0] = s[0];
00400 return 2;
00401 }
00402 else if ((s[0]&0xE0)==0xC0) {
00403 o[1] = (s[0]>>2)&0x07;
00404 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00405 return 2;
00406 }
00407 else if ((s[0]&0xF0)==0xE0) {
00408 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
00409 o[0] = (s[1]<<6) | (s[2]^0x80);
00410 return 2;
00411 }
00412 else {
00413 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
00414 o[1] = 0xD8 | (w>>2);
00415 o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
00416 o[3] = 0xDC | ((s[2]>>2)&0x03);
00417 o[2] = (s[2]<<6) | (s[3]&~0x80);
00418 return 4;
00419 }
00420 }
00421
00422 static ssize_t
00423 fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00424 {
00425 if (!s[1]) {
00426 if (s[2]==0 && s[3]<0x80) {
00427 o[0] = s[3];
00428 return 1;
00429 }
00430 else if (s[2]<0x08) {
00431 o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
00432 o[1] = 0x80 | (s[3]&0x3F);
00433 return 2;
00434 }
00435 else {
00436 o[0] = 0xE0 | (s[2]>>4);
00437 o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
00438 o[2] = 0x80 | (s[3]&0x3F);
00439 return 3;
00440 }
00441 }
00442 else {
00443 o[0] = 0xF0 | (s[1]>>2);
00444 o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
00445 o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
00446 o[3] = 0x80 | (s[3]&0x3F);
00447 return 4;
00448 }
00449 }
00450
00451 static ssize_t
00452 fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00453 {
00454 o[0] = 0;
00455 if (!(s[0]&0x80)) {
00456 o[1] = o[2] = 0x00;
00457 o[3] = s[0];
00458 }
00459 else if ((s[0]&0xE0)==0xC0) {
00460 o[1] = 0x00;
00461 o[2] = (s[0]>>2)&0x07;
00462 o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00463 }
00464 else if ((s[0]&0xF0)==0xE0) {
00465 o[1] = 0x00;
00466 o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
00467 o[3] = (s[1]<<6) | (s[2]^0x80);
00468 }
00469 else {
00470 o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
00471 o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
00472 o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
00473 }
00474 return 4;
00475 }
00476
00477 static ssize_t
00478 fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00479 {
00480 if (!s[2]) {
00481 if (s[1]==0 && s[0]<0x80) {
00482 o[0] = s[0];
00483 return 1;
00484 }
00485 else if (s[1]<0x08) {
00486 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
00487 o[1] = 0x80 | (s[0]&0x3F);
00488 return 2;
00489 }
00490 else {
00491 o[0] = 0xE0 | (s[1]>>4);
00492 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00493 o[2] = 0x80 | (s[0]&0x3F);
00494 return 3;
00495 }
00496 }
00497 else {
00498 o[0] = 0xF0 | (s[2]>>2);
00499 o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4);
00500 o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00501 o[3] = 0x80 | (s[0]&0x3F);
00502 return 4;
00503 }
00504 }
00505
00506 static ssize_t
00507 fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00508 {
00509 o[3] = 0;
00510 if (!(s[0]&0x80)) {
00511 o[2] = o[1] = 0x00;
00512 o[0] = s[0];
00513 }
00514 else if ((s[0]&0xE0)==0xC0) {
00515 o[2] = 0x00;
00516 o[1] = (s[0]>>2)&0x07;
00517 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00518 }
00519 else if ((s[0]&0xF0)==0xE0) {
00520 o[2] = 0x00;
00521 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
00522 o[0] = (s[1]<<6) | (s[2]^0x80);
00523 }
00524 else {
00525 o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
00526 o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
00527 o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
00528 }
00529 return 4;
00530 }
00531
00532 static const rb_transcoder
00533 rb_from_UTF_16BE = {
00534 "UTF-16BE", "UTF-8", from_UTF_16BE,
00535 TRANSCODE_TABLE_INFO,
00536 2,
00537 4,
00538 4,
00539 asciicompat_decoder,
00540 0, NULL, NULL,
00541 NULL, NULL, NULL, fun_so_from_utf_16be
00542 };
00543
00544 static const rb_transcoder
00545 rb_to_UTF_16BE = {
00546 "UTF-8", "UTF-16BE", from_UTF_8,
00547 TRANSCODE_TABLE_INFO,
00548 1,
00549 4,
00550 4,
00551 asciicompat_encoder,
00552 0, NULL, NULL,
00553 NULL, NULL, NULL, fun_so_to_utf_16be
00554 };
00555
00556 static const rb_transcoder
00557 rb_from_UTF_16LE = {
00558 "UTF-16LE", "UTF-8", from_UTF_16LE,
00559 TRANSCODE_TABLE_INFO,
00560 2,
00561 4,
00562 4,
00563 asciicompat_decoder,
00564 0, NULL, NULL,
00565 NULL, NULL, NULL, fun_so_from_utf_16le
00566 };
00567
00568 static const rb_transcoder
00569 rb_to_UTF_16LE = {
00570 "UTF-8", "UTF-16LE", from_UTF_8,
00571 TRANSCODE_TABLE_INFO,
00572 1,
00573 4,
00574 4,
00575 asciicompat_encoder,
00576 0, NULL, NULL,
00577 NULL, NULL, NULL, fun_so_to_utf_16le
00578 };
00579
00580 static const rb_transcoder
00581 rb_from_UTF_32BE = {
00582 "UTF-32BE", "UTF-8", from_UTF_32BE,
00583 TRANSCODE_TABLE_INFO,
00584 4,
00585 4,
00586 4,
00587 asciicompat_decoder,
00588 0, NULL, NULL,
00589 NULL, NULL, NULL, fun_so_from_utf_32be
00590 };
00591
00592 static const rb_transcoder
00593 rb_to_UTF_32BE = {
00594 "UTF-8", "UTF-32BE", from_UTF_8,
00595 TRANSCODE_TABLE_INFO,
00596 1,
00597 4,
00598 4,
00599 asciicompat_encoder,
00600 0, NULL, NULL,
00601 NULL, NULL, NULL, fun_so_to_utf_32be
00602 };
00603
00604 static const rb_transcoder
00605 rb_from_UTF_32LE = {
00606 "UTF-32LE", "UTF-8", from_UTF_32LE,
00607 TRANSCODE_TABLE_INFO,
00608 4,
00609 4,
00610 4,
00611 asciicompat_decoder,
00612 0, NULL, NULL,
00613 NULL, NULL, NULL, fun_so_from_utf_32le
00614 };
00615
00616 static const rb_transcoder
00617 rb_to_UTF_32LE = {
00618 "UTF-8", "UTF-32LE", from_UTF_8,
00619 TRANSCODE_TABLE_INFO,
00620 1,
00621 4,
00622 4,
00623 asciicompat_encoder,
00624 0, NULL, NULL,
00625 NULL, NULL, NULL, fun_so_to_utf_32le
00626 };
00627
00628 void
00629 Init_utf_16_32(void)
00630 {
00631 rb_register_transcoder(&rb_from_UTF_16BE);
00632 rb_register_transcoder(&rb_to_UTF_16BE);
00633 rb_register_transcoder(&rb_from_UTF_16LE);
00634 rb_register_transcoder(&rb_to_UTF_16LE);
00635 rb_register_transcoder(&rb_from_UTF_32BE);
00636 rb_register_transcoder(&rb_to_UTF_32BE);
00637 rb_register_transcoder(&rb_from_UTF_32LE);
00638 rb_register_transcoder(&rb_to_UTF_32LE);
00639 }
00640
00641