Line data Source code
1 : /* Copyright (C) 2000-2012 by George Williams */
2 : /*
3 : * Redistribution and use in source and binary forms, with or without
4 : * modification, are permitted provided that the following conditions are met:
5 :
6 : * Redistributions of source code must retain the above copyright notice, this
7 : * list of conditions and the following disclaimer.
8 :
9 : * Redistributions in binary form must reproduce the above copyright notice,
10 : * this list of conditions and the following disclaimer in the documentation
11 : * and/or other materials provided with the distribution.
12 :
13 : * The name of the author may not be used to endorse or promote products
14 : * derived from this software without specific prior written permission.
15 :
16 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 : * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 : * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 : * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 : * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 : * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 : * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 : * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 : */
27 :
28 : #include <fontforge-config.h>
29 : #include <gwwiconv.h>
30 : #include <stddef.h>
31 : #include <ustring.h>
32 : #include <utype.h>
33 : #include <charset.h>
34 : #include <chardata.h>
35 :
36 : int local_encoding = e_iso8859_1;
37 : #if HAVE_ICONV
38 : char *iconv_local_encoding_name = NULL;
39 : #endif
40 :
41 : static int bad_enc_warn = false;
42 :
43 : /* Does not handle conversions to Extended unix */
44 :
45 319 : unichar_t *encoding2u_strncpy(unichar_t *uto, const char *_from, int n, enum encoding cs) {
46 319 : unichar_t *upt=uto;
47 : const unichar_t *table;
48 : int offset;
49 319 : const unsigned char *from = (const unsigned char *) _from;
50 :
51 319 : if ( cs<e_first2byte ) {
52 319 : table = unicode_from_alphabets[cs];
53 319 : if ( table==NULL ) {
54 0 : while ( *from && n>0 ) {
55 0 : *upt++ = *(unsigned char *) (from++);
56 0 : --n;
57 : }
58 : } else {
59 4353 : while ( *from && n>0 ) {
60 3715 : *upt ++ = table[*(unsigned char *) (from++)];
61 3715 : --n;
62 : }
63 : }
64 0 : } else if ( cs<e_unicode ) {
65 0 : *uto = '\0';
66 0 : switch ( cs ) {
67 : default:
68 0 : if ( !bad_enc_warn ) {
69 0 : bad_enc_warn = true;
70 0 : fprintf( stderr, "Unexpected encoding %d, I'll pretend it's latin1\n", cs );
71 : }
72 0 : return( encoding2u_strncpy(uto,_from,n,e_iso8859_1));
73 : case e_johab: case e_big5: case e_big5hkscs:
74 0 : if ( cs==e_big5 ) {
75 0 : offset = 0xa100;
76 0 : table = unicode_from_big5;
77 0 : } else if ( cs==e_big5hkscs ) {
78 0 : offset = 0x8100;
79 0 : table = unicode_from_big5hkscs;
80 : } else {
81 0 : offset = 0x8400;
82 0 : table = unicode_from_johab;
83 : }
84 0 : while ( *from && n>0 ) {
85 0 : if ( *from>=(offset>>8) && from[1]!='\0' ) {
86 0 : *upt++ = table[ ((*from<<8) | from[1]) - offset ];
87 0 : from += 2;
88 : } else
89 0 : *upt++ = *from++;
90 0 : --n;
91 : }
92 0 : break;
93 : case e_wansung:
94 0 : while ( *from && n>0 ) {
95 0 : if ( *from>=0xa1 && from[1]>=0xa1 ) {
96 0 : *upt++ = unicode_from_ksc5601[ (*from-0xa1)*94+(from[1]-0xa1) ];
97 0 : from += 2;
98 : } else
99 0 : *upt++ = *from++;
100 0 : --n;
101 : }
102 0 : break;
103 : case e_jisgb:
104 0 : while ( *from && n>0 ) {
105 0 : if ( *from>=0xa1 && from[1]>=0xa1 ) {
106 0 : *upt++ = unicode_from_gb2312[ (*from-0xa1)*94+(from[1]-0xa1) ];
107 0 : from += 2;
108 : } else
109 0 : *upt++ = *from++;
110 0 : --n;
111 : }
112 0 : break;
113 : case e_sjis:
114 0 : while ( *from && n>0 ) {
115 0 : if ( *from<127 || ( *from>=161 && *from<=223 )) {
116 0 : *upt++ = unicode_from_jis201[*from++];
117 : } else {
118 0 : int ch1 = *from++;
119 0 : int ch2 = *from++;
120 0 : if ( ch1 >= 129 && ch1<= 159 )
121 0 : ch1 -= 112;
122 : else
123 0 : ch1 -= 176;
124 0 : ch1 <<= 1;
125 0 : if ( ch2>=159 )
126 0 : ch2-= 126;
127 0 : else if ( ch2>127 ) {
128 0 : --ch1;
129 0 : ch2 -= 32;
130 : } else {
131 0 : --ch1;
132 0 : ch2 -= 31;
133 : }
134 0 : *upt++ = unicode_from_jis208[(ch1-0x21)*94+(ch2-0x21)];
135 : }
136 0 : --n;
137 : }
138 0 : break;
139 : }
140 0 : } else if ( cs==e_unicode ) {
141 0 : unichar_t *ufrom = (unichar_t *) from;
142 0 : while ( *ufrom && n>0 ) {
143 0 : *upt++ = *ufrom++;
144 0 : --n;
145 : }
146 0 : } else if ( cs==e_unicode_backwards ) {
147 0 : unichar_t *ufrom = (unichar_t *) from;
148 0 : while ( *ufrom && n>0 ) {
149 0 : unichar_t ch = (*ufrom>>8)||((*ufrom&0xff)<<8);
150 0 : *upt++ = ch;
151 0 : ++ufrom;
152 0 : --n;
153 : }
154 0 : } else if ( cs==e_utf8 ) {
155 0 : while ( *from && n>0 ) {
156 0 : if ( *from<=127 )
157 0 : *upt = *from++;
158 0 : else if ( *from<=0xdf ) {
159 0 : if ( from[1]>=0x80 ) {
160 0 : *upt = ((*from&0x1f)<<6) | (from[1]&0x3f);
161 0 : from += 2;
162 : } else {
163 0 : ++from; /* Badly formed utf */
164 0 : *upt = 0xfffd;
165 : }
166 0 : } else if ( *from<=0xef ) {
167 0 : if ( from[1]>=0x80 && from[2]>=0x80 ) {
168 0 : *upt = ((*from&0xf)<<12) | ((from[1]&0x3f)<<6) | (from[2]&0x3f);
169 0 : from += 3;
170 : } else {
171 0 : ++from; /* Badly formed utf */
172 0 : *upt = 0xfffd;
173 : }
174 0 : } else if ( n>2 ) {
175 0 : if ( from[1]>=0x80 && from[2]>=0x80 && from[3]>=0x80 ) {
176 0 : int w = ( ((*from&0x7)<<2) | ((from[1]&0x30)>>4) )-1;
177 0 : *upt++ = 0xd800 | (w<<6) | ((from[1]&0xf)<<2) | ((from[2]&0x30)>>4);
178 0 : *upt = 0xdc00 | ((from[2]&0xf)<<6) | (from[3]&0x3f);
179 0 : from += 4;
180 : } else {
181 0 : ++from; /* Badly formed utf */
182 0 : *upt = 0xfffd;
183 : }
184 : } else {
185 : /* no space for surrogate */
186 0 : from += 4;
187 : }
188 0 : ++upt;
189 : }
190 : } else {
191 0 : if ( !bad_enc_warn ) {
192 0 : bad_enc_warn = true;
193 0 : fprintf( stderr, "Unexpected encoding %d, I'll pretend it's latin1\n", cs );
194 : }
195 0 : return( encoding2u_strncpy(uto,_from,n,e_iso8859_1));
196 : }
197 :
198 319 : if ( n>0 )
199 0 : *upt = '\0';
200 :
201 319 : return( uto );
202 : }
203 :
204 202396 : char *u2encoding_strncpy(char *to, const unichar_t *ufrom, size_t n, enum encoding cs) {
205 202396 : char *pt = to;
206 :
207 : /* we just ignore anything that doesn't fit in the encoding we look at */
208 202396 : if ( cs<e_first2byte ) {
209 202396 : struct charmap *table = NULL;
210 : unsigned char *plane;
211 202396 : table = alphabets_from_unicode[cs];
212 202396 : if ( table==NULL ) { /* ASCII */
213 0 : while ( *ufrom && n>0 ) {
214 0 : int ch = *ufrom;
215 0 : if ( ch<127 ) {
216 0 : *pt++ = ch;
217 0 : --n;
218 : }
219 0 : ++ufrom;
220 : }
221 : } else {
222 12133727 : while ( *ufrom && n>0 ) {
223 11728935 : int highch = *ufrom>>8, ch;
224 23457870 : if ( highch>=table->first && highch<=table->last &&
225 23457870 : (plane = table->table[highch])!=NULL &&
226 11728935 : (ch=plane[*ufrom&0xff])!=0 ) {
227 11728935 : *pt++ = ch;
228 11728935 : --n;
229 : }
230 11728935 : ++ufrom;
231 : }
232 : }
233 202396 : if ( n>0 )
234 0 : *pt = '\0';
235 0 : } else if ( cs<e_unicode ) {
236 : struct charmap2 *table;
237 : unsigned short *plane;
238 : unsigned char *plane1;
239 :
240 0 : *to = '\0';
241 0 : switch ( cs ) {
242 : default:
243 0 : if ( !bad_enc_warn ) {
244 0 : bad_enc_warn = true;
245 0 : fprintf( stderr, "Unexpected encoding %d, I'll pretend it's latin1\n", cs );
246 : }
247 0 : return( u2encoding_strncpy(to,ufrom,n,e_iso8859_1));
248 : case e_johab: case e_big5: case e_big5hkscs:
249 0 : table = cs==e_big5 ? &big5_from_unicode :
250 0 : cs==e_big5hkscs ? &big5hkscs_from_unicode :
251 : &johab_from_unicode;
252 0 : while ( *ufrom && n>0 ) {
253 0 : int highch = *ufrom>>8, ch;
254 0 : if ( *ufrom<0x80 ) {
255 0 : *pt++ = *ufrom;
256 0 : --n;
257 0 : } else if ( highch>=table->first && highch<=table->last &&
258 0 : (plane = table->table[highch-table->first])!=NULL &&
259 0 : (ch=plane[*ufrom&0xff])!=0 ) {
260 0 : *pt++ = ch>>8;
261 0 : *pt++ = ch&0xff;
262 0 : n -= 2;
263 : }
264 0 : ufrom ++;
265 : }
266 0 : break;
267 : case e_wansung:
268 0 : while ( *ufrom && n>0 ) {
269 0 : int highch = *ufrom>>8, ch;
270 0 : if ( *ufrom<0x80 ) {
271 0 : *pt++ = *ufrom;
272 0 : --n;
273 0 : } else if ( highch>=ksc5601_from_unicode.first && highch<=ksc5601_from_unicode.last &&
274 0 : (plane = ksc5601_from_unicode.table[highch-ksc5601_from_unicode.first])!=NULL &&
275 0 : (ch=plane[*ufrom&0xff])!=0 ) {
276 0 : *pt++ = (ch>>8) + 0x80;
277 0 : *pt++ = (ch&0xff) + 0x80;
278 0 : n -= 2;
279 : }
280 0 : ufrom ++;
281 : }
282 0 : break;
283 : case e_jisgb:
284 0 : while ( *ufrom && n>0 ) {
285 0 : int highch = *ufrom>>8, ch;
286 0 : if ( *ufrom<0x80 ) {
287 0 : *pt++ = *ufrom;
288 0 : --n;
289 0 : } else if ( highch>=gb2312_from_unicode.first && highch<=gb2312_from_unicode.last &&
290 0 : (plane = gb2312_from_unicode.table[highch-gb2312_from_unicode.first])!=NULL &&
291 0 : (ch=plane[*ufrom&0xff])!=0 ) {
292 0 : *pt++ = (ch>>8) + 0x80;
293 0 : *pt++ = (ch&0xff) + 0x80;
294 0 : n -= 2;
295 : }
296 0 : ufrom ++;
297 : }
298 0 : break;
299 : case e_sjis:
300 0 : while ( *ufrom && n>0 ) {
301 0 : int highch = *ufrom>>8, ch;
302 0 : if ( highch>=jis201_from_unicode.first && highch<=jis201_from_unicode.last &&
303 0 : (plane1 = jis201_from_unicode.table[highch-jis201_from_unicode.first])!=NULL &&
304 0 : (ch=plane1[*ufrom&0xff])!=0 ) {
305 0 : *pt++ = ch;
306 0 : --n;
307 0 : } else if ( *ufrom<' ' ) { /* control chars */
308 0 : *pt++ = *ufrom;
309 0 : --n;
310 0 : } else if ( highch>=jis_from_unicode.first && highch<=jis_from_unicode.last &&
311 0 : (plane = jis_from_unicode.table[highch-jis_from_unicode.first])!=NULL &&
312 0 : (ch=plane[*ufrom&0xff])!=0 && ch<0x8000 ) { /* no jis212 */
313 0 : int j1 = ch>>8, j2 = ch&0xff;
314 0 : int ro = j1<95 ? 112 : 176;
315 0 : int co = (j1&1) ? (j2>95?32:31) : 126;
316 0 : *pt++ = ((j1+1)>>1)+ro;
317 0 : *pt++ = j2+co;
318 0 : n -= 2;
319 : }
320 0 : ++ufrom;
321 : }
322 0 : break;
323 : }
324 0 : if ( n>0 )
325 0 : *pt = '\0';
326 0 : } else if ( cs==e_unicode ) {
327 0 : unichar_t *uto = (unichar_t *) to;
328 0 : while ( *ufrom && n>1 ) {
329 0 : *uto++ = *ufrom++;
330 0 : n-=sizeof(unichar_t);
331 : }
332 0 : if ( n>1 )
333 0 : *uto = '\0';
334 0 : } else if ( cs==e_unicode_backwards ) {
335 0 : unichar_t *uto = (unichar_t *) to;
336 0 : while ( *ufrom && n>sizeof(unichar_t)-1 ) {
337 0 : unichar_t ch = (*ufrom>>24)|((*ufrom>>8)&0xff00)|
338 0 : ((*ufrom<<8)&0xff0000)|(*ufrom<<24);
339 0 : *uto++ = ch;
340 0 : ++ufrom;
341 0 : n-=sizeof(unichar_t);
342 : }
343 0 : if ( n>1 )
344 0 : *uto = '\0';
345 0 : } else if ( cs==e_utf8 ) {
346 0 : while ( *ufrom ) {
347 0 : if ( *ufrom<0x80 ) {
348 0 : if ( n<=1 )
349 0 : break;
350 0 : *pt++ = *ufrom;
351 0 : --n;
352 0 : } else if ( *ufrom<0x800 ) {
353 0 : if ( n<=2 )
354 0 : break;
355 0 : *pt++ = 0xc0 | (*ufrom>>6);
356 0 : *pt++ = 0x80 | (*ufrom&0x3f);
357 0 : n -= 2;
358 0 : } else if ( *ufrom>=0xd800 && *ufrom<0xdc00 && ufrom[1]>=0xdc00 && ufrom[1]<0xe000 ) {
359 0 : int u = ((*ufrom>>6)&0xf)+1, y = ((*ufrom&3)<<4) | ((ufrom[1]>>6)&0xf);
360 0 : if ( n<=4 )
361 0 : break;
362 0 : *pt++ = 0xf0 | (u>>2);
363 0 : *pt++ = 0x80 | ((u&3)<<4) | ((*ufrom>>2)&0xf);
364 0 : *pt++ = 0x80 | y;
365 0 : *pt++ = 0x80 | (ufrom[1]&0x3f);
366 0 : n -= 4;
367 : } else {
368 0 : if ( n<=3 )
369 0 : break;
370 0 : *pt++ = 0xe0 | (*ufrom>>12);
371 0 : *pt++ = 0x80 | ((*ufrom>>6)&0x3f);
372 0 : *pt++ = 0x80 | (*ufrom&0x3f);
373 : }
374 0 : ++ufrom;
375 : }
376 0 : if ( n>1 )
377 0 : *pt = '\0';
378 : } else {
379 0 : if ( !bad_enc_warn ) {
380 0 : bad_enc_warn = true;
381 0 : fprintf( stderr, "Unexpected encoding %d, I'll pretend it's latin1\n", cs );
382 : }
383 0 : return( u2encoding_strncpy(to,ufrom,n,e_iso8859_1));
384 : }
385 :
386 202396 : return( to );
387 : }
388 :
389 : #if HAVE_ICONV
390 : static char *old_local_name=NULL;
391 : static iconv_t to_unicode=(iconv_t) (-1), from_unicode=(iconv_t) (-1);
392 : static iconv_t to_utf8=(iconv_t) (-1), from_utf8=(iconv_t) (-1);
393 : static const char (*names[]) = { "UCS-4-INTERNAL", "UCS-4", "UCS4", "ISO-10646-UCS-4", "UTF-32", NULL };
394 : static const char (*namesle[]) = { "UCS-4LE", "UTF-32LE", NULL };
395 : static const char (*namesbe[]) = { "UCS-4BE", "UTF-32BE", NULL };
396 : static const char *unicode_name = NULL;
397 : static int byteswapped = false;
398 :
399 0 : static int BytesNormal(iconv_t latin1_2_unicode) {
400 : union {
401 : int32 s;
402 : char c[4];
403 : } u[8];
404 0 : const char *from = "A";
405 0 : char *to = &u[0].c[0];
406 0 : size_t in_left = 1, out_left = sizeof(u);
407 0 : memset(u,0,sizeof(u));
408 0 : iconv( latin1_2_unicode, (char **) &from, &in_left, &to, &out_left);
409 0 : if ( u[0].s=='A' )
410 0 : return( true );
411 :
412 0 : return( false );
413 : }
414 :
415 405111 : static int my_iconv_setup(void) {
416 : const char **testnames;
417 : int i;
418 : union {
419 : short s;
420 : char c[2];
421 : } u;
422 : iconv_t test;
423 :
424 405111 : if ( iconv_local_encoding_name==NULL ) {
425 405111 : if ( to_unicode!=(iconv_t) (-1) ) {
426 0 : iconv_close(to_unicode);
427 0 : iconv_close(from_unicode);
428 0 : to_unicode = from_unicode = (iconv_t) (-1);
429 : }
430 405111 : return(false);
431 : }
432 0 : if ( old_local_name!=NULL && strcmp(old_local_name,iconv_local_encoding_name)==0 )
433 0 : return( to_unicode!=(iconv_t) (-1) );
434 :
435 0 : free(old_local_name);
436 0 : old_local_name = copy(iconv_local_encoding_name);
437 0 : to_utf8 = iconv_open("UTF-8",iconv_local_encoding_name);
438 0 : from_utf8 = iconv_open(iconv_local_encoding_name,"UTF-8");
439 :
440 0 : if ( unicode_name==NULL ) {
441 0 : u.c[0] = 0x1; u.c[1] = 0x2;
442 0 : if ( u.s==0x201 ) { /* Little endian */
443 0 : testnames = namesle;
444 : } else {
445 0 : testnames = namesbe;
446 : }
447 0 : for ( i=0; testnames[i]!=NULL; ++i ) {
448 0 : test = iconv_open(testnames[i],"ISO-8859-1");
449 0 : if ( test!=(iconv_t) -1 && test!=NULL ) {
450 0 : iconv_close(test);
451 0 : unicode_name = testnames[i];
452 0 : break;
453 : }
454 : }
455 0 : if ( unicode_name==NULL ) {
456 0 : for ( i=0; names[i]!=NULL; ++i ) {
457 0 : test = iconv_open(names[i],"ISO-8859-1");
458 0 : if ( test!=(iconv_t) -1 && test!=NULL ) {
459 0 : byteswapped = !BytesNormal(test);
460 0 : iconv_close(test);
461 0 : unicode_name = names[i];
462 0 : break;
463 : }
464 : }
465 : }
466 : }
467 0 : if ( unicode_name == NULL ) {
468 0 : fprintf( stderr, "Could not find a name for Unicode which iconv could understand.\n" );
469 0 : return( false );
470 0 : } else if ( byteswapped ) {
471 0 : fprintf( stderr, "The only name for Unicode that iconv understood produced unexpected results.\nPerhaps %s was byte swapped.\n", unicode_name );
472 0 : return( false );
473 : }
474 :
475 0 : to_unicode = iconv_open(unicode_name,iconv_local_encoding_name);
476 0 : from_unicode = iconv_open(iconv_local_encoding_name,unicode_name);
477 0 : if ( to_unicode == (iconv_t) (-1) || to_utf8 == (iconv_t) (-1) ) {
478 0 : fprintf( stderr, "iconv failed to understand encoding %s\n",
479 : iconv_local_encoding_name);
480 0 : return( false );
481 : }
482 0 : return( true );
483 : }
484 : #endif
485 :
486 0 : unichar_t *def2u_strncpy(unichar_t *uto, const char *from, size_t n) {
487 : #if HAVE_ICONV
488 0 : if ( my_iconv_setup() ) {
489 0 : size_t in_left = n, out_left = sizeof(unichar_t)*n;
490 0 : char *cto = (char *) uto;
491 0 : iconv(to_unicode, (char **) &from, &in_left, &cto, &out_left);
492 0 : if ( cto<((char *) uto)+2*n) *cto++ = '\0';
493 0 : if ( cto<((char *) uto)+2*n) *cto++ = '\0';
494 0 : if ( cto<((char *) uto)+4*n) *cto++ = '\0';
495 0 : if ( cto<((char *) uto)+4*n) *cto++ = '\0';
496 0 : return( uto );
497 : }
498 : #endif
499 0 : return( encoding2u_strncpy(uto,from,n,local_encoding));
500 : }
501 :
502 0 : char *u2def_strncpy(char *to, const unichar_t *ufrom, size_t n) {
503 : #if HAVE_ICONV
504 0 : if ( my_iconv_setup() ) {
505 0 : size_t in_left = sizeof(unichar_t)*n, out_left = n;
506 0 : char *cfrom = (char *) ufrom, *cto=to;
507 0 : iconv(from_unicode, (char **) &cfrom, &in_left, &cto, &out_left);
508 0 : if ( cto<to+n ) *cto++ = '\0';
509 0 : if ( cto<to+n ) *cto++ = '\0';
510 0 : if ( cto<to+n ) *cto++ = '\0';
511 0 : if ( cto<to+n ) *cto++ = '\0';
512 0 : return( to );
513 : }
514 : #endif
515 0 : return( u2encoding_strncpy(to,ufrom,n,local_encoding));
516 : }
517 :
518 0 : unichar_t *def2u_copy(const char *from) {
519 : int len;
520 : unichar_t *uto, *ret;
521 :
522 0 : if ( from==NULL ) return( NULL );
523 0 : len = strlen(from);
524 0 : uto = (unichar_t *) malloc((len+1)*sizeof(unichar_t));
525 0 : if ( uto==NULL ) return( NULL );
526 : #if HAVE_ICONV
527 0 : if ( my_iconv_setup() ) {
528 0 : size_t in_left = len, out_left = sizeof(unichar_t)*len;
529 0 : char *cto = (char *) uto;
530 0 : iconv(to_unicode, (char **) &from, &in_left, &cto, &out_left);
531 0 : *cto++ = '\0';
532 0 : *cto++ = '\0';
533 0 : *cto++ = '\0';
534 0 : *cto++ = '\0';
535 0 : return( uto );
536 : }
537 : #endif
538 0 : ret = encoding2u_strncpy(uto,from,len,local_encoding);
539 0 : if ( ret==NULL )
540 0 : free( uto );
541 : else
542 0 : uto[len] = '\0';
543 0 : return( ret );
544 : }
545 :
546 202396 : char *u2def_copy(const unichar_t *ufrom) {
547 : int len;
548 : char *to, *ret;
549 :
550 202396 : if ( ufrom==NULL ) return( NULL );
551 202396 : len = u_strlen(ufrom);
552 : #if HAVE_ICONV
553 202396 : if ( my_iconv_setup() ) {
554 0 : size_t in_left = sizeof(unichar_t)*len, out_left = 3*len;
555 0 : char *cfrom = (char *) ufrom, *cto;
556 0 : cto = to = (char *) malloc(3*len+2);
557 0 : if ( cto==NULL ) return( NULL );
558 0 : iconv(from_unicode, (char **) &cfrom, &in_left, &cto, &out_left);
559 0 : *cto++ = '\0';
560 0 : *cto++ = '\0';
561 0 : *cto++ = '\0';
562 0 : *cto++ = '\0';
563 0 : return( to );
564 : }
565 : #endif
566 202396 : if ( local_encoding==e_utf8 )
567 0 : len *= 3;
568 202396 : if ( local_encoding>=e_first2byte )
569 0 : len *= 2;
570 202396 : to = (char *) malloc(len+sizeof(unichar_t));
571 202396 : if ( to==NULL ) return( NULL );
572 202396 : ret = u2encoding_strncpy(to,ufrom,len,local_encoding);
573 202396 : if ( ret==NULL )
574 0 : free( to );
575 202396 : else if ( local_encoding<e_first2byte )
576 202396 : to[len] = '\0';
577 : else {
578 0 : to[len] = '\0';
579 0 : to[len+1] = '\0';
580 : }
581 202396 : return( ret );
582 : }
583 :
584 319 : char *def2utf8_copy(const char *from) {
585 : int len;
586 : char *ret;
587 : unichar_t *temp, *uto;
588 :
589 319 : if ( from==NULL ) return( NULL );
590 319 : len = strlen(from);
591 : #if HAVE_ICONV
592 319 : if ( my_iconv_setup() ) {
593 0 : size_t in_left = len, out_left = 3*(len+1);
594 0 : char *cto = (char *) malloc(3*(len+1)), *cret = cto;
595 0 : if ( cto==NULL ) return( NULL );
596 0 : iconv(to_utf8, (char **) &from, &in_left, &cto, &out_left);
597 0 : *cto++ = '\0';
598 0 : *cto++ = '\0';
599 0 : *cto++ = '\0';
600 0 : *cto++ = '\0';
601 0 : return( cret );
602 : }
603 : #endif
604 319 : uto = (unichar_t *) malloc(sizeof(unichar_t)*(len+1));
605 319 : if ( uto==NULL ) return( NULL );
606 319 : temp = encoding2u_strncpy(uto,from,len,local_encoding);
607 319 : if ( temp==NULL ) {
608 0 : free( uto );
609 0 : return( NULL );
610 : }
611 319 : uto[len] = '\0';
612 319 : ret = u2utf8_copy(uto);
613 319 : free(uto);
614 319 : return( ret );
615 : }
616 :
617 202396 : char *utf82def_copy(const char *ufrom) {
618 : int len;
619 : char *ret;
620 : unichar_t *u2from;
621 :
622 202396 : if ( ufrom==NULL ) return( NULL );
623 202396 : len = strlen(ufrom);
624 : #if HAVE_ICONV
625 202396 : if ( my_iconv_setup() ) {
626 0 : size_t in_left = len, out_left = 3*len;
627 0 : char *cfrom = (char *) ufrom, *cto, *to;
628 0 : cto = to = (char *) malloc(3*len+2);
629 0 : if ( cto==NULL ) return( NULL );
630 0 : iconv(from_utf8, (char **) &cfrom, &in_left, &cto, &out_left);
631 0 : *cto++ = '\0';
632 0 : *cto++ = '\0';
633 0 : *cto++ = '\0';
634 0 : *cto++ = '\0';
635 0 : return( to );
636 : }
637 : #endif
638 202396 : if ( local_encoding==e_utf8 ) return( copy( ufrom )); /* Well, that's easy */
639 202396 : u2from = utf82u_copy(ufrom);
640 202396 : ret = u2def_copy(u2from);
641 202396 : free(u2from);
642 202396 : return( ret );
643 : }
|