1/* winduni.c -- unicode support for the windres program.
2   Copyright (C) 1997-2020 Free Software Foundation, Inc.
3   Written by Ian Lance Taylor, Cygnus Support.
4   Rewritten by Kai Tietz, Onevision.
5
6   This file is part of GNU Binutils.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3 of the License, or
11   (at your option) any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21   02110-1301, USA.  */
22
23
24/* This file contains unicode support routines for the windres
25   program.  Ideally, we would have generic unicode support which
26   would work on all systems.  However, we don't.  Instead, on a
27   Windows host, we are prepared to call some Windows routines.  This
28   means that we will generate different output on Windows and Unix
29   hosts, but that seems better than not really supporting unicode at
30   all.  */
31
32#include "sysdep.h"
33#include "bfd.h"
34#include "libiberty.h" /* for xstrdup */
35#include "bucomm.h"
36/* Must be include before windows.h and winnls.h.  */
37#if defined (_WIN32) || defined (__CYGWIN__)
38#include <windows.h>
39#include <winnls.h>
40#endif
41#include "winduni.h"
42#include "safe-ctype.h"
43
44#if HAVE_ICONV
45#include <iconv.h>
46#endif
47
48static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
49static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
50static int unichar_isascii (const unichar *, rc_uint_type);
51
52/* Convert an ASCII string to a unicode string.  We just copy it,
53   expanding chars to shorts, rather than doing something intelligent.  */
54
55#if !defined (_WIN32) && !defined (__CYGWIN__)
56
57/* Codepages mapped.  */
58static local_iconv_map codepages[] =
59{
60  { 0, "cp1252" },
61  { 1, "WINDOWS-1252" },
62  { 437, "MS-ANSI" },
63  { 737, "MS-GREEK" },
64  { 775, "WINBALTRIM" },
65  { 850, "MS-ANSI" },
66  { 852, "MS-EE" },
67  { 857, "MS-TURK" },
68  { 862, "CP862" },
69  { 864, "CP864" },
70  { 866, "MS-CYRL" },
71  { 874, "WINDOWS-874" },
72  { 932, "CP932" },
73  { 936, "CP936" },
74  { 949, "CP949" },
75  { 950, "CP950" },
76  { 1250, "WINDOWS-1250" },
77  { 1251, "WINDOWS-1251" },
78  { 1252, "WINDOWS-1252" },
79  { 1253, "WINDOWS-1253" },
80  { 1254, "WINDOWS-1254" },
81  { 1255, "WINDOWS-1255" },
82  { 1256, "WINDOWS-1256" },
83  { 1257, "WINDOWS-1257" },
84  { 1258, "WINDOWS-1258" },
85  { CP_UTF7, "UTF-7" },
86  { CP_UTF8, "UTF-8" },
87  { CP_UTF16, "UTF-16LE" },
88  { (rc_uint_type) -1, NULL }
89};
90
91/* Languages supported.  */
92static const wind_language_t languages[] =
93{
94  { 0x0000, 437, 1252, "Neutral", "Neutral" },
95  { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96  { 0x0403, 850, 1252, "Catalan", "Spain" },	      { 0x0404, 950,  950, "Chinese", "Taiwan" },
97  { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
98  { 0x0407, 850, 1252, "German", "Germany" },	      { 0x0408, 737, 1253, "Greek", "Greece" },
99  { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100  { 0x040B, 850, 1252, "Finnish", "Finland" },	      { 0x040C, 850, 1252, "French", "France" },
101  { 0x040D, 862, 1255, "Hebrew", "Israel" },	      { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102  { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
103  { 0x0411, 932,  932, "Japanese", "Japan" },	      { 0x0412, 949,  949, "Korean", "Korea (south)" },
104  { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105  { 0x0415, 852, 1250, "Polish", "Poland" },	      { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106  { 0x0418, 852, 1250, "Romanian", "Romania" },	      { 0x0419, 866, 1251, "Russian", "Russia" },
107  { 0x041A, 852, 1250, "Croatian", "Croatia" },	      { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108  { 0x041C, 852, 1250, "Albanian", "Albania" },	      { 0x041D, 850, 1252, "Swedish", "Sweden" },
109  { 0x041E, 874,  874, "Thai", "Thailand" },	      { 0x041F, 857, 1254, "Turkish", "Turkey" },
110  { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111  { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112  { 0x0425, 775, 1257, "Estonian", "Estonia" },	      { 0x0426, 775, 1257, "Latvian", "Latvia" },
113  { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114  { 0x0429, 864, 1256, "Arabic", "Farsi" },	      { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115  { 0x042D, 850, 1252, "Basque", "Spain" },
116  { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117  { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118  { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119  { 0x043C, 437, 1252, "Irish", "Ireland" },
120  { 0x043E, 850, 1252, "Malay", "Malaysia" },
121  { 0x0801, 864, 1256, "Arabic", "Iraq" },
122  { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
123  { 0x0807, 850, 1252, "German", "Switzerland" },
124  { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125  { 0x080C, 850, 1252, "French", "Belgium" },
126  { 0x0810, 850, 1252, "Italian", "Switzerland" },
127  { 0x0813, 850, 1252, "Dutch", "Belgium" },	      { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128  { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129  { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130  { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131  { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132  { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
133  { 0x0C07, 850, 1252, "German", "Austria" },
134  { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135  { 0x0C0C, 850, 1252, "French", "Canada"},
136  { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137  { 0x1001, 864, 1256, "Arabic", "Libya" },
138  { 0x1004, 936,  936, "Chinese", "Singapore" },
139  { 0x1007, 850, 1252, "German", "Luxembourg" },
140  { 0x1009, 850, 1252, "English", "Canada" },
141  { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142  { 0x100C, 850, 1252, "French", "Switzerland" },
143  { 0x1401, 864, 1256, "Arabic", "Algeria" },
144  { 0x1407, 850, 1252, "German", "Liechtenstein" },
145  { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146  { 0x140C, 850, 1252, "French", "Luxembourg" },
147  { 0x1801, 864, 1256, "Arabic", "Morocco" },
148  { 0x1809, 850, 1252, "English", "Ireland" },	      { 0x180A, 850, 1252, "Spanish", "Panama" },
149  { 0x180C, 850, 1252, "French", "Monaco" },
150  { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151  { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152  { 0x2001, 864, 1256, "Arabic", "Oman" },
153  { 0x2009, 850, 1252, "English", "Jamaica" },	      { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154  { 0x2401, 864, 1256, "Arabic", "Yemen" },
155  { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
156  { 0x2801, 864, 1256, "Arabic", "Syria" },
157  { 0x2809, 850, 1252, "English", "Belize" },	      { 0x280A, 850, 1252, "Spanish", "Peru" },
158  { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159  { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160  { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161  { 0x3009, 437, 1252, "English", "Zimbabwe" },	      { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162  { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163  { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
164  { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165  { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166  { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167  { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168  { 0x4001, 864, 1256, "Arabic", "Qatar" },
169  { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170  { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171  { 0x480A, 850, 1252, "Spanish", "Honduras" },
172  { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173  { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174  { (unsigned) -1,  0,      0, NULL, NULL }
175};
176
177#endif
178
179/* Specifies the default codepage to be used for unicode
180   transformations.  By default this is CP_ACP.  */
181rc_uint_type wind_default_codepage = CP_ACP;
182
183/* Specifies the currently used codepage for unicode
184   transformations.  By default this is CP_ACP.  */
185rc_uint_type wind_current_codepage = CP_ACP;
186
187/* Convert an ASCII string to a unicode string.  We just copy it,
188   expanding chars to shorts, rather than doing something intelligent.  */
189
190void
191unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
192{
193  unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
194}
195
196/* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
197   copy it, expanding chars to shorts, rather than doing something intelligent.
198   This routine converts also \0 within a string.  */
199
200void
201unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
202{
203  char *tmp, *p;
204  rc_uint_type tlen, elen, idx = 0;
205
206  *unicode = NULL;
207
208  if (!a_length)
209    {
210      if (length)
211        *length = 0;
212      return;
213    }
214
215  /* Make sure we have zero terminated string.  */
216  p = tmp = (char *) xmalloc (a_length + 1);
217  memcpy (tmp, ascii, a_length);
218  tmp[a_length] = 0;
219
220  while (a_length > 0)
221    {
222      unichar *utmp, *up;
223
224      tlen = strlen (p);
225
226      if (tlen > a_length)
227        tlen = a_length;
228      if (*p == 0)
229        {
230	  /* Make room for one more character.  */
231	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
232	  if (idx > 0)
233	    {
234	      memcpy (utmp, *unicode, idx * sizeof (unichar));
235	    }
236	  *unicode = utmp;
237	  utmp[idx++] = 0;
238	  --a_length;
239	  p++;
240	  continue;
241	}
242      utmp = NULL;
243      elen = 0;
244      elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
245      if (elen)
246	{
247	  utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
248	  wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
249	  elen /= sizeof (unichar);
250	  elen --;
251	}
252      else
253        {
254	  /* Make room for one more character.  */
255	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
256	  if (idx > 0)
257	    {
258	      memcpy (utmp, *unicode, idx * sizeof (unichar));
259	    }
260	  *unicode = utmp;
261	  utmp[idx++] = ((unichar) *p) & 0xff;
262	  --a_length;
263	  p++;
264	  continue;
265	}
266      p += tlen;
267      a_length -= tlen;
268
269      up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
270      if (idx > 0)
271	memcpy (up, *unicode, idx * sizeof (unichar));
272
273      *unicode = up;
274      if (elen)
275	memcpy (&up[idx], utmp, sizeof (unichar) * elen);
276
277      idx += elen;
278    }
279
280  if (length)
281    *length = idx;
282
283  free (tmp);
284}
285
286/* Convert an unicode string to an ASCII string.  We just copy it,
287   shrink shorts to chars, rather than doing something intelligent.
288   Shorts with not within the char range are replaced by '_'.  */
289
290void
291ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
292{
293  codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
294}
295
296/* Print the unicode string UNICODE to the file E.  LENGTH is the
297   number of characters to print, or -1 if we should print until the
298   end of the string.  FIXME: On a Windows host, we should be calling
299   some Windows function, probably WideCharToMultiByte.  */
300
301void
302unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
303{
304  while (1)
305    {
306      unichar ch;
307
308      if (length == 0)
309	return;
310      if ((bfd_signed_vma) length > 0)
311	--length;
312
313      ch = *unicode;
314
315      if (ch == 0 && (bfd_signed_vma) length < 0)
316	return;
317
318      ++unicode;
319
320      if ((ch & 0x7f) == ch)
321	{
322	  if (ch == '\\')
323	    fputs ("\\\\", e);
324	  else if (ch == '"')
325	    fputs ("\"\"", e);
326	  else if (ISPRINT (ch))
327	    putc (ch, e);
328	  else
329	    {
330	      switch (ch)
331		{
332		case ESCAPE_A:
333		  fputs ("\\a", e);
334		  break;
335
336		case ESCAPE_B:
337		  fputs ("\\b", e);
338		  break;
339
340		case ESCAPE_F:
341		  fputs ("\\f", e);
342		  break;
343
344		case ESCAPE_N:
345		  fputs ("\\n", e);
346		  break;
347
348		case ESCAPE_R:
349		  fputs ("\\r", e);
350		  break;
351
352		case ESCAPE_T:
353		  fputs ("\\t", e);
354		  break;
355
356		case ESCAPE_V:
357		  fputs ("\\v", e);
358		  break;
359
360		default:
361		  fprintf (e, "\\%03o", (unsigned int) ch);
362		  break;
363		}
364	    }
365	}
366      else if ((ch & 0xff) == ch)
367	fprintf (e, "\\%03o", (unsigned int) ch);
368      else
369	fprintf (e, "\\x%04x", (unsigned int) ch);
370    }
371}
372
373/* Print a unicode string to a file.  */
374
375void
376ascii_print (FILE *e, const char *s, rc_uint_type length)
377{
378  while (1)
379    {
380      char ch;
381
382      if (length == 0)
383	return;
384      if ((bfd_signed_vma) length > 0)
385	--length;
386
387      ch = *s;
388
389      if (ch == 0 && (bfd_signed_vma) length < 0)
390	return;
391
392      ++s;
393
394      if ((ch & 0x7f) == ch)
395	{
396	  if (ch == '\\')
397	    fputs ("\\\\", e);
398	  else if (ch == '"')
399	    fputs ("\"\"", e);
400	  else if (ISPRINT (ch))
401	    putc (ch, e);
402	  else
403	    {
404	      switch (ch)
405		{
406		case ESCAPE_A:
407		  fputs ("\\a", e);
408		  break;
409
410		case ESCAPE_B:
411		  fputs ("\\b", e);
412		  break;
413
414		case ESCAPE_F:
415		  fputs ("\\f", e);
416		  break;
417
418		case ESCAPE_N:
419		  fputs ("\\n", e);
420		  break;
421
422		case ESCAPE_R:
423		  fputs ("\\r", e);
424		  break;
425
426		case ESCAPE_T:
427		  fputs ("\\t", e);
428		  break;
429
430		case ESCAPE_V:
431		  fputs ("\\v", e);
432		  break;
433
434		default:
435		  fprintf (e, "\\%03o", (unsigned int) ch);
436		  break;
437		}
438	    }
439	}
440      else
441	fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
442    }
443}
444
445rc_uint_type
446unichar_len (const unichar *unicode)
447{
448  rc_uint_type r = 0;
449
450  if (unicode)
451    while (unicode[r] != 0)
452      r++;
453  else
454    --r;
455  return r;
456}
457
458unichar *
459unichar_dup (const unichar *unicode)
460{
461  unichar *r;
462  int len;
463
464  if (! unicode)
465    return NULL;
466  for (len = 0; unicode[len] != 0; ++len)
467    ;
468  ++len;
469  r = ((unichar *) res_alloc (len * sizeof (unichar)));
470  memcpy (r, unicode, len * sizeof (unichar));
471  return r;
472}
473
474unichar *
475unichar_dup_uppercase (const unichar *u)
476{
477  unichar *r = unichar_dup (u);
478  int i;
479
480  if (! r)
481    return NULL;
482
483  for (i = 0; r[i] != 0; ++i)
484    {
485      if (r[i] >= 'a' && r[i] <= 'z')
486	r[i] &= 0xdf;
487    }
488  return r;
489}
490
491static int
492unichar_isascii (const unichar *u, rc_uint_type len)
493{
494  rc_uint_type i;
495
496  if ((bfd_signed_vma) len < 0)
497    {
498      if (u)
499	len = (rc_uint_type) unichar_len (u);
500      else
501	len = 0;
502    }
503
504  for (i = 0; i < len; i++)
505    if ((u[i] & 0xff80) != 0)
506      return 0;
507  return 1;
508}
509
510void
511unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
512{
513  if (! unichar_isascii (u, len))
514    fputc ('L', e);
515  fputc ('"', e);
516  unicode_print (e, u, len);
517  fputc ('"', e);
518}
519
520int
521unicode_is_valid_codepage (rc_uint_type cp)
522{
523  if ((cp & 0xffff) != cp)
524    return 0;
525  if (cp == CP_UTF16 || cp == CP_ACP)
526    return 1;
527
528#if !defined (_WIN32) && !defined (__CYGWIN__)
529  if (! wind_find_codepage_info (cp))
530    return 0;
531  return 1;
532#else
533  return !! IsValidCodePage ((UINT) cp);
534#endif
535}
536
537#if defined (_WIN32) || defined (__CYGWIN__)
538
539#define max_cp_string_len 6
540
541static unsigned int
542codepage_from_langid (unsigned short langid)
543{
544  char cp_string [max_cp_string_len];
545  int c;
546
547  memset (cp_string, 0, max_cp_string_len);
548  /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
549     but is unavailable on Win95.  */
550  c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
551  		      LOCALE_IDEFAULTANSICODEPAGE,
552  		      cp_string, max_cp_string_len);
553  /* If codepage data for an LCID is not installed on users's system,
554     GetLocaleInfo returns an empty string.  Fall back to system ANSI
555     default. */
556  if (c == 0)
557    return CP_ACP;
558  return strtoul (cp_string, 0, 10);
559}
560
561static unsigned int
562wincodepage_from_langid (unsigned short langid)
563{
564  char cp_string [max_cp_string_len];
565  int c;
566
567  memset (cp_string, 0, max_cp_string_len);
568  /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
569     but is unavailable on Win95.  */
570  c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
571		      LOCALE_IDEFAULTCODEPAGE,
572		      cp_string, max_cp_string_len);
573  /* If codepage data for an LCID is not installed on users's system,
574     GetLocaleInfo returns an empty string.  Fall back to system ANSI
575     default. */
576  if (c == 0)
577    return CP_OEM;
578  return strtoul (cp_string, 0, 10);
579}
580
581static char *
582lang_from_langid (unsigned short langid)
583{
584  char cp_string[261];
585  int c;
586
587  memset (cp_string, 0, 261);
588  c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
589  		      LOCALE_SENGLANGUAGE,
590  		      cp_string, 260);
591  /* If codepage data for an LCID is not installed on users's system,
592     GetLocaleInfo returns an empty string.  Fall back to system ANSI
593     default. */
594  if (c == 0)
595    strcpy (cp_string, "Neutral");
596  return xstrdup (cp_string);
597}
598
599static char *
600country_from_langid (unsigned short langid)
601{
602  char cp_string[261];
603  int c;
604
605  memset (cp_string, 0, 261);
606  c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
607  		      LOCALE_SENGCOUNTRY,
608  		      cp_string, 260);
609  /* If codepage data for an LCID is not installed on users's system,
610     GetLocaleInfo returns an empty string.  Fall back to system ANSI
611     default. */
612  if (c == 0)
613    strcpy (cp_string, "Neutral");
614  return xstrdup (cp_string);
615}
616
617#endif
618
619const wind_language_t *
620wind_find_language_by_id (unsigned id)
621{
622#if !defined (_WIN32) && !defined (__CYGWIN__)
623  int i;
624
625  if (! id)
626    return NULL;
627  for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
628    ;
629  if (languages[i].id == id)
630    return &languages[i];
631  return NULL;
632#else
633  static wind_language_t wl;
634
635  wl.id = id;
636  wl.doscp = codepage_from_langid ((unsigned short) id);
637  wl.wincp = wincodepage_from_langid ((unsigned short) id);
638  wl.name = lang_from_langid ((unsigned short) id);
639  wl.country = country_from_langid ((unsigned short) id);
640
641  return & wl;
642#endif
643}
644
645const local_iconv_map *
646wind_find_codepage_info (unsigned cp)
647{
648#if !defined (_WIN32) && !defined (__CYGWIN__)
649  int i;
650
651  for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
652    ;
653  if (codepages[i].codepage == (rc_uint_type) -1)
654    return NULL;
655  return &codepages[i];
656#else
657  static local_iconv_map lim;
658  if (!unicode_is_valid_codepage (cp))
659  	return NULL;
660  lim.codepage = cp;
661  lim.iconv_name = "";
662  return & lim;
663#endif
664}
665
666/* Convert an Codepage string to a unicode string.  */
667
668void
669unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
670{
671  rc_uint_type len;
672
673  len = wind_MultiByteToWideChar (cp, src, NULL, 0);
674  if (len)
675    {
676      *u = ((unichar *) res_alloc (len));
677      wind_MultiByteToWideChar (cp, src, *u, len);
678    }
679  /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
680     this will set *length to -1.  */
681  len -= sizeof (unichar);
682
683  if (length != NULL)
684    *length = len / sizeof (unichar);
685}
686
687/* Convert an unicode string to an codepage string.  */
688
689void
690codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
691{
692  rc_uint_type len;
693
694  len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
695  if (len)
696    {
697      *ascii = (char *) res_alloc (len * sizeof (char));
698      wind_WideCharToMultiByte (cp, unicode, *ascii, len);
699    }
700  /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
701     this will set *length to -1.  */
702  len--;
703
704  if (length != NULL)
705    *length = len;
706}
707
708#if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
709static int
710iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
711{
712  int i;
713
714  for (i = 1; i <= 32; i++)
715    {
716      char *tmp_d = d;
717      ICONV_CONST char *tmp_s = s;
718      size_t ret;
719      size_t s_left = (size_t) i;
720      size_t d_left = (size_t) d_len;
721
722      ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
723
724      if (ret != (size_t) -1)
725	{
726	  *n_s = tmp_s;
727	  *n_d = tmp_d;
728	  return 0;
729	}
730    }
731
732  return 1;
733}
734
735static const char *
736wind_iconv_cp (rc_uint_type cp)
737{
738  const local_iconv_map *lim = wind_find_codepage_info (cp);
739
740  if (!lim)
741    return NULL;
742  return lim->iconv_name;
743}
744#endif /* HAVE_ICONV */
745
746static rc_uint_type
747wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
748			  unichar *u, rc_uint_type u_len)
749{
750  rc_uint_type ret = 0;
751
752#if defined (_WIN32) || defined (__CYGWIN__)
753  rc_uint_type conv_flags = MB_PRECOMPOSED;
754
755  /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
756     MultiByteToWideChar will set the last error to
757     ERROR_INVALID_FLAGS if we do. */
758  if (cp == CP_UTF8 || cp == CP_UTF7)
759    conv_flags = 0;
760
761  ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
762					    mb, -1, u, u_len);
763  /* Convert to bytes. */
764  ret *= sizeof (unichar);
765
766#elif defined (HAVE_ICONV)
767  int first = 1;
768  char tmp[32];
769  char *p_tmp;
770  const char *iconv_name = wind_iconv_cp (cp);
771
772  if (!mb || !iconv_name)
773    return 0;
774  iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
775
776  while (1)
777    {
778      int iret;
779      const char *n_mb = "";
780      char *n_tmp = "";
781
782      p_tmp = tmp;
783      iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
784      if (first)
785	{
786	  first = 0;
787	  continue;
788	}
789      if (!iret)
790	{
791	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
792
793	  if (u)
794	    {
795	      if ((size_t) u_len < l_tmp)
796		break;
797	      memcpy (u, tmp, l_tmp);
798	      u += l_tmp/2;
799	      u_len -= l_tmp;
800	    }
801	  ret += l_tmp;
802	}
803      else
804	break;
805      if (tmp[0] == 0 && tmp[1] == 0)
806	break;
807      mb = n_mb;
808    }
809  iconv_close (cd);
810#else
811  if (cp)
812    ret = 0;
813  ret = strlen (mb) + 1;
814  ret *= sizeof (unichar);
815  if (u != NULL && u_len != 0)
816    {
817      do
818	{
819	  *u++ = ((unichar) *mb) & 0xff;
820	  --u_len; mb++;
821	}
822      while (u_len != 0 && mb[-1] != 0);
823    }
824  if (u != NULL && u_len != 0)
825    *u = 0;
826#endif
827  return ret;
828}
829
830static rc_uint_type
831wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
832{
833  rc_uint_type ret = 0;
834#if defined (_WIN32) || defined (__CYGWIN__)
835  WINBOOL used_def = FALSE;
836
837  ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
838				      	    NULL, & used_def);
839#elif defined (HAVE_ICONV)
840  int first = 1;
841  char tmp[32];
842  char *p_tmp;
843  const char *iconv_name = wind_iconv_cp (cp);
844
845  if (!u || !iconv_name)
846    return 0;
847  iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
848
849  while (1)
850    {
851      int iret;
852      const char *n_u = "";
853      char *n_tmp = "";
854
855      p_tmp = tmp;
856      iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
857      if (first)
858	{
859	  first = 0;
860	  continue;
861	}
862      if (!iret)
863	{
864	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
865
866	  if (mb)
867	    {
868	      if ((size_t) mb_len < l_tmp)
869		break;
870	      memcpy (mb, tmp, l_tmp);
871	      mb += l_tmp;
872	      mb_len -= l_tmp;
873	    }
874	  ret += l_tmp;
875	}
876      else
877	break;
878      if (u[0] == 0)
879	break;
880      u = (const unichar *) n_u;
881    }
882  iconv_close (cd);
883#else
884  if (cp)
885    ret = 0;
886
887  while (u[ret] != 0)
888    ++ret;
889
890  ++ret;
891
892  if (mb)
893    {
894      while (*u != 0 && mb_len != 0)
895	{
896	  if (u[0] == (u[0] & 0x7f))
897	    *mb++ = (char) u[0];
898	  else
899	    *mb++ = '_';
900	  ++u; --mb_len;
901	}
902      if (mb_len != 0)
903	*mb = 0;
904    }
905#endif
906  return ret;
907}
908