1/* Character set conversion support for GDB.
2
3   Copyright 2001, 2003 Free Software Foundation, Inc.
4
5   This file is part of GDB.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place - Suite 330,
20   Boston, MA 02111-1307, USA.  */
21
22#include "defs.h"
23#include "charset.h"
24#include "gdbcmd.h"
25#include "gdb_assert.h"
26
27#include <stddef.h>
28#include "gdb_string.h"
29#include <ctype.h>
30
31#ifdef HAVE_ICONV
32#include <iconv.h>
33#endif
34
35
36/* How GDB's character set support works
37
38   GDB has two global settings:
39
40   - The `current host character set' is the character set GDB should
41     use in talking to the user, and which (hopefully) the user's
42     terminal knows how to display properly.
43
44   - The `current target character set' is the character set the
45     program being debugged uses.
46
47   There are commands to set each of these, and mechanisms for
48   choosing reasonable default values.  GDB has a global list of
49   character sets that it can use as its host or target character
50   sets.
51
52   The header file `charset.h' declares various functions that
53   different pieces of GDB need to perform tasks like:
54
55   - printing target strings and characters to the user's terminal
56     (mostly target->host conversions),
57
58   - building target-appropriate representations of strings and
59     characters the user enters in expressions (mostly host->target
60     conversions),
61
62   and so on.
63
64   Now, many of these operations are specific to a particular
65   host/target character set pair.  If GDB supports N character sets,
66   there are N^2 possible pairs.  This means that, the larger GDB's
67   repertoire of character sets gets, the more expensive it gets to add
68   new character sets.
69
70   To make sure that GDB can do the right thing for every possible
71   pairing of host and target character set, while still allowing
72   GDB's repertoire to scale, we use a two-tiered approach:
73
74   - We maintain a global table of "translations" --- groups of
75     functions specific to a particular pair of character sets.
76
77   - However, a translation can be incomplete: some functions can be
78     omitted.  Where there is not a translation to specify exactly
79     what function to use, we provide reasonable defaults.  The
80     default behaviors try to use the "iconv" library functions, which
81     support a wide range of character sets.  However, even if iconv
82     is not available, there are fallbacks to support trivial
83     translations: when the host and target character sets are the
84     same.  */
85
86
87/* The character set and translation structures.  */
88
89
90/* A character set GDB knows about.  GDB only supports character sets
91   with stateless encodings, in which every character is one byte
92   long.  */
93struct charset {
94
95  /* A singly-linked list of all known charsets.  */
96  struct charset *next;
97
98  /* The name of the character set.  Comparisons on character set
99     names are case-sensitive.  */
100  const char *name;
101
102  /* Non-zero iff this character set can be used as a host character
103     set.  At present, GDB basically assumes that the host character
104     set is a superset of ASCII.  */
105  int valid_host_charset;
106
107  /* Pointers to charset-specific functions that depend only on a
108     single character set, and data pointers to pass to them.  */
109  int (*host_char_print_literally) (void *baton,
110                                    int host_char);
111  void *host_char_print_literally_baton;
112
113  int (*target_char_to_control_char) (void *baton,
114                                      int target_char,
115                                      int *target_ctrl_char);
116  void *target_char_to_control_char_baton;
117};
118
119
120/* A translation from one character set to another.  */
121struct translation {
122
123  /* A singly-linked list of all known translations.  */
124  struct translation *next;
125
126  /* This structure describes functions going from the FROM character
127     set to the TO character set.  Comparisons on character set names
128     are case-sensitive.  */
129  const char *from, *to;
130
131  /* Pointers to translation-specific functions, and data pointers to
132     pass to them.  These pointers can be zero, indicating that GDB
133     should fall back on the default behavior.  We hope the default
134     behavior will be correct for many from/to pairs, reducing the
135     number of translations that need to be registered explicitly.  */
136
137  /* TARGET_CHAR is in the `from' charset.
138     Returns a string in the `to' charset.  */
139  const char *(*c_target_char_has_backslash_escape) (void *baton,
140                                                     int target_char);
141  void *c_target_char_has_backslash_escape_baton;
142
143  /* HOST_CHAR is in the `from' charset.
144     TARGET_CHAR points to a char in the `to' charset.  */
145  int (*c_parse_backslash) (void *baton, int host_char, int *target_char);
146  void *c_parse_backslash_baton;
147
148  /* This is used for the host_char_to_target and target_char_to_host
149     functions.  */
150  int (*convert_char) (void *baton, int from, int *to);
151  void *convert_char_baton;
152};
153
154
155
156/* The global lists of character sets and translations.  */
157
158
159#ifndef GDB_DEFAULT_HOST_CHARSET
160#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
161#endif
162
163#ifndef GDB_DEFAULT_TARGET_CHARSET
164#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
165#endif
166
167static const char *host_charset_name = GDB_DEFAULT_HOST_CHARSET;
168static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
169
170static const char *host_charset_enum[] =
171{
172  "ASCII",
173  "ISO-8859-1",
174  0
175};
176
177static const char *target_charset_enum[] =
178{
179  "ASCII",
180  "ISO-8859-1",
181  "EBCDIC-US",
182  "IBM1047",
183  0
184};
185
186/* The global list of all the charsets GDB knows about.  */
187static struct charset *all_charsets;
188
189
190static void
191register_charset (struct charset *cs)
192{
193  struct charset **ptr;
194
195  /* Put the new charset on the end, so that the list ends up in the
196     same order as the registrations in the _initialize function.  */
197  for (ptr = &all_charsets; *ptr; ptr = &(*ptr)->next)
198    ;
199
200  cs->next = 0;
201  *ptr = cs;
202}
203
204
205static struct charset *
206lookup_charset (const char *name)
207{
208  struct charset *cs;
209
210  for (cs = all_charsets; cs; cs = cs->next)
211    if (! strcmp (name, cs->name))
212      return cs;
213
214  return NULL;
215}
216
217
218/* The global list of translations.  */
219static struct translation *all_translations;
220
221
222static void
223register_translation (struct translation *t)
224{
225  t->next = all_translations;
226  all_translations = t;
227}
228
229
230static struct translation *
231lookup_translation (const char *from, const char *to)
232{
233  struct translation *t;
234
235  for (t = all_translations; t; t = t->next)
236    if (! strcmp (from, t->from)
237        && ! strcmp (to, t->to))
238      return t;
239
240  return 0;
241}
242
243
244
245/* Constructing charsets.  */
246
247/* Allocate, initialize and return a straightforward charset.
248   Use this function, rather than creating the structures yourself,
249   so that we can add new fields to the structure in the future without
250   having to tweak all the old charset descriptions.  */
251static struct charset *
252simple_charset (const char *name,
253                int valid_host_charset,
254                int (*host_char_print_literally) (void *baton, int host_char),
255                void *host_char_print_literally_baton,
256                int (*target_char_to_control_char) (void *baton,
257                                                    int target_char,
258                                                    int *target_ctrl_char),
259                void *target_char_to_control_char_baton)
260{
261  struct charset *cs = xmalloc (sizeof (*cs));
262
263  memset (cs, 0, sizeof (*cs));
264  cs->name = name;
265  cs->valid_host_charset = valid_host_charset;
266  cs->host_char_print_literally = host_char_print_literally;
267  cs->host_char_print_literally_baton = host_char_print_literally_baton;
268  cs->target_char_to_control_char = target_char_to_control_char;
269  cs->target_char_to_control_char_baton = target_char_to_control_char_baton;
270
271  return cs;
272}
273
274
275
276/* ASCII functions.  */
277
278static int
279ascii_print_literally (void *baton, int c)
280{
281  c &= 0xff;
282
283  return (0x20 <= c && c <= 0x7e);
284}
285
286
287static int
288ascii_to_control (void *baton, int c, int *ctrl_char)
289{
290  *ctrl_char = (c & 037);
291  return 1;
292}
293
294
295/* ISO-8859 family functions.  */
296
297
298static int
299iso_8859_print_literally (void *baton, int c)
300{
301  c &= 0xff;
302
303  return ((0x20 <= c && c <= 0x7e) /* ascii printables */
304          || (! sevenbit_strings && 0xA0 <= c)); /* iso 8859 printables */
305}
306
307
308static int
309iso_8859_to_control (void *baton, int c, int *ctrl_char)
310{
311  *ctrl_char = (c & 0200) | (c & 037);
312  return 1;
313}
314
315
316/* Construct an ISO-8859-like character set.  */
317static struct charset *
318iso_8859_family_charset (const char *name)
319{
320  return simple_charset (name, 1,
321                         iso_8859_print_literally, 0,
322                         iso_8859_to_control, 0);
323}
324
325
326
327/* EBCDIC family functions.  */
328
329
330static int
331ebcdic_print_literally (void *baton, int c)
332{
333  c &= 0xff;
334
335  return (64 <= c && c <= 254);
336}
337
338
339static int
340ebcdic_to_control (void *baton, int c, int *ctrl_char)
341{
342  /* There are no control character equivalents in EBCDIC.  Use
343     numeric escapes.  */
344  return 0;
345}
346
347
348/* Construct an EBCDIC-like character set.  */
349static struct charset *
350ebcdic_family_charset (const char *name)
351{
352  return simple_charset (name, 0,
353                         ebcdic_print_literally, 0,
354                         ebcdic_to_control, 0);
355}
356
357
358
359
360
361/* Fallback functions using iconv.  */
362
363#if defined(HAVE_ICONV)
364
365struct cached_iconv {
366  struct charset *from, *to;
367  iconv_t i;
368};
369
370
371/* Make sure the iconv cache *CI contains an iconv descriptor
372   translating from FROM to TO.  If it already does, fine; otherwise,
373   close any existing descriptor, and open up a new one.  On success,
374   return zero; on failure, return -1 and set errno.  */
375static int
376check_iconv_cache (struct cached_iconv *ci,
377                   struct charset *from,
378                   struct charset *to)
379{
380  iconv_t i;
381
382  /* Does the cached iconv descriptor match the conversion we're trying
383     to do now?  */
384  if (ci->from == from
385      && ci->to == to
386      && ci->i != (iconv_t) 0)
387    return 0;
388
389  /* It doesn't.  If we actually had any iconv descriptor open at
390     all, close it now.  */
391  if (ci->i != (iconv_t) 0)
392    {
393      i = ci->i;
394      ci->i = (iconv_t) 0;
395
396      if (iconv_close (i) == -1)
397        error ("Error closing `iconv' descriptor for "
398               "`%s'-to-`%s' character conversion: %s",
399               ci->from->name, ci->to->name, safe_strerror (errno));
400    }
401
402  /* Open a new iconv descriptor for the required conversion.  */
403  i = iconv_open (to->name, from->name);
404  if (i == (iconv_t) -1)
405    return -1;
406
407  ci->i = i;
408  ci->from = from;
409  ci->to = to;
410
411  return 0;
412}
413
414
415/* Convert FROM_CHAR using the cached iconv conversion *CI.  Return
416   non-zero if the conversion was successful, zero otherwise.  */
417static int
418cached_iconv_convert (struct cached_iconv *ci, int from_char, int *to_char)
419{
420  char from;
421  ICONV_CONST char *from_ptr = &from;
422  char to, *to_ptr = &to;
423  size_t from_left = sizeof (from), to_left = sizeof (to);
424
425  gdb_assert (ci->i != (iconv_t) 0);
426
427  from = from_char;
428  if (iconv (ci->i, &from_ptr, &from_left, &to_ptr, &to_left)
429      == (size_t) -1)
430    {
431      /* These all suggest that the input or output character sets
432         have multi-byte encodings of some characters, which means
433         it's unsuitable for use as a GDB character set.  We should
434         never have selected it.  */
435      gdb_assert (errno != E2BIG && errno != EINVAL);
436
437      /* This suggests a bug in the code managing *CI.  */
438      gdb_assert (errno != EBADF);
439
440      /* This seems to mean that there is no equivalent character in
441         the `to' character set.  */
442      if (errno == EILSEQ)
443        return 0;
444
445      /* Anything else is mysterious.  */
446      internal_error (__FILE__, __LINE__,
447		      "Error converting character `%d' from `%s' to `%s' "
448                      "character set: %s",
449                      from_char, ci->from->name, ci->to->name,
450                      safe_strerror (errno));
451    }
452
453  /* If the pointers weren't advanced across the input, that also
454     suggests something was wrong.  */
455  gdb_assert (from_left == 0 && to_left == 0);
456
457  *to_char = (unsigned char) to;
458  return 1;
459}
460
461
462static void
463register_iconv_charsets (void)
464{
465  /* Here we should check whether various character sets were
466     recognized by the local iconv implementation.
467
468     The first implementation registered a bunch of character sets
469     recognized by iconv, but then we discovered that iconv on Solaris
470     and iconv on GNU/Linux had no character sets in common.  So we
471     replaced them with the hard-coded tables that appear later in the
472     file.  */
473}
474
475#endif /* defined (HAVE_ICONV) */
476
477
478/* Fallback routines for systems without iconv.  */
479
480#if ! defined (HAVE_ICONV)
481struct cached_iconv { char nothing; };
482
483static int
484check_iconv_cache (struct cached_iconv *ci,
485                   struct charset *from,
486                   struct charset *to)
487{
488  errno = EINVAL;
489  return -1;
490}
491
492static int
493cached_iconv_convert (struct cached_iconv *ci, int from_char, int *to_char)
494{
495  /* This function should never be called.  */
496  gdb_assert (0);
497}
498
499static void
500register_iconv_charsets (void)
501{
502}
503
504#endif /* ! defined(HAVE_ICONV) */
505
506
507/* Default trivial conversion functions.  */
508
509static int
510identity_either_char_to_other (void *baton, int either_char, int *other_char)
511{
512  *other_char = either_char;
513  return 1;
514}
515
516
517
518/* Default non-trivial conversion functions.  */
519
520
521static char backslashable[] = "abfnrtv";
522static char *backslashed[] = {"a", "b", "f", "n", "r", "t", "v", "0"};
523static char represented[] = "\a\b\f\n\r\t\v";
524
525
526/* Translate TARGET_CHAR into the host character set, and see if it
527   matches any of our standard escape sequences.  */
528static const char *
529default_c_target_char_has_backslash_escape (void *baton, int target_char)
530{
531  int host_char;
532  const char *ix;
533
534  /* If target_char has no equivalent in the host character set,
535     assume it doesn't have a backslashed form.  */
536  if (! target_char_to_host (target_char, &host_char))
537    return NULL;
538
539  ix = strchr (represented, host_char);
540  if (ix)
541    return backslashed[ix - represented];
542  else
543    return NULL;
544}
545
546
547/* Translate the backslash the way we would in the host character set,
548   and then try to translate that into the target character set.  */
549static int
550default_c_parse_backslash (void *baton, int host_char, int *target_char)
551{
552  const char *ix;
553
554  ix = strchr (backslashable, host_char);
555
556  if (! ix)
557    return 0;
558  else
559    return host_char_to_target (represented[ix - backslashable],
560                                target_char);
561}
562
563
564/* Convert using a cached iconv descriptor.  */
565static int
566iconv_convert (void *baton, int from_char, int *to_char)
567{
568  struct cached_iconv *ci = baton;
569  return cached_iconv_convert (ci, from_char, to_char);
570}
571
572
573
574/* Conversion tables.  */
575
576
577/* I'd much rather fall back on iconv whenever possible.  But the
578   character set names you use with iconv aren't standardized at all,
579   a lot of platforms have really meager character set coverage, etc.
580   I wanted to have at least something we could use to exercise the
581   test suite on all platforms.
582
583   In the long run, we should have a configure-time process explore
584   somehow which character sets the host platform supports, and some
585   arrangement that allows GDB users to use platform-indepedent names
586   for character sets.  */
587
588
589/* We generated these tables using iconv on a GNU/Linux machine.  */
590
591
592static int ascii_to_iso_8859_1_table[] = {
593    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
594   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
595   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
596   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
597   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 80 */
598   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 96 */
599   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 112 */
600  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 128 */
601   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
602   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
603   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
604   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
605   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
606   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
607   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
608   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
609};
610
611
612static int ascii_to_ebcdic_us_table[] = {
613    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
614   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
615   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
616  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
617  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
618  215,216,217,226,227,228,229,230,231,232,233, -1,224, -1, -1,109, /* 96 */
619  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
620  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
621   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
622   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
623   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
624   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
625   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
626   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
627   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
628   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
629};
630
631
632static int ascii_to_ibm1047_table[] = {
633    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
634   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
635   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
636  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
637  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
638  215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109, /* 96 */
639  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
640  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
641   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
642   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
643   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
644   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
645   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
646   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
647   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
648   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
649};
650
651
652static int iso_8859_1_to_ascii_table[] = {
653    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
654   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
655   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
656   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
657   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 80 */
658   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 96 */
659   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 112 */
660  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 128 */
661   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
662   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
663   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
664   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
665   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
666   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
667   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
668   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
669};
670
671
672static int iso_8859_1_to_ebcdic_us_table[] = {
673    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
674   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
675   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
676  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
677  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
678  215,216,217,226,227,228,229,230,231,232,233, -1,224, -1, -1,109, /* 96 */
679  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
680  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
681   32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27, /* 144 */
682   48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,255, /* 160 */
683   -1, -1, 74, -1, -1, -1,106, -1, -1, -1, -1, -1, 95, -1, -1, -1, /* 176 */
684   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
685   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
686   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
687   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
688   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
689};
690
691
692static int iso_8859_1_to_ibm1047_table[] = {
693    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
694   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
695   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
696  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
697  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
698  215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109, /* 96 */
699  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
700  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
701   32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27, /* 144 */
702   48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,255, /* 160 */
703   65,170, 74,177,159,178,106,181,187,180,154,138,176,202,175,188, /* 176 */
704  144,143,234,250,190,160,182,179,157,218,155,139,183,184,185,171, /* 192 */
705  100,101, 98,102, 99,103,158,104,116,113,114,115,120,117,118,119, /* 208 */
706  172,105,237,238,235,239,236,191,128,253,254,251,252,186,174, 89, /* 224 */
707   68, 69, 66, 70, 67, 71,156, 72, 84, 81, 82, 83, 88, 85, 86, 87, /* 240 */
708  140, 73,205,206,203,207,204,225,112,221,222,219,220,141,142,223  /* 256 */
709};
710
711
712static int ebcdic_us_to_ascii_table[] = {
713    0,  1,  2,  3, -1,  9, -1,127, -1, -1, -1, 11, 12, 13, 14, 15, /* 16 */
714   16, 17, 18, 19, -1, -1,  8, -1, 24, 25, -1, -1, 28, 29, 30, 31, /* 32 */
715   -1, -1, -1, -1, -1, 10, 23, 27, -1, -1, -1, -1, -1,  5,  6,  7, /* 48 */
716   -1, -1, 22, -1, -1, -1, -1,  4, -1, -1, -1, -1, 20, 21, -1, 26, /* 64 */
717   32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46, 60, 40, 43,124, /* 80 */
718   38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59, -1, /* 96 */
719   45, 47, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, 37, 95, 62, 63, /* 112 */
720   -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */
721   -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */
722   -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */
723   -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, -1, -1, -1, /* 176 */
724   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
725  123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */
726  125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */
727   92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */
728   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1, -1  /* 256 */
729};
730
731
732static int ebcdic_us_to_iso_8859_1_table[] = {
733    0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15, /* 16 */
734   16, 17, 18, 19,157,133,  8,135, 24, 25,146,143, 28, 29, 30, 31, /* 32 */
735  128,129,130,131,132, 10, 23, 27,136,137,138,139,140,  5,  6,  7, /* 48 */
736  144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26, /* 64 */
737   32, -1, -1, -1, -1, -1, -1, -1, -1, -1,162, 46, 60, 40, 43,124, /* 80 */
738   38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59,172, /* 96 */
739   45, 47, -1, -1, -1, -1, -1, -1, -1, -1,166, 44, 37, 95, 62, 63, /* 112 */
740   -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */
741   -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */
742   -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */
743   -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, -1, -1, -1, /* 176 */
744   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
745  123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */
746  125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */
747   92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */
748   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1,159  /* 256 */
749};
750
751
752static int ebcdic_us_to_ibm1047_table[] = {
753    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
754   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
755   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
756   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
757   64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 74, 75, 76, 77, 78, 79, /* 80 */
758   80, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90, 91, 92, 93, 94,176, /* 96 */
759   96, 97, -1, -1, -1, -1, -1, -1, -1, -1,106,107,108,109,110,111, /* 112 */
760   -1, -1, -1, -1, -1, -1, -1, -1, -1,121,122,123,124,125,126,127, /* 128 */
761   -1,129,130,131,132,133,134,135,136,137, -1, -1, -1, -1, -1, -1, /* 144 */
762   -1,145,146,147,148,149,150,151,152,153, -1, -1, -1, -1, -1, -1, /* 160 */
763   -1,161,162,163,164,165,166,167,168,169, -1, -1, -1, -1, -1, -1, /* 176 */
764   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
765  192,193,194,195,196,197,198,199,200,201, -1, -1, -1, -1, -1, -1, /* 208 */
766  208,209,210,211,212,213,214,215,216,217, -1, -1, -1, -1, -1, -1, /* 224 */
767  224, -1,226,227,228,229,230,231,232,233, -1, -1, -1, -1, -1, -1, /* 240 */
768  240,241,242,243,244,245,246,247,248,249, -1, -1, -1, -1, -1,255  /* 256 */
769};
770
771
772static int ibm1047_to_ascii_table[] = {
773    0,  1,  2,  3, -1,  9, -1,127, -1, -1, -1, 11, 12, 13, 14, 15, /* 16 */
774   16, 17, 18, 19, -1, -1,  8, -1, 24, 25, -1, -1, 28, 29, 30, 31, /* 32 */
775   -1, -1, -1, -1, -1, 10, 23, 27, -1, -1, -1, -1, -1,  5,  6,  7, /* 48 */
776   -1, -1, 22, -1, -1, -1, -1,  4, -1, -1, -1, -1, 20, 21, -1, 26, /* 64 */
777   32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46, 60, 40, 43,124, /* 80 */
778   38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59, 94, /* 96 */
779   45, 47, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, 37, 95, 62, 63, /* 112 */
780   -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */
781   -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */
782   -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */
783   -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, 91, -1, -1, /* 176 */
784   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 93, -1, -1, /* 192 */
785  123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */
786  125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */
787   92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */
788   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1, -1  /* 256 */
789};
790
791
792static int ibm1047_to_iso_8859_1_table[] = {
793    0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15, /* 16 */
794   16, 17, 18, 19,157,133,  8,135, 24, 25,146,143, 28, 29, 30, 31, /* 32 */
795  128,129,130,131,132, 10, 23, 27,136,137,138,139,140,  5,  6,  7, /* 48 */
796  144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26, /* 64 */
797   32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124, /* 80 */
798   38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94, /* 96 */
799   45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63, /* 112 */
800  248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34, /* 128 */
801  216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177, /* 144 */
802  176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164, /* 160 */
803  181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174, /* 176 */
804  172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215, /* 192 */
805  123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245, /* 208 */
806  125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255, /* 224 */
807   92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213, /* 240 */
808   48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159  /* 256 */
809};
810
811
812static int ibm1047_to_ebcdic_us_table[] = {
813    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
814   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
815   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
816   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
817   64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 74, 75, 76, 77, 78, 79, /* 80 */
818   80, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90, 91, 92, 93, 94, -1, /* 96 */
819   96, 97, -1, -1, -1, -1, -1, -1, -1, -1,106,107,108,109,110,111, /* 112 */
820   -1, -1, -1, -1, -1, -1, -1, -1, -1,121,122,123,124,125,126,127, /* 128 */
821   -1,129,130,131,132,133,134,135,136,137, -1, -1, -1, -1, -1, -1, /* 144 */
822   -1,145,146,147,148,149,150,151,152,153, -1, -1, -1, -1, -1, -1, /* 160 */
823   -1,161,162,163,164,165,166,167,168,169, -1, -1, -1, -1, -1, -1, /* 176 */
824   95, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
825  192,193,194,195,196,197,198,199,200,201, -1, -1, -1, -1, -1, -1, /* 208 */
826  208,209,210,211,212,213,214,215,216,217, -1, -1, -1, -1, -1, -1, /* 224 */
827  224, -1,226,227,228,229,230,231,232,233, -1, -1, -1, -1, -1, -1, /* 240 */
828  240,241,242,243,244,245,246,247,248,249, -1, -1, -1, -1, -1,255  /* 256 */
829};
830
831
832static int
833table_convert_char (void *baton, int from, int *to)
834{
835  int *table = (int *) baton;
836
837  if (0 <= from && from <= 255
838      && table[from] != -1)
839    {
840      *to = table[from];
841      return 1;
842    }
843  else
844    return 0;
845}
846
847
848static struct translation *
849table_translation (const char *from, const char *to, int *table,
850                   const char *(*c_target_char_has_backslash_escape)
851                   (void *baton, int target_char),
852                   void *c_target_char_has_backslash_escape_baton,
853                   int (*c_parse_backslash) (void *baton,
854                                             int host_char,
855                                             int *target_char),
856                   void *c_parse_backslash_baton)
857{
858  struct translation *t = xmalloc (sizeof (*t));
859
860  memset (t, 0, sizeof (*t));
861  t->from = from;
862  t->to = to;
863  t->c_target_char_has_backslash_escape = c_target_char_has_backslash_escape;
864  t->c_target_char_has_backslash_escape_baton
865    = c_target_char_has_backslash_escape_baton;
866  t->c_parse_backslash = c_parse_backslash;
867  t->c_parse_backslash_baton = c_parse_backslash_baton;
868  t->convert_char = table_convert_char;
869  t->convert_char_baton = (void *) table;
870
871  return t;
872}
873
874
875static struct translation *
876simple_table_translation (const char *from, const char *to, int *table)
877{
878  return table_translation (from, to, table, 0, 0, 0, 0);
879}
880
881
882
883/* Setting and retrieving the host and target charsets.  */
884
885
886/* The current host and target character sets.  */
887static struct charset *current_host_charset, *current_target_charset;
888
889/* The current functions and batons we should use for the functions in
890   charset.h.  */
891
892static const char *(*c_target_char_has_backslash_escape_func)
893     (void *baton, int target_char);
894static void *c_target_char_has_backslash_escape_baton;
895
896static int (*c_parse_backslash_func) (void *baton,
897                                      int host_char,
898                                      int *target_char);
899static void *c_parse_backslash_baton;
900
901static int (*host_char_to_target_func) (void *baton,
902                                        int host_char,
903                                        int *target_char);
904static void *host_char_to_target_baton;
905
906static int (*target_char_to_host_func) (void *baton,
907                                        int target_char,
908                                        int *host_char);
909static void *target_char_to_host_baton;
910
911
912/* Cached iconv conversions, that might be useful to fallback
913   routines.  */
914static struct cached_iconv cached_iconv_host_to_target;
915static struct cached_iconv cached_iconv_target_to_host;
916
917
918/* Charset structures manipulation functions.  */
919
920static struct charset *
921lookup_charset_or_error (const char *name)
922{
923  struct charset *cs = lookup_charset (name);
924
925  if (! cs)
926    error ("GDB doesn't know of any character set named `%s'.", name);
927
928  return cs;
929}
930
931static void
932check_valid_host_charset (struct charset *cs)
933{
934  if (! cs->valid_host_charset)
935    error ("GDB can't use `%s' as its host character set.", cs->name);
936}
937
938/* Set the host and target character sets to HOST and TARGET.  */
939static void
940set_host_and_target_charsets (struct charset *host, struct charset *target)
941{
942  struct translation *h2t, *t2h;
943
944  /* If they're not both initialized yet, then just do nothing for
945     now.  As soon as we're done running our initialize function,
946     everything will be initialized.  */
947  if (! host || ! target)
948    {
949      current_host_charset = host;
950      current_target_charset = target;
951      return;
952    }
953
954  h2t = lookup_translation (host->name, target->name);
955  t2h = lookup_translation (target->name, host->name);
956
957  /* If the translations don't provide conversion functions, make sure
958     iconv can back them up.  Do this *before* modifying any state.  */
959  if (host != target)
960    {
961      if (! h2t || ! h2t->convert_char)
962        {
963          if (check_iconv_cache (&cached_iconv_host_to_target, host, target)
964              < 0)
965            error ("GDB can't convert from the `%s' character set to `%s'.",
966                   host->name, target->name);
967        }
968      if (! t2h || ! t2h->convert_char)
969        {
970          if (check_iconv_cache (&cached_iconv_target_to_host, target, host)
971              < 0)
972            error ("GDB can't convert from the `%s' character set to `%s'.",
973                   target->name, host->name);
974        }
975    }
976
977  if (t2h && t2h->c_target_char_has_backslash_escape)
978    {
979      c_target_char_has_backslash_escape_func
980        = t2h->c_target_char_has_backslash_escape;
981      c_target_char_has_backslash_escape_baton
982        = t2h->c_target_char_has_backslash_escape_baton;
983    }
984  else
985    c_target_char_has_backslash_escape_func
986      = default_c_target_char_has_backslash_escape;
987
988  if (h2t && h2t->c_parse_backslash)
989    {
990      c_parse_backslash_func = h2t->c_parse_backslash;
991      c_parse_backslash_baton = h2t->c_parse_backslash_baton;
992    }
993  else
994    c_parse_backslash_func = default_c_parse_backslash;
995
996  if (h2t && h2t->convert_char)
997    {
998      host_char_to_target_func = h2t->convert_char;
999      host_char_to_target_baton = h2t->convert_char_baton;
1000    }
1001  else if (host == target)
1002    host_char_to_target_func = identity_either_char_to_other;
1003  else
1004    {
1005      host_char_to_target_func = iconv_convert;
1006      host_char_to_target_baton = &cached_iconv_host_to_target;
1007    }
1008
1009  if (t2h && t2h->convert_char)
1010    {
1011      target_char_to_host_func = t2h->convert_char;
1012      target_char_to_host_baton = t2h->convert_char_baton;
1013    }
1014  else if (host == target)
1015    target_char_to_host_func = identity_either_char_to_other;
1016  else
1017    {
1018      target_char_to_host_func = iconv_convert;
1019      target_char_to_host_baton = &cached_iconv_target_to_host;
1020    }
1021
1022  current_host_charset = host;
1023  current_target_charset = target;
1024}
1025
1026/* Do the real work of setting the host charset.  */
1027static void
1028set_host_charset (const char *charset)
1029{
1030  struct charset *cs = lookup_charset_or_error (charset);
1031  check_valid_host_charset (cs);
1032  set_host_and_target_charsets (cs, current_target_charset);
1033}
1034
1035/* Do the real work of setting the target charset.  */
1036static void
1037set_target_charset (const char *charset)
1038{
1039  struct charset *cs = lookup_charset_or_error (charset);
1040
1041  set_host_and_target_charsets (current_host_charset, cs);
1042}
1043
1044
1045/* 'Set charset', 'set host-charset', 'set target-charset', 'show
1046   charset' sfunc's.  */
1047
1048/* This is the sfunc for the 'set charset' command.  */
1049static void
1050set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
1051{
1052  struct charset *cs = lookup_charset_or_error (host_charset_name);
1053  check_valid_host_charset (cs);
1054  /* CAREFUL: set the target charset here as well. */
1055  target_charset_name = host_charset_name;
1056  set_host_and_target_charsets (cs, cs);
1057}
1058
1059/* 'set host-charset' command sfunc.  We need a wrapper here because
1060   the function needs to have a specific signature.  */
1061static void
1062set_host_charset_sfunc (char *charset, int from_tty,
1063			  struct cmd_list_element *c)
1064{
1065  set_host_charset (host_charset_name);
1066}
1067
1068/* Wrapper for the 'set target-charset' command.  */
1069static void
1070set_target_charset_sfunc (char *charset, int from_tty,
1071			    struct cmd_list_element *c)
1072{
1073  set_target_charset (target_charset_name);
1074}
1075
1076/* sfunc for the 'show charset' command.  */
1077static void
1078show_charset (char *arg, int from_tty)
1079{
1080  if (current_host_charset == current_target_charset)
1081    {
1082      printf_filtered ("The current host and target character set is `%s'.\n",
1083                       host_charset ());
1084    }
1085  else
1086    {
1087      printf_filtered ("The current host character set is `%s'.\n",
1088                       host_charset ());
1089      printf_filtered ("The current target character set is `%s'.\n",
1090                       target_charset ());
1091    }
1092}
1093
1094
1095/* Accessor functions.  */
1096
1097const char *
1098host_charset (void)
1099{
1100  return current_host_charset->name;
1101}
1102
1103const char *
1104target_charset (void)
1105{
1106  return current_target_charset->name;
1107}
1108
1109
1110
1111/* Public character management functions.  */
1112
1113
1114const char *
1115c_target_char_has_backslash_escape (int target_char)
1116{
1117  return ((*c_target_char_has_backslash_escape_func)
1118          (c_target_char_has_backslash_escape_baton, target_char));
1119}
1120
1121
1122int
1123c_parse_backslash (int host_char, int *target_char)
1124{
1125  return (*c_parse_backslash_func) (c_parse_backslash_baton,
1126                                    host_char, target_char);
1127}
1128
1129
1130int
1131host_char_print_literally (int host_char)
1132{
1133  return ((*current_host_charset->host_char_print_literally)
1134          (current_host_charset->host_char_print_literally_baton,
1135           host_char));
1136}
1137
1138
1139int
1140target_char_to_control_char (int target_char, int *target_ctrl_char)
1141{
1142  return ((*current_target_charset->target_char_to_control_char)
1143          (current_target_charset->target_char_to_control_char_baton,
1144           target_char, target_ctrl_char));
1145}
1146
1147
1148int
1149host_char_to_target (int host_char, int *target_char)
1150{
1151  return ((*host_char_to_target_func)
1152          (host_char_to_target_baton, host_char, target_char));
1153}
1154
1155
1156int
1157target_char_to_host (int target_char, int *host_char)
1158{
1159  return ((*target_char_to_host_func)
1160          (target_char_to_host_baton, target_char, host_char));
1161}
1162
1163
1164
1165/* The charset.c module initialization function.  */
1166
1167extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
1168
1169void
1170_initialize_charset (void)
1171{
1172  struct cmd_list_element *new_cmd;
1173
1174  /* Register all the character set GDB knows about.
1175
1176     You should use the same names that iconv does, where possible, to
1177     take advantage of the iconv-based default behaviors.
1178
1179     CAUTION: if you register a character set, you must also register
1180     as many translations as are necessary to make that character set
1181     interoperate correctly with all the other character sets.  We do
1182     provide default behaviors when no translation is available, or
1183     when a translation's function pointer for a particular operation
1184     is zero.  Hopefully, these defaults will be correct often enough
1185     that we won't need to provide too many translations.  */
1186  register_charset (simple_charset ("ASCII", 1,
1187                                    ascii_print_literally, 0,
1188                                    ascii_to_control, 0));
1189  register_charset (iso_8859_family_charset ("ISO-8859-1"));
1190  register_charset (ebcdic_family_charset ("EBCDIC-US"));
1191  register_charset (ebcdic_family_charset ("IBM1047"));
1192  register_iconv_charsets ();
1193
1194  {
1195    struct { char *from; char *to; int *table; } tlist[] = {
1196      { "ASCII",      "ISO-8859-1", ascii_to_iso_8859_1_table },
1197      { "ASCII",      "EBCDIC-US",  ascii_to_ebcdic_us_table },
1198      { "ASCII",      "IBM1047",    ascii_to_ibm1047_table },
1199      { "ISO-8859-1", "ASCII",      iso_8859_1_to_ascii_table },
1200      { "ISO-8859-1", "EBCDIC-US",  iso_8859_1_to_ebcdic_us_table },
1201      { "ISO-8859-1", "IBM1047",    iso_8859_1_to_ibm1047_table },
1202      { "EBCDIC-US",  "ASCII",      ebcdic_us_to_ascii_table },
1203      { "EBCDIC-US",  "ISO-8859-1", ebcdic_us_to_iso_8859_1_table },
1204      { "EBCDIC-US",  "IBM1047",    ebcdic_us_to_ibm1047_table },
1205      { "IBM1047",    "ASCII",      ibm1047_to_ascii_table },
1206      { "IBM1047",    "ISO-8859-1", ibm1047_to_iso_8859_1_table },
1207      { "IBM1047",    "EBCDIC-US",  ibm1047_to_ebcdic_us_table }
1208    };
1209
1210    int i;
1211
1212    for (i = 0; i < (sizeof (tlist) / sizeof (tlist[0])); i++)
1213      register_translation (simple_table_translation (tlist[i].from,
1214                                                      tlist[i].to,
1215                                                      tlist[i].table));
1216  }
1217
1218  set_host_charset (host_charset_name);
1219  set_target_charset (target_charset_name);
1220
1221  new_cmd = add_set_enum_cmd ("charset",
1222			      class_support,
1223			      host_charset_enum,
1224			      &host_charset_name,
1225                              "Set the host and target character sets.\n"
1226                              "The `host character set' is the one used by the system GDB is running on.\n"
1227                              "The `target character set' is the one used by the program being debugged.\n"
1228                              "You may only use supersets of ASCII for your host character set; GDB does\n"
1229                              "not support any others.\n"
1230                              "To see a list of the character sets GDB supports, type `set charset <TAB>'.",
1231			      &setlist);
1232
1233  /* Note that the sfunc below needs to set target_charset_name, because
1234     the 'set charset' command sets two variables.  */
1235  set_cmd_sfunc (new_cmd, set_charset_sfunc);
1236  /* Don't use set_from_show - need to print some extra info. */
1237  add_cmd ("charset", class_support, show_charset,
1238	   "Show the host and target character sets.\n"
1239	   "The `host character set' is the one used by the system GDB is running on.\n"
1240	   "The `target character set' is the one used by the program being debugged.\n"
1241	   "You may only use supersets of ASCII for your host character set; GDB does\n"
1242	   "not support any others.\n"
1243	   "To see a list of the character sets GDB supports, type `set charset <TAB>'.",
1244	   &showlist);
1245
1246
1247  new_cmd = add_set_enum_cmd ("host-charset",
1248			      class_support,
1249			      host_charset_enum,
1250			      &host_charset_name,
1251			      "Set the host character set.\n"
1252			      "The `host character set' is the one used by the system GDB is running on.\n"
1253			      "You may only use supersets of ASCII for your host character set; GDB does\n"
1254			      "not support any others.\n"
1255			      "To see a list of the character sets GDB supports, type `set host-charset <TAB>'.",
1256			      &setlist);
1257
1258  set_cmd_sfunc (new_cmd, set_host_charset_sfunc);
1259
1260  add_show_from_set (new_cmd, &showlist);
1261
1262
1263
1264  new_cmd = add_set_enum_cmd ("target-charset",
1265			      class_support,
1266			      target_charset_enum,
1267			      &target_charset_name,
1268			      "Set the target character set.\n"
1269			      "The `target character set' is the one used by the program being debugged.\n"
1270			      "GDB translates characters and strings between the host and target\n"
1271			      "character sets as needed.\n"
1272			      "To see a list of the character sets GDB supports, type `set target-charset'<TAB>",
1273			      &setlist);
1274
1275  set_cmd_sfunc (new_cmd, set_target_charset_sfunc);
1276  add_show_from_set (new_cmd, &showlist);
1277}
1278