1/* Charset handling while reading PO files.
2   Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#ifndef _PO_CHARSET_H
20#define _PO_CHARSET_H
21
22#include <stdbool.h>
23#include <stddef.h>
24
25#if HAVE_ICONV
26#include <iconv.h>
27#endif
28
29
30#ifdef __cplusplus
31extern "C" {
32#endif
33
34
35/* Canonicalize an encoding name.
36   The results of this function are statically allocated and can be
37   compared using ==.  */
38extern const char *po_charset_canonicalize (const char *charset);
39
40/* The canonicalized encoding name for ASCII.  */
41extern DLL_VARIABLE const char *po_charset_ascii;
42
43/* The canonicalized encoding name for UTF-8.  */
44extern DLL_VARIABLE const char *po_charset_utf8;
45
46/* Test for ASCII compatibility.  */
47extern bool po_charset_ascii_compatible (const char *canon_charset);
48
49/* Test for a weird encoding, i.e. an encoding which has double-byte
50   characters ending in 0x5C.  */
51extern bool po_is_charset_weird (const char *canon_charset);
52
53/* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure.
54   An encoding has CJK structure if every valid character stream is composed
55   of single bytes in the range 0x{00..7F} and of byte pairs in the range
56   0x{80..FF}{30..FF}.  */
57extern bool po_is_charset_weird_cjk (const char *canon_charset);
58
59/* Returns a character iterator for a given encoding.
60   Given a pointer into a string, it returns the number occupied by the next
61   single character.  If the piece of string is not valid or if the *s == '\0',
62   it returns 1.  */
63typedef size_t (*character_iterator_t) (const char *s);
64extern character_iterator_t po_charset_character_iterator (const char *canon_charset);
65
66
67/* The PO file's encoding, as specified in the header entry.  */
68extern DLL_VARIABLE const char *po_lex_charset;
69
70#if HAVE_ICONV
71/* Converter from the PO file's encoding to UTF-8.  */
72extern DLL_VARIABLE iconv_t po_lex_iconv;
73#endif
74/* If no converter is available, some information about the structure of the
75   PO file's encoding.  */
76extern DLL_VARIABLE bool po_lex_weird_cjk;
77
78/* Initialize the PO file's encoding.  */
79extern void po_lex_charset_init (void);
80
81/* Set the PO file's encoding from the header entry.  */
82extern void po_lex_charset_set (const char *header_entry,
83				const char *filename);
84
85/* Finish up with the PO file's encoding.  */
86extern void po_lex_charset_close (void);
87
88
89#ifdef __cplusplus
90}
91#endif
92
93
94#endif /* _PO_CHARSET_H */
95