1/* Charset handling while reading PO files. 2 Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifndef _PO_CHARSET_H 19#define _PO_CHARSET_H 20 21#include <stdbool.h> 22#include <stddef.h> 23 24#if HAVE_ICONV 25#include <iconv.h> 26#endif 27 28 29#ifdef __cplusplus 30extern "C" { 31#endif 32 33 34/* Canonicalize an encoding name. 35 The results of this function are statically allocated and can be 36 compared using ==. */ 37extern const char *po_charset_canonicalize (const char *charset); 38 39/* The canonicalized encoding name for ASCII. */ 40extern DLL_VARIABLE const char *po_charset_ascii; 41 42/* The canonicalized encoding name for UTF-8. */ 43extern DLL_VARIABLE const char *po_charset_utf8; 44 45/* Test for ASCII compatibility. */ 46extern bool po_charset_ascii_compatible (const char *canon_charset); 47 48/* Test for a weird encoding, i.e. an encoding which has double-byte 49 characters ending in 0x5C. */ 50extern bool po_is_charset_weird (const char *canon_charset); 51 52/* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure. 53 An encoding has CJK structure if every valid character stream is composed 54 of single bytes in the range 0x{00..7F} and of byte pairs in the range 55 0x{80..FF}{30..FF}. */ 56extern bool po_is_charset_weird_cjk (const char *canon_charset); 57 58/* Returns a character iterator for a given encoding. 59 Given a pointer into a string, it returns the number occupied by the next 60 single character. If the piece of string is not valid or if the *s == '\0', 61 it returns 1. */ 62typedef size_t (*character_iterator_t) (const char *s); 63extern character_iterator_t po_charset_character_iterator (const char *canon_charset); 64 65 66/* The PO file's encoding, as specified in the header entry. */ 67extern DLL_VARIABLE const char *po_lex_charset; 68 69#if HAVE_ICONV 70/* Converter from the PO file's encoding to UTF-8. */ 71extern DLL_VARIABLE iconv_t po_lex_iconv; 72#endif 73/* If no converter is available, some information about the structure of the 74 PO file's encoding. */ 75extern DLL_VARIABLE bool po_lex_weird_cjk; 76 77/* Initialize the PO file's encoding. */ 78extern void po_lex_charset_init (void); 79 80/* Set the PO file's encoding from the header entry. */ 81extern void po_lex_charset_set (const char *header_entry, 82 const char *filename); 83 84/* Finish up with the PO file's encoding. */ 85extern void po_lex_charset_close (void); 86 87 88#ifdef __cplusplus 89} 90#endif 91 92 93#endif /* _PO_CHARSET_H */ 94