1/* Charset handling while reading PO files. 2 Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#ifndef _PO_CHARSET_H 20#define _PO_CHARSET_H 21 22#include <stdbool.h> 23#include <stddef.h> 24 25#if HAVE_ICONV 26#include <iconv.h> 27#endif 28 29 30#ifdef __cplusplus 31extern "C" { 32#endif 33 34 35/* Canonicalize an encoding name. 36 The results of this function are statically allocated and can be 37 compared using ==. */ 38extern const char *po_charset_canonicalize (const char *charset); 39 40/* The canonicalized encoding name for ASCII. */ 41extern DLL_VARIABLE const char *po_charset_ascii; 42 43/* The canonicalized encoding name for UTF-8. */ 44extern DLL_VARIABLE const char *po_charset_utf8; 45 46/* Test for ASCII compatibility. */ 47extern bool po_charset_ascii_compatible (const char *canon_charset); 48 49/* Test for a weird encoding, i.e. an encoding which has double-byte 50 characters ending in 0x5C. */ 51extern bool po_is_charset_weird (const char *canon_charset); 52 53/* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure. 54 An encoding has CJK structure if every valid character stream is composed 55 of single bytes in the range 0x{00..7F} and of byte pairs in the range 56 0x{80..FF}{30..FF}. */ 57extern bool po_is_charset_weird_cjk (const char *canon_charset); 58 59/* Returns a character iterator for a given encoding. 60 Given a pointer into a string, it returns the number occupied by the next 61 single character. If the piece of string is not valid or if the *s == '\0', 62 it returns 1. */ 63typedef size_t (*character_iterator_t) (const char *s); 64extern character_iterator_t po_charset_character_iterator (const char *canon_charset); 65 66 67/* The PO file's encoding, as specified in the header entry. */ 68extern DLL_VARIABLE const char *po_lex_charset; 69 70#if HAVE_ICONV 71/* Converter from the PO file's encoding to UTF-8. */ 72extern DLL_VARIABLE iconv_t po_lex_iconv; 73#endif 74/* If no converter is available, some information about the structure of the 75 PO file's encoding. */ 76extern DLL_VARIABLE bool po_lex_weird_cjk; 77 78/* Initialize the PO file's encoding. */ 79extern void po_lex_charset_init (void); 80 81/* Set the PO file's encoding from the header entry. */ 82extern void po_lex_charset_set (const char *header_entry, 83 const char *filename); 84 85/* Finish up with the PO file's encoding. */ 86extern void po_lex_charset_close (void); 87 88 89#ifdef __cplusplus 90} 91#endif 92 93 94#endif /* _PO_CHARSET_H */ 95