1/* $Id: utf8.h,v 1.1 2003/06/04 00:25:44 marka Exp $ */
2/*
3 * Copyright (c) 2000 Japan Network Information Center.  All rights reserved.
4 *
5 * By using this file, you agree to the terms and conditions set forth bellow.
6 *
7 * 			LICENSE TERMS AND CONDITIONS
8 *
9 * The following License Terms and Conditions apply, unless a different
10 * license is obtained from Japan Network Information Center ("JPNIC"),
11 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
12 * Chiyoda-ku, Tokyo 101-0047, Japan.
13 *
14 * 1. Use, Modification and Redistribution (including distribution of any
15 *    modified or derived work) in source and/or binary forms is permitted
16 *    under this License Terms and Conditions.
17 *
18 * 2. Redistribution of source code must retain the copyright notices as they
19 *    appear in each source code file, this License Terms and Conditions.
20 *
21 * 3. Redistribution in binary form must reproduce the Copyright Notice,
22 *    this License Terms and Conditions, in the documentation and/or other
23 *    materials provided with the distribution.  For the purposes of binary
24 *    distribution the "Copyright Notice" refers to the following language:
25 *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
26 *
27 * 4. The name of JPNIC may not be used to endorse or promote products
28 *    derived from this Software without specific prior written approval of
29 *    JPNIC.
30 *
31 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
32 *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
34 *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
35 *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
39 *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
40 *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
42 */
43
44#ifndef IDN_UTF8_H
45#define IDN_UTF8_H 1
46
47#ifdef __cplusplus
48extern "C" {
49#endif
50
51/*
52 * UTF-8 encoded string facility.
53 */
54
55#include <idn/export.h>
56
57/*
58 * Get the length of a character.
59 *
60 * Get the length (in bytes) of a character whose first byte is pointed
61 * by 's'.  Since this function only looks one first byte to determine the
62 * length, it is possible some of the following bytes are invalid.
63 */
64IDN_EXPORT int
65idn_utf8_mblen(const char *s);
66
67/*
68 * Get one character.
69 *
70 * Get the first character of the string pointed by 's', and copy it
71 * to 'buf', whose length is 'len'.  Returns the number of bytes copied,
72 * or zero if the encoding is invalid or len is too small.
73 *
74 * The area pointed by 'buf' must be large enough to store any UTF-8 encoded
75 * character.
76 *
77 * Note that the copied string is not NUL-terminated.
78 */
79IDN_EXPORT int
80idn_utf8_getmb(const char *s, size_t len, char *buf);
81
82/*
83 * Get one character in UCS-4.
84 *
85 * Similar to 'idn_utf8_getmb', except that the result is not in UTF-8
86 * encoding, but in UCS-4 format (plain 32bit integer value).
87 */
88IDN_EXPORT int
89idn_utf8_getwc(const char *s, size_t len, unsigned long *vp);
90
91/*
92 * Put one character.
93 *
94 * This function is an opposite of 'idn_utf8_getwc'.  It takes a UCS-4
95 * value 'v', convert it to UTF-8 encoded sequence, and store it to 's',
96 * whose length is 'len'.  It returns the number of bytes written, or
97 * zero 'v' is out of range or 'len' is too small.
98 */
99IDN_EXPORT int
100idn_utf8_putwc(char *s, size_t len, unsigned long v);
101
102/*
103 * Check the validity of UTF-8 encoded character.
104 *
105 * Check if the character pointed by 's' is a valid UTF-8 encoded
106 * character.  Return the length of the character (in bytes) if it is valid,
107 * 0 otherwise.
108 */
109IDN_EXPORT int
110idn_utf8_isvalidchar(const char *s);
111
112/*
113 * Check the validity of UTF-8 encoded string.
114 *
115 * Check if the NUL-terminated string 's' is valid as a UTF-8 encoded
116 * string.  Return 1 if it is valid, 0 otherwise.
117 */
118IDN_EXPORT int
119idn_utf8_isvalidstring(const char *s);
120
121/*
122 * Find first byte of a character.
123 *
124 * Find the first byte of a character 's' points to.  's' may point
125 * the 2nd or later byte of a character.  'known_top' is a pointer to
126 * a string which contains 's', and is known to be the first byte of
127 * a character.  If it couldn't find the first byte between 'known_top'
128 * and 's', NULL will be returned.
129 */
130IDN_EXPORT char *
131idn_utf8_findfirstbyte(const char *s, const char *known_top);
132
133#ifdef __cplusplus
134}
135#endif
136
137#endif /* IDN_UTF8_H */
138