1/* Unicode character case mappings.
2   Copyright (C) 2002, 2009, 2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify it
5   under the terms of the GNU Lesser General Public License as published
6   by the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13
14   You should have received a copy of the GNU Lesser General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17#ifndef _UNICASE_H
18#define _UNICASE_H
19
20#include "unitypes.h"
21
22/* Get bool.  */
23#include <unistring/stdbool.h>
24
25/* Get size_t.  */
26#include <stddef.h>
27
28/* Get uninorm_t.  */
29#include "uninorm.h"
30
31#ifdef __cplusplus
32extern "C" {
33#endif
34
35/* ========================================================================= */
36
37/* Character case mappings.
38   These mappings are locale and context independent.
39   WARNING! These functions are not sufficient for languages such as German.
40   Better use the functions below that treat an entire string at once and are
41   language aware.  */
42
43/* Return the uppercase mapping of a Unicode character.  */
44extern ucs4_t
45       uc_toupper (ucs4_t uc);
46
47/* Return the lowercase mapping of a Unicode character.  */
48extern ucs4_t
49       uc_tolower (ucs4_t uc);
50
51/* Return the titlecase mapping of a Unicode character.  */
52extern ucs4_t
53       uc_totitle (ucs4_t uc);
54
55/* ========================================================================= */
56
57/* String case mappings.  */
58
59/* These functions are locale dependent.  The iso639_language argument
60   identifies the language (e.g. "tr" for Turkish).  NULL means to use
61   locale independent case mappings.  */
62
63/* Return the ISO 639 language code of the current locale.
64   Return "" if it is unknown, or in the "C" locale.  */
65extern const char *
66       uc_locale_language (void);
67
68/* Conventions:
69
70   All functions prefixed with u8_ operate on UTF-8 encoded strings.
71   Their unit is an uint8_t (1 byte).
72
73   All functions prefixed with u16_ operate on UTF-16 encoded strings.
74   Their unit is an uint16_t (a 2-byte word).
75
76   All functions prefixed with u32_ operate on UCS-4 encoded strings.
77   Their unit is an uint32_t (a 4-byte word).
78
79   All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
80   n units.
81
82   Functions returning a string result take a (resultbuf, lengthp) argument
83   pair.  If resultbuf is not NULL and the result fits into *lengthp units,
84   it is put in resultbuf, and resultbuf is returned.  Otherwise, a freshly
85   allocated string is returned.  In both cases, *lengthp is set to the
86   length (number of units) of the returned string.  In case of error,
87   NULL is returned and errno is set.  */
88
89/* Return the uppercase mapping of a string.
90   The nf argument identifies the normalization form to apply after the
91   case-mapping.  It can also be NULL, for no normalization.  */
92extern uint8_t *
93       u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
94                   uninorm_t nf,
95                   uint8_t *resultbuf, size_t *lengthp);
96extern uint16_t *
97       u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
98                    uninorm_t nf,
99                    uint16_t *resultbuf, size_t *lengthp);
100extern uint32_t *
101       u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
102                    uninorm_t nf,
103                    uint32_t *resultbuf, size_t *lengthp);
104
105/* Return the lowercase mapping of a string.
106   The nf argument identifies the normalization form to apply after the
107   case-mapping.  It can also be NULL, for no normalization.  */
108extern uint8_t *
109       u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
110                   uninorm_t nf,
111                   uint8_t *resultbuf, size_t *lengthp);
112extern uint16_t *
113       u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
114                    uninorm_t nf,
115                    uint16_t *resultbuf, size_t *lengthp);
116extern uint32_t *
117       u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
118                    uninorm_t nf,
119                    uint32_t *resultbuf, size_t *lengthp);
120
121/* Return the titlecase mapping of a string.
122   The nf argument identifies the normalization form to apply after the
123   case-mapping.  It can also be NULL, for no normalization.  */
124extern uint8_t *
125       u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
126                   uninorm_t nf,
127                   uint8_t *resultbuf, size_t *lengthp);
128extern uint16_t *
129       u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
130                    uninorm_t nf,
131                    uint16_t *resultbuf, size_t *lengthp);
132extern uint32_t *
133       u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
134                    uninorm_t nf,
135                    uint32_t *resultbuf, size_t *lengthp);
136
137/* The case-mapping context given by a prefix string.  */
138typedef struct casing_prefix_context
139        {
140          /* These fields are private, undocumented.  */
141          uint32_t last_char_except_ignorable;
142          uint32_t last_char_normal_or_above;
143        }
144        casing_prefix_context_t;
145/* The case-mapping context of the empty prefix string.  */
146extern LIBUNISTRING_DLL_VARIABLE const casing_prefix_context_t unicase_empty_prefix_context;
147/* Return the case-mapping context of a given prefix string.  */
148extern casing_prefix_context_t
149       u8_casing_prefix_context (const uint8_t *s, size_t n);
150extern casing_prefix_context_t
151       u16_casing_prefix_context (const uint16_t *s, size_t n);
152extern casing_prefix_context_t
153       u32_casing_prefix_context (const uint32_t *s, size_t n);
154/* Return the case-mapping context of the prefix concat(A, S), given the
155   case-mapping context of the prefix A.  */
156extern casing_prefix_context_t
157       u8_casing_prefixes_context (const uint8_t *s, size_t n,
158                                   casing_prefix_context_t a_context);
159extern casing_prefix_context_t
160       u16_casing_prefixes_context (const uint16_t *s, size_t n,
161                                    casing_prefix_context_t a_context);
162extern casing_prefix_context_t
163       u32_casing_prefixes_context (const uint32_t *s, size_t n,
164                                    casing_prefix_context_t a_context);
165
166/* The case-mapping context given by a suffix string.  */
167typedef struct casing_suffix_context
168        {
169          /* These fields are private, undocumented.  */
170          uint32_t first_char_except_ignorable;
171          uint32_t bits;
172        }
173        casing_suffix_context_t;
174/* The case-mapping context of the empty suffix string.  */
175extern LIBUNISTRING_DLL_VARIABLE const casing_suffix_context_t unicase_empty_suffix_context;
176/* Return the case-mapping context of a given suffix string.  */
177extern casing_suffix_context_t
178       u8_casing_suffix_context (const uint8_t *s, size_t n);
179extern casing_suffix_context_t
180       u16_casing_suffix_context (const uint16_t *s, size_t n);
181extern casing_suffix_context_t
182       u32_casing_suffix_context (const uint32_t *s, size_t n);
183/* Return the case-mapping context of the suffix concat(S, A), given the
184   case-mapping context of the suffix A.  */
185extern casing_suffix_context_t
186       u8_casing_suffixes_context (const uint8_t *s, size_t n,
187                                   casing_suffix_context_t a_context);
188extern casing_suffix_context_t
189       u16_casing_suffixes_context (const uint16_t *s, size_t n,
190                                    casing_suffix_context_t a_context);
191extern casing_suffix_context_t
192       u32_casing_suffixes_context (const uint32_t *s, size_t n,
193                                    casing_suffix_context_t a_context);
194
195/* Return the uppercase mapping of a string that is surrounded by a prefix
196   and a suffix.  */
197extern uint8_t *
198       u8_ct_toupper (const uint8_t *s, size_t n,
199                      casing_prefix_context_t prefix_context,
200                      casing_suffix_context_t suffix_context,
201                      const char *iso639_language,
202                      uninorm_t nf,
203                      uint8_t *resultbuf, size_t *lengthp);
204extern uint16_t *
205       u16_ct_toupper (const uint16_t *s, size_t n,
206                      casing_prefix_context_t prefix_context,
207                      casing_suffix_context_t suffix_context,
208                      const char *iso639_language,
209                      uninorm_t nf,
210                      uint16_t *resultbuf, size_t *lengthp);
211extern uint32_t *
212       u32_ct_toupper (const uint32_t *s, size_t n,
213                      casing_prefix_context_t prefix_context,
214                      casing_suffix_context_t suffix_context,
215                      const char *iso639_language,
216                      uninorm_t nf,
217                      uint32_t *resultbuf, size_t *lengthp);
218
219/* Return the lowercase mapping of a string that is surrounded by a prefix
220   and a suffix.  */
221extern uint8_t *
222       u8_ct_tolower (const uint8_t *s, size_t n,
223                      casing_prefix_context_t prefix_context,
224                      casing_suffix_context_t suffix_context,
225                      const char *iso639_language,
226                      uninorm_t nf,
227                      uint8_t *resultbuf, size_t *lengthp);
228extern uint16_t *
229       u16_ct_tolower (const uint16_t *s, size_t n,
230                      casing_prefix_context_t prefix_context,
231                      casing_suffix_context_t suffix_context,
232                      const char *iso639_language,
233                      uninorm_t nf,
234                      uint16_t *resultbuf, size_t *lengthp);
235extern uint32_t *
236       u32_ct_tolower (const uint32_t *s, size_t n,
237                      casing_prefix_context_t prefix_context,
238                      casing_suffix_context_t suffix_context,
239                      const char *iso639_language,
240                      uninorm_t nf,
241                      uint32_t *resultbuf, size_t *lengthp);
242
243/* Return the titlecase mapping of a string that is surrounded by a prefix
244   and a suffix.  */
245extern uint8_t *
246       u8_ct_totitle (const uint8_t *s, size_t n,
247                      casing_prefix_context_t prefix_context,
248                      casing_suffix_context_t suffix_context,
249                      const char *iso639_language,
250                      uninorm_t nf,
251                      uint8_t *resultbuf, size_t *lengthp);
252extern uint16_t *
253       u16_ct_totitle (const uint16_t *s, size_t n,
254                      casing_prefix_context_t prefix_context,
255                      casing_suffix_context_t suffix_context,
256                      const char *iso639_language,
257                      uninorm_t nf,
258                      uint16_t *resultbuf, size_t *lengthp);
259extern uint32_t *
260       u32_ct_totitle (const uint32_t *s, size_t n,
261                      casing_prefix_context_t prefix_context,
262                      casing_suffix_context_t suffix_context,
263                      const char *iso639_language,
264                      uninorm_t nf,
265                      uint32_t *resultbuf, size_t *lengthp);
266
267/* Return the case folded string.
268   Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
269   to comparing S1 and S2 with uN_casecmp().
270   The nf argument identifies the normalization form to apply after the
271   case-mapping.  It can also be NULL, for no normalization.  */
272extern uint8_t *
273       u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
274                    uninorm_t nf,
275                    uint8_t *resultbuf, size_t *lengthp);
276extern uint16_t *
277       u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
278                     uninorm_t nf,
279                     uint16_t *resultbuf, size_t *lengthp);
280extern uint32_t *
281       u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
282                     uninorm_t nf,
283                     uint32_t *resultbuf, size_t *lengthp);
284/* Likewise, for a string that is surrounded by a prefix and a suffix.  */
285extern uint8_t *
286       u8_ct_casefold (const uint8_t *s, size_t n,
287                       casing_prefix_context_t prefix_context,
288                       casing_suffix_context_t suffix_context,
289                       const char *iso639_language,
290                       uninorm_t nf,
291                       uint8_t *resultbuf, size_t *lengthp);
292extern uint16_t *
293       u16_ct_casefold (const uint16_t *s, size_t n,
294                        casing_prefix_context_t prefix_context,
295                        casing_suffix_context_t suffix_context,
296                        const char *iso639_language,
297                        uninorm_t nf,
298                        uint16_t *resultbuf, size_t *lengthp);
299extern uint32_t *
300       u32_ct_casefold (const uint32_t *s, size_t n,
301                        casing_prefix_context_t prefix_context,
302                        casing_suffix_context_t suffix_context,
303                        const char *iso639_language,
304                        uninorm_t nf,
305                        uint32_t *resultbuf, size_t *lengthp);
306
307/* Compare S1 and S2, ignoring differences in case and normalization.
308   The nf argument identifies the normalization form to apply after the
309   case-mapping.  It can also be NULL, for no normalization.
310   If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
311   return 0.  Upon failure, return -1 with errno set.  */
312extern int
313       u8_casecmp (const uint8_t *s1, size_t n1,
314                   const uint8_t *s2, size_t n2,
315                   const char *iso639_language, uninorm_t nf, int *resultp);
316extern int
317       u16_casecmp (const uint16_t *s1, size_t n1,
318                    const uint16_t *s2, size_t n2,
319                    const char *iso639_language, uninorm_t nf, int *resultp);
320extern int
321       u32_casecmp (const uint32_t *s1, size_t n1,
322                    const uint32_t *s2, size_t n2,
323                    const char *iso639_language, uninorm_t nf, int *resultp);
324extern int
325       ulc_casecmp (const char *s1, size_t n1,
326                    const char *s2, size_t n2,
327                    const char *iso639_language, uninorm_t nf, int *resultp);
328
329/* Convert the string S of length N to a NUL-terminated byte sequence, in such
330   a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
331   function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
332   NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization.  */
333extern char *
334       u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
335                    uninorm_t nf, char *resultbuf, size_t *lengthp);
336extern char *
337       u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
338                     uninorm_t nf, char *resultbuf, size_t *lengthp);
339extern char *
340       u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
341                     uninorm_t nf, char *resultbuf, size_t *lengthp);
342extern char *
343       ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
344                     uninorm_t nf, char *resultbuf, size_t *lengthp);
345
346/* Compare S1 and S2, ignoring differences in case and normalization, using the
347   collation rules of the current locale.
348   The nf argument identifies the normalization form to apply after the
349   case-mapping.  It must be either UNINORM_NFC or UNINORM_NFKC.  It can also
350   be NULL, for no normalization.
351   If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
352   return 0.  Upon failure, return -1 with errno set.  */
353extern int
354       u8_casecoll (const uint8_t *s1, size_t n1,
355                    const uint8_t *s2, size_t n2,
356                    const char *iso639_language, uninorm_t nf, int *resultp);
357extern int
358       u16_casecoll (const uint16_t *s1, size_t n1,
359                     const uint16_t *s2, size_t n2,
360                     const char *iso639_language, uninorm_t nf, int *resultp);
361extern int
362       u32_casecoll (const uint32_t *s1, size_t n1,
363                     const uint32_t *s2, size_t n2,
364                     const char *iso639_language, uninorm_t nf, int *resultp);
365extern int
366       ulc_casecoll (const char *s1, size_t n1,
367                     const char *s2, size_t n2,
368                     const char *iso639_language, uninorm_t nf, int *resultp);
369
370
371/* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
372   otherwise, and return 0.  Upon failure, return -1 with errno set.  */
373extern int
374       u8_is_uppercase (const uint8_t *s, size_t n,
375                        const char *iso639_language,
376                        bool *resultp);
377extern int
378       u16_is_uppercase (const uint16_t *s, size_t n,
379                         const char *iso639_language,
380                         bool *resultp);
381extern int
382       u32_is_uppercase (const uint32_t *s, size_t n,
383                         const char *iso639_language,
384                         bool *resultp);
385
386/* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
387   otherwise, and return 0.  Upon failure, return -1 with errno set.  */
388extern int
389       u8_is_lowercase (const uint8_t *s, size_t n,
390                        const char *iso639_language,
391                        bool *resultp);
392extern int
393       u16_is_lowercase (const uint16_t *s, size_t n,
394                         const char *iso639_language,
395                         bool *resultp);
396extern int
397       u32_is_lowercase (const uint32_t *s, size_t n,
398                         const char *iso639_language,
399                         bool *resultp);
400
401/* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
402   otherwise, and return 0.  Upon failure, return -1 with errno set.  */
403extern int
404       u8_is_titlecase (const uint8_t *s, size_t n,
405                        const char *iso639_language,
406                        bool *resultp);
407extern int
408       u16_is_titlecase (const uint16_t *s, size_t n,
409                         const char *iso639_language,
410                         bool *resultp);
411extern int
412       u32_is_titlecase (const uint32_t *s, size_t n,
413                         const char *iso639_language,
414                         bool *resultp);
415
416/* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
417   false otherwise, and return 0.  Upon failure, return -1 with errno set.  */
418extern int
419       u8_is_casefolded (const uint8_t *s, size_t n,
420                         const char *iso639_language,
421                         bool *resultp);
422extern int
423       u16_is_casefolded (const uint16_t *s, size_t n,
424                          const char *iso639_language,
425                          bool *resultp);
426extern int
427       u32_is_casefolded (const uint32_t *s, size_t n,
428                          const char *iso639_language,
429                          bool *resultp);
430
431/* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
432   either upper case or lower case or title case is not a no-op.
433   Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
434   under the lower case mapping, and under the title case mapping; in other
435   words, when NFD(S) consists entirely of caseless characters.
436   Upon failure, return -1 with errno set.  */
437extern int
438       u8_is_cased (const uint8_t *s, size_t n,
439                    const char *iso639_language,
440                    bool *resultp);
441extern int
442       u16_is_cased (const uint16_t *s, size_t n,
443                     const char *iso639_language,
444                     bool *resultp);
445extern int
446       u32_is_cased (const uint32_t *s, size_t n,
447                     const char *iso639_language,
448                     bool *resultp);
449
450
451/* ========================================================================= */
452
453#ifdef __cplusplus
454}
455#endif
456
457#endif /* _UNICASE_H */
458