1/* Word breaks in Unicode strings. 2 Copyright (C) 2001-2003, 2005-2010 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2009. 4 5 This program is free software: you can redistribute it and/or modify it 6 under the terms of the GNU Lesser General Public License as published 7 by the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifndef _UNIWBRK_H 19#define _UNIWBRK_H 20 21/* Get size_t. */ 22#include <stddef.h> 23 24#include "unitypes.h" 25 26 27#ifdef __cplusplus 28extern "C" { 29#endif 30 31/* ========================================================================= */ 32 33/* Property defined in Unicode Standard Annex #29, section "Word Boundaries" 34 <http://www.unicode.org/reports/tr29/#Word_Boundaries> */ 35 36/* Possible values of the Word_Break property. 37 This enumeration may be extended in the future. */ 38enum 39{ 40 WBP_OTHER = 0, 41 WBP_CR = 11, 42 WBP_LF = 12, 43 WBP_NEWLINE = 10, 44 WBP_EXTEND = 8, 45 WBP_FORMAT = 9, 46 WBP_KATAKANA = 1, 47 WBP_ALETTER = 2, 48 WBP_MIDNUMLET = 3, 49 WBP_MIDLETTER = 4, 50 WBP_MIDNUM = 5, 51 WBP_NUMERIC = 6, 52 WBP_EXTENDNUMLET = 7 53}; 54 55/* Return the Word_Break property of a Unicode character. */ 56extern int 57 uc_wordbreak_property (ucs4_t uc); 58 59/* ========================================================================= */ 60 61/* Word breaks. */ 62 63/* Determine the word break points in S, and store the result at p[0..n-1]. 64 p[i] = 1 means that there is a word boundary between s[i-1] and s[i]. 65 p[i] = 0 means that s[i-1] and s[i] must not be separated. 66 */ 67extern void 68 u8_wordbreaks (const uint8_t *s, size_t n, char *p); 69extern void 70 u16_wordbreaks (const uint16_t *s, size_t n, char *p); 71extern void 72 u32_wordbreaks (const uint32_t *s, size_t n, char *p); 73extern void 74 ulc_wordbreaks (const char *s, size_t n, char *p); 75 76/* ========================================================================= */ 77 78#ifdef __cplusplus 79} 80#endif 81 82 83#endif /* _UNIWBRK_H */ 84