1/* Test whether a Unicode character is case-ignorable. 2 Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2009. 4 5 This program is free software: you can redistribute it and/or modify it 6 under the terms of the GNU Lesser General Public License as published 7 by the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#include <config.h> 19 20/* Specification. */ 21#include "caseprop.h" 22 23/* Quoting the Unicode standard: 24 Definition: A character is defined to be "case-ignorable" if it has the 25 value MidLetter {or the value MidNumLet} for the Word_Break property or 26 its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), 27 Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). 28 The text marked in braces was added in Unicode 5.1.0, see 29 <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of 30 Definition of case-ignorable". */ 31/* Since this predicate is only used for the "Before C" and "After C" 32 conditions of FINAL_SIGMA, we exclude the "cased" characters here. 33 This simplifies the evaluation of the regular expressions 34 \p{cased} (\p{case-ignorable})* C 35 and 36 C (\p{case-ignorable})* \p{cased} 37 */ 38 39#if 0 40 41#include "unictype.h" 42#include "uniwbrk.h" 43 44bool 45uc_is_case_ignorable (ucs4_t uc) 46{ 47 int wbp = uc_wordbreak_property (uc); 48 49 return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET 50 || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn 51 | UC_CATEGORY_MASK_Me 52 | UC_CATEGORY_MASK_Cf 53 | UC_CATEGORY_MASK_Lm 54 | UC_CATEGORY_MASK_Sk)) 55 && !uc_is_cased (uc); 56} 57 58#else 59 60#include "unictype/bitmap.h" 61 62/* Define u_casing_property_case_ignorable table. */ 63#include "ignorable.h" 64 65bool 66uc_is_case_ignorable (ucs4_t uc) 67{ 68 return bitmap_lookup (&u_casing_property_case_ignorable, uc); 69} 70 71#endif 72