1/* Test of decomposition of Unicode characters. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#include "uninorm.h" 22 23#include "macros.h" 24 25int 26main () 27{ 28 ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH]; 29 int tag; 30 int ret; 31 32 /* SPACE */ 33 ret = uc_decomposition (0x0020, &tag, decomposed); 34 ASSERT (ret == -1); 35 36 /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 37 ret = uc_decomposition (0x00C4, &tag, decomposed); 38 ASSERT (ret == 2); 39 ASSERT (tag == UC_DECOMP_CANONICAL); 40 ASSERT (decomposed[0] == 0x0041); 41 ASSERT (decomposed[1] == 0x0308); 42 43 /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 44 ret = uc_decomposition (0x01DE, &tag, decomposed); 45 ASSERT (ret == 2); 46 ASSERT (tag == UC_DECOMP_CANONICAL); 47 ASSERT (decomposed[0] == 0x00C4); 48 ASSERT (decomposed[1] == 0x0304); 49 50 /* GREEK DIALYTIKA AND PERISPOMENI */ 51 ret = uc_decomposition (0x1FC1, &tag, decomposed); 52 ASSERT (ret == 2); 53 ASSERT (tag == UC_DECOMP_CANONICAL); 54 ASSERT (decomposed[0] == 0x00A8); 55 ASSERT (decomposed[1] == 0x0342); 56 57 /* SCRIPT SMALL L */ 58 ret = uc_decomposition (0x2113, &tag, decomposed); 59 ASSERT (ret == 1); 60 ASSERT (tag == UC_DECOMP_FONT); 61 ASSERT (decomposed[0] == 0x006C); 62 63 /* NO-BREAK SPACE */ 64 ret = uc_decomposition (0x00A0, &tag, decomposed); 65 ASSERT (ret == 1); 66 ASSERT (tag == UC_DECOMP_NOBREAK); 67 ASSERT (decomposed[0] == 0x0020); 68 69 /* ARABIC LETTER VEH INITIAL FORM */ 70 ret = uc_decomposition (0xFB6C, &tag, decomposed); 71 ASSERT (ret == 1); 72 ASSERT (tag == UC_DECOMP_INITIAL); 73 ASSERT (decomposed[0] == 0x06A4); 74 75 /* ARABIC LETTER VEH MEDIAL FORM */ 76 ret = uc_decomposition (0xFB6D, &tag, decomposed); 77 ASSERT (ret == 1); 78 ASSERT (tag == UC_DECOMP_MEDIAL); 79 ASSERT (decomposed[0] == 0x06A4); 80 81 /* ARABIC LETTER VEH FINAL FORM */ 82 ret = uc_decomposition (0xFB6B, &tag, decomposed); 83 ASSERT (ret == 1); 84 ASSERT (tag == UC_DECOMP_FINAL); 85 ASSERT (decomposed[0] == 0x06A4); 86 87 /* ARABIC LETTER VEH ISOLATED FORM */ 88 ret = uc_decomposition (0xFB6A, &tag, decomposed); 89 ASSERT (ret == 1); 90 ASSERT (tag == UC_DECOMP_ISOLATED); 91 ASSERT (decomposed[0] == 0x06A4); 92 93 /* CIRCLED NUMBER FIFTEEN */ 94 ret = uc_decomposition (0x246E, &tag, decomposed); 95 ASSERT (ret == 2); 96 ASSERT (tag == UC_DECOMP_CIRCLE); 97 ASSERT (decomposed[0] == 0x0031); 98 ASSERT (decomposed[1] == 0x0035); 99 100 /* TRADE MARK SIGN */ 101 ret = uc_decomposition (0x2122, &tag, decomposed); 102 ASSERT (ret == 2); 103 ASSERT (tag == UC_DECOMP_SUPER); 104 ASSERT (decomposed[0] == 0x0054); 105 ASSERT (decomposed[1] == 0x004D); 106 107 /* LATIN SUBSCRIPT SMALL LETTER I */ 108 ret = uc_decomposition (0x1D62, &tag, decomposed); 109 ASSERT (ret == 1); 110 ASSERT (tag == UC_DECOMP_SUB); 111 ASSERT (decomposed[0] == 0x0069); 112 113 /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 114 ret = uc_decomposition (0xFE35, &tag, decomposed); 115 ASSERT (ret == 1); 116 ASSERT (tag == UC_DECOMP_VERTICAL); 117 ASSERT (decomposed[0] == 0x0028); 118 119 /* FULLWIDTH LATIN CAPITAL LETTER A */ 120 ret = uc_decomposition (0xFF21, &tag, decomposed); 121 ASSERT (ret == 1); 122 ASSERT (tag == UC_DECOMP_WIDE); 123 ASSERT (decomposed[0] == 0x0041); 124 125 /* HALFWIDTH IDEOGRAPHIC COMMA */ 126 ret = uc_decomposition (0xFF64, &tag, decomposed); 127 ASSERT (ret == 1); 128 ASSERT (tag == UC_DECOMP_NARROW); 129 ASSERT (decomposed[0] == 0x3001); 130 131 /* SMALL IDEOGRAPHIC COMMA */ 132 ret = uc_decomposition (0xFE51, &tag, decomposed); 133 ASSERT (ret == 1); 134 ASSERT (tag == UC_DECOMP_SMALL); 135 ASSERT (decomposed[0] == 0x3001); 136 137 /* SQUARE MHZ */ 138 ret = uc_decomposition (0x3392, &tag, decomposed); 139 ASSERT (ret == 3); 140 ASSERT (tag == UC_DECOMP_SQUARE); 141 ASSERT (decomposed[0] == 0x004D); 142 ASSERT (decomposed[1] == 0x0048); 143 ASSERT (decomposed[2] == 0x007A); 144 145 /* VULGAR FRACTION THREE EIGHTHS */ 146 ret = uc_decomposition (0x215C, &tag, decomposed); 147 ASSERT (ret == 3); 148 ASSERT (tag == UC_DECOMP_FRACTION); 149 ASSERT (decomposed[0] == 0x0033); 150 ASSERT (decomposed[1] == 0x2044); 151 ASSERT (decomposed[2] == 0x0038); 152 153 /* MICRO SIGN */ 154 ret = uc_decomposition (0x00B5, &tag, decomposed); 155 ASSERT (ret == 1); 156 ASSERT (tag == UC_DECOMP_COMPAT); 157 ASSERT (decomposed[0] == 0x03BC); 158 159 /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 160 ret = uc_decomposition (0xFDFA, &tag, decomposed); 161 ASSERT (ret == 18); 162 ASSERT (tag == UC_DECOMP_ISOLATED); 163 ASSERT (decomposed[0] == 0x0635); 164 ASSERT (decomposed[1] == 0x0644); 165 ASSERT (decomposed[2] == 0x0649); 166 ASSERT (decomposed[3] == 0x0020); 167 ASSERT (decomposed[4] == 0x0627); 168 ASSERT (decomposed[5] == 0x0644); 169 ASSERT (decomposed[6] == 0x0644); 170 ASSERT (decomposed[7] == 0x0647); 171 ASSERT (decomposed[8] == 0x0020); 172 ASSERT (decomposed[9] == 0x0639); 173 ASSERT (decomposed[10] == 0x0644); 174 ASSERT (decomposed[11] == 0x064A); 175 ASSERT (decomposed[12] == 0x0647); 176 ASSERT (decomposed[13] == 0x0020); 177 ASSERT (decomposed[14] == 0x0648); 178 ASSERT (decomposed[15] == 0x0633); 179 ASSERT (decomposed[16] == 0x0644); 180 ASSERT (decomposed[17] == 0x0645); 181 182 /* HANGUL SYLLABLE GEUL */ 183 ret = uc_decomposition (0xAE00, &tag, decomposed); 184 /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>, 185 section "Clarification of Hangul Jamo Handling". */ 186#if 1 187 ASSERT (ret == 2); 188 ASSERT (tag == UC_DECOMP_CANONICAL); 189 ASSERT (decomposed[0] == 0xADF8); 190 ASSERT (decomposed[1] == 0x11AF); 191#else 192 ASSERT (ret == 3); 193 ASSERT (tag == UC_DECOMP_CANONICAL); 194 ASSERT (decomposed[0] == 0x1100); 195 ASSERT (decomposed[1] == 0x1173); 196 ASSERT (decomposed[2] == 0x11AF); 197#endif 198 199 /* HANGUL SYLLABLE GEU */ 200 ret = uc_decomposition (0xADF8, &tag, decomposed); 201 ASSERT (ret == 2); 202 ASSERT (tag == UC_DECOMP_CANONICAL); 203 ASSERT (decomposed[0] == 0x1100); 204 ASSERT (decomposed[1] == 0x1173); 205 206 return 0; 207} 208