1/* Test of decomposition of Unicode characters.
2   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
18
19#include <config.h>
20
21#include "uninorm.h"
22
23#include "macros.h"
24
25int
26main ()
27{
28  ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
29  int tag;
30  int ret;
31
32  /* SPACE */
33  ret = uc_decomposition (0x0020, &tag, decomposed);
34  ASSERT (ret == -1);
35
36  /* LATIN CAPITAL LETTER A WITH DIAERESIS */
37  ret = uc_decomposition (0x00C4, &tag, decomposed);
38  ASSERT (ret == 2);
39  ASSERT (tag == UC_DECOMP_CANONICAL);
40  ASSERT (decomposed[0] == 0x0041);
41  ASSERT (decomposed[1] == 0x0308);
42
43  /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
44  ret = uc_decomposition (0x01DE, &tag, decomposed);
45  ASSERT (ret == 2);
46  ASSERT (tag == UC_DECOMP_CANONICAL);
47  ASSERT (decomposed[0] == 0x00C4);
48  ASSERT (decomposed[1] == 0x0304);
49
50  /* GREEK DIALYTIKA AND PERISPOMENI */
51  ret = uc_decomposition (0x1FC1, &tag, decomposed);
52  ASSERT (ret == 2);
53  ASSERT (tag == UC_DECOMP_CANONICAL);
54  ASSERT (decomposed[0] == 0x00A8);
55  ASSERT (decomposed[1] == 0x0342);
56
57  /* SCRIPT SMALL L */
58  ret = uc_decomposition (0x2113, &tag, decomposed);
59  ASSERT (ret == 1);
60  ASSERT (tag == UC_DECOMP_FONT);
61  ASSERT (decomposed[0] == 0x006C);
62
63  /* NO-BREAK SPACE */
64  ret = uc_decomposition (0x00A0, &tag, decomposed);
65  ASSERT (ret == 1);
66  ASSERT (tag == UC_DECOMP_NOBREAK);
67  ASSERT (decomposed[0] == 0x0020);
68
69  /* ARABIC LETTER VEH INITIAL FORM */
70  ret = uc_decomposition (0xFB6C, &tag, decomposed);
71  ASSERT (ret == 1);
72  ASSERT (tag == UC_DECOMP_INITIAL);
73  ASSERT (decomposed[0] == 0x06A4);
74
75  /* ARABIC LETTER VEH MEDIAL FORM */
76  ret = uc_decomposition (0xFB6D, &tag, decomposed);
77  ASSERT (ret == 1);
78  ASSERT (tag == UC_DECOMP_MEDIAL);
79  ASSERT (decomposed[0] == 0x06A4);
80
81  /* ARABIC LETTER VEH FINAL FORM */
82  ret = uc_decomposition (0xFB6B, &tag, decomposed);
83  ASSERT (ret == 1);
84  ASSERT (tag == UC_DECOMP_FINAL);
85  ASSERT (decomposed[0] == 0x06A4);
86
87  /* ARABIC LETTER VEH ISOLATED FORM */
88  ret = uc_decomposition (0xFB6A, &tag, decomposed);
89  ASSERT (ret == 1);
90  ASSERT (tag == UC_DECOMP_ISOLATED);
91  ASSERT (decomposed[0] == 0x06A4);
92
93  /* CIRCLED NUMBER FIFTEEN */
94  ret = uc_decomposition (0x246E, &tag, decomposed);
95  ASSERT (ret == 2);
96  ASSERT (tag == UC_DECOMP_CIRCLE);
97  ASSERT (decomposed[0] == 0x0031);
98  ASSERT (decomposed[1] == 0x0035);
99
100  /* TRADE MARK SIGN */
101  ret = uc_decomposition (0x2122, &tag, decomposed);
102  ASSERT (ret == 2);
103  ASSERT (tag == UC_DECOMP_SUPER);
104  ASSERT (decomposed[0] == 0x0054);
105  ASSERT (decomposed[1] == 0x004D);
106
107  /* LATIN SUBSCRIPT SMALL LETTER I */
108  ret = uc_decomposition (0x1D62, &tag, decomposed);
109  ASSERT (ret == 1);
110  ASSERT (tag == UC_DECOMP_SUB);
111  ASSERT (decomposed[0] == 0x0069);
112
113  /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
114  ret = uc_decomposition (0xFE35, &tag, decomposed);
115  ASSERT (ret == 1);
116  ASSERT (tag == UC_DECOMP_VERTICAL);
117  ASSERT (decomposed[0] == 0x0028);
118
119  /* FULLWIDTH LATIN CAPITAL LETTER A */
120  ret = uc_decomposition (0xFF21, &tag, decomposed);
121  ASSERT (ret == 1);
122  ASSERT (tag == UC_DECOMP_WIDE);
123  ASSERT (decomposed[0] == 0x0041);
124
125  /* HALFWIDTH IDEOGRAPHIC COMMA */
126  ret = uc_decomposition (0xFF64, &tag, decomposed);
127  ASSERT (ret == 1);
128  ASSERT (tag == UC_DECOMP_NARROW);
129  ASSERT (decomposed[0] == 0x3001);
130
131  /* SMALL IDEOGRAPHIC COMMA */
132  ret = uc_decomposition (0xFE51, &tag, decomposed);
133  ASSERT (ret == 1);
134  ASSERT (tag == UC_DECOMP_SMALL);
135  ASSERT (decomposed[0] == 0x3001);
136
137  /* SQUARE MHZ */
138  ret = uc_decomposition (0x3392, &tag, decomposed);
139  ASSERT (ret == 3);
140  ASSERT (tag == UC_DECOMP_SQUARE);
141  ASSERT (decomposed[0] == 0x004D);
142  ASSERT (decomposed[1] == 0x0048);
143  ASSERT (decomposed[2] == 0x007A);
144
145  /* VULGAR FRACTION THREE EIGHTHS */
146  ret = uc_decomposition (0x215C, &tag, decomposed);
147  ASSERT (ret == 3);
148  ASSERT (tag == UC_DECOMP_FRACTION);
149  ASSERT (decomposed[0] == 0x0033);
150  ASSERT (decomposed[1] == 0x2044);
151  ASSERT (decomposed[2] == 0x0038);
152
153  /* MICRO SIGN */
154  ret = uc_decomposition (0x00B5, &tag, decomposed);
155  ASSERT (ret == 1);
156  ASSERT (tag == UC_DECOMP_COMPAT);
157  ASSERT (decomposed[0] == 0x03BC);
158
159  /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
160  ret = uc_decomposition (0xFDFA, &tag, decomposed);
161  ASSERT (ret == 18);
162  ASSERT (tag == UC_DECOMP_ISOLATED);
163  ASSERT (decomposed[0] == 0x0635);
164  ASSERT (decomposed[1] == 0x0644);
165  ASSERT (decomposed[2] == 0x0649);
166  ASSERT (decomposed[3] == 0x0020);
167  ASSERT (decomposed[4] == 0x0627);
168  ASSERT (decomposed[5] == 0x0644);
169  ASSERT (decomposed[6] == 0x0644);
170  ASSERT (decomposed[7] == 0x0647);
171  ASSERT (decomposed[8] == 0x0020);
172  ASSERT (decomposed[9] == 0x0639);
173  ASSERT (decomposed[10] == 0x0644);
174  ASSERT (decomposed[11] == 0x064A);
175  ASSERT (decomposed[12] == 0x0647);
176  ASSERT (decomposed[13] == 0x0020);
177  ASSERT (decomposed[14] == 0x0648);
178  ASSERT (decomposed[15] == 0x0633);
179  ASSERT (decomposed[16] == 0x0644);
180  ASSERT (decomposed[17] == 0x0645);
181
182  /* HANGUL SYLLABLE GEUL */
183  ret = uc_decomposition (0xAE00, &tag, decomposed);
184  /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>,
185     section "Clarification of Hangul Jamo Handling".  */
186#if 1
187  ASSERT (ret == 2);
188  ASSERT (tag == UC_DECOMP_CANONICAL);
189  ASSERT (decomposed[0] == 0xADF8);
190  ASSERT (decomposed[1] == 0x11AF);
191#else
192  ASSERT (ret == 3);
193  ASSERT (tag == UC_DECOMP_CANONICAL);
194  ASSERT (decomposed[0] == 0x1100);
195  ASSERT (decomposed[1] == 0x1173);
196  ASSERT (decomposed[2] == 0x11AF);
197#endif
198
199  /* HANGUL SYLLABLE GEU */
200  ret = uc_decomposition (0xADF8, &tag, decomposed);
201  ASSERT (ret == 2);
202  ASSERT (tag == UC_DECOMP_CANONICAL);
203  ASSERT (decomposed[0] == 0x1100);
204  ASSERT (decomposed[1] == 0x1173);
205
206  return 0;
207}
208