test-utf8.c revision 226031
1219069Sdim/*
2219069Sdim * Copyright (c) 2004 Kungliga Tekniska H��gskolan
3219069Sdim * (Royal Institute of Technology, Stockholm, Sweden).
4219069Sdim * All rights reserved.
5219069Sdim *
6219069Sdim * Redistribution and use in source and binary forms, with or without
7219069Sdim * modification, are permitted provided that the following conditions
8219069Sdim * are met:
9219069Sdim *
10219069Sdim * 1. Redistributions of source code must retain the above copyright
11219069Sdim *    notice, this list of conditions and the following disclaimer.
12219069Sdim *
13219069Sdim * 2. Redistributions in binary form must reproduce the above copyright
14219069Sdim *    notice, this list of conditions and the following disclaimer in the
15219069Sdim *    documentation and/or other materials provided with the distribution.
16219069Sdim *
17219069Sdim * 3. Neither the name of the Institute nor the names of its contributors
18219069Sdim *    may be used to endorse or promote products derived from this software
19219069Sdim *    without specific prior written permission.
20219069Sdim *
21219069Sdim * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22219069Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23219069Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24219069Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25234353Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26219069Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27234353Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28234353Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29234353Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30234353Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31234353Sdim * SUCH DAMAGE.
32226633Sdim */
33234353Sdim
34234353Sdim#ifdef HAVE_CONFIG_H
35219069Sdim#include <config.h>
36234353Sdim#endif
37234353Sdim#include <stdio.h>
38234353Sdim#include <string.h>
39234353Sdim#include <err.h>
40234353Sdim#include "windlocl.h"
41226633Sdim
42226633Sdimstatic const char *failing_testcases[] = {
43226633Sdim    "\x80",
44234353Sdim    "\xFF",
45234353Sdim    "\xC0",
46226633Sdim    "\xDF",
47234353Sdim    "\xE0",
48226633Sdim    "\xEF",
49234353Sdim    "\xF0",
50234353Sdim    "\xF7",
51234353Sdim    "\xC0\x01",
52234353Sdim    "\xC0\x7F",
53219069Sdim    "\xC0\xFF",
54219069Sdim    "\xC0\x80\x80",
55219069Sdim    "\xE0\x01",
56219069Sdim    "\xE0\x7F",
57219069Sdim    "\xE0\x80",
58219069Sdim    "\xE0\xFF",
59219069Sdim    "\xE0\x80\x20",
60219069Sdim    "\xE0\x80\xFF",
61219069Sdim    "\xE0\x80\x80\x80",
62219069Sdim    "\xF0\x01",
63219069Sdim    "\xF0\x80",
64219069Sdim    "\xF0\x80\x01",
65219069Sdim    "\xF0\x80\x80",
66219069Sdim    "\xF0\x80\x80\x01",
67234353Sdim    "\xF0\x80\x80\xFF",
68234353Sdim    NULL
69234353Sdim};
70234353Sdim
71234353Sdim#define MAX_LENGTH 10
72219069Sdim
73234353Sdimstruct testcase {
74234353Sdim    const char *utf8_str;
75234353Sdim    size_t len;
76234353Sdim    uint32_t u[MAX_LENGTH];
77226633Sdim    int invalid_ucs2;
78226633Sdim};
79234353Sdim
80234353Sdimstatic const struct testcase testcases[] = {
81234353Sdim    {"", 0, {0}},
82226633Sdim    {"\x01", 1, {1}},
83219069Sdim    {"\x7F", 1, {0x7F}},
84219069Sdim    {"\x01\x7F", 2, {0x01, 0x7F}},
85219069Sdim    {"\xC0\x80", 1, {0}},
86234353Sdim    {"\xC0\x81", 1, {1}},
87234353Sdim    {"\xC1\x80", 1, {0x40}},
88234353Sdim    {"\xDF\xBF", 1, {0x7FF}},
89234353Sdim    {"\xE0\x80\x80", 1, {0}},
90234353Sdim    {"\xE0\x80\x81", 1, {1}},
91234353Sdim    {"\xE0\x81\x80", 1, {0x40}},
92234353Sdim    {"\xE1\x80\x80", 1, {0x1000}},
93234353Sdim    {"\xEF\xBF\xBF", 1, {0xFFFF}},
94234353Sdim    {"\xF0\x80\x80\x80", 1, {0}},
95219069Sdim    {"\xF0\x80\x80\x81", 1, {1}},
96219069Sdim    {"\xF0\x80\x81\x80", 1, {0x40}},
97219069Sdim    {"\xF0\x81\x80\x80", 1, {0x1000}},
98219069Sdim    {"\xF1\x80\x80\x80", 1, {0x40000}},
99219069Sdim    {"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1},
100219069Sdim};
101219069Sdim
102219069Sdimint
103219069Sdimmain(void)
104219069Sdim{
105219069Sdim    unsigned failures = 0;
106219069Sdim    unsigned i;
107226633Sdim    const char **s;
108226633Sdim    int ret;
109226633Sdim    size_t len, len2;
110226633Sdim    uint32_t u[MAX_LENGTH];
111234353Sdim    char str[MAX_LENGTH * 4];
112226633Sdim
113226633Sdim    for (s = failing_testcases; *s != NULL; ++s) {
114226633Sdim	len = MAX_LENGTH;
115234353Sdim	ret = wind_utf8ucs4(*s, u, &len);
116234353Sdim	if (ret == 0) {
117219069Sdim	    printf("utf8 decode of \"%s\" should have failed\n", *s);
118234353Sdim	    ++failures;
119234353Sdim	}
120234353Sdim    }
121219069Sdim
122219069Sdim    for (i = 0; i < sizeof(testcases)/sizeof(testcases[0]); ++i) {
123234353Sdim	const struct testcase *t = &testcases[i];
124234353Sdim
125234353Sdim	ret = wind_utf8ucs4_length(t->utf8_str, &len);
126234353Sdim	if (ret) {
127234353Sdim	    printf("utf8ucs4 length of \"%s\" should have succeeded\n",
128234353Sdim		   t->utf8_str);
129234353Sdim	    ++failures;
130234353Sdim	    continue;
131234353Sdim	}
132234353Sdim	if (len != t->len) {
133219069Sdim	    printf("utf8ucs4_length of \"%s\" has wrong length: "
134219069Sdim		   "expected: %u, actual: %u\n",
135234353Sdim		   t->utf8_str, (unsigned int)t->len, (unsigned int)len);
136234353Sdim	    ++failures;
137234353Sdim	    continue;
138234353Sdim	}
139234353Sdim
140234353Sdim	len = MAX_LENGTH;
141234353Sdim	ret = wind_utf8ucs4(t->utf8_str, u, &len);
142234353Sdim	if (ret) {
143234353Sdim	    printf("utf8 decode of \"%s\" should have succeeded\n",
144234353Sdim		   t->utf8_str);
145234353Sdim	    ++failures;
146219069Sdim	    continue;
147219069Sdim	}
148234353Sdim	if (len != t->len) {
149234353Sdim	    printf("utf8 decode of \"%s\" has wrong length: "
150234353Sdim		   "expected: %u, actual: %u\n",
151234353Sdim		   t->utf8_str, (unsigned int)t->len, (unsigned int)len);
152219069Sdim	    ++failures;
153234353Sdim	    continue;
154234353Sdim	}
155234353Sdim	if (memcmp(t->u, u, len * sizeof(uint32_t)) != 0) {
156234353Sdim	    printf("utf8 decode of \"%s\" has wrong data\n",
157234353Sdim		   t->utf8_str);
158234353Sdim	    ++failures;
159234353Sdim	    continue;
160234353Sdim	}
161234353Sdim	if (t->invalid_ucs2 == 0) {
162234353Sdim	    len2 = sizeof(str);
163234353Sdim	    ret = wind_ucs4utf8(u, len, str, &len2);
164234353Sdim	    if (ret) {
165234353Sdim		printf("ucs4 decode of \"%s\" should have succeeded\n",
166234353Sdim		       t->utf8_str);
167234353Sdim		++failures;
168219069Sdim		continue;
169219069Sdim	    }
170234353Sdim	}
171234353Sdim    }
172234353Sdim
173234353Sdim    return failures != 0;
174234353Sdim}
175234353Sdim