test-utf8.c revision 226031
1226031Sstas/* 2226031Sstas * Copyright (c) 2004 Kungliga Tekniska H��gskolan 3226031Sstas * (Royal Institute of Technology, Stockholm, Sweden). 4226031Sstas * All rights reserved. 5226031Sstas * 6226031Sstas * Redistribution and use in source and binary forms, with or without 7226031Sstas * modification, are permitted provided that the following conditions 8226031Sstas * are met: 9226031Sstas * 10226031Sstas * 1. Redistributions of source code must retain the above copyright 11226031Sstas * notice, this list of conditions and the following disclaimer. 12226031Sstas * 13226031Sstas * 2. Redistributions in binary form must reproduce the above copyright 14226031Sstas * notice, this list of conditions and the following disclaimer in the 15226031Sstas * documentation and/or other materials provided with the distribution. 16226031Sstas * 17226031Sstas * 3. Neither the name of the Institute nor the names of its contributors 18226031Sstas * may be used to endorse or promote products derived from this software 19226031Sstas * without specific prior written permission. 20226031Sstas * 21226031Sstas * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 22226031Sstas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23226031Sstas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24226031Sstas * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 25226031Sstas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26226031Sstas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27226031Sstas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28226031Sstas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29226031Sstas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30226031Sstas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31226031Sstas * SUCH DAMAGE. 32226031Sstas */ 33226031Sstas 34226031Sstas#ifdef HAVE_CONFIG_H 35226031Sstas#include <config.h> 36226031Sstas#endif 37226031Sstas#include <stdio.h> 38226031Sstas#include <string.h> 39226031Sstas#include <err.h> 40226031Sstas#include "windlocl.h" 41226031Sstas 42226031Sstasstatic const char *failing_testcases[] = { 43226031Sstas "\x80", 44226031Sstas "\xFF", 45226031Sstas "\xC0", 46226031Sstas "\xDF", 47226031Sstas "\xE0", 48226031Sstas "\xEF", 49226031Sstas "\xF0", 50226031Sstas "\xF7", 51226031Sstas "\xC0\x01", 52226031Sstas "\xC0\x7F", 53226031Sstas "\xC0\xFF", 54226031Sstas "\xC0\x80\x80", 55226031Sstas "\xE0\x01", 56226031Sstas "\xE0\x7F", 57226031Sstas "\xE0\x80", 58226031Sstas "\xE0\xFF", 59226031Sstas "\xE0\x80\x20", 60226031Sstas "\xE0\x80\xFF", 61226031Sstas "\xE0\x80\x80\x80", 62226031Sstas "\xF0\x01", 63226031Sstas "\xF0\x80", 64226031Sstas "\xF0\x80\x01", 65226031Sstas "\xF0\x80\x80", 66226031Sstas "\xF0\x80\x80\x01", 67226031Sstas "\xF0\x80\x80\xFF", 68226031Sstas NULL 69226031Sstas}; 70226031Sstas 71226031Sstas#define MAX_LENGTH 10 72226031Sstas 73226031Sstasstruct testcase { 74226031Sstas const char *utf8_str; 75226031Sstas size_t len; 76226031Sstas uint32_t u[MAX_LENGTH]; 77226031Sstas int invalid_ucs2; 78226031Sstas}; 79226031Sstas 80226031Sstasstatic const struct testcase testcases[] = { 81226031Sstas {"", 0, {0}}, 82226031Sstas {"\x01", 1, {1}}, 83226031Sstas {"\x7F", 1, {0x7F}}, 84226031Sstas {"\x01\x7F", 2, {0x01, 0x7F}}, 85226031Sstas {"\xC0\x80", 1, {0}}, 86226031Sstas {"\xC0\x81", 1, {1}}, 87226031Sstas {"\xC1\x80", 1, {0x40}}, 88226031Sstas {"\xDF\xBF", 1, {0x7FF}}, 89226031Sstas {"\xE0\x80\x80", 1, {0}}, 90226031Sstas {"\xE0\x80\x81", 1, {1}}, 91226031Sstas {"\xE0\x81\x80", 1, {0x40}}, 92226031Sstas {"\xE1\x80\x80", 1, {0x1000}}, 93226031Sstas {"\xEF\xBF\xBF", 1, {0xFFFF}}, 94226031Sstas {"\xF0\x80\x80\x80", 1, {0}}, 95226031Sstas {"\xF0\x80\x80\x81", 1, {1}}, 96226031Sstas {"\xF0\x80\x81\x80", 1, {0x40}}, 97226031Sstas {"\xF0\x81\x80\x80", 1, {0x1000}}, 98226031Sstas {"\xF1\x80\x80\x80", 1, {0x40000}}, 99226031Sstas {"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1}, 100226031Sstas}; 101226031Sstas 102226031Sstasint 103226031Sstasmain(void) 104226031Sstas{ 105226031Sstas unsigned failures = 0; 106226031Sstas unsigned i; 107226031Sstas const char **s; 108226031Sstas int ret; 109226031Sstas size_t len, len2; 110226031Sstas uint32_t u[MAX_LENGTH]; 111226031Sstas char str[MAX_LENGTH * 4]; 112226031Sstas 113226031Sstas for (s = failing_testcases; *s != NULL; ++s) { 114226031Sstas len = MAX_LENGTH; 115226031Sstas ret = wind_utf8ucs4(*s, u, &len); 116226031Sstas if (ret == 0) { 117226031Sstas printf("utf8 decode of \"%s\" should have failed\n", *s); 118226031Sstas ++failures; 119226031Sstas } 120226031Sstas } 121226031Sstas 122226031Sstas for (i = 0; i < sizeof(testcases)/sizeof(testcases[0]); ++i) { 123226031Sstas const struct testcase *t = &testcases[i]; 124226031Sstas 125226031Sstas ret = wind_utf8ucs4_length(t->utf8_str, &len); 126226031Sstas if (ret) { 127226031Sstas printf("utf8ucs4 length of \"%s\" should have succeeded\n", 128226031Sstas t->utf8_str); 129226031Sstas ++failures; 130226031Sstas continue; 131226031Sstas } 132226031Sstas if (len != t->len) { 133226031Sstas printf("utf8ucs4_length of \"%s\" has wrong length: " 134226031Sstas "expected: %u, actual: %u\n", 135226031Sstas t->utf8_str, (unsigned int)t->len, (unsigned int)len); 136226031Sstas ++failures; 137226031Sstas continue; 138226031Sstas } 139226031Sstas 140226031Sstas len = MAX_LENGTH; 141226031Sstas ret = wind_utf8ucs4(t->utf8_str, u, &len); 142226031Sstas if (ret) { 143226031Sstas printf("utf8 decode of \"%s\" should have succeeded\n", 144226031Sstas t->utf8_str); 145226031Sstas ++failures; 146226031Sstas continue; 147226031Sstas } 148226031Sstas if (len != t->len) { 149226031Sstas printf("utf8 decode of \"%s\" has wrong length: " 150226031Sstas "expected: %u, actual: %u\n", 151226031Sstas t->utf8_str, (unsigned int)t->len, (unsigned int)len); 152226031Sstas ++failures; 153226031Sstas continue; 154226031Sstas } 155226031Sstas if (memcmp(t->u, u, len * sizeof(uint32_t)) != 0) { 156226031Sstas printf("utf8 decode of \"%s\" has wrong data\n", 157226031Sstas t->utf8_str); 158226031Sstas ++failures; 159226031Sstas continue; 160226031Sstas } 161226031Sstas if (t->invalid_ucs2 == 0) { 162226031Sstas len2 = sizeof(str); 163226031Sstas ret = wind_ucs4utf8(u, len, str, &len2); 164226031Sstas if (ret) { 165226031Sstas printf("ucs4 decode of \"%s\" should have succeeded\n", 166226031Sstas t->utf8_str); 167226031Sstas ++failures; 168226031Sstas continue; 169226031Sstas } 170226031Sstas } 171226031Sstas } 172226031Sstas 173226031Sstas return failures != 0; 174226031Sstas} 175