1/* Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, write to the Free 17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307 USA. */ 19 20/* We always want assert to be fully defined. */ 21#undef NDEBUG 22#include <assert.h> 23#include <locale.h> 24#include <stdio.h> 25#include <stdlib.h> 26#include <string.h> 27#include <wchar.h> 28 29static int check_ascii(const char *locname); 30 31 32/* UTF-8 single byte feeding test for mbrtowc(), 33 contributed by Markus Kuhn <mkuhn@acm.org>. */ 34static int 35utf8_test_1(void) 36{ 37 wchar_t wc; 38 mbstate_t s; 39 40 const char str[] = "\xe0\xa0\x80"; 41 42 wc = 42; /* arbitrary number */ 43 memset(&s, 0, sizeof(s)); /* get s into initial state */ 44 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 45 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 46 assert (wc == 42); /* no value has not been stored into &wc yet */ 47 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ 48 assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */ 49 assert (mbrtowc (&wc, "", 1, &s) == 0); /* test final byte processing */ 50 assert (wc == 0); /* test final byte decoding */ 51 52 /* The following test is by Al Viro <aviro@redhat.com>. */ 53 wc = 42; /* arbitrary number */ 54 memset(&s, 0, sizeof(s)); /* get s into initial state */ 55 assert (mbrtowc (&wc, str, 1, &s) == (size_t)-2); 56 assert (mbrtowc (&wc, str + 1, 2, &s) == 2); 57 assert (wc == 0x800); 58 59 wc = 42; /* arbitrary number */ 60 memset(&s, 0, sizeof(s)); /* get s into initial state */ 61 assert (mbrtowc (&wc, str, 3, &s) == 3); 62 assert (wc == 0x800); 63 64 return 0; 65} 66 67 68/* Test for NUL byte processing via empty string. */ 69static int 70utf8_test_2(void) 71{ 72 wchar_t wc; 73 mbstate_t s; 74 75 wc = 42; /* arbitrary number */ 76 memset(&s, 0, sizeof(s)); /* get s into initial state */ 77 assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */ 78 assert (mbsinit (&s)); 79 80 wc = 42; /* arbitrary number */ 81 memset(&s, 0, sizeof(s)); /* get s into initial state */ 82 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 83 assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */ 84 85 wc = 42; /* arbitrary number */ 86 memset(&s, 0, sizeof(s)); /* get s into initial state */ 87 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 88 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 89 assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */ 90 91 wc = 42; /* arbitrary number */ 92 memset(&s, 0, sizeof(s)); /* get s into initial state */ 93 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 94 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 95 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ 96 assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */ 97 assert (mbsinit (&s)); 98 99 return 0; 100} 101 102 103/* Test for NUL byte processing via NULL string. */ 104static int 105utf8_test_3(void) 106{ 107 wchar_t wc; 108 mbstate_t s; 109 110 wc = 42; /* arbitrary number */ 111 memset(&s, 0, sizeof(s)); /* get s into initial state */ 112 assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */ 113 assert (mbsinit (&s)); 114 115 wc = 42; /* arbitrary number */ 116 memset(&s, 0, sizeof(s)); /* get s into initial state */ 117 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 118 assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */ 119 120 wc = 42; /* arbitrary number */ 121 memset(&s, 0, sizeof(s)); /* get s into initial state */ 122 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 123 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 124 assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */ 125 126 wc = 42; /* arbitrary number */ 127 memset(&s, 0, sizeof(s)); /* get s into initial state */ 128 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 129 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 130 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ 131 assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */ 132 assert (mbsinit (&s)); 133 134 return 0; 135} 136 137 138static int 139utf8_test(void) 140{ 141 const char *locale = "de_DE.UTF-8"; 142 int error = 0; 143 144 if (!setlocale(LC_CTYPE, locale)) { 145 fprintf(stderr, "locale '%s' not available!\n", locale); 146 exit(1); 147 } 148 149 error |= utf8_test_1(); 150 error |= utf8_test_2(); 151 error |= utf8_test_3(); 152 153 return error; 154} 155 156 157int 158main(void) 159{ 160 int result = 0; 161 162 /* Check mapping of ASCII range for some character sets which have 163 ASCII as a subset. For those the wide char generated must have 164 the same value. */ 165 setlocale(LC_ALL, "C"); 166 result |= check_ascii(setlocale(LC_ALL, NULL)); 167 168 setlocale(LC_ALL, "de_DE.UTF-8"); 169 result |= check_ascii(setlocale(LC_ALL, NULL)); 170 result |= utf8_test(); 171 172 setlocale(LC_ALL, "ja_JP.EUC-JP"); 173 result |= check_ascii(setlocale(LC_ALL, NULL)); 174 175 return result; 176} 177 178 179static int 180check_ascii(const char *locname) 181{ 182 int c; 183 int res = 0; 184 185 printf("Testing locale \"%s\":\n", locname); 186 187 for (c = 0; c <= 127; ++c) { 188 char buf[MB_CUR_MAX]; 189 wchar_t wc = (wchar_t) 0xffffffff; 190 mbstate_t s; 191 size_t n, i; 192 193 for (i = 0; i < MB_CUR_MAX; ++i) 194 buf[i] = c + i; 195 196 memset(&s, '\0', sizeof(s)); 197 198 n = mbrtowc(&wc, buf, MB_CUR_MAX, &s); 199 if (n == (size_t) - 1) { 200 printf("%s: '\\x%x': encoding error\n", locname, c); 201 ++res; 202 } else if (n == (size_t) - 2) { 203 printf("%s: '\\x%x': incomplete character\n", locname, c); 204 ++res; 205 } else if (n == 0 && c != 0) { 206 printf("%s: '\\x%x': 0 returned\n", locname, c); 207 ++res; 208 } else if (n != 0 && c == 0) { 209 printf("%s: '\\x%x': not 0 returned\n", locname, c); 210 ++res; 211 } else if (c != 0 && n != 1) { 212 printf("%s: '\\x%x': not 1 returned\n", locname, c); 213 ++res; 214 } else if (wc != (wchar_t) c) { 215 printf("%s: '\\x%x' != wc != L'\\x%x'\n", locname, c, wc); 216 ++res; 217 } 218 } 219 220 printf(res == 1 ? "%d error\n" : "%d errors\n", res); 221 222 return res != 0; 223} 224