1/*-
2 * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
3 * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
4 * Copyright 2017 Nexenta Systems, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/lib/libc/tests/string/wcscoll_test.c 317647 2017-05-01 12:42:06Z bapt $");
31
32#include <wchar.h>
33#include <locale.h>
34#include <stdlib.h>
35#include <time.h>
36#include <errno.h>
37
38#include <atf-c.h>
39
40static int
41cmp(const void *a, const void *b)
42{
43	const wchar_t wa[2] = { *(const wchar_t *)a, 0 };
44	const wchar_t wb[2] = { *(const wchar_t *)b, 0 };
45
46	return (wcscoll(wa, wb));
47}
48
49ATF_TC_WITHOUT_HEAD(russian_collation);
50ATF_TC_BODY(russian_collation, tc)
51{
52	wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz������������������������������������������������������������������������������������������������������������������������������������";
53	wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ������������������������������������������������������������������������������������������������������������������������������������";
54
55	ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL,
56	    "Fail to set locale to \"ru_RU.UTF-8\"");
57	qsort(c, wcslen(c), sizeof(wchar_t), cmp);
58	ATF_CHECK_MSG(wcscmp(c, res) == 0,
59	    "Bad collation, expected: '%ls' got '%ls'", res, c);
60}
61
62#define	NSTRINGS 2000
63#define	MAXSTRLEN 20
64#define	MAXXFRMLEN (MAXSTRLEN * 20)
65
66typedef struct {
67	char	sval[MAXSTRLEN];
68	char	xval[MAXXFRMLEN];
69} cstr;
70
71ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
72ATF_TC_BODY(strcoll_vs_strxfrm, tc)
73{
74	cstr	data[NSTRINGS];
75	char	*curloc;
76	int	i, j;
77
78	curloc = setlocale(LC_ALL, "en_US.UTF-8");
79	ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
80
81	/* Ensure new random() values on every run */
82	srandom((unsigned int) time(NULL));
83
84	/* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
85	for (i = 0; i < NSTRINGS; i++) {
86		char	*p;
87		int	len;
88
89again:
90		p = data[i].sval;
91		len = 1 + (random() % (MAXSTRLEN - 1));
92		while (len > 0) {
93			int c;
94			/*
95			 * Generate random printable char in ISO8859-1 range.
96			 * Bias towards producing a lot of spaces.
97			 */
98
99			if ((random() % 16) < 3) {
100				c = ' ';
101			} else {
102				do {
103					c = random() & 0xFF;
104				} while (!((c >= ' ' && c <= 127) ||
105				    (c >= 0xA0 && c <= 0xFF)));
106			}
107
108			if (c <= 127) {
109				*p++ = c;
110				len--;
111			} else {
112				if (len < 2)
113					break;
114				/* Poor man's utf8-ification */
115				*p++ = 0xC0 + (c >> 6);
116				len--;
117				*p++ = 0x80 + (c & 0x3F);
118				len--;
119			}
120		}
121		*p = '\0';
122		/* strxfrm() each string as we produce it */
123		errno = 0;
124		ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
125		    MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
126		    " string exceeded %d bytes", (int)strlen(data[i].sval),
127		    MAXXFRMLEN);
128
129		/*
130		 * Amend strxfrm() failing on certain characters to be fixed and
131		 * test later
132		 */
133		if (errno != 0)
134			goto again;
135	}
136
137	for (i = 0; i < NSTRINGS; i++) {
138		for (j = 0; j < NSTRINGS; j++) {
139			int sr = strcoll(data[i].sval, data[j].sval);
140			int sx = strcmp(data[i].xval, data[j].xval);
141
142			ATF_CHECK_MSG(!((sr * sx < 0) ||
143			    (sr * sx == 0 && sr + sx != 0)),
144			    "%s: diff for \"%s\" and \"%s\"",
145			    curloc, data[i].sval, data[j].sval);
146		}
147	}
148}
149
150ATF_TP_ADD_TCS(tp)
151{
152	ATF_TP_ADD_TC(tp, russian_collation);
153	ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
154
155	return (atf_no_error());
156}
157