1/*	$NetBSD: strcasecmp_utf8.c,v 1.2 2017/02/14 01:16:49 christos Exp $	*/
2
3/*++
4/* NAME
5/*	strcasecmp_utf8 3
6/* SUMMARY
7/*	caseless string comparison
8/* SYNOPSIS
9/*	#include <stringops.h>
10/*
11/*	int	strcasecmp_utf8(
12/*	const char *s1,
13/*	const char *s2)
14/*
15/*	int	strncasecmp_utf8(
16/*	const char *s1,
17/*	const char *s2,
18/*	ssize_t	len)
19/* AUXILIARY FUNCTIONS
20/*	int	strcasecmp_utf8x(
21/*	int	flags,
22/*	const char *s1,
23/*	const char *s2)
24/*
25/*	int	strncasecmp_utf8x(
26/*	int	flags,
27/*	const char *s1,
28/*	const char *s2,
29/*	ssize_t	len)
30/* DESCRIPTION
31/*	strcasecmp_utf8() implements caseless string comparison for
32/*	UTF-8 text, with an API similar to strcasecmp(). Only ASCII
33/*	characters are casefolded when the code is compiled without
34/*	EAI support or when util_utf8_enable is zero.
35/*
36/*	strncasecmp_utf8() implements caseless string comparison
37/*	for UTF-8 text, with an API similar to strncasecmp(). Only
38/*	ASCII characters are casefolded when the code is compiled
39/*	without EAI support or when util_utf8_enable is zero.
40/*
41/*	strcasecmp_utf8x() and strncasecmp_utf8x() implement a more
42/*	complex API that provides the above functionality and more.
43/*
44/*	Arguments:
45/* .IP "s1, s2"
46/*	Null-terminated strings to be compared.
47/* .IP len
48/*	String length before casefolding.
49/* .IP flags
50/*	Zero or CASEF_FLAG_UTF8. The latter flag enables UTF-8 case
51/*	folding instead of folding only ASCII characters. This flag
52/*	is ignored when compiled without EAI support.
53/* SEE ALSO
54/*	casefold(), casefold text for caseless comparison.
55/* LICENSE
56/* .ad
57/* .fi
58/*	The Secure Mailer license must be distributed with this software.
59/* AUTHOR(S)
60/*	Wietse Venema
61/*	IBM T.J. Watson Research
62/*	P.O. Box 704
63/*	Yorktown Heights, NY 10598, USA
64/*
65/*	Wietse Venema
66/*	Google, Inc.
67/*	111 8th Avenue
68/*	New York, NY 10011, USA
69/*--*/
70
71 /*
72  * System library.
73  */
74#include <sys_defs.h>
75#include <string.h>
76
77#ifdef STRCASECMP_IN_STRINGS_H
78#include <strings.h>
79#endif
80
81 /*
82  * Utility library.
83  */
84#include <stringops.h>
85
86#define STR(x)	vstring_str(x)
87
88static VSTRING *f1;			/* casefold result for s1 */
89static VSTRING *f2;			/* casefold result for s2 */
90
91/* strcasecmp_utf8_init - initialize */
92
93static void strcasecmp_utf8_init(void)
94{
95    f1 = vstring_alloc(100);
96    f2 = vstring_alloc(100);
97}
98
99/* strcasecmp_utf8x - caseless string comparison */
100
101int     strcasecmp_utf8x(int flags, const char *s1, const char *s2)
102{
103
104    /*
105     * Short-circuit optimization for ASCII-only text. This may be slower
106     * than using a cache for all results. We must not expose strcasecmp(3)
107     * to non-ASCII text.
108     */
109    if (allascii(s1) && allascii(s2))
110	return (strcasecmp(s1, s2));
111
112    if (f1 == 0)
113	strcasecmp_utf8_init();
114
115    /*
116     * Cross our fingers and hope that strcmp() remains agnostic of
117     * charactersets and locales.
118     */
119    flags &= CASEF_FLAG_UTF8;
120    casefoldx(flags, f1, s1, -1);
121    casefoldx(flags, f2, s2, -1);
122    return (strcmp(STR(f1), STR(f2)));
123}
124
125/* strncasecmp_utf8x - caseless string comparison */
126
127int     strncasecmp_utf8x(int flags, const char *s1, const char *s2,
128			          ssize_t len)
129{
130
131    /*
132     * Consider using a cache for all results.
133     */
134    if (f1 == 0)
135	strcasecmp_utf8_init();
136
137    /*
138     * Short-circuit optimization for ASCII-only text. This may be slower
139     * than using a cache for all results. See comments above for limitations
140     * of strcasecmp().
141     */
142    if (allascii_len(s1, len) && allascii_len(s2, len))
143	return (strncasecmp(s1, s2, len));
144
145    /*
146     * Caution: casefolding may change the number of bytes. See comments
147     * above for concerns about strcmp().
148     */
149    flags &= CASEF_FLAG_UTF8;
150    casefoldx(flags, f1, s1, len);
151    casefoldx(flags, f2, s2, len);
152    return (strcmp(STR(f1), STR(f2)));
153}
154
155#ifdef TEST
156#include <stdio.h>
157#include <stdlib.h>
158#include <vstream.h>
159#include <vstring_vstream.h>
160#include <msg_vstream.h>
161#include <argv.h>
162
163int     main(int argc, char **argv)
164{
165    VSTRING *buffer = vstring_alloc(1);
166    ARGV   *cmd;
167    char  **args;
168    int     len;
169    int     flags;
170    int     res;
171
172    msg_vstream_init(argv[0], VSTREAM_ERR);
173    flags = CASEF_FLAG_UTF8;
174    util_utf8_enable = 1;
175    while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
176	vstream_printf("> %s\n", STR(buffer));
177	cmd = argv_split(STR(buffer), CHARS_SPACE);
178	if (cmd->argc == 0 || cmd->argv[0][0] == '#')
179	    continue;
180	args = cmd->argv;
181
182	/*
183	 * Compare two strings.
184	 */
185	if (strcmp(args[0], "compare") == 0 && cmd->argc == 3) {
186	    res = strcasecmp_utf8x(flags, args[1], args[2]);
187	    vstream_printf("\"%s\" %s \"%s\"\n",
188			   args[1],
189			   res < 0 ? "<" : res == 0 ? "==" : ">",
190			   args[2]);
191	}
192
193	/*
194	 * Compare two substrings.
195	 */
196	else if (strcmp(args[0], "compare-len") == 0 && cmd->argc == 4
197		 && sscanf(args[3], "%d", &len) == 1 && len >= 0) {
198	    res = strncasecmp_utf8x(flags, args[1], args[2], len);
199	    vstream_printf("\"%.*s\" %s \"%.*s\"\n",
200			   len, args[1],
201			   res < 0 ? "<" : res == 0 ? "==" : ">",
202			   len, args[2]);
203	}
204
205	/*
206	 * Usage.
207	 */
208	else {
209	    vstream_printf("Usage: %s compare <s1> <s2> | compare-len <s1> <s2> <len>\n",
210			   argv[0]);
211	}
212	vstream_fflush(VSTREAM_OUT);
213	argv_free(cmd);
214    }
215    exit(0);
216}
217
218#endif					/* TEST */
219