1/*-
2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3 *		at Electronni Visti IA, Kiev, Ukraine.
4 *			All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
30
31#include "xlocale_private.h"
32/* assumes the locale_t variable is named loc */
33#define __collate_chain_equiv_table	(loc->__lc_collate->__chain_equiv_table)
34#define __collate_chain_pri_table	(loc->__lc_collate->__chain_pri_table)
35#define __collate_char_pri_table	(loc->__lc_collate->__char_pri_table)
36#define __collate_info			(&loc->__lc_collate->__info)
37#define __collate_large_char_pri_table	(loc->__lc_collate->__large_char_pri_table)
38#define __collate_substitute_table	(loc->__lc_collate->__substitute_table)
39
40#include "namespace.h"
41#include <arpa/inet.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <stddef.h>
45#include <string.h>
46#include <wchar.h>
47#include <errno.h>
48#include <unistd.h>
49#include <sysexits.h>
50#include <ctype.h>
51#include "un-namespace.h"
52
53#include "collate.h"
54#include "setlocale.h"
55#include "ldpart.h"
56
57#include "libc_private.h"
58
59#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
60static void wntohl(wchar_t *, int);
61#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
62void __collate_err(int ex, const char *f) __dead2;
63
64/*
65 * Normally, the __collate_* routines should all be __private_extern__,
66 * but grep is using them (3715846).  Until we can provide an alternative,
67 * we leave them public, and provide a read-only __collate_load_error variable
68 */
69#undef __collate_load_error
70int __collate_load_error = 1;
71
72__private_extern__ int
73__collate_load_tables(const char *encoding, locale_t loc)
74{
75	FILE *fp;
76	int i, saverr, chains, z;
77	char strbuf[STR_LEN], buf[PATH_MAX];
78	struct __xlocale_st_collate *TMP;
79	static struct __xlocale_st_collate *cache = NULL;
80	struct __collate_st_info info;
81	void *vp;
82
83	/* 'encoding' must be already checked. */
84	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
85		loc->__collate_load_error = 1;
86		if (loc == &__global_locale)
87			__collate_load_error = 1;
88		XL_RELEASE(loc->__lc_collate);
89		loc->__lc_collate = NULL;
90		return (_LDP_CACHE);
91	}
92
93	/*
94	 * If the locale name is the same as our cache, use the cache.
95	 */
96	if (cache && strcmp(encoding, cache->__encoding) == 0) {
97		loc->__collate_load_error = 0;
98		if (loc == &__global_locale)
99			__collate_load_error = 0;
100		XL_RELEASE(loc->__lc_collate);
101		loc->__lc_collate = cache;
102		XL_RETAIN(loc->__lc_collate);
103		return (_LDP_CACHE);
104	}
105
106	/*
107	 * Slurp the locale file into the cache.
108	 */
109
110	/* 'PathLocale' must be already set & checked. */
111	/* Range checking not needed, encoding has fixed size */
112	(void)strcpy(buf, _PathLocale);
113	(void)strcat(buf, "/");
114	(void)strcat(buf, encoding);
115	(void)strcat(buf, "/LC_COLLATE");
116	if ((fp = fopen(buf, "r")) == NULL)
117		return (_LDP_ERROR);
118
119	if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
120		saverr = errno;
121		(void)fclose(fp);
122		errno = saverr;
123		return (_LDP_ERROR);
124	}
125	chains = -1;
126	if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
127		chains = 1;
128	if (chains < 0) {
129		(void)fclose(fp);
130		errno = EFTYPE;
131		return (_LDP_ERROR);
132	}
133	if (chains) {
134		if (fread(&info, sizeof(info), 1, fp) != 1) {
135			saverr = errno;
136			(void)fclose(fp);
137			errno = saverr;
138			return (_LDP_ERROR);
139		}
140#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
141		for(z = 0; z < info.directive_count; z++) {
142			info.undef_pri[z] = ntohl(info.undef_pri[z]);
143			info.subst_count[z] = ntohl(info.subst_count[z]);
144		}
145		info.chain_count = ntohl(info.chain_count);
146		info.large_pri_count = ntohl(info.large_pri_count);
147#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
148		if ((chains = info.chain_count) < 0) {
149			(void)fclose(fp);
150			errno = EFTYPE;
151			return (_LDP_ERROR);
152		}
153	} else
154		chains = TABLE_SIZE;
155
156	i = sizeof(struct __xlocale_st_collate)
157	    + sizeof(struct __collate_st_chain_pri) * chains
158	    + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
159	for(z = 0; z < info.directive_count; z++)
160		i += sizeof(struct __collate_st_subst) * info.subst_count[z];
161	if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
162		saverr = errno;
163		(void)fclose(fp);
164		errno = saverr;
165		return (_LDP_ERROR);
166	}
167	TMP->__refcount = 2; /* one for the locale, one for the cache */
168	TMP->__free_extra = NULL;
169
170#define FREAD(a, b, c, d) \
171{ \
172	if (fread(a, b, c, d) != c) { \
173		saverr = errno; \
174		free(TMP); \
175		(void)fclose(d); \
176		errno = saverr; \
177		return (_LDP_ERROR); \
178	} \
179}
180
181	/* adjust size to read the remaining in one chunk */
182	i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
183	FREAD(TMP->__char_pri_table, i, 1, fp);
184	(void)fclose(fp);
185
186	vp = (void *)(TMP + 1);
187
188	/* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
189	if (info.subst_count[0] > 0) {
190		TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
191		vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
192	} else
193		TMP->__substitute_table[0] = NULL;
194	if (info.flags & COLLATE_SUBST_DUP)
195		TMP->__substitute_table[1] = TMP->__substitute_table[0];
196	else if (info.subst_count[1] > 0) {
197		TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
198		vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
199	} else
200		TMP->__substitute_table[1] = NULL;
201
202	if (chains > 0) {
203		TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
204		vp += chains * sizeof(struct __collate_st_chain_pri);
205	} else
206		TMP->__chain_pri_table = NULL;
207	if (info.large_pri_count > 0)
208		TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
209	else
210		TMP->__large_char_pri_table = NULL;
211
212#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
213	{
214		struct __collate_st_char_pri *p = TMP->__char_pri_table;
215		for(i = UCHAR_MAX + 1; i-- > 0; p++) {
216			for(z = 0; z < info.directive_count; z++)
217				p->pri[z] = ntohl(p->pri[z]);
218		}
219	}
220	for(z = 0; z < info.directive_count; z++)
221		if (info.subst_count[z] > 0) {
222			struct __collate_st_subst *p = TMP->__substitute_table[z];
223			for(i = info.subst_count[z]; i-- > 0; p++) {
224				p->val = ntohl(p->val);
225				wntohl(p->str, STR_LEN);
226			}
227		}
228	{
229		struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
230		for(i = chains; i-- > 0; p++) {
231			wntohl(p->str, STR_LEN);
232			for(z = 0; z < info.directive_count; z++)
233				p->pri[z] = ntohl(p->pri[z]);
234		}
235	}
236	if (info.large_pri_count > 0) {
237		struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
238		for(i = info.large_pri_count; i-- > 0; p++) {
239			p->val = ntohl(p->val);
240			for(z = 0; z < info.directive_count; z++)
241				p->pri.pri[z] = ntohl(p->pri.pri[z]);
242		}
243	}
244#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
245	(void)strcpy(TMP->__encoding, encoding);
246	(void)memcpy(&TMP->__info, &info, sizeof(info));
247	XL_RELEASE(cache);
248	cache = TMP;
249	XL_RELEASE(loc->__lc_collate);
250	loc->__lc_collate = cache;
251	/* no need to retain, since we set __refcount to 2 above */
252
253	loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
254	loc->__collate_load_error = 0;
255	if (loc == &__global_locale)
256		__collate_load_error = 0;
257
258	return (_LDP_LOADED);
259}
260
261static int
262__collate_wcsnlen(const wchar_t *s, int len)
263{
264	int n = 0;
265	while (*s && n < len) {
266		s++;
267		n++;
268	}
269	return n;
270}
271
272static struct __collate_st_subst *
273substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
274{
275	int low = 0;
276	int high = n - 1;
277	int next, compar;
278	struct __collate_st_subst *p;
279
280	while (low <= high) {
281		next = (low + high) / 2;
282		p = tab + next;
283		compar = key - p->val;
284		if (compar == 0)
285			return p;
286		if (compar > 0)
287			low = next + 1;
288		else
289			high = next - 1;
290	}
291	return NULL;
292}
293
294__private_extern__ wchar_t *
295__collate_substitute(const wchar_t *s, int which, locale_t loc)
296{
297	int dest_len, len, nlen;
298	int n, delta, nsubst;
299	wchar_t *dest_str = NULL;
300	const wchar_t *fp;
301	struct __collate_st_subst *subst, *match;
302
303	if (s == NULL || *s == '\0')
304		return (__collate_wcsdup(L""));
305	dest_len = wcslen(s);
306	nsubst = __collate_info->subst_count[which];
307	if (nsubst <= 0)
308		return __collate_wcsdup(s);
309	subst = __collate_substitute_table[which];
310	delta = dest_len / 4;
311	if (delta < 2)
312		delta = 2;
313	dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
314	if (dest_str == NULL)
315		__collate_err(EX_OSERR, __func__);
316	len = 0;
317	while (*s) {
318		if ((match = substsearch(*s, subst, nsubst)) != NULL) {
319			fp = match->str;
320			n = __collate_wcsnlen(fp, STR_LEN);
321		} else {
322			fp = s;
323			n = 1;
324		}
325		nlen = len + n;
326		if (dest_len <= nlen) {
327			dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
328			if (dest_str == NULL)
329				__collate_err(EX_OSERR, __func__);
330		}
331		wcsncpy(dest_str + len, fp, n);
332		len += n;
333		s++;
334	}
335	dest_str[len] = 0;
336	return (dest_str);
337}
338
339static struct __collate_st_chain_pri *
340chainsearch(const wchar_t *key, int *len, locale_t loc)
341{
342	int low = 0;
343	int high = __collate_info->chain_count - 1;
344	int next, compar, l;
345	struct __collate_st_chain_pri *p;
346	struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
347
348	while (low <= high) {
349		next = (low + high) / 2;
350		p = tab + next;
351		compar = *key - *p->str;
352		if (compar == 0) {
353			l = __collate_wcsnlen(p->str, STR_LEN);
354			compar = wcsncmp(key, p->str, l);
355			if (compar == 0) {
356				*len = l;
357				return p;
358			}
359		}
360		if (compar > 0)
361			low = next + 1;
362		else
363			high = next - 1;
364	}
365	return NULL;
366}
367
368static struct __collate_st_large_char_pri *
369largesearch(const wchar_t key, locale_t loc)
370{
371	int low = 0;
372	int high = __collate_info->large_pri_count - 1;
373	int next, compar;
374	struct __collate_st_large_char_pri *p;
375	struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
376
377	while (low <= high) {
378		next = (low + high) / 2;
379		p = tab + next;
380		compar = key - p->val;
381		if (compar == 0)
382			return p;
383		if (compar > 0)
384			low = next + 1;
385		else
386			high = next - 1;
387	}
388	return NULL;
389}
390
391__private_extern__ void
392__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
393{
394	struct __collate_st_chain_pri *p2;
395	int l;
396
397	*len = 1;
398	*prim = *sec = 0;
399	p2 = chainsearch(t, &l, loc);
400	/* use the chain if prim >= 0 */
401	if (p2 && p2->pri[0] >= 0) {
402		*len = l;
403		*prim = p2->pri[0];
404		*sec = p2->pri[1];
405		return;
406	}
407	if (*t <= UCHAR_MAX) {
408		*prim = __collate_char_pri_table[*t].pri[0];
409		*sec = __collate_char_pri_table[*t].pri[1];
410		return;
411	}
412	if (__collate_info->large_pri_count > 0) {
413		struct __collate_st_large_char_pri *match;
414		match = largesearch(*t, loc);
415		if (match) {
416			*prim = match->pri.pri[0];
417			*sec = match->pri.pri[1];
418			return;
419		}
420	}
421	*prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
422	*sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
423}
424
425/*
426 * This is only provided for programs (like grep) that are calling this
427 * private function.  This will go away eventually.
428 */
429void
430__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
431{
432	locale_t loc = __current_locale();
433	wchar_t *w = __collate_mbstowcs((const char *)t, loc);
434	int sverrno;
435
436	__collate_lookup_l(w, len, prim, sec, loc);
437	sverrno = errno;
438	free(w);
439	errno = sverrno;
440}
441
442__private_extern__ void
443__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
444{
445	struct __collate_st_chain_pri *p2;
446	int p, l;
447
448	*len = 1;
449	*pri = 0;
450	p2 = chainsearch(t, &l, loc);
451	if (p2) {
452		p = p2->pri[which];
453		/* use the chain if pri >= 0 */
454		if (p >= 0) {
455			*len = l;
456			*pri = p;
457			return;
458		}
459	}
460	if (*t <= UCHAR_MAX) {
461		*pri = __collate_char_pri_table[*t].pri[which];
462		return;
463	}
464	if (__collate_info->large_pri_count > 0) {
465		struct __collate_st_large_char_pri *match;
466		match = largesearch(*t, loc);
467		if (match) {
468			*pri = match->pri.pri[which];
469			return;
470		}
471	}
472	*pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
473}
474
475__private_extern__ wchar_t *
476__collate_mbstowcs(const char *s, locale_t loc)
477{
478	static const mbstate_t initial;
479	mbstate_t st;
480	size_t len;
481	const char *ss;
482	wchar_t *wcs;
483
484	ss = s;
485	st = initial;
486	if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
487		return NULL;
488	if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
489		__collate_err(EX_OSERR, __func__);
490	st = initial;
491	mbsrtowcs_l(wcs, &s, len, &st, loc);
492	wcs[len] = 0;
493
494	return (wcs);
495}
496
497__private_extern__ wchar_t *
498__collate_wcsdup(const wchar_t *s)
499{
500	size_t len = wcslen(s) + 1;
501	wchar_t *wcs;
502
503	if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
504		__collate_err(EX_OSERR, __func__);
505	wcscpy(wcs, s);
506	return (wcs);
507}
508
509__private_extern__ void
510__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
511{
512	int pri, len;
513	size_t slen;
514	const wchar_t *t;
515	wchar_t *tt = NULL, *tr = NULL;
516	int direc, pass;
517	wchar_t *xfp;
518	struct __collate_st_info *info = __collate_info;
519	int sverrno;
520
521	for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
522		xf[pass] = NULL;
523	for(pass = 0; pass < info->directive_count; pass++) {
524		direc = info->directive[pass];
525		if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
526			sverrno = errno;
527			free(tt);
528			errno = sverrno;
529			tt = __collate_substitute(src, pass, loc);
530		}
531		if (direc & DIRECTIVE_BACKWARD) {
532			wchar_t *bp, *fp, c;
533			sverrno = errno;
534			free(tr);
535			errno = sverrno;
536			tr = __collate_wcsdup(tt ? tt : src);
537			bp = tr;
538			fp = tr + wcslen(tr) - 1;
539			while(bp < fp) {
540				c = *bp;
541				*bp++ = *fp;
542				*fp-- = c;
543			}
544			t = (const wchar_t *)tr;
545		} else if (tt)
546			t = (const wchar_t *)tt;
547		else
548			t = (const wchar_t *)src;
549		sverrno = errno;
550		if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
551			errno = sverrno;
552			slen = 0;
553			goto end;
554		}
555		errno = sverrno;
556		xfp = xf[pass];
557		if (direc & DIRECTIVE_POSITION) {
558			while(*t) {
559				__collate_lookup_which(t, &len, &pri, pass, loc);
560				t += len;
561				if (pri <= 0) {
562					if (pri < 0) {
563						errno = EINVAL;
564						slen = 0;
565						goto end;
566					}
567					pri = COLLATE_MAX_PRIORITY;
568				}
569				*xfp++ = pri;
570			}
571		} else {
572			while(*t) {
573				__collate_lookup_which(t, &len, &pri, pass, loc);
574				t += len;
575				if (pri <= 0) {
576					if (pri < 0) {
577						errno = EINVAL;
578						slen = 0;
579						goto end;
580					}
581					continue;
582				}
583				*xfp++ = pri;
584			}
585 		}
586		*xfp = 0;
587	}
588  end:
589	sverrno = errno;
590	free(tt);
591	free(tr);
592	errno = sverrno;
593}
594
595__private_extern__ void
596__collate_err(int ex, const char *f)
597{
598	const char *s;
599	int serrno = errno;
600
601	s = _getprogname();
602	_write(STDERR_FILENO, s, strlen(s));
603	_write(STDERR_FILENO, ": ", 2);
604	s = f;
605	_write(STDERR_FILENO, s, strlen(s));
606	_write(STDERR_FILENO, ": ", 2);
607	s = strerror(serrno);
608	_write(STDERR_FILENO, s, strlen(s));
609	_write(STDERR_FILENO, "\n", 1);
610	exit(ex);
611}
612
613/*
614 * __collate_collating_symbol takes the multibyte string specified by
615 * src and slen, and using ps, converts that to a wide character.  Then
616 * it is checked to verify it is a collating symbol, and then copies
617 * it to the wide character string specified by dst and dlen (the
618 * results are not null terminated).  The length of the wide characters
619 * copied to dst is returned if successful.  Zero is returned if no such
620 * collating symbol exists.  (size_t)-1 is returned if there are wide-character
621 * conversion errors, if the length of the converted string is greater that
622 * STR_LEN or if dlen is too small.  It is up to the calling routine to
623 * preserve the mbstate_t structure as needed.
624 */
625__private_extern__ size_t
626__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
627{
628	wchar_t wname[STR_LEN];
629	wchar_t w, *wp;
630	size_t len, l;
631
632	/* POSIX locale */
633	if (loc->__collate_load_error) {
634		if (dlen < 1)
635			return (size_t)-1;
636		if (slen != 1 || !isascii(*src))
637			return 0;
638		*dst = *src;
639		return 1;
640	}
641	for(wp = wname, len = 0; slen > 0; len++) {
642		l = mbrtowc_l(&w, src, slen, ps, loc);
643		if (l == (size_t)-1 || l == (size_t)-2)
644			return (size_t)-1;
645		if (l == 0)
646			break;
647		if (len >= STR_LEN)
648			return -1;
649		*wp++ = w;
650		src += l;
651		slen = (long)slen - (long)l;
652	}
653	if (len == 0 || len > dlen)
654		return (size_t)-1;
655	if (len == 1) {
656		if (*wname <= UCHAR_MAX) {
657			if (__collate_char_pri_table[*wname].pri[0] >= 0) {
658				if (dlen > 0)
659					*dst = *wname;
660				return 1;
661			}
662			return 0;
663		} else if (__collate_info->large_pri_count > 0) {
664			struct __collate_st_large_char_pri *match;
665			match = largesearch(*wname, loc);
666			if (match && match->pri.pri[0] >= 0) {
667				if (dlen > 0)
668					*dst = *wname;
669				return 1;
670			}
671		}
672		return 0;
673	}
674	*wp = 0;
675	if (__collate_info->chain_count > 0) {
676		struct __collate_st_chain_pri *match;
677		int ll;
678		match = chainsearch(wname, &ll, loc);
679		if (match) {
680			if (ll < dlen)
681				dlen = ll;
682			wcsncpy(dst, wname, dlen);
683			return ll;
684		}
685	}
686	return 0;
687}
688
689/*
690 * __collate_equiv_class returns the equivalence class number for the symbol
691 * specified by src and slen, using ps to convert from multi-byte to wide
692 * character.  Zero is returned if the symbol is not in an equivalence
693 * class.  -1 is returned if there are wide character conversion error,
694 * if there are any greater-than-8-bit characters or if a multi-byte symbol
695 * is greater or equal to STR_LEN in length.  It is up to the calling
696 * routine to preserve the mbstate_t structure as needed.
697 */
698__private_extern__ int
699__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
700{
701	wchar_t wname[STR_LEN];
702	wchar_t w, *wp;
703	size_t len, l;
704	int e;
705
706	/* POSIX locale */
707	if (loc->__collate_load_error)
708		return 0;
709	for(wp = wname, len = 0; slen > 0; len++) {
710		l = mbrtowc_l(&w, src, slen, ps, loc);
711		if (l == (size_t)-1 || l == (size_t)-2)
712			return -1;
713		if (l == 0)
714			break;
715		if (len >= STR_LEN)
716			return -1;
717		*wp++ = w;
718		src += l;
719		slen = (long)slen - (long)l;
720	}
721	if (len == 0)
722		return -1;
723	if (len == 1) {
724		e = -1;
725		if (*wname <= UCHAR_MAX)
726			e = __collate_char_pri_table[*wname].pri[0];
727		else if (__collate_info->large_pri_count > 0) {
728			struct __collate_st_large_char_pri *match;
729			match = largesearch(*wname, loc);
730			if (match)
731				e = match->pri.pri[0];
732		}
733		if (e == 0)
734			return IGNORE_EQUIV_CLASS;
735		return e > 0 ? e : 0;
736	}
737	*wp = 0;
738	if (__collate_info->chain_count > 0) {
739		struct __collate_st_chain_pri *match;
740		int ll;
741		match = chainsearch(wname, &ll, loc);
742		if (match) {
743			e = match->pri[0];
744			if (e == 0)
745				return IGNORE_EQUIV_CLASS;
746			return e < 0 ? -e : e;
747		}
748	}
749	return 0;
750}
751
752/*
753 * __collate_equiv_match tries to match any single or multi-character symbol
754 * in equivalence class equiv_class in the multi-byte string specified by src
755 * and slen.  If start is non-zero, it is taken to be the first (pre-converted)
756 * wide character.  Subsequence wide characters, if needed, will use ps in
757 * the conversion.  On a successful match, the length of the matched string
758 * is returned (including the start character).  If dst is non-NULL, the
759 * matched wide-character string is copied to dst, a wide character array of
760 * length dlen (the results are not zero-terminated).  If rlen is non-NULL,
761 * the number of character in src actually used is returned.  Zero is
762 * returned by __collate_equiv_match if there is no match.  (size_t)-1 is
763 * returned on error: if there were conversion errors or if dlen is too small
764 * to accept the results.  On no match or error, ps is restored to its incoming
765 * state.
766 */
767size_t
768__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
769{
770	wchar_t w;
771	size_t len, l, clen;
772	int i;
773	wchar_t buf[STR_LEN], *wp;
774	mbstate_t save;
775	const char *s = src;
776	size_t sl = slen;
777	struct __collate_st_chain_pri *ch = NULL;
778
779	/* POSIX locale */
780	if (loc->__collate_load_error)
781		return (size_t)-1;
782	if (equiv_class == IGNORE_EQUIV_CLASS)
783		equiv_class = 0;
784	if (ps)
785		save = *ps;
786	wp = buf;
787	len = clen = 0;
788	if (start) {
789		*wp++ = start;
790		len = 1;
791	}
792	/* convert up to the max chain length */
793	while(sl > 0 && len < __collate_info->chain_max_len) {
794		l = mbrtowc_l(&w, s, sl, ps, loc);
795		if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
796			break;
797		*wp++ = w;
798		s += l;
799		clen += l;
800		sl -= l;
801		len++;
802	}
803	*wp = 0;
804	if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
805		int e = ch->pri[0];
806		if (e < 0)
807			e = -e;
808		if (e == equiv_class)
809			goto found;
810	}
811	/* try single character */
812	i = 1;
813	if (*buf <= UCHAR_MAX) {
814		if (equiv_class == __collate_char_pri_table[*buf].pri[0])
815			goto found;
816	} else if (__collate_info->large_pri_count > 0) {
817		struct __collate_st_large_char_pri *match;
818		match = largesearch(*buf, loc);
819		if (match && equiv_class == match->pri.pri[0])
820			goto found;
821	}
822	/* no match */
823	if (ps)
824		*ps = save;
825	return 0;
826found:
827	/* if we converted more than we used, restore to initial and reconvert
828	 * up to what did match */
829	if (i < len) {
830		len = i;
831		if (ps)
832			*ps = save;
833		if (start)
834			i--;
835		clen = 0;
836		while(i-- > 0) {
837			l = mbrtowc_l(&w, src, slen, ps, loc);
838			src += l;
839			clen += l;
840			slen -= l;
841		}
842	}
843	if (dst) {
844		if (dlen < len) {
845			if (ps)
846				*ps = save;
847			return (size_t)-1;
848		}
849		for(wp = buf; len > 0; len--)
850		    *dst++ = *wp++;
851	}
852	if (rlen)
853		*rlen = clen;
854	return len;
855}
856
857/*
858 * __collate_equiv_value returns the primary collation value for the given
859 * collating symbol specified by str and len.  Zero or negative is return
860 * if the collating symbol was not found.  (Use by the bracket code in TRE.)
861 */
862__private_extern__ int
863__collate_equiv_value(locale_t loc, const wchar_t *str, size_t len)
864{
865	int e;
866
867	if (len < 1 || len >= STR_LEN)
868		return -1;
869
870	/* POSIX locale */
871	if (loc->__collate_load_error)
872		return (len == 1 && *str <= UCHAR_MAX) ? *str : -1;
873
874	if (len == 1) {
875		e = -1;
876		if (*str <= UCHAR_MAX)
877			e = __collate_char_pri_table[*str].pri[0];
878		else if (__collate_info->large_pri_count > 0) {
879			struct __collate_st_large_char_pri *match;
880			match = largesearch(*str, loc);
881			if (match)
882				e = match->pri.pri[0];
883		}
884		if (e == 0)
885			return IGNORE_EQUIV_CLASS;
886		return e > 0 ? e : 0;
887	}
888	if (__collate_info->chain_count > 0) {
889		wchar_t name[STR_LEN];
890		struct __collate_st_chain_pri *match;
891		int ll;
892
893		wcsncpy(name, str, len);
894		name[len] = 0;
895		match = chainsearch(name, &ll, loc);
896		if (match) {
897			e = match->pri[0];
898			if (e == 0)
899				return IGNORE_EQUIV_CLASS;
900			return e < 0 ? -e : e;
901		}
902	}
903	return 0;
904}
905
906#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
907static void
908wntohl(wchar_t *str, int len)
909{
910	for(; *str && len > 0; str++, len--)
911		*str = ntohl(*str);
912}
913#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
914
915#ifdef COLLATE_DEBUG
916static char *
917show(int c)
918{
919	static char buf[5];
920
921	if (c >=32 && c <= 126)
922		sprintf(buf, "'%c' ", c);
923	else
924		sprintf(buf, "\\x{%02x}", c);
925	return buf;
926}
927
928static char *
929showwcs(const wchar_t *t, int len)
930{
931	static char buf[64];
932	char *cp = buf;
933
934	for(; *t && len > 0; len--, t++) {
935		if (*t >=32 && *t <= 126)
936			*cp++ = *t;
937		else {
938			sprintf(cp, "\\x{%02x}", *t);
939			cp += strlen(cp);
940		}
941	}
942	*cp = 0;
943	return buf;
944}
945
946void
947__collate_print_tables()
948{
949	int i, z;
950	locale_t loc = __current_locale();
951
952	printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
953	    __collate_info->directive[0], __collate_info->directive[1],
954	    __collate_info->flags, __collate_info->chain_max_len,
955	    __collate_info->directive_count,
956	    __collate_info->undef_pri[0], __collate_info->undef_pri[1],
957	    __collate_info->subst_count[0], __collate_info->subst_count[1],
958	    __collate_info->chain_count, __collate_info->large_pri_count);
959	for(z = 0; z < __collate_info->directive_count; z++) {
960		if (__collate_info->subst_count[z] > 0) {
961			struct __collate_st_subst *p2 = __collate_substitute_table[z];
962			if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
963				printf("Both substitute tables:\n");
964			else
965				printf("Substitute table %d:\n", z);
966			for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
967				printf("\t%s --> \"%s\"\n",
968					show(p2->val),
969					showwcs(p2->str, STR_LEN));
970		}
971	}
972	if (__collate_info->chain_count > 0) {
973		printf("Chain priority table:\n");
974		struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
975		for (i = __collate_info->chain_count; i-- > 0; p2++) {
976			printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
977			for(z = 0; z < __collate_info->directive_count; z++)
978				printf(" %d", p2->pri[z]);
979			putchar('\n');
980		}
981	}
982	printf("Char priority table:\n");
983	{
984		struct __collate_st_char_pri *p2 = __collate_char_pri_table;
985		for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
986			printf("\t%s :", show(i));
987			for(z = 0; z < __collate_info->directive_count; z++)
988				printf(" %d", p2->pri[z]);
989			putchar('\n');
990		}
991	}
992	if (__collate_info->large_pri_count > 0) {
993		struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
994		printf("Large priority table:\n");
995		for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
996			printf("\t%s :", show(p2->val));
997			for(z = 0; z < __collate_info->directive_count; z++)
998				printf(" %d", p2->pri.pri[z]);
999			putchar('\n');
1000		}
1001	}
1002}
1003#endif
1004