1/*- 2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 3 * at Electronni Visti IA, Kiev, Ukraine. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $"); 30 31#include "xlocale_private.h" 32/* assumes the locale_t variable is named loc */ 33#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table) 34#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table) 35#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table) 36#define __collate_info (&loc->__lc_collate->__info) 37#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table) 38#define __collate_substitute_table (loc->__lc_collate->__substitute_table) 39 40#include "namespace.h" 41#include <arpa/inet.h> 42#include <stdio.h> 43#include <stdlib.h> 44#include <stddef.h> 45#include <string.h> 46#include <wchar.h> 47#include <errno.h> 48#include <unistd.h> 49#include <sysexits.h> 50#include <ctype.h> 51#include "un-namespace.h" 52 53#include "collate.h" 54#include "setlocale.h" 55#include "ldpart.h" 56 57#include "libc_private.h" 58 59#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN 60static void wntohl(wchar_t *, int); 61#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ 62void __collate_err(int ex, const char *f) __dead2; 63 64/* 65 * Normally, the __collate_* routines should all be __private_extern__, 66 * but grep is using them (3715846). Until we can provide an alternative, 67 * we leave them public, and provide a read-only __collate_load_error variable 68 */ 69#undef __collate_load_error 70int __collate_load_error = 1; 71 72__private_extern__ int 73__collate_load_tables(const char *encoding, locale_t loc) 74{ 75 FILE *fp; 76 int i, saverr, chains, z; 77 char strbuf[STR_LEN], buf[PATH_MAX]; 78 struct __xlocale_st_collate *TMP; 79 static struct __xlocale_st_collate *cache = NULL; 80 struct __collate_st_info info; 81 void *vp; 82 83 /* 'encoding' must be already checked. */ 84 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 85 loc->__collate_load_error = 1; 86 if (loc == &__global_locale) 87 __collate_load_error = 1; 88 XL_RELEASE(loc->__lc_collate); 89 loc->__lc_collate = NULL; 90 return (_LDP_CACHE); 91 } 92 93 /* 94 * If the locale name is the same as our cache, use the cache. 95 */ 96 if (cache && strcmp(encoding, cache->__encoding) == 0) { 97 loc->__collate_load_error = 0; 98 if (loc == &__global_locale) 99 __collate_load_error = 0; 100 XL_RELEASE(loc->__lc_collate); 101 loc->__lc_collate = cache; 102 XL_RETAIN(loc->__lc_collate); 103 return (_LDP_CACHE); 104 } 105 106 /* 107 * Slurp the locale file into the cache. 108 */ 109 110 /* 'PathLocale' must be already set & checked. */ 111 /* Range checking not needed, encoding has fixed size */ 112 (void)strcpy(buf, _PathLocale); 113 (void)strcat(buf, "/"); 114 (void)strcat(buf, encoding); 115 (void)strcat(buf, "/LC_COLLATE"); 116 if ((fp = fopen(buf, "r")) == NULL) 117 return (_LDP_ERROR); 118 119 if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) { 120 saverr = errno; 121 (void)fclose(fp); 122 errno = saverr; 123 return (_LDP_ERROR); 124 } 125 chains = -1; 126 if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0) 127 chains = 1; 128 if (chains < 0) { 129 (void)fclose(fp); 130 errno = EFTYPE; 131 return (_LDP_ERROR); 132 } 133 if (chains) { 134 if (fread(&info, sizeof(info), 1, fp) != 1) { 135 saverr = errno; 136 (void)fclose(fp); 137 errno = saverr; 138 return (_LDP_ERROR); 139 } 140#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN 141 for(z = 0; z < info.directive_count; z++) { 142 info.undef_pri[z] = ntohl(info.undef_pri[z]); 143 info.subst_count[z] = ntohl(info.subst_count[z]); 144 } 145 info.chain_count = ntohl(info.chain_count); 146 info.large_pri_count = ntohl(info.large_pri_count); 147#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ 148 if ((chains = info.chain_count) < 0) { 149 (void)fclose(fp); 150 errno = EFTYPE; 151 return (_LDP_ERROR); 152 } 153 } else 154 chains = TABLE_SIZE; 155 156 i = sizeof(struct __xlocale_st_collate) 157 + sizeof(struct __collate_st_chain_pri) * chains 158 + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; 159 for(z = 0; z < info.directive_count; z++) 160 i += sizeof(struct __collate_st_subst) * info.subst_count[z]; 161 if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) { 162 saverr = errno; 163 (void)fclose(fp); 164 errno = saverr; 165 return (_LDP_ERROR); 166 } 167 TMP->__refcount = 2; /* one for the locale, one for the cache */ 168 TMP->__free_extra = NULL; 169 170#define FREAD(a, b, c, d) \ 171{ \ 172 if (fread(a, b, c, d) != c) { \ 173 saverr = errno; \ 174 free(TMP); \ 175 (void)fclose(d); \ 176 errno = saverr; \ 177 return (_LDP_ERROR); \ 178 } \ 179} 180 181 /* adjust size to read the remaining in one chunk */ 182 i -= offsetof(struct __xlocale_st_collate, __char_pri_table); 183 FREAD(TMP->__char_pri_table, i, 1, fp); 184 (void)fclose(fp); 185 186 vp = (void *)(TMP + 1); 187 188 /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ 189 if (info.subst_count[0] > 0) { 190 TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; 191 vp += info.subst_count[0] * sizeof(struct __collate_st_subst); 192 } else 193 TMP->__substitute_table[0] = NULL; 194 if (info.flags & COLLATE_SUBST_DUP) 195 TMP->__substitute_table[1] = TMP->__substitute_table[0]; 196 else if (info.subst_count[1] > 0) { 197 TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; 198 vp += info.subst_count[1] * sizeof(struct __collate_st_subst); 199 } else 200 TMP->__substitute_table[1] = NULL; 201 202 if (chains > 0) { 203 TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; 204 vp += chains * sizeof(struct __collate_st_chain_pri); 205 } else 206 TMP->__chain_pri_table = NULL; 207 if (info.large_pri_count > 0) 208 TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp; 209 else 210 TMP->__large_char_pri_table = NULL; 211 212#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN 213 { 214 struct __collate_st_char_pri *p = TMP->__char_pri_table; 215 for(i = UCHAR_MAX + 1; i-- > 0; p++) { 216 for(z = 0; z < info.directive_count; z++) 217 p->pri[z] = ntohl(p->pri[z]); 218 } 219 } 220 for(z = 0; z < info.directive_count; z++) 221 if (info.subst_count[z] > 0) { 222 struct __collate_st_subst *p = TMP->__substitute_table[z]; 223 for(i = info.subst_count[z]; i-- > 0; p++) { 224 p->val = ntohl(p->val); 225 wntohl(p->str, STR_LEN); 226 } 227 } 228 { 229 struct __collate_st_chain_pri *p = TMP->__chain_pri_table; 230 for(i = chains; i-- > 0; p++) { 231 wntohl(p->str, STR_LEN); 232 for(z = 0; z < info.directive_count; z++) 233 p->pri[z] = ntohl(p->pri[z]); 234 } 235 } 236 if (info.large_pri_count > 0) { 237 struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table; 238 for(i = info.large_pri_count; i-- > 0; p++) { 239 p->val = ntohl(p->val); 240 for(z = 0; z < info.directive_count; z++) 241 p->pri.pri[z] = ntohl(p->pri.pri[z]); 242 } 243 } 244#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ 245 (void)strcpy(TMP->__encoding, encoding); 246 (void)memcpy(&TMP->__info, &info, sizeof(info)); 247 XL_RELEASE(cache); 248 cache = TMP; 249 XL_RELEASE(loc->__lc_collate); 250 loc->__lc_collate = cache; 251 /* no need to retain, since we set __refcount to 2 above */ 252 253 loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0); 254 loc->__collate_load_error = 0; 255 if (loc == &__global_locale) 256 __collate_load_error = 0; 257 258 return (_LDP_LOADED); 259} 260 261static int 262__collate_wcsnlen(const wchar_t *s, int len) 263{ 264 int n = 0; 265 while (*s && n < len) { 266 s++; 267 n++; 268 } 269 return n; 270} 271 272static struct __collate_st_subst * 273substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) 274{ 275 int low = 0; 276 int high = n - 1; 277 int next, compar; 278 struct __collate_st_subst *p; 279 280 while (low <= high) { 281 next = (low + high) / 2; 282 p = tab + next; 283 compar = key - p->val; 284 if (compar == 0) 285 return p; 286 if (compar > 0) 287 low = next + 1; 288 else 289 high = next - 1; 290 } 291 return NULL; 292} 293 294__private_extern__ wchar_t * 295__collate_substitute(const wchar_t *s, int which, locale_t loc) 296{ 297 int dest_len, len, nlen; 298 int n, delta, nsubst; 299 wchar_t *dest_str = NULL; 300 const wchar_t *fp; 301 struct __collate_st_subst *subst, *match; 302 303 if (s == NULL || *s == '\0') 304 return (__collate_wcsdup(L"")); 305 dest_len = wcslen(s); 306 nsubst = __collate_info->subst_count[which]; 307 if (nsubst <= 0) 308 return __collate_wcsdup(s); 309 subst = __collate_substitute_table[which]; 310 delta = dest_len / 4; 311 if (delta < 2) 312 delta = 2; 313 dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); 314 if (dest_str == NULL) 315 __collate_err(EX_OSERR, __func__); 316 len = 0; 317 while (*s) { 318 if ((match = substsearch(*s, subst, nsubst)) != NULL) { 319 fp = match->str; 320 n = __collate_wcsnlen(fp, STR_LEN); 321 } else { 322 fp = s; 323 n = 1; 324 } 325 nlen = len + n; 326 if (dest_len <= nlen) { 327 dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t)); 328 if (dest_str == NULL) 329 __collate_err(EX_OSERR, __func__); 330 } 331 wcsncpy(dest_str + len, fp, n); 332 len += n; 333 s++; 334 } 335 dest_str[len] = 0; 336 return (dest_str); 337} 338 339static struct __collate_st_chain_pri * 340chainsearch(const wchar_t *key, int *len, locale_t loc) 341{ 342 int low = 0; 343 int high = __collate_info->chain_count - 1; 344 int next, compar, l; 345 struct __collate_st_chain_pri *p; 346 struct __collate_st_chain_pri *tab = __collate_chain_pri_table; 347 348 while (low <= high) { 349 next = (low + high) / 2; 350 p = tab + next; 351 compar = *key - *p->str; 352 if (compar == 0) { 353 l = __collate_wcsnlen(p->str, STR_LEN); 354 compar = wcsncmp(key, p->str, l); 355 if (compar == 0) { 356 *len = l; 357 return p; 358 } 359 } 360 if (compar > 0) 361 low = next + 1; 362 else 363 high = next - 1; 364 } 365 return NULL; 366} 367 368static struct __collate_st_large_char_pri * 369largesearch(const wchar_t key, locale_t loc) 370{ 371 int low = 0; 372 int high = __collate_info->large_pri_count - 1; 373 int next, compar; 374 struct __collate_st_large_char_pri *p; 375 struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table; 376 377 while (low <= high) { 378 next = (low + high) / 2; 379 p = tab + next; 380 compar = key - p->val; 381 if (compar == 0) 382 return p; 383 if (compar > 0) 384 low = next + 1; 385 else 386 high = next - 1; 387 } 388 return NULL; 389} 390 391__private_extern__ void 392__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc) 393{ 394 struct __collate_st_chain_pri *p2; 395 int l; 396 397 *len = 1; 398 *prim = *sec = 0; 399 p2 = chainsearch(t, &l, loc); 400 /* use the chain if prim >= 0 */ 401 if (p2 && p2->pri[0] >= 0) { 402 *len = l; 403 *prim = p2->pri[0]; 404 *sec = p2->pri[1]; 405 return; 406 } 407 if (*t <= UCHAR_MAX) { 408 *prim = __collate_char_pri_table[*t].pri[0]; 409 *sec = __collate_char_pri_table[*t].pri[1]; 410 return; 411 } 412 if (__collate_info->large_pri_count > 0) { 413 struct __collate_st_large_char_pri *match; 414 match = largesearch(*t, loc); 415 if (match) { 416 *prim = match->pri.pri[0]; 417 *sec = match->pri.pri[1]; 418 return; 419 } 420 } 421 *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; 422 *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; 423} 424 425/* 426 * This is only provided for programs (like grep) that are calling this 427 * private function. This will go away eventually. 428 */ 429void 430__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec) 431{ 432 locale_t loc = __current_locale(); 433 wchar_t *w = __collate_mbstowcs((const char *)t, loc); 434 int sverrno; 435 436 __collate_lookup_l(w, len, prim, sec, loc); 437 sverrno = errno; 438 free(w); 439 errno = sverrno; 440} 441 442__private_extern__ void 443__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc) 444{ 445 struct __collate_st_chain_pri *p2; 446 int p, l; 447 448 *len = 1; 449 *pri = 0; 450 p2 = chainsearch(t, &l, loc); 451 if (p2) { 452 p = p2->pri[which]; 453 /* use the chain if pri >= 0 */ 454 if (p >= 0) { 455 *len = l; 456 *pri = p; 457 return; 458 } 459 } 460 if (*t <= UCHAR_MAX) { 461 *pri = __collate_char_pri_table[*t].pri[which]; 462 return; 463 } 464 if (__collate_info->large_pri_count > 0) { 465 struct __collate_st_large_char_pri *match; 466 match = largesearch(*t, loc); 467 if (match) { 468 *pri = match->pri.pri[which]; 469 return; 470 } 471 } 472 *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; 473} 474 475__private_extern__ wchar_t * 476__collate_mbstowcs(const char *s, locale_t loc) 477{ 478 static const mbstate_t initial; 479 mbstate_t st; 480 size_t len; 481 const char *ss; 482 wchar_t *wcs; 483 484 ss = s; 485 st = initial; 486 if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1) 487 return NULL; 488 if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) 489 __collate_err(EX_OSERR, __func__); 490 st = initial; 491 mbsrtowcs_l(wcs, &s, len, &st, loc); 492 wcs[len] = 0; 493 494 return (wcs); 495} 496 497__private_extern__ wchar_t * 498__collate_wcsdup(const wchar_t *s) 499{ 500 size_t len = wcslen(s) + 1; 501 wchar_t *wcs; 502 503 if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) 504 __collate_err(EX_OSERR, __func__); 505 wcscpy(wcs, s); 506 return (wcs); 507} 508 509__private_extern__ void 510__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc) 511{ 512 int pri, len; 513 size_t slen; 514 const wchar_t *t; 515 wchar_t *tt = NULL, *tr = NULL; 516 int direc, pass; 517 wchar_t *xfp; 518 struct __collate_st_info *info = __collate_info; 519 int sverrno; 520 521 for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) 522 xf[pass] = NULL; 523 for(pass = 0; pass < info->directive_count; pass++) { 524 direc = info->directive[pass]; 525 if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { 526 sverrno = errno; 527 free(tt); 528 errno = sverrno; 529 tt = __collate_substitute(src, pass, loc); 530 } 531 if (direc & DIRECTIVE_BACKWARD) { 532 wchar_t *bp, *fp, c; 533 sverrno = errno; 534 free(tr); 535 errno = sverrno; 536 tr = __collate_wcsdup(tt ? tt : src); 537 bp = tr; 538 fp = tr + wcslen(tr) - 1; 539 while(bp < fp) { 540 c = *bp; 541 *bp++ = *fp; 542 *fp-- = c; 543 } 544 t = (const wchar_t *)tr; 545 } else if (tt) 546 t = (const wchar_t *)tt; 547 else 548 t = (const wchar_t *)src; 549 sverrno = errno; 550 if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) { 551 errno = sverrno; 552 slen = 0; 553 goto end; 554 } 555 errno = sverrno; 556 xfp = xf[pass]; 557 if (direc & DIRECTIVE_POSITION) { 558 while(*t) { 559 __collate_lookup_which(t, &len, &pri, pass, loc); 560 t += len; 561 if (pri <= 0) { 562 if (pri < 0) { 563 errno = EINVAL; 564 slen = 0; 565 goto end; 566 } 567 pri = COLLATE_MAX_PRIORITY; 568 } 569 *xfp++ = pri; 570 } 571 } else { 572 while(*t) { 573 __collate_lookup_which(t, &len, &pri, pass, loc); 574 t += len; 575 if (pri <= 0) { 576 if (pri < 0) { 577 errno = EINVAL; 578 slen = 0; 579 goto end; 580 } 581 continue; 582 } 583 *xfp++ = pri; 584 } 585 } 586 *xfp = 0; 587 } 588 end: 589 sverrno = errno; 590 free(tt); 591 free(tr); 592 errno = sverrno; 593} 594 595__private_extern__ void 596__collate_err(int ex, const char *f) 597{ 598 const char *s; 599 int serrno = errno; 600 601 s = _getprogname(); 602 _write(STDERR_FILENO, s, strlen(s)); 603 _write(STDERR_FILENO, ": ", 2); 604 s = f; 605 _write(STDERR_FILENO, s, strlen(s)); 606 _write(STDERR_FILENO, ": ", 2); 607 s = strerror(serrno); 608 _write(STDERR_FILENO, s, strlen(s)); 609 _write(STDERR_FILENO, "\n", 1); 610 exit(ex); 611} 612 613/* 614 * __collate_collating_symbol takes the multibyte string specified by 615 * src and slen, and using ps, converts that to a wide character. Then 616 * it is checked to verify it is a collating symbol, and then copies 617 * it to the wide character string specified by dst and dlen (the 618 * results are not null terminated). The length of the wide characters 619 * copied to dst is returned if successful. Zero is returned if no such 620 * collating symbol exists. (size_t)-1 is returned if there are wide-character 621 * conversion errors, if the length of the converted string is greater that 622 * STR_LEN or if dlen is too small. It is up to the calling routine to 623 * preserve the mbstate_t structure as needed. 624 */ 625__private_extern__ size_t 626__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc) 627{ 628 wchar_t wname[STR_LEN]; 629 wchar_t w, *wp; 630 size_t len, l; 631 632 /* POSIX locale */ 633 if (loc->__collate_load_error) { 634 if (dlen < 1) 635 return (size_t)-1; 636 if (slen != 1 || !isascii(*src)) 637 return 0; 638 *dst = *src; 639 return 1; 640 } 641 for(wp = wname, len = 0; slen > 0; len++) { 642 l = mbrtowc_l(&w, src, slen, ps, loc); 643 if (l == (size_t)-1 || l == (size_t)-2) 644 return (size_t)-1; 645 if (l == 0) 646 break; 647 if (len >= STR_LEN) 648 return -1; 649 *wp++ = w; 650 src += l; 651 slen = (long)slen - (long)l; 652 } 653 if (len == 0 || len > dlen) 654 return (size_t)-1; 655 if (len == 1) { 656 if (*wname <= UCHAR_MAX) { 657 if (__collate_char_pri_table[*wname].pri[0] >= 0) { 658 if (dlen > 0) 659 *dst = *wname; 660 return 1; 661 } 662 return 0; 663 } else if (__collate_info->large_pri_count > 0) { 664 struct __collate_st_large_char_pri *match; 665 match = largesearch(*wname, loc); 666 if (match && match->pri.pri[0] >= 0) { 667 if (dlen > 0) 668 *dst = *wname; 669 return 1; 670 } 671 } 672 return 0; 673 } 674 *wp = 0; 675 if (__collate_info->chain_count > 0) { 676 struct __collate_st_chain_pri *match; 677 int ll; 678 match = chainsearch(wname, &ll, loc); 679 if (match) { 680 if (ll < dlen) 681 dlen = ll; 682 wcsncpy(dst, wname, dlen); 683 return ll; 684 } 685 } 686 return 0; 687} 688 689/* 690 * __collate_equiv_class returns the equivalence class number for the symbol 691 * specified by src and slen, using ps to convert from multi-byte to wide 692 * character. Zero is returned if the symbol is not in an equivalence 693 * class. -1 is returned if there are wide character conversion error, 694 * if there are any greater-than-8-bit characters or if a multi-byte symbol 695 * is greater or equal to STR_LEN in length. It is up to the calling 696 * routine to preserve the mbstate_t structure as needed. 697 */ 698__private_extern__ int 699__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc) 700{ 701 wchar_t wname[STR_LEN]; 702 wchar_t w, *wp; 703 size_t len, l; 704 int e; 705 706 /* POSIX locale */ 707 if (loc->__collate_load_error) 708 return 0; 709 for(wp = wname, len = 0; slen > 0; len++) { 710 l = mbrtowc_l(&w, src, slen, ps, loc); 711 if (l == (size_t)-1 || l == (size_t)-2) 712 return -1; 713 if (l == 0) 714 break; 715 if (len >= STR_LEN) 716 return -1; 717 *wp++ = w; 718 src += l; 719 slen = (long)slen - (long)l; 720 } 721 if (len == 0) 722 return -1; 723 if (len == 1) { 724 e = -1; 725 if (*wname <= UCHAR_MAX) 726 e = __collate_char_pri_table[*wname].pri[0]; 727 else if (__collate_info->large_pri_count > 0) { 728 struct __collate_st_large_char_pri *match; 729 match = largesearch(*wname, loc); 730 if (match) 731 e = match->pri.pri[0]; 732 } 733 if (e == 0) 734 return IGNORE_EQUIV_CLASS; 735 return e > 0 ? e : 0; 736 } 737 *wp = 0; 738 if (__collate_info->chain_count > 0) { 739 struct __collate_st_chain_pri *match; 740 int ll; 741 match = chainsearch(wname, &ll, loc); 742 if (match) { 743 e = match->pri[0]; 744 if (e == 0) 745 return IGNORE_EQUIV_CLASS; 746 return e < 0 ? -e : e; 747 } 748 } 749 return 0; 750} 751 752/* 753 * __collate_equiv_match tries to match any single or multi-character symbol 754 * in equivalence class equiv_class in the multi-byte string specified by src 755 * and slen. If start is non-zero, it is taken to be the first (pre-converted) 756 * wide character. Subsequence wide characters, if needed, will use ps in 757 * the conversion. On a successful match, the length of the matched string 758 * is returned (including the start character). If dst is non-NULL, the 759 * matched wide-character string is copied to dst, a wide character array of 760 * length dlen (the results are not zero-terminated). If rlen is non-NULL, 761 * the number of character in src actually used is returned. Zero is 762 * returned by __collate_equiv_match if there is no match. (size_t)-1 is 763 * returned on error: if there were conversion errors or if dlen is too small 764 * to accept the results. On no match or error, ps is restored to its incoming 765 * state. 766 */ 767size_t 768__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc) 769{ 770 wchar_t w; 771 size_t len, l, clen; 772 int i; 773 wchar_t buf[STR_LEN], *wp; 774 mbstate_t save; 775 const char *s = src; 776 size_t sl = slen; 777 struct __collate_st_chain_pri *ch = NULL; 778 779 /* POSIX locale */ 780 if (loc->__collate_load_error) 781 return (size_t)-1; 782 if (equiv_class == IGNORE_EQUIV_CLASS) 783 equiv_class = 0; 784 if (ps) 785 save = *ps; 786 wp = buf; 787 len = clen = 0; 788 if (start) { 789 *wp++ = start; 790 len = 1; 791 } 792 /* convert up to the max chain length */ 793 while(sl > 0 && len < __collate_info->chain_max_len) { 794 l = mbrtowc_l(&w, s, sl, ps, loc); 795 if (l == (size_t)-1 || l == (size_t)-2 || l == 0) 796 break; 797 *wp++ = w; 798 s += l; 799 clen += l; 800 sl -= l; 801 len++; 802 } 803 *wp = 0; 804 if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) { 805 int e = ch->pri[0]; 806 if (e < 0) 807 e = -e; 808 if (e == equiv_class) 809 goto found; 810 } 811 /* try single character */ 812 i = 1; 813 if (*buf <= UCHAR_MAX) { 814 if (equiv_class == __collate_char_pri_table[*buf].pri[0]) 815 goto found; 816 } else if (__collate_info->large_pri_count > 0) { 817 struct __collate_st_large_char_pri *match; 818 match = largesearch(*buf, loc); 819 if (match && equiv_class == match->pri.pri[0]) 820 goto found; 821 } 822 /* no match */ 823 if (ps) 824 *ps = save; 825 return 0; 826found: 827 /* if we converted more than we used, restore to initial and reconvert 828 * up to what did match */ 829 if (i < len) { 830 len = i; 831 if (ps) 832 *ps = save; 833 if (start) 834 i--; 835 clen = 0; 836 while(i-- > 0) { 837 l = mbrtowc_l(&w, src, slen, ps, loc); 838 src += l; 839 clen += l; 840 slen -= l; 841 } 842 } 843 if (dst) { 844 if (dlen < len) { 845 if (ps) 846 *ps = save; 847 return (size_t)-1; 848 } 849 for(wp = buf; len > 0; len--) 850 *dst++ = *wp++; 851 } 852 if (rlen) 853 *rlen = clen; 854 return len; 855} 856 857/* 858 * __collate_equiv_value returns the primary collation value for the given 859 * collating symbol specified by str and len. Zero or negative is return 860 * if the collating symbol was not found. (Use by the bracket code in TRE.) 861 */ 862__private_extern__ int 863__collate_equiv_value(locale_t loc, const wchar_t *str, size_t len) 864{ 865 int e; 866 867 if (len < 1 || len >= STR_LEN) 868 return -1; 869 870 /* POSIX locale */ 871 if (loc->__collate_load_error) 872 return (len == 1 && *str <= UCHAR_MAX) ? *str : -1; 873 874 if (len == 1) { 875 e = -1; 876 if (*str <= UCHAR_MAX) 877 e = __collate_char_pri_table[*str].pri[0]; 878 else if (__collate_info->large_pri_count > 0) { 879 struct __collate_st_large_char_pri *match; 880 match = largesearch(*str, loc); 881 if (match) 882 e = match->pri.pri[0]; 883 } 884 if (e == 0) 885 return IGNORE_EQUIV_CLASS; 886 return e > 0 ? e : 0; 887 } 888 if (__collate_info->chain_count > 0) { 889 wchar_t name[STR_LEN]; 890 struct __collate_st_chain_pri *match; 891 int ll; 892 893 wcsncpy(name, str, len); 894 name[len] = 0; 895 match = chainsearch(name, &ll, loc); 896 if (match) { 897 e = match->pri[0]; 898 if (e == 0) 899 return IGNORE_EQUIV_CLASS; 900 return e < 0 ? -e : e; 901 } 902 } 903 return 0; 904} 905 906#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN 907static void 908wntohl(wchar_t *str, int len) 909{ 910 for(; *str && len > 0; str++, len--) 911 *str = ntohl(*str); 912} 913#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ 914 915#ifdef COLLATE_DEBUG 916static char * 917show(int c) 918{ 919 static char buf[5]; 920 921 if (c >=32 && c <= 126) 922 sprintf(buf, "'%c' ", c); 923 else 924 sprintf(buf, "\\x{%02x}", c); 925 return buf; 926} 927 928static char * 929showwcs(const wchar_t *t, int len) 930{ 931 static char buf[64]; 932 char *cp = buf; 933 934 for(; *t && len > 0; len--, t++) { 935 if (*t >=32 && *t <= 126) 936 *cp++ = *t; 937 else { 938 sprintf(cp, "\\x{%02x}", *t); 939 cp += strlen(cp); 940 } 941 } 942 *cp = 0; 943 return buf; 944} 945 946void 947__collate_print_tables() 948{ 949 int i, z; 950 locale_t loc = __current_locale(); 951 952 printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", 953 __collate_info->directive[0], __collate_info->directive[1], 954 __collate_info->flags, __collate_info->chain_max_len, 955 __collate_info->directive_count, 956 __collate_info->undef_pri[0], __collate_info->undef_pri[1], 957 __collate_info->subst_count[0], __collate_info->subst_count[1], 958 __collate_info->chain_count, __collate_info->large_pri_count); 959 for(z = 0; z < __collate_info->directive_count; z++) { 960 if (__collate_info->subst_count[z] > 0) { 961 struct __collate_st_subst *p2 = __collate_substitute_table[z]; 962 if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP)) 963 printf("Both substitute tables:\n"); 964 else 965 printf("Substitute table %d:\n", z); 966 for (i = __collate_info->subst_count[z]; i-- > 0; p2++) 967 printf("\t%s --> \"%s\"\n", 968 show(p2->val), 969 showwcs(p2->str, STR_LEN)); 970 } 971 } 972 if (__collate_info->chain_count > 0) { 973 printf("Chain priority table:\n"); 974 struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; 975 for (i = __collate_info->chain_count; i-- > 0; p2++) { 976 printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); 977 for(z = 0; z < __collate_info->directive_count; z++) 978 printf(" %d", p2->pri[z]); 979 putchar('\n'); 980 } 981 } 982 printf("Char priority table:\n"); 983 { 984 struct __collate_st_char_pri *p2 = __collate_char_pri_table; 985 for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { 986 printf("\t%s :", show(i)); 987 for(z = 0; z < __collate_info->directive_count; z++) 988 printf(" %d", p2->pri[z]); 989 putchar('\n'); 990 } 991 } 992 if (__collate_info->large_pri_count > 0) { 993 struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; 994 printf("Large priority table:\n"); 995 for (i = __collate_info->large_pri_count; i-- > 0; p2++) { 996 printf("\t%s :", show(p2->val)); 997 for(z = 0; z < __collate_info->directive_count; z++) 998 printf(" %d", p2->pri.pri[z]); 999 putchar('\n'); 1000 } 1001 } 1002} 1003#endif 1004