bwstring.c revision 281125
1/*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/usr.bin/sort/bwstring.c 281125 2015-04-05 23:06:42Z pfg $"); 30 31#include <ctype.h> 32#include <errno.h> 33#include <err.h> 34#include <langinfo.h> 35#include <math.h> 36#include <stdlib.h> 37#include <string.h> 38#include <wchar.h> 39#include <wctype.h> 40 41#include "bwstring.h" 42#include "sort.h" 43 44bool byte_sort; 45 46static wchar_t **wmonths; 47static unsigned char **cmonths; 48 49/* initialise months */ 50 51void 52initialise_months(void) 53{ 54 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 55 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 56 ABMON_11, ABMON_12 }; 57 unsigned char *tmp; 58 size_t len; 59 60 if (MB_CUR_MAX == 1) { 61 if (cmonths == NULL) { 62 unsigned char *m; 63 64 cmonths = sort_malloc(sizeof(unsigned char*) * 12); 65 for (int i = 0; i < 12; i++) { 66 cmonths[i] = NULL; 67 tmp = (unsigned char *) nl_langinfo(item[i]); 68 if (tmp == NULL) 69 continue; 70 if (debug_sort) 71 printf("month[%d]=%s\n", i, tmp); 72 len = strlen((char*)tmp); 73 if (len < 1) 74 continue; 75 while (isblank(*tmp)) 76 ++tmp; 77 m = sort_malloc(len + 1); 78 memcpy(m, tmp, len + 1); 79 m[len] = '\0'; 80 for (unsigned int j = 0; j < len; j++) 81 m[j] = toupper(m[j]); 82 cmonths[i] = m; 83 } 84 } 85 86 } else { 87 if (wmonths == NULL) { 88 wchar_t *m; 89 90 wmonths = sort_malloc(sizeof(wchar_t *) * 12); 91 for (int i = 0; i < 12; i++) { 92 wmonths[i] = NULL; 93 tmp = (unsigned char *) nl_langinfo(item[i]); 94 if (tmp == NULL) 95 continue; 96 if (debug_sort) 97 printf("month[%d]=%s\n", i, tmp); 98 len = strlen((char*)tmp); 99 if (len < 1) 100 continue; 101 while (isblank(*tmp)) 102 ++tmp; 103 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 104 if (mbstowcs(m, (char*)tmp, len) == ((size_t) -1)) 105 continue; 106 m[len] = L'\0'; 107 for (unsigned int j = 0; j < len; j++) 108 m[j] = towupper(m[j]); 109 wmonths[i] = m; 110 } 111 } 112 } 113} 114 115/* 116 * Compare two wide-character strings 117 */ 118static int 119wide_str_coll(const wchar_t *s1, const wchar_t *s2) 120{ 121 int ret = 0; 122 123 errno = 0; 124 ret = wcscoll(s1, s2); 125 if (errno == EILSEQ) { 126 errno = 0; 127 ret = wcscmp(s1, s2); 128 if (errno != 0) { 129 for (size_t i = 0; ; ++i) { 130 wchar_t c1 = s1[i]; 131 wchar_t c2 = s2[i]; 132 if (c1 == L'\0') 133 return ((c2 == L'\0') ? 0 : -1); 134 if (c2 == L'\0') 135 return (+1); 136 if (c1 == c2) 137 continue; 138 return ((int)(c1 - c2)); 139 } 140 } 141 } 142 return (ret); 143} 144 145/* counterparts of wcs functions */ 146 147void 148bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 149{ 150 if (MB_CUR_MAX == 1) 151 fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); 152 else 153 fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); 154} 155 156const void* bwsrawdata(const struct bwstring *bws) 157{ 158 return (&(bws->data)); 159} 160 161size_t bwsrawlen(const struct bwstring *bws) 162{ 163 return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len)); 164} 165 166size_t 167bws_memsize(const struct bwstring *bws) 168{ 169 return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : 170 (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring))); 171} 172 173void 174bws_setlen(struct bwstring *bws, size_t newlen) 175{ 176 if (bws && newlen != bws->len && newlen <= bws->len) { 177 bws->len = newlen; 178 if (MB_CUR_MAX == 1) 179 bws->data.cstr[newlen] = '\0'; 180 else 181 bws->data.wstr[newlen] = L'\0'; 182 } 183} 184 185/* 186 * Allocate a new binary string of specified size 187 */ 188struct bwstring * 189bwsalloc(size_t sz) 190{ 191 struct bwstring *ret; 192 193 if (MB_CUR_MAX == 1) 194 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 195 else 196 ret = sort_malloc(sizeof(struct bwstring) + 197 SIZEOF_WCHAR_STRING(sz + 1)); 198 ret->len = sz; 199 200 if (MB_CUR_MAX == 1) 201 ret->data.cstr[ret->len] = '\0'; 202 else 203 ret->data.wstr[ret->len] = L'\0'; 204 205 return (ret); 206} 207 208/* 209 * Create a copy of binary string. 210 * New string size equals the length of the old string. 211 */ 212struct bwstring * 213bwsdup(const struct bwstring *s) 214{ 215 if (s == NULL) 216 return (NULL); 217 else { 218 struct bwstring *ret = bwsalloc(s->len); 219 220 if (MB_CUR_MAX == 1) 221 memcpy(ret->data.cstr, s->data.cstr, (s->len)); 222 else 223 memcpy(ret->data.wstr, s->data.wstr, 224 SIZEOF_WCHAR_STRING(s->len)); 225 226 return (ret); 227 } 228} 229 230/* 231 * Create a new binary string from a wide character buffer. 232 */ 233struct bwstring * 234bwssbdup(const wchar_t *str, size_t len) 235{ 236 if (str == NULL) 237 return ((len == 0) ? bwsalloc(0) : NULL); 238 else { 239 struct bwstring *ret; 240 241 ret = bwsalloc(len); 242 243 if (MB_CUR_MAX == 1) 244 for (size_t i = 0; i < len; ++i) 245 ret->data.cstr[i] = (unsigned char) str[i]; 246 else 247 memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); 248 249 return (ret); 250 } 251} 252 253/* 254 * Create a new binary string from a raw binary buffer. 255 */ 256struct bwstring * 257bwscsbdup(const unsigned char *str, size_t len) 258{ 259 struct bwstring *ret; 260 261 ret = bwsalloc(len); 262 263 if (str) { 264 if (MB_CUR_MAX == 1) 265 memcpy(ret->data.cstr, str, len); 266 else { 267 mbstate_t mbs; 268 const char *s; 269 size_t charlen, chars, cptr; 270 271 charlen = chars = 0; 272 cptr = 0; 273 s = (const char *) str; 274 275 memset(&mbs, 0, sizeof(mbs)); 276 277 while (cptr < len) { 278 size_t n = MB_CUR_MAX; 279 280 if (n > len - cptr) 281 n = len - cptr; 282 charlen = mbrlen(s + cptr, n, &mbs); 283 switch (charlen) { 284 case 0: 285 /* FALLTHROUGH */ 286 case (size_t) -1: 287 /* FALLTHROUGH */ 288 case (size_t) -2: 289 ret->data.wstr[chars++] = 290 (unsigned char) s[cptr]; 291 ++cptr; 292 break; 293 default: 294 n = mbrtowc(ret->data.wstr + (chars++), 295 s + cptr, charlen, &mbs); 296 if ((n == (size_t)-1) || (n == (size_t)-2)) 297 /* NOTREACHED */ 298 err(2, "mbrtowc error"); 299 cptr += charlen; 300 }; 301 } 302 303 ret->len = chars; 304 ret->data.wstr[ret->len] = L'\0'; 305 } 306 } 307 return (ret); 308} 309 310/* 311 * De-allocate object memory 312 */ 313void 314bwsfree(const struct bwstring *s) 315{ 316 if (s) 317 sort_free(s); 318} 319 320/* 321 * Copy content of src binary string to dst. 322 * If the capacity of the dst string is not sufficient, 323 * then the data is truncated. 324 */ 325size_t 326bwscpy(struct bwstring *dst, const struct bwstring *src) 327{ 328 size_t nums = src->len; 329 330 if (nums > dst->len) 331 nums = dst->len; 332 dst->len = nums; 333 334 if (MB_CUR_MAX == 1) { 335 memcpy(dst->data.cstr, src->data.cstr, nums); 336 dst->data.cstr[dst->len] = '\0'; 337 } else { 338 memcpy(dst->data.wstr, src->data.wstr, 339 SIZEOF_WCHAR_STRING(nums + 1)); 340 dst->data.wstr[dst->len] = L'\0'; 341 } 342 343 return (nums); 344} 345 346/* 347 * Copy content of src binary string to dst, 348 * with specified number of symbols to be copied. 349 * If the capacity of the dst string is not sufficient, 350 * then the data is truncated. 351 */ 352struct bwstring * 353bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) 354{ 355 size_t nums = src->len; 356 357 if (nums > dst->len) 358 nums = dst->len; 359 if (nums > size) 360 nums = size; 361 dst->len = nums; 362 363 if (MB_CUR_MAX == 1) { 364 memcpy(dst->data.cstr, src->data.cstr, nums); 365 dst->data.cstr[dst->len] = '\0'; 366 } else { 367 memcpy(dst->data.wstr, src->data.wstr, 368 SIZEOF_WCHAR_STRING(nums + 1)); 369 dst->data.wstr[dst->len] = L'\0'; 370 } 371 372 return (dst); 373} 374 375/* 376 * Copy content of src binary string to dst, 377 * with specified number of symbols to be copied. 378 * An offset value can be specified, from the start of src string. 379 * If the capacity of the dst string is not sufficient, 380 * then the data is truncated. 381 */ 382struct bwstring * 383bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 384 size_t size) 385{ 386 if (offset >= src->len) { 387 dst->data.wstr[0] = 0; 388 dst->len = 0; 389 } else { 390 size_t nums = src->len - offset; 391 392 if (nums > dst->len) 393 nums = dst->len; 394 if (nums > size) 395 nums = size; 396 dst->len = nums; 397 if (MB_CUR_MAX == 1) { 398 memcpy(dst->data.cstr, src->data.cstr + offset, 399 (nums)); 400 dst->data.cstr[dst->len] = '\0'; 401 } else { 402 memcpy(dst->data.wstr, src->data.wstr + offset, 403 SIZEOF_WCHAR_STRING(nums)); 404 dst->data.wstr[dst->len] = L'\0'; 405 } 406 } 407 return (dst); 408} 409 410/* 411 * Write binary string to the file. 412 * The output is ended either with '\n' (nl == true) 413 * or '\0' (nl == false). 414 */ 415size_t 416bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 417{ 418 if (MB_CUR_MAX == 1) { 419 size_t len = bws->len; 420 421 if (!zero_ended) { 422 bws->data.cstr[len] = '\n'; 423 424 if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 425 err(2, NULL); 426 427 bws->data.cstr[len] = '\0'; 428 } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 429 err(2, NULL); 430 431 return (len + 1); 432 433 } else { 434 wchar_t eols; 435 size_t printed = 0; 436 437 eols = zero_ended ? btowc('\0') : btowc('\n'); 438 439 while (printed < BWSLEN(bws)) { 440 const wchar_t *s = bws->data.wstr + printed; 441 442 if (*s == L'\0') { 443 int nums; 444 445 nums = fwprintf(f, L"%lc", *s); 446 447 if (nums != 1) 448 err(2, NULL); 449 ++printed; 450 } else { 451 int nums; 452 453 nums = fwprintf(f, L"%ls", s); 454 455 if (nums < 1) 456 err(2, NULL); 457 printed += nums; 458 } 459 } 460 fwprintf(f, L"%lc", eols); 461 return (printed + 1); 462 } 463} 464 465/* 466 * Allocate and read a binary string from file. 467 * The strings are nl-ended or zero-ended, depending on the sort setting. 468 */ 469struct bwstring * 470bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) 471{ 472 wint_t eols; 473 474 eols = zero_ended ? btowc('\0') : btowc('\n'); 475 476 if (!zero_ended && (MB_CUR_MAX > 1)) { 477 wchar_t *ret; 478 479 ret = fgetwln(f, len); 480 481 if (ret == NULL) { 482 if (!feof(f)) 483 err(2, NULL); 484 return (NULL); 485 } 486 if (*len > 0) { 487 if (ret[*len - 1] == (wchar_t)eols) 488 --(*len); 489 } 490 return (bwssbdup(ret, *len)); 491 492 } else if (!zero_ended && (MB_CUR_MAX == 1)) { 493 char *ret; 494 495 ret = fgetln(f, len); 496 497 if (ret == NULL) { 498 if (!feof(f)) 499 err(2, NULL); 500 return (NULL); 501 } 502 if (*len > 0) { 503 if (ret[*len - 1] == '\n') 504 --(*len); 505 } 506 return (bwscsbdup((unsigned char*)ret, *len)); 507 508 } else { 509 *len = 0; 510 511 if (feof(f)) 512 return (NULL); 513 514 if (2 >= rb->fgetwln_z_buffer_size) { 515 rb->fgetwln_z_buffer_size += 256; 516 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 517 sizeof(wchar_t) * rb->fgetwln_z_buffer_size); 518 } 519 rb->fgetwln_z_buffer[*len] = 0; 520 521 if (MB_CUR_MAX == 1) 522 while (!feof(f)) { 523 int c; 524 525 c = fgetc(f); 526 527 if (c == EOF) { 528 if (*len == 0) 529 return (NULL); 530 goto line_read_done; 531 } 532 if (c == eols) 533 goto line_read_done; 534 535 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 536 rb->fgetwln_z_buffer_size += 256; 537 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 538 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 539 } 540 541 rb->fgetwln_z_buffer[*len] = c; 542 rb->fgetwln_z_buffer[++(*len)] = 0; 543 } 544 else 545 while (!feof(f)) { 546 wint_t c = 0; 547 548 c = fgetwc(f); 549 550 if (c == WEOF) { 551 if (*len == 0) 552 return (NULL); 553 goto line_read_done; 554 } 555 if (c == eols) 556 goto line_read_done; 557 558 if (*len + 1 >= rb->fgetwln_z_buffer_size) { 559 rb->fgetwln_z_buffer_size += 256; 560 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 561 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 562 } 563 564 rb->fgetwln_z_buffer[*len] = c; 565 rb->fgetwln_z_buffer[++(*len)] = 0; 566 } 567 568line_read_done: 569 /* we do not count the last 0 */ 570 return (bwssbdup(rb->fgetwln_z_buffer, *len)); 571 } 572} 573 574int 575bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 576 size_t offset, size_t len) 577{ 578 size_t cmp_len, len1, len2; 579 int res = 0; 580 581 cmp_len = 0; 582 len1 = bws1->len; 583 len2 = bws2->len; 584 585 if (len1 <= offset) { 586 return ((len2 <= offset) ? 0 : -1); 587 } else { 588 if (len2 <= offset) 589 return (+1); 590 else { 591 len1 -= offset; 592 len2 -= offset; 593 594 cmp_len = len1; 595 596 if (len2 < cmp_len) 597 cmp_len = len2; 598 599 if (len < cmp_len) 600 cmp_len = len; 601 602 if (MB_CUR_MAX == 1) { 603 const unsigned char *s1, *s2; 604 605 s1 = bws1->data.cstr + offset; 606 s2 = bws2->data.cstr + offset; 607 608 res = memcmp(s1, s2, cmp_len); 609 610 } else { 611 const wchar_t *s1, *s2; 612 613 s1 = bws1->data.wstr + offset; 614 s2 = bws2->data.wstr + offset; 615 616 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 617 } 618 } 619 } 620 621 if (res == 0) { 622 if (len1 < cmp_len && len1 < len2) 623 res = -1; 624 else if (len2 < cmp_len && len2 < len1) 625 res = +1; 626 } 627 628 return (res); 629} 630 631int 632bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 633{ 634 size_t len1, len2, cmp_len; 635 int res; 636 637 len1 = bws1->len; 638 len2 = bws2->len; 639 640 len1 -= offset; 641 len2 -= offset; 642 643 cmp_len = len1; 644 645 if (len2 < cmp_len) 646 cmp_len = len2; 647 648 res = bwsncmp(bws1, bws2, offset, cmp_len); 649 650 if (res == 0) { 651 if( len1 < len2) 652 res = -1; 653 else if (len2 < len1) 654 res = +1; 655 } 656 657 return (res); 658} 659 660int 661bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 662{ 663 wchar_t c1, c2; 664 size_t i = 0; 665 666 for (i = 0; i < len; ++i) { 667 c1 = bws_get_iter_value(iter1); 668 c2 = bws_get_iter_value(iter2); 669 if (c1 != c2) 670 return (c1 - c2); 671 iter1 = bws_iterator_inc(iter1, 1); 672 iter2 = bws_iterator_inc(iter2, 1); 673 } 674 675 return (0); 676} 677 678int 679bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 680{ 681 size_t len1, len2; 682 683 len1 = bws1->len; 684 len2 = bws2->len; 685 686 if (len1 <= offset) 687 return ((len2 <= offset) ? 0 : -1); 688 else { 689 if (len2 <= offset) 690 return (+1); 691 else { 692 len1 -= offset; 693 len2 -= offset; 694 695 if (MB_CUR_MAX == 1) { 696 const unsigned char *s1, *s2; 697 698 s1 = bws1->data.cstr + offset; 699 s2 = bws2->data.cstr + offset; 700 701 if (byte_sort) { 702 int res = 0; 703 704 if (len1 > len2) { 705 res = memcmp(s1, s2, len2); 706 if (!res) 707 res = +1; 708 } else if (len1 < len2) { 709 res = memcmp(s1, s2, len1); 710 if (!res) 711 res = -1; 712 } else 713 res = memcmp(s1, s2, len1); 714 715 return (res); 716 717 } else { 718 int res = 0; 719 size_t i, maxlen; 720 721 i = 0; 722 maxlen = len1; 723 724 if (maxlen > len2) 725 maxlen = len2; 726 727 while (i < maxlen) { 728 /* goto next non-zero part: */ 729 while ((i < maxlen) && 730 !s1[i] && !s2[i]) 731 ++i; 732 733 if (i >= maxlen) 734 break; 735 736 if (s1[i] == 0) { 737 if (s2[i] == 0) 738 /* NOTREACHED */ 739 err(2, "bwscoll error 01"); 740 else 741 return (-1); 742 } else if (s2[i] == 0) 743 return (+1); 744 745 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 746 if (res) 747 return (res); 748 749 while ((i < maxlen) && 750 s1[i] && s2[i]) 751 ++i; 752 753 if (i >= maxlen) 754 break; 755 756 if (s1[i] == 0) { 757 if (s2[i] == 0) { 758 ++i; 759 continue; 760 } else 761 return (-1); 762 } else if (s2[i] == 0) 763 return (+1); 764 else 765 /* NOTREACHED */ 766 err(2, "bwscoll error 02"); 767 } 768 769 if (len1 < len2) 770 return (-1); 771 else if (len1 > len2) 772 return (+1); 773 774 return (0); 775 } 776 } else { 777 const wchar_t *s1, *s2; 778 size_t i, maxlen; 779 int res = 0; 780 781 s1 = bws1->data.wstr + offset; 782 s2 = bws2->data.wstr + offset; 783 784 i = 0; 785 maxlen = len1; 786 787 if (maxlen > len2) 788 maxlen = len2; 789 790 while (i < maxlen) { 791 792 /* goto next non-zero part: */ 793 while ((i < maxlen) && 794 !s1[i] && !s2[i]) 795 ++i; 796 797 if (i >= maxlen) 798 break; 799 800 if (s1[i] == 0) { 801 if (s2[i] == 0) 802 /* NOTREACHED */ 803 err(2, "bwscoll error 1"); 804 else 805 return (-1); 806 } else if (s2[i] == 0) 807 return (+1); 808 809 res = wide_str_coll(s1 + i, s2 + i); 810 if (res) 811 return (res); 812 813 while ((i < maxlen) && s1[i] && s2[i]) 814 ++i; 815 816 if (i >= maxlen) 817 break; 818 819 if (s1[i] == 0) { 820 if (s2[i] == 0) { 821 ++i; 822 continue; 823 } else 824 return (-1); 825 } else if (s2[i] == 0) 826 return (+1); 827 else 828 /* NOTREACHED */ 829 err(2, "bwscoll error 2"); 830 } 831 832 if (len1 < len2) 833 return (-1); 834 else if (len1 > len2) 835 return (+1); 836 837 return (0); 838 } 839 } 840 } 841} 842 843/* 844 * Correction of the system API 845 */ 846double 847bwstod(struct bwstring *s0, bool *empty) 848{ 849 double ret = 0; 850 851 if (MB_CUR_MAX == 1) { 852 unsigned char *end, *s; 853 char *ep; 854 855 s = s0->data.cstr; 856 end = s + s0->len; 857 ep = NULL; 858 859 while (isblank(*s) && s < end) 860 ++s; 861 862 if (!isprint(*s)) { 863 *empty = true; 864 return (0); 865 } 866 867 ret = strtod((char*)s, &ep); 868 if ((unsigned char*) ep == s) { 869 *empty = true; 870 return (0); 871 } 872 } else { 873 wchar_t *end, *ep, *s; 874 875 s = s0->data.wstr; 876 end = s + s0->len; 877 ep = NULL; 878 879 while (iswblank(*s) && s < end) 880 ++s; 881 882 if (!iswprint(*s)) { 883 *empty = true; 884 return (0); 885 } 886 887 ret = wcstod(s, &ep); 888 if (ep == s) { 889 *empty = true; 890 return (0); 891 } 892 } 893 894 *empty = false; 895 return (ret); 896} 897 898/* 899 * A helper function for monthcoll. If a line matches 900 * a month name, it returns (number of the month - 1), 901 * while if there is no match, it just return -1. 902 */ 903 904int 905bws_month_score(const struct bwstring *s0) 906{ 907 if (MB_CUR_MAX == 1) { 908 const unsigned char *end, *s; 909 size_t len; 910 911 s = s0->data.cstr; 912 end = s + s0->len; 913 914 while (isblank(*s) && s < end) 915 ++s; 916 917 len = strlen((const char*)s); 918 919 for (int i = 11; i >= 0; --i) { 920 if (cmonths[i] && 921 (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i])))) 922 return (i); 923 } 924 925 } else { 926 const wchar_t *end, *s; 927 size_t len; 928 929 s = s0->data.wstr; 930 end = s + s0->len; 931 932 while (iswblank(*s) && s < end) 933 ++s; 934 935 len = wcslen(s); 936 937 for (int i = 11; i >= 0; --i) { 938 if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 939 return (i); 940 } 941 } 942 943 return (-1); 944} 945 946/* 947 * Rips out leading blanks (-b). 948 */ 949struct bwstring * 950ignore_leading_blanks(struct bwstring *str) 951{ 952 953 if (MB_CUR_MAX == 1) { 954 unsigned char *dst, *end, *src; 955 956 src = str->data.cstr; 957 dst = src; 958 end = src + str->len; 959 960 while (src < end && isblank(*src)) 961 ++src; 962 963 if (src != dst) { 964 size_t newlen; 965 966 newlen = BWSLEN(str) - (src - dst); 967 968 while (src < end) { 969 *dst = *src; 970 ++dst; 971 ++src; 972 } 973 bws_setlen(str, newlen); 974 } 975 } else { 976 wchar_t *dst, *end, *src; 977 978 src = str->data.wstr; 979 dst = src; 980 end = src + str->len; 981 982 while (src < end && iswblank(*src)) 983 ++src; 984 985 if (src != dst) { 986 987 size_t newlen = BWSLEN(str) - (src - dst); 988 989 while (src < end) { 990 *dst = *src; 991 ++dst; 992 ++src; 993 } 994 bws_setlen(str, newlen); 995 996 } 997 } 998 return (str); 999} 1000 1001/* 1002 * Rips out nonprinting characters (-i). 1003 */ 1004struct bwstring * 1005ignore_nonprinting(struct bwstring *str) 1006{ 1007 size_t newlen = str->len; 1008 1009 if (MB_CUR_MAX == 1) { 1010 unsigned char *dst, *end, *src; 1011 unsigned char c; 1012 1013 src = str->data.cstr; 1014 dst = src; 1015 end = src + str->len; 1016 1017 while (src < end) { 1018 c = *src; 1019 if (isprint(c)) { 1020 *dst = c; 1021 ++dst; 1022 ++src; 1023 } else { 1024 ++src; 1025 --newlen; 1026 } 1027 } 1028 } else { 1029 wchar_t *dst, *end, *src; 1030 wchar_t c; 1031 1032 src = str->data.wstr; 1033 dst = src; 1034 end = src + str->len; 1035 1036 while (src < end) { 1037 c = *src; 1038 if (iswprint(c)) { 1039 *dst = c; 1040 ++dst; 1041 ++src; 1042 } else { 1043 ++src; 1044 --newlen; 1045 } 1046 } 1047 } 1048 bws_setlen(str, newlen); 1049 1050 return (str); 1051} 1052 1053/* 1054 * Rips out any characters that are not alphanumeric characters 1055 * nor blanks (-d). 1056 */ 1057struct bwstring * 1058dictionary_order(struct bwstring *str) 1059{ 1060 size_t newlen = str->len; 1061 1062 if (MB_CUR_MAX == 1) { 1063 unsigned char *dst, *end, *src; 1064 unsigned char c; 1065 1066 src = str->data.cstr; 1067 dst = src; 1068 end = src + str->len; 1069 1070 while (src < end) { 1071 c = *src; 1072 if (isalnum(c) || isblank(c)) { 1073 *dst = c; 1074 ++dst; 1075 ++src; 1076 } else { 1077 ++src; 1078 --newlen; 1079 } 1080 } 1081 } else { 1082 wchar_t *dst, *end, *src; 1083 wchar_t c; 1084 1085 src = str->data.wstr; 1086 dst = src; 1087 end = src + str->len; 1088 1089 while (src < end) { 1090 c = *src; 1091 if (iswalnum(c) || iswblank(c)) { 1092 *dst = c; 1093 ++dst; 1094 ++src; 1095 } else { 1096 ++src; 1097 --newlen; 1098 } 1099 } 1100 } 1101 bws_setlen(str, newlen); 1102 1103 return (str); 1104} 1105 1106/* 1107 * Converts string to lower case(-f). 1108 */ 1109struct bwstring * 1110ignore_case(struct bwstring *str) 1111{ 1112 if (MB_CUR_MAX == 1) { 1113 unsigned char *end, *s; 1114 1115 s = str->data.cstr; 1116 end = s + str->len; 1117 1118 while (s < end) { 1119 *s = toupper(*s); 1120 ++s; 1121 } 1122 } else { 1123 wchar_t *end, *s; 1124 1125 s = str->data.wstr; 1126 end = s + str->len; 1127 1128 while (s < end) { 1129 *s = towupper(*s); 1130 ++s; 1131 } 1132 } 1133 return (str); 1134} 1135 1136void 1137bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1138{ 1139 if (MB_CUR_MAX == 1) 1140 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); 1141 else 1142 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); 1143} 1144