1/*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/usr.bin/sort/coll.c 318152 2017-05-10 20:29:01Z marius $"); 30 31#include <sys/types.h> 32 33#include <errno.h> 34#include <err.h> 35#include <langinfo.h> 36#include <limits.h> 37#include <math.h> 38#include <md5.h> 39#include <stdlib.h> 40#include <string.h> 41#include <wchar.h> 42#include <wctype.h> 43 44#include "coll.h" 45#include "vsort.h" 46 47struct key_specs *keys; 48size_t keys_num = 0; 49 50wint_t symbol_decimal_point = L'.'; 51/* there is no default thousands separator in collate rules: */ 52wint_t symbol_thousands_sep = 0; 53wint_t symbol_negative_sign = L'-'; 54wint_t symbol_positive_sign = L'+'; 55 56static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 57static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 58static int monthcoll(struct key_value*, struct key_value *, size_t offset); 59static int numcoll(struct key_value*, struct key_value *, size_t offset); 60static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 61static int randomcoll(struct key_value*, struct key_value *, size_t offset); 62static int versioncoll(struct key_value*, struct key_value *, size_t offset); 63 64/* 65 * Allocate keys array 66 */ 67struct keys_array * 68keys_array_alloc(void) 69{ 70 struct keys_array *ka; 71 size_t sz; 72 73 sz = keys_array_size(); 74 ka = sort_malloc(sz); 75 memset(ka, 0, sz); 76 77 return (ka); 78} 79 80/* 81 * Calculate whether we need key hint space 82 */ 83static size_t 84key_hint_size(void) 85{ 86 87 return (need_hint ? sizeof(struct key_hint) : 0); 88} 89 90/* 91 * Calculate keys array size 92 */ 93size_t 94keys_array_size(void) 95{ 96 97 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 98} 99 100/* 101 * Clean data of keys array 102 */ 103void 104clean_keys_array(const struct bwstring *s, struct keys_array *ka) 105{ 106 107 if (ka) { 108 for (size_t i = 0; i < keys_num; ++i) { 109 const struct key_value *kv; 110 111 kv = get_key_from_keys_array(ka, i); 112 if (kv->k && kv->k != s) 113 bwsfree(kv->k); 114 } 115 memset(ka, 0, keys_array_size()); 116 } 117} 118 119/* 120 * Get pointer to a key value in the keys set 121 */ 122struct key_value * 123get_key_from_keys_array(struct keys_array *ka, size_t ind) 124{ 125 126 return ((struct key_value *)((caddr_t)ka->key + 127 ind * (sizeof(struct key_value) + key_hint_size()))); 128} 129 130/* 131 * Set value of a key in the keys set 132 */ 133void 134set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 135{ 136 137 if (ka && keys_num > ind) { 138 struct key_value *kv; 139 140 kv = get_key_from_keys_array(ka, ind); 141 142 if (kv->k && kv->k != s) 143 bwsfree(kv->k); 144 kv->k = s; 145 } 146} 147 148/* 149 * Initialize a sort list item 150 */ 151struct sort_list_item * 152sort_list_item_alloc(void) 153{ 154 struct sort_list_item *si; 155 size_t sz; 156 157 sz = sizeof(struct sort_list_item) + keys_array_size(); 158 si = sort_malloc(sz); 159 memset(si, 0, sz); 160 161 return (si); 162} 163 164size_t 165sort_list_item_size(struct sort_list_item *si) 166{ 167 size_t ret = 0; 168 169 if (si) { 170 ret = sizeof(struct sort_list_item) + keys_array_size(); 171 if (si->str) 172 ret += bws_memsize(si->str); 173 for (size_t i = 0; i < keys_num; ++i) { 174 const struct key_value *kv; 175 176 kv = get_key_from_keys_array(&si->ka, i); 177 178 if (kv->k != si->str) 179 ret += bws_memsize(kv->k); 180 } 181 } 182 return (ret); 183} 184 185/* 186 * Calculate key for a sort list item 187 */ 188static void 189sort_list_item_make_key(struct sort_list_item *si) 190{ 191 192 preproc(si->str, &(si->ka)); 193} 194 195/* 196 * Set value of a sort list item. 197 * Return combined string and keys memory size. 198 */ 199void 200sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 201{ 202 203 if (si) { 204 clean_keys_array(si->str, &(si->ka)); 205 if (si->str) { 206 if (si->str == str) { 207 /* we are trying to reset the same string */ 208 return; 209 } else { 210 bwsfree(si->str); 211 si->str = NULL; 212 } 213 } 214 si->str = str; 215 sort_list_item_make_key(si); 216 } 217} 218 219/* 220 * De-allocate a sort list item object memory 221 */ 222void 223sort_list_item_clean(struct sort_list_item *si) 224{ 225 226 if (si) { 227 clean_keys_array(si->str, &(si->ka)); 228 if (si->str) { 229 bwsfree(si->str); 230 si->str = NULL; 231 } 232 } 233} 234 235/* 236 * Skip columns according to specs 237 */ 238static size_t 239skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 240 bool skip_blanks, bool *empty_key) 241{ 242 if (cols < 1) 243 return (BWSLEN(s) + 1); 244 245 if (skip_blanks) 246 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 247 ++start; 248 249 while (start < BWSLEN(s) && cols > 1) { 250 --cols; 251 ++start; 252 } 253 254 if (start >= BWSLEN(s)) 255 *empty_key = true; 256 257 return (start); 258} 259 260/* 261 * Skip fields according to specs 262 */ 263static size_t 264skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 265{ 266 267 if (fields < 2) { 268 if (BWSLEN(s) == 0) 269 *empty_field = true; 270 return (0); 271 } else if (!(sort_opts_vals.tflag)) { 272 size_t cpos = 0; 273 bool pb = true; 274 275 while (cpos < BWSLEN(s)) { 276 bool isblank; 277 278 isblank = iswblank(BWS_GET(s, cpos)); 279 280 if (isblank && !pb) { 281 --fields; 282 if (fields <= 1) 283 return (cpos); 284 } 285 pb = isblank; 286 ++cpos; 287 } 288 if (fields > 1) 289 *empty_field = true; 290 return (cpos); 291 } else { 292 size_t cpos = 0; 293 294 while (cpos < BWSLEN(s)) { 295 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 296 --fields; 297 if (fields <= 1) 298 return (cpos + 1); 299 } 300 ++cpos; 301 } 302 if (fields > 1) 303 *empty_field = true; 304 return (cpos); 305 } 306} 307 308/* 309 * Find fields start 310 */ 311static void 312find_field_start(const struct bwstring *s, struct key_specs *ks, 313 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 314{ 315 316 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 317 if (!*empty_field) 318 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 319 ks->pos1b, empty_key); 320 else 321 *empty_key = true; 322} 323 324/* 325 * Find end key position 326 */ 327static size_t 328find_field_end(const struct bwstring *s, struct key_specs *ks) 329{ 330 size_t f2, next_field_start, pos_end; 331 bool empty_field, empty_key; 332 333 pos_end = 0; 334 next_field_start = 0; 335 empty_field = false; 336 empty_key = false; 337 f2 = ks->f2; 338 339 if (f2 == 0) 340 return (BWSLEN(s) + 1); 341 else { 342 if (ks->c2 == 0) { 343 next_field_start = skip_fields_to_start(s, f2 + 1, 344 &empty_field); 345 if ((next_field_start > 0) && sort_opts_vals.tflag && 346 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 347 next_field_start - 1))) 348 --next_field_start; 349 } else 350 next_field_start = skip_fields_to_start(s, f2, 351 &empty_field); 352 } 353 354 if (empty_field || (next_field_start >= BWSLEN(s))) 355 return (BWSLEN(s) + 1); 356 357 if (ks->c2) { 358 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 359 ks->pos2b, &empty_key); 360 if (pos_end < BWSLEN(s)) 361 ++pos_end; 362 } else 363 pos_end = next_field_start; 364 365 return (pos_end); 366} 367 368/* 369 * Cut a field according to the key specs 370 */ 371static struct bwstring * 372cut_field(const struct bwstring *s, struct key_specs *ks) 373{ 374 struct bwstring *ret = NULL; 375 376 if (s && ks) { 377 size_t field_start, key_end, key_start, sz; 378 bool empty_field, empty_key; 379 380 field_start = 0; 381 key_start = 0; 382 empty_field = false; 383 empty_key = false; 384 385 find_field_start(s, ks, &field_start, &key_start, 386 &empty_field, &empty_key); 387 388 if (empty_key) 389 sz = 0; 390 else { 391 key_end = find_field_end(s, ks); 392 sz = (key_end < key_start) ? 0 : (key_end - key_start); 393 } 394 395 ret = bwsalloc(sz); 396 if (sz) 397 bwsnocpy(ret, s, key_start, sz); 398 } else 399 ret = bwsalloc(0); 400 401 return (ret); 402} 403 404/* 405 * Preprocesses a line applying the necessary transformations 406 * specified by command line options and returns the preprocessed 407 * string, which can be used to compare. 408 */ 409int 410preproc(struct bwstring *s, struct keys_array *ka) 411{ 412 413 if (sort_opts_vals.kflag) 414 for (size_t i = 0; i < keys_num; i++) { 415 struct bwstring *key; 416 struct key_specs *kspecs; 417 struct sort_mods *sm; 418 419 kspecs = &(keys[i]); 420 key = cut_field(s, kspecs); 421 422 sm = &(kspecs->sm); 423 if (sm->dflag) 424 key = dictionary_order(key); 425 else if (sm->iflag) 426 key = ignore_nonprinting(key); 427 if (sm->fflag || sm->Mflag) 428 key = ignore_case(key); 429 430 set_key_on_keys_array(ka, key, i); 431 } 432 else { 433 struct bwstring *ret = NULL; 434 struct sort_mods *sm = default_sort_mods; 435 436 if (sm->bflag) { 437 if (ret == NULL) 438 ret = bwsdup(s); 439 ret = ignore_leading_blanks(ret); 440 } 441 if (sm->dflag) { 442 if (ret == NULL) 443 ret = bwsdup(s); 444 ret = dictionary_order(ret); 445 } else if (sm->iflag) { 446 if (ret == NULL) 447 ret = bwsdup(s); 448 ret = ignore_nonprinting(ret); 449 } 450 if (sm->fflag || sm->Mflag) { 451 if (ret == NULL) 452 ret = bwsdup(s); 453 ret = ignore_case(ret); 454 } 455 if (ret == NULL) 456 set_key_on_keys_array(ka, s, 0); 457 else 458 set_key_on_keys_array(ka, ret, 0); 459 } 460 461 return 0; 462} 463 464cmpcoll_t 465get_sort_func(struct sort_mods *sm) 466{ 467 468 if (sm->nflag) 469 return (numcoll); 470 else if (sm->hflag) 471 return (hnumcoll); 472 else if (sm->gflag) 473 return (gnumcoll); 474 else if (sm->Mflag) 475 return (monthcoll); 476 else if (sm->Rflag) 477 return (randomcoll); 478 else if (sm->Vflag) 479 return (versioncoll); 480 else 481 return (wstrcoll); 482} 483 484/* 485 * Compares the given strings. Returns a positive number if 486 * the first precedes the second, a negative number if the second is 487 * the preceding one, and zero if they are equal. This function calls 488 * the underlying collate functions, which done the actual comparison. 489 */ 490int 491key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 492{ 493 struct key_value *kv1, *kv2; 494 struct sort_mods *sm; 495 int res = 0; 496 497 for (size_t i = 0; i < keys_num; ++i) { 498 kv1 = get_key_from_keys_array(ps1, i); 499 kv2 = get_key_from_keys_array(ps2, i); 500 sm = &(keys[i].sm); 501 502 if (sm->rflag) 503 res = sm->func(kv2, kv1, offset); 504 else 505 res = sm->func(kv1, kv2, offset); 506 507 if (res) 508 break; 509 510 /* offset applies to only the first key */ 511 offset = 0; 512 } 513 return (res); 514} 515 516/* 517 * Compare two strings. 518 * Plain symbol-by-symbol comparison. 519 */ 520int 521top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 522{ 523 524 if (default_sort_mods->rflag) { 525 const struct bwstring *tmp; 526 527 tmp = s1; 528 s1 = s2; 529 s2 = tmp; 530 } 531 532 return (bwscoll(s1, s2, 0)); 533} 534 535/* 536 * Compare a string and a sort list item, according to the sort specs. 537 */ 538int 539str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 540{ 541 struct keys_array *ka1; 542 int ret = 0; 543 544 ka1 = keys_array_alloc(); 545 546 preproc(str1, ka1); 547 548 sort_list_item_make_key(*ss2); 549 550 if (debug_sort) { 551 bwsprintf(stdout, str1, "; s1=<", ">"); 552 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 553 } 554 555 ret = key_coll(ka1, &((*ss2)->ka), 0); 556 557 if (debug_sort) 558 printf("; cmp1=%d", ret); 559 560 clean_keys_array(str1, ka1); 561 sort_free(ka1); 562 563 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 564 ret = top_level_str_coll(str1, ((*ss2)->str)); 565 if (debug_sort) 566 printf("; cmp2=%d", ret); 567 } 568 569 if (debug_sort) 570 printf("\n"); 571 572 return (ret); 573} 574 575/* 576 * Compare two sort list items, according to the sort specs. 577 */ 578int 579list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 580 size_t offset) 581{ 582 int ret; 583 584 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 585 586 if (debug_sort) { 587 if (offset) 588 printf("; offset=%d", (int) offset); 589 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 590 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 591 printf("; cmp1=%d\n", ret); 592 } 593 594 if (ret) 595 return (ret); 596 597 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 598 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 599 if (debug_sort) 600 printf("; cmp2=%d\n", ret); 601 } 602 603 return (ret); 604} 605 606/* 607 * Compare two sort list items, according to the sort specs. 608 */ 609int 610list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 611{ 612 613 return (list_coll_offset(ss1, ss2, 0)); 614} 615 616#define LSCDEF(N) \ 617static int \ 618list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 619{ \ 620 \ 621 return (list_coll_offset(ss1, ss2, N)); \ 622} 623 624LSCDEF(1) 625LSCDEF(2) 626LSCDEF(3) 627LSCDEF(4) 628LSCDEF(5) 629LSCDEF(6) 630LSCDEF(7) 631LSCDEF(8) 632LSCDEF(9) 633LSCDEF(10) 634LSCDEF(11) 635LSCDEF(12) 636LSCDEF(13) 637LSCDEF(14) 638LSCDEF(15) 639LSCDEF(16) 640LSCDEF(17) 641LSCDEF(18) 642LSCDEF(19) 643LSCDEF(20) 644 645listcoll_t 646get_list_call_func(size_t offset) 647{ 648 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 649 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 650 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 651 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 652 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 653 list_coll_18, list_coll_19, list_coll_20 }; 654 655 if (offset <= 20) 656 return (lsarray[offset]); 657 658 return (list_coll); 659} 660 661/* 662 * Compare two sort list items, only by their original string. 663 */ 664int 665list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 666{ 667 668 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 669} 670 671/* 672 * Maximum size of a number in the string (before or after decimal point) 673 */ 674#define MAX_NUM_SIZE (128) 675 676/* 677 * Set suffix value 678 */ 679static void setsuffix(wchar_t c, unsigned char *si) 680{ 681 switch (c){ 682 case L'k': 683 case L'K': 684 *si = 1; 685 break; 686 case L'M': 687 *si = 2; 688 break; 689 case L'G': 690 *si = 3; 691 break; 692 case L'T': 693 *si = 4; 694 break; 695 case L'P': 696 *si = 5; 697 break; 698 case L'E': 699 *si = 6; 700 break; 701 case L'Z': 702 *si = 7; 703 break; 704 case L'Y': 705 *si = 8; 706 break; 707 default: 708 *si = 0; 709 }; 710} 711 712/* 713 * Read string s and parse the string into a fixed-decimal-point number. 714 * sign equals -1 if the number is negative (explicit plus is not allowed, 715 * according to GNU sort's "info sort". 716 * The number part before decimal point is in the smain, after the decimal 717 * point is in sfrac, tail is the pointer to the remainder of the string. 718 */ 719static int 720read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 721{ 722 bwstring_iterator s; 723 724 s = bws_begin(s0); 725 726 /* always end the fraction with zero, even if we have no fraction */ 727 sfrac[0] = 0; 728 729 while (iswblank(bws_get_iter_value(s))) 730 s = bws_iterator_inc(s, 1); 731 732 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 733 *sign = -1; 734 s = bws_iterator_inc(s, 1); 735 } 736 737 // This is '0', not '\0', do not change this 738 while (iswdigit(bws_get_iter_value(s)) && 739 (bws_get_iter_value(s) == L'0')) 740 s = bws_iterator_inc(s, 1); 741 742 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 743 if (iswdigit(bws_get_iter_value(s))) { 744 smain[*main_len] = bws_get_iter_value(s); 745 s = bws_iterator_inc(s, 1); 746 *main_len += 1; 747 } else if (symbol_thousands_sep && 748 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 749 s = bws_iterator_inc(s, 1); 750 else 751 break; 752 } 753 754 smain[*main_len] = 0; 755 756 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 757 s = bws_iterator_inc(s, 1); 758 while (iswdigit(bws_get_iter_value(s)) && 759 *frac_len < MAX_NUM_SIZE) { 760 sfrac[*frac_len] = bws_get_iter_value(s); 761 s = bws_iterator_inc(s, 1); 762 *frac_len += 1; 763 } 764 sfrac[*frac_len] = 0; 765 766 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 767 --(*frac_len); 768 sfrac[*frac_len] = L'\0'; 769 } 770 } 771 772 setsuffix(bws_get_iter_value(s),si); 773 774 if ((*main_len + *frac_len) == 0) 775 *sign = 0; 776 777 return (0); 778} 779 780/* 781 * Implements string sort. 782 */ 783static int 784wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 785{ 786 787 if (debug_sort) { 788 if (offset) 789 printf("; offset=%d\n", (int) offset); 790 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 791 printf("(%zu)", BWSLEN(kv1->k)); 792 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 793 printf("(%zu)", BWSLEN(kv2->k)); 794 } 795 796 return (bwscoll(kv1->k, kv2->k, offset)); 797} 798 799/* 800 * Compare two suffixes 801 */ 802static inline int 803cmpsuffix(unsigned char si1, unsigned char si2) 804{ 805 806 return ((char)si1 - (char)si2); 807} 808 809/* 810 * Implements numeric sort for -n and -h. 811 */ 812static int 813numcoll_impl(struct key_value *kv1, struct key_value *kv2, 814 size_t offset __unused, bool use_suffix) 815{ 816 struct bwstring *s1, *s2; 817 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 818 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 819 int cmp_res, sign1, sign2; 820 size_t frac1, frac2, main1, main2; 821 unsigned char SI1, SI2; 822 bool e1, e2, key1_read, key2_read; 823 824 s1 = kv1->k; 825 s2 = kv2->k; 826 sign1 = sign2 = 0; 827 main1 = main2 = 0; 828 frac1 = frac2 = 0; 829 830 cmp_res = 0; 831 key1_read = key2_read = false; 832 833 if (debug_sort) { 834 bwsprintf(stdout, s1, "; k1=<", ">"); 835 bwsprintf(stdout, s2, ", k2=<", ">"); 836 } 837 838 if (s1 == s2) 839 return (0); 840 841 if (kv1->hint->status == HS_UNINITIALIZED) { 842 /* read the number from the string */ 843 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 844 key1_read = true; 845 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 846 if(main1 < 1 && frac1 < 1) 847 kv1->hint->v.nh.empty=true; 848 kv1->hint->v.nh.si = SI1; 849 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 850 HS_INITIALIZED : HS_ERROR; 851 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 852 } 853 854 if (kv2->hint->status == HS_UNINITIALIZED) { 855 /* read the number from the string */ 856 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 857 key2_read = true; 858 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 859 if(main2 < 1 && frac2 < 1) 860 kv2->hint->v.nh.empty=true; 861 kv2->hint->v.nh.si = SI2; 862 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 863 HS_INITIALIZED : HS_ERROR; 864 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 865 } 866 867 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 868 HS_INITIALIZED) { 869 unsigned long long n1, n2; 870 bool neg1, neg2; 871 872 e1 = kv1->hint->v.nh.empty; 873 e2 = kv2->hint->v.nh.empty; 874 875 if (e1 && e2) 876 return (0); 877 878 neg1 = kv1->hint->v.nh.neg; 879 neg2 = kv2->hint->v.nh.neg; 880 881 if (neg1 && !neg2) 882 return (-1); 883 if (neg2 && !neg1) 884 return (+1); 885 886 if (e1) 887 return (neg2 ? +1 : -1); 888 else if (e2) 889 return (neg1 ? -1 : +1); 890 891 892 if (use_suffix) { 893 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 894 if (cmp_res) 895 return (neg1 ? -cmp_res : cmp_res); 896 } 897 898 n1 = kv1->hint->v.nh.n1; 899 n2 = kv2->hint->v.nh.n1; 900 if (n1 < n2) 901 return (neg1 ? +1 : -1); 902 else if (n1 > n2) 903 return (neg1 ? -1 : +1); 904 } 905 906 /* read the numbers from the strings */ 907 if (!key1_read) 908 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 909 if (!key2_read) 910 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 911 912 e1 = ((main1 + frac1) == 0); 913 e2 = ((main2 + frac2) == 0); 914 915 if (e1 && e2) 916 return (0); 917 918 /* we know the result if the signs are different */ 919 if (sign1 < 0 && sign2 >= 0) 920 return (-1); 921 if (sign1 >= 0 && sign2 < 0) 922 return (+1); 923 924 if (e1) 925 return ((sign2 < 0) ? +1 : -1); 926 else if (e2) 927 return ((sign1 < 0) ? -1 : +1); 928 929 if (use_suffix) { 930 cmp_res = cmpsuffix(SI1, SI2); 931 if (cmp_res) 932 return ((sign1 < 0) ? -cmp_res : cmp_res); 933 } 934 935 /* if both numbers are empty assume that the strings are equal */ 936 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 937 return (0); 938 939 /* 940 * if the main part is of different size, we know the result 941 * (because the leading zeros are removed) 942 */ 943 if (main1 < main2) 944 cmp_res = -1; 945 else if (main1 > main2) 946 cmp_res = +1; 947 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 948 else 949 cmp_res = wcscmp(smain1, smain2); 950 951 /* check fraction */ 952 if (!cmp_res) 953 cmp_res = wcscmp(sfrac1, sfrac2); 954 955 if (!cmp_res) 956 return (0); 957 958 /* reverse result if the signs are negative */ 959 if (sign1 < 0 && sign2 < 0) 960 cmp_res = -cmp_res; 961 962 return (cmp_res); 963} 964 965/* 966 * Implements numeric sort (-n). 967 */ 968static int 969numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 970{ 971 972 return (numcoll_impl(kv1, kv2, offset, false)); 973} 974 975/* 976 * Implements 'human' numeric sort (-h). 977 */ 978static int 979hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 980{ 981 982 return (numcoll_impl(kv1, kv2, offset, true)); 983} 984 985/* 986 * Implements random sort (-R). 987 */ 988static int 989randomcoll(struct key_value *kv1, struct key_value *kv2, 990 size_t offset __unused) 991{ 992 struct bwstring *s1, *s2; 993 MD5_CTX ctx1, ctx2; 994 char *b1, *b2; 995 996 s1 = kv1->k; 997 s2 = kv2->k; 998 999 if (debug_sort) { 1000 bwsprintf(stdout, s1, "; k1=<", ">"); 1001 bwsprintf(stdout, s2, ", k2=<", ">"); 1002 } 1003 1004 if (s1 == s2) 1005 return (0); 1006 1007 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 1008 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 1009 1010 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1011 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1012 b1 = MD5End(&ctx1, NULL); 1013 b2 = MD5End(&ctx2, NULL); 1014 if (b1 == NULL) { 1015 if (b2 == NULL) 1016 return (0); 1017 else { 1018 sort_free(b2); 1019 return (-1); 1020 } 1021 } else if (b2 == NULL) { 1022 sort_free(b1); 1023 return (+1); 1024 } else { 1025 int cmp_res; 1026 1027 cmp_res = strcmp(b1,b2); 1028 sort_free(b1); 1029 sort_free(b2); 1030 1031 if (!cmp_res) 1032 cmp_res = bwscoll(s1, s2, 0); 1033 1034 return (cmp_res); 1035 } 1036} 1037 1038/* 1039 * Implements version sort (-V). 1040 */ 1041static int 1042versioncoll(struct key_value *kv1, struct key_value *kv2, 1043 size_t offset __unused) 1044{ 1045 struct bwstring *s1, *s2; 1046 1047 s1 = kv1->k; 1048 s2 = kv2->k; 1049 1050 if (debug_sort) { 1051 bwsprintf(stdout, s1, "; k1=<", ">"); 1052 bwsprintf(stdout, s2, ", k2=<", ">"); 1053 } 1054 1055 if (s1 == s2) 1056 return (0); 1057 1058 return (vcmp(s1, s2)); 1059} 1060 1061/* 1062 * Check for minus infinity 1063 */ 1064static inline bool 1065huge_minus(double d, int err1) 1066{ 1067 1068 if (err1 == ERANGE) 1069 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1070 return (+1); 1071 1072 return (0); 1073} 1074 1075/* 1076 * Check for plus infinity 1077 */ 1078static inline bool 1079huge_plus(double d, int err1) 1080{ 1081 1082 if (err1 == ERANGE) 1083 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1084 return (+1); 1085 1086 return (0); 1087} 1088 1089/* 1090 * Check whether a function is a NAN 1091 */ 1092static bool 1093is_nan(double d) 1094{ 1095 1096 return ((d == NAN) || (isnan(d))); 1097} 1098 1099/* 1100 * Compare two NANs 1101 */ 1102static int 1103cmp_nans(double d1, double d2) 1104{ 1105 1106 if (d1 < d2) 1107 return (-1); 1108 if (d1 > d2) 1109 return (+1); 1110 return (0); 1111} 1112 1113/* 1114 * Implements general numeric sort (-g). 1115 */ 1116static int 1117gnumcoll(struct key_value *kv1, struct key_value *kv2, 1118 size_t offset __unused) 1119{ 1120 double d1, d2; 1121 int err1, err2; 1122 bool empty1, empty2, key1_read, key2_read; 1123 1124 d1 = d2 = 0; 1125 err1 = err2 = 0; 1126 key1_read = key2_read = false; 1127 1128 if (debug_sort) { 1129 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1130 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1131 } 1132 1133 if (kv1->hint->status == HS_UNINITIALIZED) { 1134 errno = 0; 1135 d1 = bwstod(kv1->k, &empty1); 1136 err1 = errno; 1137 1138 if (empty1) 1139 kv1->hint->v.gh.notnum = true; 1140 else if (err1 == 0) { 1141 kv1->hint->v.gh.d = d1; 1142 kv1->hint->v.gh.nan = is_nan(d1); 1143 kv1->hint->status = HS_INITIALIZED; 1144 } else 1145 kv1->hint->status = HS_ERROR; 1146 1147 key1_read = true; 1148 } 1149 1150 if (kv2->hint->status == HS_UNINITIALIZED) { 1151 errno = 0; 1152 d2 = bwstod(kv2->k, &empty2); 1153 err2 = errno; 1154 1155 if (empty2) 1156 kv2->hint->v.gh.notnum = true; 1157 else if (err2 == 0) { 1158 kv2->hint->v.gh.d = d2; 1159 kv2->hint->v.gh.nan = is_nan(d2); 1160 kv2->hint->status = HS_INITIALIZED; 1161 } else 1162 kv2->hint->status = HS_ERROR; 1163 1164 key2_read = true; 1165 } 1166 1167 if (kv1->hint->status == HS_INITIALIZED && 1168 kv2->hint->status == HS_INITIALIZED) { 1169 if (kv1->hint->v.gh.notnum) 1170 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1171 else if (kv2->hint->v.gh.notnum) 1172 return (+1); 1173 1174 if (kv1->hint->v.gh.nan) 1175 return ((kv2->hint->v.gh.nan) ? 1176 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1177 -1); 1178 else if (kv2->hint->v.gh.nan) 1179 return (+1); 1180 1181 d1 = kv1->hint->v.gh.d; 1182 d2 = kv2->hint->v.gh.d; 1183 1184 if (d1 < d2) 1185 return (-1); 1186 else if (d1 > d2) 1187 return (+1); 1188 else 1189 return (0); 1190 } 1191 1192 if (!key1_read) { 1193 errno = 0; 1194 d1 = bwstod(kv1->k, &empty1); 1195 err1 = errno; 1196 } 1197 1198 if (!key2_read) { 1199 errno = 0; 1200 d2 = bwstod(kv2->k, &empty2); 1201 err2 = errno; 1202 } 1203 1204 /* Non-value case: */ 1205 if (empty1) 1206 return (empty2 ? 0 : -1); 1207 else if (empty2) 1208 return (+1); 1209 1210 /* NAN case */ 1211 if (is_nan(d1)) 1212 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1213 else if (is_nan(d2)) 1214 return (+1); 1215 1216 /* Infinities */ 1217 if (err1 == ERANGE || err2 == ERANGE) { 1218 /* Minus infinity case */ 1219 if (huge_minus(d1, err1)) { 1220 if (huge_minus(d2, err2)) { 1221 if (d1 < d2) 1222 return (-1); 1223 if (d1 > d2) 1224 return (+1); 1225 return (0); 1226 } else 1227 return (-1); 1228 1229 } else if (huge_minus(d2, err2)) { 1230 if (huge_minus(d1, err1)) { 1231 if (d1 < d2) 1232 return (-1); 1233 if (d1 > d2) 1234 return (+1); 1235 return (0); 1236 } else 1237 return (+1); 1238 } 1239 1240 /* Plus infinity case */ 1241 if (huge_plus(d1, err1)) { 1242 if (huge_plus(d2, err2)) { 1243 if (d1 < d2) 1244 return (-1); 1245 if (d1 > d2) 1246 return (+1); 1247 return (0); 1248 } else 1249 return (+1); 1250 } else if (huge_plus(d2, err2)) { 1251 if (huge_plus(d1, err1)) { 1252 if (d1 < d2) 1253 return (-1); 1254 if (d1 > d2) 1255 return (+1); 1256 return (0); 1257 } else 1258 return (-1); 1259 } 1260 } 1261 1262 if (d1 < d2) 1263 return (-1); 1264 if (d1 > d2) 1265 return (+1); 1266 1267 return (0); 1268} 1269 1270/* 1271 * Implements month sort (-M). 1272 */ 1273static int 1274monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1275{ 1276 int val1, val2; 1277 bool key1_read, key2_read; 1278 1279 val1 = val2 = 0; 1280 key1_read = key2_read = false; 1281 1282 if (debug_sort) { 1283 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1284 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1285 } 1286 1287 if (kv1->hint->status == HS_UNINITIALIZED) { 1288 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1289 key1_read = true; 1290 kv1->hint->status = HS_INITIALIZED; 1291 } 1292 1293 if (kv2->hint->status == HS_UNINITIALIZED) { 1294 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1295 key2_read = true; 1296 kv2->hint->status = HS_INITIALIZED; 1297 } 1298 1299 if (kv1->hint->status == HS_INITIALIZED) { 1300 val1 = kv1->hint->v.Mh.m; 1301 key1_read = true; 1302 } 1303 1304 if (kv2->hint->status == HS_INITIALIZED) { 1305 val2 = kv2->hint->v.Mh.m; 1306 key2_read = true; 1307 } 1308 1309 if (!key1_read) 1310 val1 = bws_month_score(kv1->k); 1311 if (!key2_read) 1312 val2 = bws_month_score(kv2->k); 1313 1314 if (val1 == val2) { 1315 return (0); 1316 } 1317 if (val1 < val2) 1318 return (-1); 1319 return (+1); 1320} 1321