1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD$"); 32 33#include <sys/types.h> 34 35#include <errno.h> 36#include <err.h> 37#include <langinfo.h> 38#include <limits.h> 39#include <math.h> 40#include <md5.h> 41#include <stdlib.h> 42#include <string.h> 43#include <wchar.h> 44#include <wctype.h> 45 46#include "coll.h" 47#include "vsort.h" 48 49struct key_specs *keys; 50size_t keys_num = 0; 51 52wint_t symbol_decimal_point = L'.'; 53/* there is no default thousands separator in collate rules: */ 54wint_t symbol_thousands_sep = 0; 55wint_t symbol_negative_sign = L'-'; 56wint_t symbol_positive_sign = L'+'; 57 58static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 59static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 60static int monthcoll(struct key_value*, struct key_value *, size_t offset); 61static int numcoll(struct key_value*, struct key_value *, size_t offset); 62static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 63static int randomcoll(struct key_value*, struct key_value *, size_t offset); 64static int versioncoll(struct key_value*, struct key_value *, size_t offset); 65 66/* 67 * Allocate keys array 68 */ 69struct keys_array * 70keys_array_alloc(void) 71{ 72 struct keys_array *ka; 73 size_t sz; 74 75 sz = keys_array_size(); 76 ka = sort_malloc(sz); 77 memset(ka, 0, sz); 78 79 return (ka); 80} 81 82/* 83 * Calculate whether we need key hint space 84 */ 85static size_t 86key_hint_size(void) 87{ 88 89 return (need_hint ? sizeof(struct key_hint) : 0); 90} 91 92/* 93 * Calculate keys array size 94 */ 95size_t 96keys_array_size(void) 97{ 98 99 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 100} 101 102/* 103 * Clean data of keys array 104 */ 105void 106clean_keys_array(const struct bwstring *s, struct keys_array *ka) 107{ 108 109 if (ka) { 110 for (size_t i = 0; i < keys_num; ++i) { 111 const struct key_value *kv; 112 113 kv = get_key_from_keys_array(ka, i); 114 if (kv->k && kv->k != s) 115 bwsfree(kv->k); 116 } 117 memset(ka, 0, keys_array_size()); 118 } 119} 120 121/* 122 * Get pointer to a key value in the keys set 123 */ 124struct key_value * 125get_key_from_keys_array(struct keys_array *ka, size_t ind) 126{ 127 128 return ((struct key_value *)((caddr_t)ka->key + 129 ind * (sizeof(struct key_value) + key_hint_size()))); 130} 131 132/* 133 * Set value of a key in the keys set 134 */ 135void 136set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 137{ 138 139 if (ka && keys_num > ind) { 140 struct key_value *kv; 141 142 kv = get_key_from_keys_array(ka, ind); 143 144 if (kv->k && kv->k != s) 145 bwsfree(kv->k); 146 kv->k = s; 147 } 148} 149 150/* 151 * Initialize a sort list item 152 */ 153struct sort_list_item * 154sort_list_item_alloc(void) 155{ 156 struct sort_list_item *si; 157 size_t sz; 158 159 sz = sizeof(struct sort_list_item) + keys_array_size(); 160 si = sort_malloc(sz); 161 memset(si, 0, sz); 162 163 return (si); 164} 165 166size_t 167sort_list_item_size(struct sort_list_item *si) 168{ 169 size_t ret = 0; 170 171 if (si) { 172 ret = sizeof(struct sort_list_item) + keys_array_size(); 173 if (si->str) 174 ret += bws_memsize(si->str); 175 for (size_t i = 0; i < keys_num; ++i) { 176 const struct key_value *kv; 177 178 kv = get_key_from_keys_array(&si->ka, i); 179 180 if (kv->k != si->str) 181 ret += bws_memsize(kv->k); 182 } 183 } 184 return (ret); 185} 186 187/* 188 * Calculate key for a sort list item 189 */ 190static void 191sort_list_item_make_key(struct sort_list_item *si) 192{ 193 194 preproc(si->str, &(si->ka)); 195} 196 197/* 198 * Set value of a sort list item. 199 * Return combined string and keys memory size. 200 */ 201void 202sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 203{ 204 205 if (si) { 206 clean_keys_array(si->str, &(si->ka)); 207 if (si->str) { 208 if (si->str == str) { 209 /* we are trying to reset the same string */ 210 return; 211 } else { 212 bwsfree(si->str); 213 si->str = NULL; 214 } 215 } 216 si->str = str; 217 sort_list_item_make_key(si); 218 } 219} 220 221/* 222 * De-allocate a sort list item object memory 223 */ 224void 225sort_list_item_clean(struct sort_list_item *si) 226{ 227 228 if (si) { 229 clean_keys_array(si->str, &(si->ka)); 230 if (si->str) { 231 bwsfree(si->str); 232 si->str = NULL; 233 } 234 } 235} 236 237/* 238 * Skip columns according to specs 239 */ 240static size_t 241skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 242 bool skip_blanks, bool *empty_key) 243{ 244 if (cols < 1) 245 return (BWSLEN(s) + 1); 246 247 if (skip_blanks) 248 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 249 ++start; 250 251 while (start < BWSLEN(s) && cols > 1) { 252 --cols; 253 ++start; 254 } 255 256 if (start >= BWSLEN(s)) 257 *empty_key = true; 258 259 return (start); 260} 261 262/* 263 * Skip fields according to specs 264 */ 265static size_t 266skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 267{ 268 269 if (fields < 2) { 270 if (BWSLEN(s) == 0) 271 *empty_field = true; 272 return (0); 273 } else if (!(sort_opts_vals.tflag)) { 274 size_t cpos = 0; 275 bool pb = true; 276 277 while (cpos < BWSLEN(s)) { 278 bool isblank; 279 280 isblank = iswblank(BWS_GET(s, cpos)); 281 282 if (isblank && !pb) { 283 --fields; 284 if (fields <= 1) 285 return (cpos); 286 } 287 pb = isblank; 288 ++cpos; 289 } 290 if (fields > 1) 291 *empty_field = true; 292 return (cpos); 293 } else { 294 size_t cpos = 0; 295 296 while (cpos < BWSLEN(s)) { 297 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 298 --fields; 299 if (fields <= 1) 300 return (cpos + 1); 301 } 302 ++cpos; 303 } 304 if (fields > 1) 305 *empty_field = true; 306 return (cpos); 307 } 308} 309 310/* 311 * Find fields start 312 */ 313static void 314find_field_start(const struct bwstring *s, struct key_specs *ks, 315 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 316{ 317 318 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 319 if (!*empty_field) 320 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 321 ks->pos1b, empty_key); 322 else 323 *empty_key = true; 324} 325 326/* 327 * Find end key position 328 */ 329static size_t 330find_field_end(const struct bwstring *s, struct key_specs *ks) 331{ 332 size_t f2, next_field_start, pos_end; 333 bool empty_field, empty_key; 334 335 empty_field = false; 336 empty_key = false; 337 f2 = ks->f2; 338 339 if (f2 == 0) 340 return (BWSLEN(s) + 1); 341 else { 342 if (ks->c2 == 0) { 343 next_field_start = skip_fields_to_start(s, f2 + 1, 344 &empty_field); 345 if ((next_field_start > 0) && sort_opts_vals.tflag && 346 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 347 next_field_start - 1))) 348 --next_field_start; 349 } else 350 next_field_start = skip_fields_to_start(s, f2, 351 &empty_field); 352 } 353 354 if (empty_field || (next_field_start >= BWSLEN(s))) 355 return (BWSLEN(s) + 1); 356 357 if (ks->c2) { 358 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 359 ks->pos2b, &empty_key); 360 if (pos_end < BWSLEN(s)) 361 ++pos_end; 362 } else 363 pos_end = next_field_start; 364 365 return (pos_end); 366} 367 368/* 369 * Cut a field according to the key specs 370 */ 371static struct bwstring * 372cut_field(const struct bwstring *s, struct key_specs *ks) 373{ 374 struct bwstring *ret = NULL; 375 376 if (s && ks) { 377 size_t field_start, key_end, key_start, sz; 378 bool empty_field, empty_key; 379 380 field_start = 0; 381 key_start = 0; 382 empty_field = false; 383 empty_key = false; 384 385 find_field_start(s, ks, &field_start, &key_start, 386 &empty_field, &empty_key); 387 388 if (empty_key) 389 sz = 0; 390 else { 391 key_end = find_field_end(s, ks); 392 sz = (key_end < key_start) ? 0 : (key_end - key_start); 393 } 394 395 ret = bwsalloc(sz); 396 if (sz) 397 bwsnocpy(ret, s, key_start, sz); 398 } else 399 ret = bwsalloc(0); 400 401 return (ret); 402} 403 404/* 405 * Preprocesses a line applying the necessary transformations 406 * specified by command line options and returns the preprocessed 407 * string, which can be used to compare. 408 */ 409int 410preproc(struct bwstring *s, struct keys_array *ka) 411{ 412 413 if (sort_opts_vals.kflag) 414 for (size_t i = 0; i < keys_num; i++) { 415 struct bwstring *key; 416 struct key_specs *kspecs; 417 struct sort_mods *sm; 418 419 kspecs = &(keys[i]); 420 key = cut_field(s, kspecs); 421 422 sm = &(kspecs->sm); 423 if (sm->dflag) 424 key = dictionary_order(key); 425 else if (sm->iflag) 426 key = ignore_nonprinting(key); 427 if (sm->fflag || sm->Mflag) 428 key = ignore_case(key); 429 430 set_key_on_keys_array(ka, key, i); 431 } 432 else { 433 struct bwstring *ret = NULL; 434 struct sort_mods *sm = default_sort_mods; 435 436 if (sm->bflag) { 437 if (ret == NULL) 438 ret = bwsdup(s); 439 ret = ignore_leading_blanks(ret); 440 } 441 if (sm->dflag) { 442 if (ret == NULL) 443 ret = bwsdup(s); 444 ret = dictionary_order(ret); 445 } else if (sm->iflag) { 446 if (ret == NULL) 447 ret = bwsdup(s); 448 ret = ignore_nonprinting(ret); 449 } 450 if (sm->fflag || sm->Mflag) { 451 if (ret == NULL) 452 ret = bwsdup(s); 453 ret = ignore_case(ret); 454 } 455 if (ret == NULL) 456 set_key_on_keys_array(ka, s, 0); 457 else 458 set_key_on_keys_array(ka, ret, 0); 459 } 460 461 return 0; 462} 463 464cmpcoll_t 465get_sort_func(struct sort_mods *sm) 466{ 467 468 if (sm->nflag) 469 return (numcoll); 470 else if (sm->hflag) 471 return (hnumcoll); 472 else if (sm->gflag) 473 return (gnumcoll); 474 else if (sm->Mflag) 475 return (monthcoll); 476 else if (sm->Rflag) 477 return (randomcoll); 478 else if (sm->Vflag) 479 return (versioncoll); 480 else 481 return (wstrcoll); 482} 483 484/* 485 * Compares the given strings. Returns a positive number if 486 * the first precedes the second, a negative number if the second is 487 * the preceding one, and zero if they are equal. This function calls 488 * the underlying collate functions, which done the actual comparison. 489 */ 490int 491key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 492{ 493 struct key_value *kv1, *kv2; 494 struct sort_mods *sm; 495 int res = 0; 496 497 for (size_t i = 0; i < keys_num; ++i) { 498 kv1 = get_key_from_keys_array(ps1, i); 499 kv2 = get_key_from_keys_array(ps2, i); 500 sm = &(keys[i].sm); 501 502 if (sm->rflag) 503 res = sm->func(kv2, kv1, offset); 504 else 505 res = sm->func(kv1, kv2, offset); 506 507 if (res) 508 break; 509 510 /* offset applies to only the first key */ 511 offset = 0; 512 } 513 return (res); 514} 515 516/* 517 * Compare two strings. 518 * Plain symbol-by-symbol comparison. 519 */ 520int 521top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 522{ 523 524 if (default_sort_mods->rflag) { 525 const struct bwstring *tmp; 526 527 tmp = s1; 528 s1 = s2; 529 s2 = tmp; 530 } 531 532 return (bwscoll(s1, s2, 0)); 533} 534 535/* 536 * Compare a string and a sort list item, according to the sort specs. 537 */ 538int 539str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 540{ 541 struct keys_array *ka1; 542 int ret = 0; 543 544 ka1 = keys_array_alloc(); 545 546 preproc(str1, ka1); 547 548 sort_list_item_make_key(*ss2); 549 550 if (debug_sort) { 551 bwsprintf(stdout, str1, "; s1=<", ">"); 552 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 553 } 554 555 ret = key_coll(ka1, &((*ss2)->ka), 0); 556 557 if (debug_sort) 558 printf("; cmp1=%d", ret); 559 560 clean_keys_array(str1, ka1); 561 sort_free(ka1); 562 563 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 564 ret = top_level_str_coll(str1, ((*ss2)->str)); 565 if (debug_sort) 566 printf("; cmp2=%d", ret); 567 } 568 569 if (debug_sort) 570 printf("\n"); 571 572 return (ret); 573} 574 575/* 576 * Compare two sort list items, according to the sort specs. 577 */ 578int 579list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 580 size_t offset) 581{ 582 int ret; 583 584 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 585 586 if (debug_sort) { 587 if (offset) 588 printf("; offset=%d", (int) offset); 589 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 590 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 591 printf("; cmp1=%d\n", ret); 592 } 593 594 if (ret) 595 return (ret); 596 597 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 598 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 599 if (debug_sort) 600 printf("; cmp2=%d\n", ret); 601 } 602 603 return (ret); 604} 605 606/* 607 * Compare two sort list items, according to the sort specs. 608 */ 609int 610list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 611{ 612 613 return (list_coll_offset(ss1, ss2, 0)); 614} 615 616#define LSCDEF(N) \ 617static int \ 618list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 619{ \ 620 \ 621 return (list_coll_offset(ss1, ss2, N)); \ 622} 623 624LSCDEF(1) 625LSCDEF(2) 626LSCDEF(3) 627LSCDEF(4) 628LSCDEF(5) 629LSCDEF(6) 630LSCDEF(7) 631LSCDEF(8) 632LSCDEF(9) 633LSCDEF(10) 634LSCDEF(11) 635LSCDEF(12) 636LSCDEF(13) 637LSCDEF(14) 638LSCDEF(15) 639LSCDEF(16) 640LSCDEF(17) 641LSCDEF(18) 642LSCDEF(19) 643LSCDEF(20) 644 645listcoll_t 646get_list_call_func(size_t offset) 647{ 648 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 649 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 650 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 651 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 652 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 653 list_coll_18, list_coll_19, list_coll_20 }; 654 655 if (offset <= 20) 656 return (lsarray[offset]); 657 658 return (list_coll); 659} 660 661/* 662 * Compare two sort list items, only by their original string. 663 */ 664int 665list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 666{ 667 668 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 669} 670 671/* 672 * Maximum size of a number in the string (before or after decimal point) 673 */ 674#define MAX_NUM_SIZE (128) 675 676/* 677 * Set suffix value 678 */ 679static void setsuffix(wchar_t c, unsigned char *si) 680{ 681 switch (c){ 682 case L'k': 683 case L'K': 684 *si = 1; 685 break; 686 case L'M': 687 *si = 2; 688 break; 689 case L'G': 690 *si = 3; 691 break; 692 case L'T': 693 *si = 4; 694 break; 695 case L'P': 696 *si = 5; 697 break; 698 case L'E': 699 *si = 6; 700 break; 701 case L'Z': 702 *si = 7; 703 break; 704 case L'Y': 705 *si = 8; 706 break; 707 default: 708 *si = 0; 709 } 710} 711 712/* 713 * Read string s and parse the string into a fixed-decimal-point number. 714 * sign equals -1 if the number is negative (explicit plus is not allowed, 715 * according to GNU sort's "info sort". 716 * The number part before decimal point is in the smain, after the decimal 717 * point is in sfrac, tail is the pointer to the remainder of the string. 718 */ 719static int 720read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 721{ 722 bwstring_iterator s; 723 724 s = bws_begin(s0); 725 726 /* always end the fraction with zero, even if we have no fraction */ 727 sfrac[0] = 0; 728 729 while (iswblank(bws_get_iter_value(s))) 730 s = bws_iterator_inc(s, 1); 731 732 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 733 *sign = -1; 734 s = bws_iterator_inc(s, 1); 735 } 736 737 // This is '0', not '\0', do not change this 738 while (iswdigit(bws_get_iter_value(s)) && 739 (bws_get_iter_value(s) == L'0')) 740 s = bws_iterator_inc(s, 1); 741 742 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 743 if (iswdigit(bws_get_iter_value(s))) { 744 smain[*main_len] = bws_get_iter_value(s); 745 s = bws_iterator_inc(s, 1); 746 *main_len += 1; 747 } else if (symbol_thousands_sep && 748 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 749 s = bws_iterator_inc(s, 1); 750 else 751 break; 752 } 753 754 smain[*main_len] = 0; 755 756 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 757 s = bws_iterator_inc(s, 1); 758 while (iswdigit(bws_get_iter_value(s)) && 759 *frac_len < MAX_NUM_SIZE) { 760 sfrac[*frac_len] = bws_get_iter_value(s); 761 s = bws_iterator_inc(s, 1); 762 *frac_len += 1; 763 } 764 sfrac[*frac_len] = 0; 765 766 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 767 --(*frac_len); 768 sfrac[*frac_len] = L'\0'; 769 } 770 } 771 772 setsuffix(bws_get_iter_value(s),si); 773 774 if ((*main_len + *frac_len) == 0) 775 *sign = 0; 776 777 return (0); 778} 779 780/* 781 * Implements string sort. 782 */ 783static int 784wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 785{ 786 787 if (debug_sort) { 788 if (offset) 789 printf("; offset=%d\n", (int) offset); 790 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 791 printf("(%zu)", BWSLEN(kv1->k)); 792 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 793 printf("(%zu)", BWSLEN(kv2->k)); 794 } 795 796 return (bwscoll(kv1->k, kv2->k, offset)); 797} 798 799/* 800 * Compare two suffixes 801 */ 802static inline int 803cmpsuffix(unsigned char si1, unsigned char si2) 804{ 805 806 return ((char)si1 - (char)si2); 807} 808 809/* 810 * Implements numeric sort for -n and -h. 811 */ 812static int 813numcoll_impl(struct key_value *kv1, struct key_value *kv2, 814 size_t offset __unused, bool use_suffix) 815{ 816 struct bwstring *s1, *s2; 817 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 818 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 819 int cmp_res, sign1, sign2; 820 size_t frac1, frac2, main1, main2; 821 unsigned char SI1, SI2; 822 bool e1, e2, key1_read, key2_read; 823 824 s1 = kv1->k; 825 s2 = kv2->k; 826 sign1 = sign2 = 0; 827 main1 = main2 = 0; 828 frac1 = frac2 = 0; 829 830 key1_read = key2_read = false; 831 832 if (debug_sort) { 833 bwsprintf(stdout, s1, "; k1=<", ">"); 834 bwsprintf(stdout, s2, ", k2=<", ">"); 835 } 836 837 if (s1 == s2) 838 return (0); 839 840 if (kv1->hint->status == HS_UNINITIALIZED) { 841 /* read the number from the string */ 842 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 843 key1_read = true; 844 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 845 if(main1 < 1 && frac1 < 1) 846 kv1->hint->v.nh.empty=true; 847 kv1->hint->v.nh.si = SI1; 848 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 849 HS_INITIALIZED : HS_ERROR; 850 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 851 } 852 853 if (kv2->hint->status == HS_UNINITIALIZED) { 854 /* read the number from the string */ 855 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 856 key2_read = true; 857 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 858 if(main2 < 1 && frac2 < 1) 859 kv2->hint->v.nh.empty=true; 860 kv2->hint->v.nh.si = SI2; 861 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 862 HS_INITIALIZED : HS_ERROR; 863 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 864 } 865 866 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 867 HS_INITIALIZED) { 868 unsigned long long n1, n2; 869 bool neg1, neg2; 870 871 e1 = kv1->hint->v.nh.empty; 872 e2 = kv2->hint->v.nh.empty; 873 874 if (e1 && e2) 875 return (0); 876 877 neg1 = kv1->hint->v.nh.neg; 878 neg2 = kv2->hint->v.nh.neg; 879 880 if (neg1 && !neg2) 881 return (-1); 882 if (neg2 && !neg1) 883 return (+1); 884 885 if (e1) 886 return (neg2 ? +1 : -1); 887 else if (e2) 888 return (neg1 ? -1 : +1); 889 890 891 if (use_suffix) { 892 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 893 if (cmp_res) 894 return (neg1 ? -cmp_res : cmp_res); 895 } 896 897 n1 = kv1->hint->v.nh.n1; 898 n2 = kv2->hint->v.nh.n1; 899 if (n1 < n2) 900 return (neg1 ? +1 : -1); 901 else if (n1 > n2) 902 return (neg1 ? -1 : +1); 903 } 904 905 /* read the numbers from the strings */ 906 if (!key1_read) 907 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 908 if (!key2_read) 909 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 910 911 e1 = ((main1 + frac1) == 0); 912 e2 = ((main2 + frac2) == 0); 913 914 if (e1 && e2) 915 return (0); 916 917 /* we know the result if the signs are different */ 918 if (sign1 < 0 && sign2 >= 0) 919 return (-1); 920 if (sign1 >= 0 && sign2 < 0) 921 return (+1); 922 923 if (e1) 924 return ((sign2 < 0) ? +1 : -1); 925 else if (e2) 926 return ((sign1 < 0) ? -1 : +1); 927 928 if (use_suffix) { 929 cmp_res = cmpsuffix(SI1, SI2); 930 if (cmp_res) 931 return ((sign1 < 0) ? -cmp_res : cmp_res); 932 } 933 934 /* if both numbers are empty assume that the strings are equal */ 935 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 936 return (0); 937 938 /* 939 * if the main part is of different size, we know the result 940 * (because the leading zeros are removed) 941 */ 942 if (main1 < main2) 943 cmp_res = -1; 944 else if (main1 > main2) 945 cmp_res = +1; 946 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 947 else 948 cmp_res = wcscmp(smain1, smain2); 949 950 /* check fraction */ 951 if (!cmp_res) 952 cmp_res = wcscmp(sfrac1, sfrac2); 953 954 if (!cmp_res) 955 return (0); 956 957 /* reverse result if the signs are negative */ 958 if (sign1 < 0 && sign2 < 0) 959 cmp_res = -cmp_res; 960 961 return (cmp_res); 962} 963 964/* 965 * Implements numeric sort (-n). 966 */ 967static int 968numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 969{ 970 971 return (numcoll_impl(kv1, kv2, offset, false)); 972} 973 974/* 975 * Implements 'human' numeric sort (-h). 976 */ 977static int 978hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 979{ 980 981 return (numcoll_impl(kv1, kv2, offset, true)); 982} 983 984/* Use hint space to memoize md5 computations, at least. */ 985static void 986randomcoll_init_hint(struct key_value *kv, void *hash) 987{ 988 989 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached)); 990 kv->hint->status = HS_INITIALIZED; 991} 992 993/* 994 * Implements random sort (-R). 995 */ 996static int 997randomcoll(struct key_value *kv1, struct key_value *kv2, 998 size_t offset __unused) 999{ 1000 struct bwstring *s1, *s2; 1001 MD5_CTX ctx1, ctx2; 1002 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH]; 1003 int cmp; 1004 1005 s1 = kv1->k; 1006 s2 = kv2->k; 1007 1008 if (debug_sort) { 1009 bwsprintf(stdout, s1, "; k1=<", ">"); 1010 bwsprintf(stdout, s2, ", k2=<", ">"); 1011 } 1012 1013 if (s1 == s2) 1014 return (0); 1015 1016 if (kv1->hint->status == HS_INITIALIZED && 1017 kv2->hint->status == HS_INITIALIZED) { 1018 cmp = memcmp(kv1->hint->v.Rh.cached, 1019 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached)); 1020 if (cmp != 0) 1021 return (cmp); 1022 } 1023 1024 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 1025 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 1026 1027 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1028 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1029 1030 MD5Final(hash1, &ctx1); 1031 MD5Final(hash2, &ctx2); 1032 1033 if (kv1->hint->status == HS_UNINITIALIZED) 1034 randomcoll_init_hint(kv1, hash1); 1035 if (kv2->hint->status == HS_UNINITIALIZED) 1036 randomcoll_init_hint(kv2, hash2); 1037 1038 return (memcmp(hash1, hash2, sizeof(hash1))); 1039} 1040 1041/* 1042 * Implements version sort (-V). 1043 */ 1044static int 1045versioncoll(struct key_value *kv1, struct key_value *kv2, 1046 size_t offset __unused) 1047{ 1048 struct bwstring *s1, *s2; 1049 1050 s1 = kv1->k; 1051 s2 = kv2->k; 1052 1053 if (debug_sort) { 1054 bwsprintf(stdout, s1, "; k1=<", ">"); 1055 bwsprintf(stdout, s2, ", k2=<", ">"); 1056 } 1057 1058 if (s1 == s2) 1059 return (0); 1060 1061 return (vcmp(s1, s2)); 1062} 1063 1064/* 1065 * Check for minus infinity 1066 */ 1067static inline bool 1068huge_minus(double d, int err1) 1069{ 1070 1071 if (err1 == ERANGE) 1072 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1073 return (+1); 1074 1075 return (0); 1076} 1077 1078/* 1079 * Check for plus infinity 1080 */ 1081static inline bool 1082huge_plus(double d, int err1) 1083{ 1084 1085 if (err1 == ERANGE) 1086 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1087 return (+1); 1088 1089 return (0); 1090} 1091 1092/* 1093 * Check whether a function is a NAN 1094 */ 1095static bool 1096is_nan(double d) 1097{ 1098 1099 return ((d == NAN) || (isnan(d))); 1100} 1101 1102/* 1103 * Compare two NANs 1104 */ 1105static int 1106cmp_nans(double d1, double d2) 1107{ 1108 1109 if (d1 < d2) 1110 return (-1); 1111 if (d1 > d2) 1112 return (+1); 1113 return (0); 1114} 1115 1116/* 1117 * Implements general numeric sort (-g). 1118 */ 1119static int 1120gnumcoll(struct key_value *kv1, struct key_value *kv2, 1121 size_t offset __unused) 1122{ 1123 double d1, d2; 1124 int err1, err2; 1125 bool empty1, empty2, key1_read, key2_read; 1126 1127 d1 = d2 = 0; 1128 err1 = err2 = 0; 1129 key1_read = key2_read = false; 1130 1131 if (debug_sort) { 1132 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1133 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1134 } 1135 1136 if (kv1->hint->status == HS_UNINITIALIZED) { 1137 errno = 0; 1138 d1 = bwstod(kv1->k, &empty1); 1139 err1 = errno; 1140 1141 if (empty1) 1142 kv1->hint->v.gh.notnum = true; 1143 else if (err1 == 0) { 1144 kv1->hint->v.gh.d = d1; 1145 kv1->hint->v.gh.nan = is_nan(d1); 1146 kv1->hint->status = HS_INITIALIZED; 1147 } else 1148 kv1->hint->status = HS_ERROR; 1149 1150 key1_read = true; 1151 } 1152 1153 if (kv2->hint->status == HS_UNINITIALIZED) { 1154 errno = 0; 1155 d2 = bwstod(kv2->k, &empty2); 1156 err2 = errno; 1157 1158 if (empty2) 1159 kv2->hint->v.gh.notnum = true; 1160 else if (err2 == 0) { 1161 kv2->hint->v.gh.d = d2; 1162 kv2->hint->v.gh.nan = is_nan(d2); 1163 kv2->hint->status = HS_INITIALIZED; 1164 } else 1165 kv2->hint->status = HS_ERROR; 1166 1167 key2_read = true; 1168 } 1169 1170 if (kv1->hint->status == HS_INITIALIZED && 1171 kv2->hint->status == HS_INITIALIZED) { 1172 if (kv1->hint->v.gh.notnum) 1173 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1174 else if (kv2->hint->v.gh.notnum) 1175 return (+1); 1176 1177 if (kv1->hint->v.gh.nan) 1178 return ((kv2->hint->v.gh.nan) ? 1179 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1180 -1); 1181 else if (kv2->hint->v.gh.nan) 1182 return (+1); 1183 1184 d1 = kv1->hint->v.gh.d; 1185 d2 = kv2->hint->v.gh.d; 1186 1187 if (d1 < d2) 1188 return (-1); 1189 else if (d1 > d2) 1190 return (+1); 1191 else 1192 return (0); 1193 } 1194 1195 if (!key1_read) { 1196 errno = 0; 1197 d1 = bwstod(kv1->k, &empty1); 1198 err1 = errno; 1199 } 1200 1201 if (!key2_read) { 1202 errno = 0; 1203 d2 = bwstod(kv2->k, &empty2); 1204 err2 = errno; 1205 } 1206 1207 /* Non-value case: */ 1208 if (empty1) 1209 return (empty2 ? 0 : -1); 1210 else if (empty2) 1211 return (+1); 1212 1213 /* NAN case */ 1214 if (is_nan(d1)) 1215 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1216 else if (is_nan(d2)) 1217 return (+1); 1218 1219 /* Infinities */ 1220 if (err1 == ERANGE || err2 == ERANGE) { 1221 /* Minus infinity case */ 1222 if (huge_minus(d1, err1)) { 1223 if (huge_minus(d2, err2)) { 1224 if (d1 < d2) 1225 return (-1); 1226 if (d1 > d2) 1227 return (+1); 1228 return (0); 1229 } else 1230 return (-1); 1231 1232 } else if (huge_minus(d2, err2)) { 1233 if (huge_minus(d1, err1)) { 1234 if (d1 < d2) 1235 return (-1); 1236 if (d1 > d2) 1237 return (+1); 1238 return (0); 1239 } else 1240 return (+1); 1241 } 1242 1243 /* Plus infinity case */ 1244 if (huge_plus(d1, err1)) { 1245 if (huge_plus(d2, err2)) { 1246 if (d1 < d2) 1247 return (-1); 1248 if (d1 > d2) 1249 return (+1); 1250 return (0); 1251 } else 1252 return (+1); 1253 } else if (huge_plus(d2, err2)) { 1254 if (huge_plus(d1, err1)) { 1255 if (d1 < d2) 1256 return (-1); 1257 if (d1 > d2) 1258 return (+1); 1259 return (0); 1260 } else 1261 return (-1); 1262 } 1263 } 1264 1265 if (d1 < d2) 1266 return (-1); 1267 if (d1 > d2) 1268 return (+1); 1269 1270 return (0); 1271} 1272 1273/* 1274 * Implements month sort (-M). 1275 */ 1276static int 1277monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1278{ 1279 int val1, val2; 1280 bool key1_read, key2_read; 1281 1282 val1 = val2 = 0; 1283 key1_read = key2_read = false; 1284 1285 if (debug_sort) { 1286 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1287 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1288 } 1289 1290 if (kv1->hint->status == HS_UNINITIALIZED) { 1291 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1292 key1_read = true; 1293 kv1->hint->status = HS_INITIALIZED; 1294 } 1295 1296 if (kv2->hint->status == HS_UNINITIALIZED) { 1297 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1298 key2_read = true; 1299 kv2->hint->status = HS_INITIALIZED; 1300 } 1301 1302 if (kv1->hint->status == HS_INITIALIZED) { 1303 val1 = kv1->hint->v.Mh.m; 1304 key1_read = true; 1305 } 1306 1307 if (kv2->hint->status == HS_INITIALIZED) { 1308 val2 = kv2->hint->v.Mh.m; 1309 key2_read = true; 1310 } 1311 1312 if (!key1_read) 1313 val1 = bws_month_score(kv1->k); 1314 if (!key2_read) 1315 val2 = bws_month_score(kv2->k); 1316 1317 if (val1 == val2) { 1318 return (0); 1319 } 1320 if (val1 < val2) 1321 return (-1); 1322 return (+1); 1323} 1324