1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/11/usr.bin/sort/coll.c 330449 2018-03-05 07:26:05Z eadler $"); 32 33#include <sys/types.h> 34 35#include <errno.h> 36#include <err.h> 37#include <langinfo.h> 38#include <limits.h> 39#include <math.h> 40#include <md5.h> 41#include <stdlib.h> 42#include <string.h> 43#include <wchar.h> 44#include <wctype.h> 45 46#include "coll.h" 47#include "vsort.h" 48 49struct key_specs *keys; 50size_t keys_num = 0; 51 52wint_t symbol_decimal_point = L'.'; 53/* there is no default thousands separator in collate rules: */ 54wint_t symbol_thousands_sep = 0; 55wint_t symbol_negative_sign = L'-'; 56wint_t symbol_positive_sign = L'+'; 57 58static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 59static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 60static int monthcoll(struct key_value*, struct key_value *, size_t offset); 61static int numcoll(struct key_value*, struct key_value *, size_t offset); 62static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 63static int randomcoll(struct key_value*, struct key_value *, size_t offset); 64static int versioncoll(struct key_value*, struct key_value *, size_t offset); 65 66/* 67 * Allocate keys array 68 */ 69struct keys_array * 70keys_array_alloc(void) 71{ 72 struct keys_array *ka; 73 size_t sz; 74 75 sz = keys_array_size(); 76 ka = sort_malloc(sz); 77 memset(ka, 0, sz); 78 79 return (ka); 80} 81 82/* 83 * Calculate whether we need key hint space 84 */ 85static size_t 86key_hint_size(void) 87{ 88 89 return (need_hint ? sizeof(struct key_hint) : 0); 90} 91 92/* 93 * Calculate keys array size 94 */ 95size_t 96keys_array_size(void) 97{ 98 99 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 100} 101 102/* 103 * Clean data of keys array 104 */ 105void 106clean_keys_array(const struct bwstring *s, struct keys_array *ka) 107{ 108 109 if (ka) { 110 for (size_t i = 0; i < keys_num; ++i) { 111 const struct key_value *kv; 112 113 kv = get_key_from_keys_array(ka, i); 114 if (kv->k && kv->k != s) 115 bwsfree(kv->k); 116 } 117 memset(ka, 0, keys_array_size()); 118 } 119} 120 121/* 122 * Get pointer to a key value in the keys set 123 */ 124struct key_value * 125get_key_from_keys_array(struct keys_array *ka, size_t ind) 126{ 127 128 return ((struct key_value *)((caddr_t)ka->key + 129 ind * (sizeof(struct key_value) + key_hint_size()))); 130} 131 132/* 133 * Set value of a key in the keys set 134 */ 135void 136set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 137{ 138 139 if (ka && keys_num > ind) { 140 struct key_value *kv; 141 142 kv = get_key_from_keys_array(ka, ind); 143 144 if (kv->k && kv->k != s) 145 bwsfree(kv->k); 146 kv->k = s; 147 } 148} 149 150/* 151 * Initialize a sort list item 152 */ 153struct sort_list_item * 154sort_list_item_alloc(void) 155{ 156 struct sort_list_item *si; 157 size_t sz; 158 159 sz = sizeof(struct sort_list_item) + keys_array_size(); 160 si = sort_malloc(sz); 161 memset(si, 0, sz); 162 163 return (si); 164} 165 166size_t 167sort_list_item_size(struct sort_list_item *si) 168{ 169 size_t ret = 0; 170 171 if (si) { 172 ret = sizeof(struct sort_list_item) + keys_array_size(); 173 if (si->str) 174 ret += bws_memsize(si->str); 175 for (size_t i = 0; i < keys_num; ++i) { 176 const struct key_value *kv; 177 178 kv = get_key_from_keys_array(&si->ka, i); 179 180 if (kv->k != si->str) 181 ret += bws_memsize(kv->k); 182 } 183 } 184 return (ret); 185} 186 187/* 188 * Calculate key for a sort list item 189 */ 190static void 191sort_list_item_make_key(struct sort_list_item *si) 192{ 193 194 preproc(si->str, &(si->ka)); 195} 196 197/* 198 * Set value of a sort list item. 199 * Return combined string and keys memory size. 200 */ 201void 202sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 203{ 204 205 if (si) { 206 clean_keys_array(si->str, &(si->ka)); 207 if (si->str) { 208 if (si->str == str) { 209 /* we are trying to reset the same string */ 210 return; 211 } else { 212 bwsfree(si->str); 213 si->str = NULL; 214 } 215 } 216 si->str = str; 217 sort_list_item_make_key(si); 218 } 219} 220 221/* 222 * De-allocate a sort list item object memory 223 */ 224void 225sort_list_item_clean(struct sort_list_item *si) 226{ 227 228 if (si) { 229 clean_keys_array(si->str, &(si->ka)); 230 if (si->str) { 231 bwsfree(si->str); 232 si->str = NULL; 233 } 234 } 235} 236 237/* 238 * Skip columns according to specs 239 */ 240static size_t 241skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 242 bool skip_blanks, bool *empty_key) 243{ 244 if (cols < 1) 245 return (BWSLEN(s) + 1); 246 247 if (skip_blanks) 248 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 249 ++start; 250 251 while (start < BWSLEN(s) && cols > 1) { 252 --cols; 253 ++start; 254 } 255 256 if (start >= BWSLEN(s)) 257 *empty_key = true; 258 259 return (start); 260} 261 262/* 263 * Skip fields according to specs 264 */ 265static size_t 266skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 267{ 268 269 if (fields < 2) { 270 if (BWSLEN(s) == 0) 271 *empty_field = true; 272 return (0); 273 } else if (!(sort_opts_vals.tflag)) { 274 size_t cpos = 0; 275 bool pb = true; 276 277 while (cpos < BWSLEN(s)) { 278 bool isblank; 279 280 isblank = iswblank(BWS_GET(s, cpos)); 281 282 if (isblank && !pb) { 283 --fields; 284 if (fields <= 1) 285 return (cpos); 286 } 287 pb = isblank; 288 ++cpos; 289 } 290 if (fields > 1) 291 *empty_field = true; 292 return (cpos); 293 } else { 294 size_t cpos = 0; 295 296 while (cpos < BWSLEN(s)) { 297 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 298 --fields; 299 if (fields <= 1) 300 return (cpos + 1); 301 } 302 ++cpos; 303 } 304 if (fields > 1) 305 *empty_field = true; 306 return (cpos); 307 } 308} 309 310/* 311 * Find fields start 312 */ 313static void 314find_field_start(const struct bwstring *s, struct key_specs *ks, 315 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 316{ 317 318 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 319 if (!*empty_field) 320 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 321 ks->pos1b, empty_key); 322 else 323 *empty_key = true; 324} 325 326/* 327 * Find end key position 328 */ 329static size_t 330find_field_end(const struct bwstring *s, struct key_specs *ks) 331{ 332 size_t f2, next_field_start, pos_end; 333 bool empty_field, empty_key; 334 335 pos_end = 0; 336 next_field_start = 0; 337 empty_field = false; 338 empty_key = false; 339 f2 = ks->f2; 340 341 if (f2 == 0) 342 return (BWSLEN(s) + 1); 343 else { 344 if (ks->c2 == 0) { 345 next_field_start = skip_fields_to_start(s, f2 + 1, 346 &empty_field); 347 if ((next_field_start > 0) && sort_opts_vals.tflag && 348 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 349 next_field_start - 1))) 350 --next_field_start; 351 } else 352 next_field_start = skip_fields_to_start(s, f2, 353 &empty_field); 354 } 355 356 if (empty_field || (next_field_start >= BWSLEN(s))) 357 return (BWSLEN(s) + 1); 358 359 if (ks->c2) { 360 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 361 ks->pos2b, &empty_key); 362 if (pos_end < BWSLEN(s)) 363 ++pos_end; 364 } else 365 pos_end = next_field_start; 366 367 return (pos_end); 368} 369 370/* 371 * Cut a field according to the key specs 372 */ 373static struct bwstring * 374cut_field(const struct bwstring *s, struct key_specs *ks) 375{ 376 struct bwstring *ret = NULL; 377 378 if (s && ks) { 379 size_t field_start, key_end, key_start, sz; 380 bool empty_field, empty_key; 381 382 field_start = 0; 383 key_start = 0; 384 empty_field = false; 385 empty_key = false; 386 387 find_field_start(s, ks, &field_start, &key_start, 388 &empty_field, &empty_key); 389 390 if (empty_key) 391 sz = 0; 392 else { 393 key_end = find_field_end(s, ks); 394 sz = (key_end < key_start) ? 0 : (key_end - key_start); 395 } 396 397 ret = bwsalloc(sz); 398 if (sz) 399 bwsnocpy(ret, s, key_start, sz); 400 } else 401 ret = bwsalloc(0); 402 403 return (ret); 404} 405 406/* 407 * Preprocesses a line applying the necessary transformations 408 * specified by command line options and returns the preprocessed 409 * string, which can be used to compare. 410 */ 411int 412preproc(struct bwstring *s, struct keys_array *ka) 413{ 414 415 if (sort_opts_vals.kflag) 416 for (size_t i = 0; i < keys_num; i++) { 417 struct bwstring *key; 418 struct key_specs *kspecs; 419 struct sort_mods *sm; 420 421 kspecs = &(keys[i]); 422 key = cut_field(s, kspecs); 423 424 sm = &(kspecs->sm); 425 if (sm->dflag) 426 key = dictionary_order(key); 427 else if (sm->iflag) 428 key = ignore_nonprinting(key); 429 if (sm->fflag || sm->Mflag) 430 key = ignore_case(key); 431 432 set_key_on_keys_array(ka, key, i); 433 } 434 else { 435 struct bwstring *ret = NULL; 436 struct sort_mods *sm = default_sort_mods; 437 438 if (sm->bflag) { 439 if (ret == NULL) 440 ret = bwsdup(s); 441 ret = ignore_leading_blanks(ret); 442 } 443 if (sm->dflag) { 444 if (ret == NULL) 445 ret = bwsdup(s); 446 ret = dictionary_order(ret); 447 } else if (sm->iflag) { 448 if (ret == NULL) 449 ret = bwsdup(s); 450 ret = ignore_nonprinting(ret); 451 } 452 if (sm->fflag || sm->Mflag) { 453 if (ret == NULL) 454 ret = bwsdup(s); 455 ret = ignore_case(ret); 456 } 457 if (ret == NULL) 458 set_key_on_keys_array(ka, s, 0); 459 else 460 set_key_on_keys_array(ka, ret, 0); 461 } 462 463 return 0; 464} 465 466cmpcoll_t 467get_sort_func(struct sort_mods *sm) 468{ 469 470 if (sm->nflag) 471 return (numcoll); 472 else if (sm->hflag) 473 return (hnumcoll); 474 else if (sm->gflag) 475 return (gnumcoll); 476 else if (sm->Mflag) 477 return (monthcoll); 478 else if (sm->Rflag) 479 return (randomcoll); 480 else if (sm->Vflag) 481 return (versioncoll); 482 else 483 return (wstrcoll); 484} 485 486/* 487 * Compares the given strings. Returns a positive number if 488 * the first precedes the second, a negative number if the second is 489 * the preceding one, and zero if they are equal. This function calls 490 * the underlying collate functions, which done the actual comparison. 491 */ 492int 493key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 494{ 495 struct key_value *kv1, *kv2; 496 struct sort_mods *sm; 497 int res = 0; 498 499 for (size_t i = 0; i < keys_num; ++i) { 500 kv1 = get_key_from_keys_array(ps1, i); 501 kv2 = get_key_from_keys_array(ps2, i); 502 sm = &(keys[i].sm); 503 504 if (sm->rflag) 505 res = sm->func(kv2, kv1, offset); 506 else 507 res = sm->func(kv1, kv2, offset); 508 509 if (res) 510 break; 511 512 /* offset applies to only the first key */ 513 offset = 0; 514 } 515 return (res); 516} 517 518/* 519 * Compare two strings. 520 * Plain symbol-by-symbol comparison. 521 */ 522int 523top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 524{ 525 526 if (default_sort_mods->rflag) { 527 const struct bwstring *tmp; 528 529 tmp = s1; 530 s1 = s2; 531 s2 = tmp; 532 } 533 534 return (bwscoll(s1, s2, 0)); 535} 536 537/* 538 * Compare a string and a sort list item, according to the sort specs. 539 */ 540int 541str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 542{ 543 struct keys_array *ka1; 544 int ret = 0; 545 546 ka1 = keys_array_alloc(); 547 548 preproc(str1, ka1); 549 550 sort_list_item_make_key(*ss2); 551 552 if (debug_sort) { 553 bwsprintf(stdout, str1, "; s1=<", ">"); 554 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 555 } 556 557 ret = key_coll(ka1, &((*ss2)->ka), 0); 558 559 if (debug_sort) 560 printf("; cmp1=%d", ret); 561 562 clean_keys_array(str1, ka1); 563 sort_free(ka1); 564 565 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 566 ret = top_level_str_coll(str1, ((*ss2)->str)); 567 if (debug_sort) 568 printf("; cmp2=%d", ret); 569 } 570 571 if (debug_sort) 572 printf("\n"); 573 574 return (ret); 575} 576 577/* 578 * Compare two sort list items, according to the sort specs. 579 */ 580int 581list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 582 size_t offset) 583{ 584 int ret; 585 586 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 587 588 if (debug_sort) { 589 if (offset) 590 printf("; offset=%d", (int) offset); 591 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 592 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 593 printf("; cmp1=%d\n", ret); 594 } 595 596 if (ret) 597 return (ret); 598 599 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 600 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 601 if (debug_sort) 602 printf("; cmp2=%d\n", ret); 603 } 604 605 return (ret); 606} 607 608/* 609 * Compare two sort list items, according to the sort specs. 610 */ 611int 612list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 613{ 614 615 return (list_coll_offset(ss1, ss2, 0)); 616} 617 618#define LSCDEF(N) \ 619static int \ 620list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 621{ \ 622 \ 623 return (list_coll_offset(ss1, ss2, N)); \ 624} 625 626LSCDEF(1) 627LSCDEF(2) 628LSCDEF(3) 629LSCDEF(4) 630LSCDEF(5) 631LSCDEF(6) 632LSCDEF(7) 633LSCDEF(8) 634LSCDEF(9) 635LSCDEF(10) 636LSCDEF(11) 637LSCDEF(12) 638LSCDEF(13) 639LSCDEF(14) 640LSCDEF(15) 641LSCDEF(16) 642LSCDEF(17) 643LSCDEF(18) 644LSCDEF(19) 645LSCDEF(20) 646 647listcoll_t 648get_list_call_func(size_t offset) 649{ 650 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 651 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 652 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 653 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 654 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 655 list_coll_18, list_coll_19, list_coll_20 }; 656 657 if (offset <= 20) 658 return (lsarray[offset]); 659 660 return (list_coll); 661} 662 663/* 664 * Compare two sort list items, only by their original string. 665 */ 666int 667list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 668{ 669 670 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 671} 672 673/* 674 * Maximum size of a number in the string (before or after decimal point) 675 */ 676#define MAX_NUM_SIZE (128) 677 678/* 679 * Set suffix value 680 */ 681static void setsuffix(wchar_t c, unsigned char *si) 682{ 683 switch (c){ 684 case L'k': 685 case L'K': 686 *si = 1; 687 break; 688 case L'M': 689 *si = 2; 690 break; 691 case L'G': 692 *si = 3; 693 break; 694 case L'T': 695 *si = 4; 696 break; 697 case L'P': 698 *si = 5; 699 break; 700 case L'E': 701 *si = 6; 702 break; 703 case L'Z': 704 *si = 7; 705 break; 706 case L'Y': 707 *si = 8; 708 break; 709 default: 710 *si = 0; 711 } 712} 713 714/* 715 * Read string s and parse the string into a fixed-decimal-point number. 716 * sign equals -1 if the number is negative (explicit plus is not allowed, 717 * according to GNU sort's "info sort". 718 * The number part before decimal point is in the smain, after the decimal 719 * point is in sfrac, tail is the pointer to the remainder of the string. 720 */ 721static int 722read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 723{ 724 bwstring_iterator s; 725 726 s = bws_begin(s0); 727 728 /* always end the fraction with zero, even if we have no fraction */ 729 sfrac[0] = 0; 730 731 while (iswblank(bws_get_iter_value(s))) 732 s = bws_iterator_inc(s, 1); 733 734 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 735 *sign = -1; 736 s = bws_iterator_inc(s, 1); 737 } 738 739 // This is '0', not '\0', do not change this 740 while (iswdigit(bws_get_iter_value(s)) && 741 (bws_get_iter_value(s) == L'0')) 742 s = bws_iterator_inc(s, 1); 743 744 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 745 if (iswdigit(bws_get_iter_value(s))) { 746 smain[*main_len] = bws_get_iter_value(s); 747 s = bws_iterator_inc(s, 1); 748 *main_len += 1; 749 } else if (symbol_thousands_sep && 750 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 751 s = bws_iterator_inc(s, 1); 752 else 753 break; 754 } 755 756 smain[*main_len] = 0; 757 758 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 759 s = bws_iterator_inc(s, 1); 760 while (iswdigit(bws_get_iter_value(s)) && 761 *frac_len < MAX_NUM_SIZE) { 762 sfrac[*frac_len] = bws_get_iter_value(s); 763 s = bws_iterator_inc(s, 1); 764 *frac_len += 1; 765 } 766 sfrac[*frac_len] = 0; 767 768 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 769 --(*frac_len); 770 sfrac[*frac_len] = L'\0'; 771 } 772 } 773 774 setsuffix(bws_get_iter_value(s),si); 775 776 if ((*main_len + *frac_len) == 0) 777 *sign = 0; 778 779 return (0); 780} 781 782/* 783 * Implements string sort. 784 */ 785static int 786wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 787{ 788 789 if (debug_sort) { 790 if (offset) 791 printf("; offset=%d\n", (int) offset); 792 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 793 printf("(%zu)", BWSLEN(kv1->k)); 794 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 795 printf("(%zu)", BWSLEN(kv2->k)); 796 } 797 798 return (bwscoll(kv1->k, kv2->k, offset)); 799} 800 801/* 802 * Compare two suffixes 803 */ 804static inline int 805cmpsuffix(unsigned char si1, unsigned char si2) 806{ 807 808 return ((char)si1 - (char)si2); 809} 810 811/* 812 * Implements numeric sort for -n and -h. 813 */ 814static int 815numcoll_impl(struct key_value *kv1, struct key_value *kv2, 816 size_t offset __unused, bool use_suffix) 817{ 818 struct bwstring *s1, *s2; 819 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 820 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 821 int cmp_res, sign1, sign2; 822 size_t frac1, frac2, main1, main2; 823 unsigned char SI1, SI2; 824 bool e1, e2, key1_read, key2_read; 825 826 s1 = kv1->k; 827 s2 = kv2->k; 828 sign1 = sign2 = 0; 829 main1 = main2 = 0; 830 frac1 = frac2 = 0; 831 832 cmp_res = 0; 833 key1_read = key2_read = false; 834 835 if (debug_sort) { 836 bwsprintf(stdout, s1, "; k1=<", ">"); 837 bwsprintf(stdout, s2, ", k2=<", ">"); 838 } 839 840 if (s1 == s2) 841 return (0); 842 843 if (kv1->hint->status == HS_UNINITIALIZED) { 844 /* read the number from the string */ 845 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 846 key1_read = true; 847 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 848 if(main1 < 1 && frac1 < 1) 849 kv1->hint->v.nh.empty=true; 850 kv1->hint->v.nh.si = SI1; 851 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 852 HS_INITIALIZED : HS_ERROR; 853 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 854 } 855 856 if (kv2->hint->status == HS_UNINITIALIZED) { 857 /* read the number from the string */ 858 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 859 key2_read = true; 860 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 861 if(main2 < 1 && frac2 < 1) 862 kv2->hint->v.nh.empty=true; 863 kv2->hint->v.nh.si = SI2; 864 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 865 HS_INITIALIZED : HS_ERROR; 866 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 867 } 868 869 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 870 HS_INITIALIZED) { 871 unsigned long long n1, n2; 872 bool neg1, neg2; 873 874 e1 = kv1->hint->v.nh.empty; 875 e2 = kv2->hint->v.nh.empty; 876 877 if (e1 && e2) 878 return (0); 879 880 neg1 = kv1->hint->v.nh.neg; 881 neg2 = kv2->hint->v.nh.neg; 882 883 if (neg1 && !neg2) 884 return (-1); 885 if (neg2 && !neg1) 886 return (+1); 887 888 if (e1) 889 return (neg2 ? +1 : -1); 890 else if (e2) 891 return (neg1 ? -1 : +1); 892 893 894 if (use_suffix) { 895 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 896 if (cmp_res) 897 return (neg1 ? -cmp_res : cmp_res); 898 } 899 900 n1 = kv1->hint->v.nh.n1; 901 n2 = kv2->hint->v.nh.n1; 902 if (n1 < n2) 903 return (neg1 ? +1 : -1); 904 else if (n1 > n2) 905 return (neg1 ? -1 : +1); 906 } 907 908 /* read the numbers from the strings */ 909 if (!key1_read) 910 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 911 if (!key2_read) 912 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 913 914 e1 = ((main1 + frac1) == 0); 915 e2 = ((main2 + frac2) == 0); 916 917 if (e1 && e2) 918 return (0); 919 920 /* we know the result if the signs are different */ 921 if (sign1 < 0 && sign2 >= 0) 922 return (-1); 923 if (sign1 >= 0 && sign2 < 0) 924 return (+1); 925 926 if (e1) 927 return ((sign2 < 0) ? +1 : -1); 928 else if (e2) 929 return ((sign1 < 0) ? -1 : +1); 930 931 if (use_suffix) { 932 cmp_res = cmpsuffix(SI1, SI2); 933 if (cmp_res) 934 return ((sign1 < 0) ? -cmp_res : cmp_res); 935 } 936 937 /* if both numbers are empty assume that the strings are equal */ 938 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 939 return (0); 940 941 /* 942 * if the main part is of different size, we know the result 943 * (because the leading zeros are removed) 944 */ 945 if (main1 < main2) 946 cmp_res = -1; 947 else if (main1 > main2) 948 cmp_res = +1; 949 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 950 else 951 cmp_res = wcscmp(smain1, smain2); 952 953 /* check fraction */ 954 if (!cmp_res) 955 cmp_res = wcscmp(sfrac1, sfrac2); 956 957 if (!cmp_res) 958 return (0); 959 960 /* reverse result if the signs are negative */ 961 if (sign1 < 0 && sign2 < 0) 962 cmp_res = -cmp_res; 963 964 return (cmp_res); 965} 966 967/* 968 * Implements numeric sort (-n). 969 */ 970static int 971numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 972{ 973 974 return (numcoll_impl(kv1, kv2, offset, false)); 975} 976 977/* 978 * Implements 'human' numeric sort (-h). 979 */ 980static int 981hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 982{ 983 984 return (numcoll_impl(kv1, kv2, offset, true)); 985} 986 987/* 988 * Implements random sort (-R). 989 */ 990static int 991randomcoll(struct key_value *kv1, struct key_value *kv2, 992 size_t offset __unused) 993{ 994 struct bwstring *s1, *s2; 995 MD5_CTX ctx1, ctx2; 996 char *b1, *b2; 997 998 s1 = kv1->k; 999 s2 = kv2->k; 1000 1001 if (debug_sort) { 1002 bwsprintf(stdout, s1, "; k1=<", ">"); 1003 bwsprintf(stdout, s2, ", k2=<", ">"); 1004 } 1005 1006 if (s1 == s2) 1007 return (0); 1008 1009 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 1010 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 1011 1012 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1013 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1014 b1 = MD5End(&ctx1, NULL); 1015 b2 = MD5End(&ctx2, NULL); 1016 if (b1 == NULL) { 1017 if (b2 == NULL) 1018 return (0); 1019 else { 1020 sort_free(b2); 1021 return (-1); 1022 } 1023 } else if (b2 == NULL) { 1024 sort_free(b1); 1025 return (+1); 1026 } else { 1027 int cmp_res; 1028 1029 cmp_res = strcmp(b1,b2); 1030 sort_free(b1); 1031 sort_free(b2); 1032 1033 if (!cmp_res) 1034 cmp_res = bwscoll(s1, s2, 0); 1035 1036 return (cmp_res); 1037 } 1038} 1039 1040/* 1041 * Implements version sort (-V). 1042 */ 1043static int 1044versioncoll(struct key_value *kv1, struct key_value *kv2, 1045 size_t offset __unused) 1046{ 1047 struct bwstring *s1, *s2; 1048 1049 s1 = kv1->k; 1050 s2 = kv2->k; 1051 1052 if (debug_sort) { 1053 bwsprintf(stdout, s1, "; k1=<", ">"); 1054 bwsprintf(stdout, s2, ", k2=<", ">"); 1055 } 1056 1057 if (s1 == s2) 1058 return (0); 1059 1060 return (vcmp(s1, s2)); 1061} 1062 1063/* 1064 * Check for minus infinity 1065 */ 1066static inline bool 1067huge_minus(double d, int err1) 1068{ 1069 1070 if (err1 == ERANGE) 1071 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1072 return (+1); 1073 1074 return (0); 1075} 1076 1077/* 1078 * Check for plus infinity 1079 */ 1080static inline bool 1081huge_plus(double d, int err1) 1082{ 1083 1084 if (err1 == ERANGE) 1085 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1086 return (+1); 1087 1088 return (0); 1089} 1090 1091/* 1092 * Check whether a function is a NAN 1093 */ 1094static bool 1095is_nan(double d) 1096{ 1097 1098 return ((d == NAN) || (isnan(d))); 1099} 1100 1101/* 1102 * Compare two NANs 1103 */ 1104static int 1105cmp_nans(double d1, double d2) 1106{ 1107 1108 if (d1 < d2) 1109 return (-1); 1110 if (d1 > d2) 1111 return (+1); 1112 return (0); 1113} 1114 1115/* 1116 * Implements general numeric sort (-g). 1117 */ 1118static int 1119gnumcoll(struct key_value *kv1, struct key_value *kv2, 1120 size_t offset __unused) 1121{ 1122 double d1, d2; 1123 int err1, err2; 1124 bool empty1, empty2, key1_read, key2_read; 1125 1126 d1 = d2 = 0; 1127 err1 = err2 = 0; 1128 key1_read = key2_read = false; 1129 1130 if (debug_sort) { 1131 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1132 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1133 } 1134 1135 if (kv1->hint->status == HS_UNINITIALIZED) { 1136 errno = 0; 1137 d1 = bwstod(kv1->k, &empty1); 1138 err1 = errno; 1139 1140 if (empty1) 1141 kv1->hint->v.gh.notnum = true; 1142 else if (err1 == 0) { 1143 kv1->hint->v.gh.d = d1; 1144 kv1->hint->v.gh.nan = is_nan(d1); 1145 kv1->hint->status = HS_INITIALIZED; 1146 } else 1147 kv1->hint->status = HS_ERROR; 1148 1149 key1_read = true; 1150 } 1151 1152 if (kv2->hint->status == HS_UNINITIALIZED) { 1153 errno = 0; 1154 d2 = bwstod(kv2->k, &empty2); 1155 err2 = errno; 1156 1157 if (empty2) 1158 kv2->hint->v.gh.notnum = true; 1159 else if (err2 == 0) { 1160 kv2->hint->v.gh.d = d2; 1161 kv2->hint->v.gh.nan = is_nan(d2); 1162 kv2->hint->status = HS_INITIALIZED; 1163 } else 1164 kv2->hint->status = HS_ERROR; 1165 1166 key2_read = true; 1167 } 1168 1169 if (kv1->hint->status == HS_INITIALIZED && 1170 kv2->hint->status == HS_INITIALIZED) { 1171 if (kv1->hint->v.gh.notnum) 1172 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1173 else if (kv2->hint->v.gh.notnum) 1174 return (+1); 1175 1176 if (kv1->hint->v.gh.nan) 1177 return ((kv2->hint->v.gh.nan) ? 1178 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1179 -1); 1180 else if (kv2->hint->v.gh.nan) 1181 return (+1); 1182 1183 d1 = kv1->hint->v.gh.d; 1184 d2 = kv2->hint->v.gh.d; 1185 1186 if (d1 < d2) 1187 return (-1); 1188 else if (d1 > d2) 1189 return (+1); 1190 else 1191 return (0); 1192 } 1193 1194 if (!key1_read) { 1195 errno = 0; 1196 d1 = bwstod(kv1->k, &empty1); 1197 err1 = errno; 1198 } 1199 1200 if (!key2_read) { 1201 errno = 0; 1202 d2 = bwstod(kv2->k, &empty2); 1203 err2 = errno; 1204 } 1205 1206 /* Non-value case: */ 1207 if (empty1) 1208 return (empty2 ? 0 : -1); 1209 else if (empty2) 1210 return (+1); 1211 1212 /* NAN case */ 1213 if (is_nan(d1)) 1214 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1215 else if (is_nan(d2)) 1216 return (+1); 1217 1218 /* Infinities */ 1219 if (err1 == ERANGE || err2 == ERANGE) { 1220 /* Minus infinity case */ 1221 if (huge_minus(d1, err1)) { 1222 if (huge_minus(d2, err2)) { 1223 if (d1 < d2) 1224 return (-1); 1225 if (d1 > d2) 1226 return (+1); 1227 return (0); 1228 } else 1229 return (-1); 1230 1231 } else if (huge_minus(d2, err2)) { 1232 if (huge_minus(d1, err1)) { 1233 if (d1 < d2) 1234 return (-1); 1235 if (d1 > d2) 1236 return (+1); 1237 return (0); 1238 } else 1239 return (+1); 1240 } 1241 1242 /* Plus infinity case */ 1243 if (huge_plus(d1, err1)) { 1244 if (huge_plus(d2, err2)) { 1245 if (d1 < d2) 1246 return (-1); 1247 if (d1 > d2) 1248 return (+1); 1249 return (0); 1250 } else 1251 return (+1); 1252 } else if (huge_plus(d2, err2)) { 1253 if (huge_plus(d1, err1)) { 1254 if (d1 < d2) 1255 return (-1); 1256 if (d1 > d2) 1257 return (+1); 1258 return (0); 1259 } else 1260 return (-1); 1261 } 1262 } 1263 1264 if (d1 < d2) 1265 return (-1); 1266 if (d1 > d2) 1267 return (+1); 1268 1269 return (0); 1270} 1271 1272/* 1273 * Implements month sort (-M). 1274 */ 1275static int 1276monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1277{ 1278 int val1, val2; 1279 bool key1_read, key2_read; 1280 1281 val1 = val2 = 0; 1282 key1_read = key2_read = false; 1283 1284 if (debug_sort) { 1285 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1286 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1287 } 1288 1289 if (kv1->hint->status == HS_UNINITIALIZED) { 1290 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1291 key1_read = true; 1292 kv1->hint->status = HS_INITIALIZED; 1293 } 1294 1295 if (kv2->hint->status == HS_UNINITIALIZED) { 1296 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1297 key2_read = true; 1298 kv2->hint->status = HS_INITIALIZED; 1299 } 1300 1301 if (kv1->hint->status == HS_INITIALIZED) { 1302 val1 = kv1->hint->v.Mh.m; 1303 key1_read = true; 1304 } 1305 1306 if (kv2->hint->status == HS_INITIALIZED) { 1307 val2 = kv2->hint->v.Mh.m; 1308 key2_read = true; 1309 } 1310 1311 if (!key1_read) 1312 val1 = bws_month_score(kv1->k); 1313 if (!key2_read) 1314 val2 = bws_month_score(kv2->k); 1315 1316 if (val1 == val2) { 1317 return (0); 1318 } 1319 if (val1 < val2) 1320 return (-1); 1321 return (+1); 1322} 1323