1/*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD$"); 30 31#include <sys/types.h> 32 33#include <errno.h> 34#include <err.h> 35#include <langinfo.h> 36#include <limits.h> 37#include <math.h> 38#include <md5.h> 39#include <stdlib.h> 40#include <string.h> 41#include <wchar.h> 42#include <wctype.h> 43 44#include "coll.h" 45#include "vsort.h" 46 47struct key_specs *keys; 48size_t keys_num = 0; 49 50wint_t symbol_decimal_point = L'.'; 51/* there is no default thousands separator in collate rules: */ 52wint_t symbol_thousands_sep = 0; 53wint_t symbol_negative_sign = L'-'; 54wint_t symbol_positive_sign = L'+'; 55 56static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 57static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 58static int monthcoll(struct key_value*, struct key_value *, size_t offset); 59static int numcoll(struct key_value*, struct key_value *, size_t offset); 60static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 61static int randomcoll(struct key_value*, struct key_value *, size_t offset); 62static int versioncoll(struct key_value*, struct key_value *, size_t offset); 63 64/* 65 * Allocate keys array 66 */ 67struct keys_array * 68keys_array_alloc(void) 69{ 70 struct keys_array *ka; 71 size_t sz; 72 73 sz = keys_array_size(); 74 ka = sort_malloc(sz); 75 memset(ka, 0, sz); 76 77 return (ka); 78} 79 80/* 81 * Calculate whether we need key hint space 82 */ 83static size_t 84key_hint_size(void) 85{ 86 87 return (need_hint ? sizeof(struct key_hint) : 0); 88} 89 90/* 91 * Calculate keys array size 92 */ 93size_t 94keys_array_size(void) 95{ 96 97 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 98} 99 100/* 101 * Clean data of keys array 102 */ 103void 104clean_keys_array(const struct bwstring *s, struct keys_array *ka) 105{ 106 107 if (ka) { 108 for (size_t i = 0; i < keys_num; ++i) 109 if (ka->key[i].k && ka->key[i].k != s) 110 bwsfree(ka->key[i].k); 111 memset(ka, 0, keys_array_size()); 112 } 113} 114 115/* 116 * Set value of a key in the keys set 117 */ 118void 119set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 120{ 121 122 if (ka && keys_num > ind) { 123 struct key_value *kv; 124 125 kv = &(ka->key[ind]); 126 127 if (kv->k && kv->k != s) 128 bwsfree(kv->k); 129 kv->k = s; 130 } 131} 132 133/* 134 * Initialize a sort list item 135 */ 136struct sort_list_item * 137sort_list_item_alloc(void) 138{ 139 struct sort_list_item *si; 140 size_t sz; 141 142 sz = sizeof(struct sort_list_item) + keys_array_size(); 143 si = sort_malloc(sz); 144 memset(si, 0, sz); 145 146 return (si); 147} 148 149size_t 150sort_list_item_size(struct sort_list_item *si) 151{ 152 size_t ret = 0; 153 154 if (si) { 155 ret = sizeof(struct sort_list_item) + keys_array_size(); 156 if (si->str) 157 ret += bws_memsize(si->str); 158 for (size_t i = 0; i < keys_num; ++i) { 159 struct key_value *kv; 160 161 kv = &(si->ka.key[i]); 162 163 if (kv->k != si->str) 164 ret += bws_memsize(kv->k); 165 } 166 } 167 return (ret); 168} 169 170/* 171 * Calculate key for a sort list item 172 */ 173static void 174sort_list_item_make_key(struct sort_list_item *si) 175{ 176 177 preproc(si->str, &(si->ka)); 178} 179 180/* 181 * Set value of a sort list item. 182 * Return combined string and keys memory size. 183 */ 184void 185sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 186{ 187 188 if (si) { 189 clean_keys_array(si->str, &(si->ka)); 190 if (si->str) { 191 if (si->str == str) { 192 /* we are trying to reset the same string */ 193 return; 194 } else { 195 bwsfree(si->str); 196 si->str = NULL; 197 } 198 } 199 si->str = str; 200 sort_list_item_make_key(si); 201 } 202} 203 204/* 205 * De-allocate a sort list item object memory 206 */ 207void 208sort_list_item_clean(struct sort_list_item *si) 209{ 210 211 if (si) { 212 clean_keys_array(si->str, &(si->ka)); 213 if (si->str) { 214 bwsfree(si->str); 215 si->str = NULL; 216 } 217 } 218} 219 220/* 221 * Skip columns according to specs 222 */ 223static size_t 224skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 225 bool skip_blanks, bool *empty_key) 226{ 227 if (cols < 1) 228 return (BWSLEN(s) + 1); 229 230 if (skip_blanks) 231 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 232 ++start; 233 234 while (start < BWSLEN(s) && cols > 1) { 235 --cols; 236 ++start; 237 } 238 239 if (start >= BWSLEN(s)) 240 *empty_key = true; 241 242 return (start); 243} 244 245/* 246 * Skip fields according to specs 247 */ 248static size_t 249skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 250{ 251 252 if (fields < 2) { 253 if (BWSLEN(s) == 0) 254 *empty_field = true; 255 return (0); 256 } else if (!(sort_opts_vals.tflag)) { 257 size_t cpos = 0; 258 bool pb = true; 259 260 while (cpos < BWSLEN(s)) { 261 bool isblank; 262 263 isblank = iswblank(BWS_GET(s, cpos)); 264 265 if (isblank && !pb) { 266 --fields; 267 if (fields <= 1) 268 return (cpos); 269 } 270 pb = isblank; 271 ++cpos; 272 } 273 if (fields > 1) 274 *empty_field = true; 275 return (cpos); 276 } else { 277 size_t cpos = 0; 278 279 while (cpos < BWSLEN(s)) { 280 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 281 --fields; 282 if (fields <= 1) 283 return (cpos + 1); 284 } 285 ++cpos; 286 } 287 if (fields > 1) 288 *empty_field = true; 289 return (cpos); 290 } 291} 292 293/* 294 * Find fields start 295 */ 296static void 297find_field_start(const struct bwstring *s, struct key_specs *ks, 298 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 299{ 300 301 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 302 if (!*empty_field) 303 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 304 ks->pos1b, empty_key); 305 else 306 *empty_key = true; 307} 308 309/* 310 * Find end key position 311 */ 312static size_t 313find_field_end(const struct bwstring *s, struct key_specs *ks) 314{ 315 size_t f2, next_field_start, pos_end; 316 bool empty_field, empty_key; 317 318 pos_end = 0; 319 next_field_start = 0; 320 empty_field = false; 321 empty_key = false; 322 f2 = ks->f2; 323 324 if (f2 == 0) 325 return (BWSLEN(s) + 1); 326 else { 327 if (ks->c2 == 0) { 328 next_field_start = skip_fields_to_start(s, f2 + 1, 329 &empty_field); 330 if ((next_field_start > 0) && sort_opts_vals.tflag && 331 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 332 next_field_start - 1))) 333 --next_field_start; 334 } else 335 next_field_start = skip_fields_to_start(s, f2, 336 &empty_field); 337 } 338 339 if (empty_field || (next_field_start >= BWSLEN(s))) 340 return (BWSLEN(s) + 1); 341 342 if (ks->c2) { 343 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 344 ks->pos2b, &empty_key); 345 if (pos_end < BWSLEN(s)) 346 ++pos_end; 347 } else 348 pos_end = next_field_start; 349 350 return (pos_end); 351} 352 353/* 354 * Cut a field according to the key specs 355 */ 356static struct bwstring * 357cut_field(const struct bwstring *s, struct key_specs *ks) 358{ 359 struct bwstring *ret = NULL; 360 361 if (s && ks) { 362 size_t field_start, key_end, key_start, sz; 363 bool empty_field, empty_key; 364 365 field_start = 0; 366 key_start = 0; 367 empty_field = false; 368 empty_key = false; 369 370 find_field_start(s, ks, &field_start, &key_start, 371 &empty_field, &empty_key); 372 373 if (empty_key) 374 sz = 0; 375 else { 376 key_end = find_field_end(s, ks); 377 sz = (key_end < key_start) ? 0 : (key_end - key_start); 378 } 379 380 ret = bwsalloc(sz); 381 if (sz) 382 bwsnocpy(ret, s, key_start, sz); 383 } else 384 ret = bwsalloc(0); 385 386 return (ret); 387} 388 389/* 390 * Preprocesses a line applying the necessary transformations 391 * specified by command line options and returns the preprocessed 392 * string, which can be used to compare. 393 */ 394int 395preproc(struct bwstring *s, struct keys_array *ka) 396{ 397 398 if (sort_opts_vals.kflag) 399 for (size_t i = 0; i < keys_num; i++) { 400 struct bwstring *key; 401 struct key_specs *kspecs; 402 struct sort_mods *sm; 403 404 kspecs = &(keys[i]); 405 key = cut_field(s, kspecs); 406 407 sm = &(kspecs->sm); 408 if (sm->dflag) 409 key = dictionary_order(key); 410 else if (sm->iflag) 411 key = ignore_nonprinting(key); 412 if (sm->fflag || sm->Mflag) 413 key = ignore_case(key); 414 415 set_key_on_keys_array(ka, key, i); 416 } 417 else { 418 struct bwstring *ret = NULL; 419 struct sort_mods *sm = default_sort_mods; 420 421 if (sm->bflag) { 422 if (ret == NULL) 423 ret = bwsdup(s); 424 ret = ignore_leading_blanks(ret); 425 } 426 if (sm->dflag) { 427 if (ret == NULL) 428 ret = bwsdup(s); 429 ret = dictionary_order(ret); 430 } else if (sm->iflag) { 431 if (ret == NULL) 432 ret = bwsdup(s); 433 ret = ignore_nonprinting(ret); 434 } 435 if (sm->fflag || sm->Mflag) { 436 if (ret == NULL) 437 ret = bwsdup(s); 438 ret = ignore_case(ret); 439 } 440 if (ret == NULL) 441 set_key_on_keys_array(ka, s, 0); 442 else 443 set_key_on_keys_array(ka, ret, 0); 444 } 445 446 return 0; 447} 448 449cmpcoll_t 450get_sort_func(struct sort_mods *sm) 451{ 452 453 if (sm->nflag) 454 return (numcoll); 455 else if (sm->hflag) 456 return (hnumcoll); 457 else if (sm->gflag) 458 return (gnumcoll); 459 else if (sm->Mflag) 460 return (monthcoll); 461 else if (sm->Rflag) 462 return (randomcoll); 463 else if (sm->Vflag) 464 return (versioncoll); 465 else 466 return (wstrcoll); 467} 468 469/* 470 * Compares the given strings. Returns a positive number if 471 * the first precedes the second, a negative number if the second is 472 * the preceding one, and zero if they are equal. This function calls 473 * the underlying collate functions, which done the actual comparison. 474 */ 475int 476key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 477{ 478 struct sort_mods *sm; 479 int res = 0; 480 481 for (size_t i = 0; i < keys_num; ++i) { 482 sm = &(keys[i].sm); 483 484 if (sm->rflag) 485 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 486 else 487 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 488 489 if (res) 490 break; 491 492 /* offset applies to only the first key */ 493 offset = 0; 494 } 495 return (res); 496} 497 498/* 499 * Compare two strings. 500 * Plain symbol-by-symbol comparison. 501 */ 502int 503top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 504{ 505 506 if (default_sort_mods->rflag) { 507 const struct bwstring *tmp; 508 509 tmp = s1; 510 s1 = s2; 511 s2 = tmp; 512 } 513 514 return (bwscoll(s1, s2, 0)); 515} 516 517/* 518 * Compare a string and a sort list item, according to the sort specs. 519 */ 520int 521str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 522{ 523 struct keys_array *ka1; 524 int ret = 0; 525 526 ka1 = keys_array_alloc(); 527 528 preproc(str1, ka1); 529 530 sort_list_item_make_key(*ss2); 531 532 if (debug_sort) { 533 bwsprintf(stdout, str1, "; s1=<", ">"); 534 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 535 } 536 537 ret = key_coll(ka1, &((*ss2)->ka), 0); 538 539 if (debug_sort) 540 printf("; cmp1=%d", ret); 541 542 clean_keys_array(str1, ka1); 543 sort_free(ka1); 544 545 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 546 ret = top_level_str_coll(str1, ((*ss2)->str)); 547 if (debug_sort) 548 printf("; cmp2=%d", ret); 549 } 550 551 if (debug_sort) 552 printf("\n"); 553 554 return (ret); 555} 556 557/* 558 * Compare two sort list items, according to the sort specs. 559 */ 560int 561list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 562 size_t offset) 563{ 564 int ret; 565 566 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 567 568 if (debug_sort) { 569 if (offset) 570 printf("; offset=%d", (int) offset); 571 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 572 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 573 printf("; cmp1=%d\n", ret); 574 } 575 576 if (ret) 577 return (ret); 578 579 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 580 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 581 if (debug_sort) 582 printf("; cmp2=%d\n", ret); 583 } 584 585 return (ret); 586} 587 588/* 589 * Compare two sort list items, according to the sort specs. 590 */ 591int 592list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 593{ 594 595 return (list_coll_offset(ss1, ss2, 0)); 596} 597 598#define LSCDEF(N) \ 599static int \ 600list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 601{ \ 602 \ 603 return (list_coll_offset(ss1, ss2, N)); \ 604} 605 606LSCDEF(1) 607LSCDEF(2) 608LSCDEF(3) 609LSCDEF(4) 610LSCDEF(5) 611LSCDEF(6) 612LSCDEF(7) 613LSCDEF(8) 614LSCDEF(9) 615LSCDEF(10) 616LSCDEF(11) 617LSCDEF(12) 618LSCDEF(13) 619LSCDEF(14) 620LSCDEF(15) 621LSCDEF(16) 622LSCDEF(17) 623LSCDEF(18) 624LSCDEF(19) 625LSCDEF(20) 626 627listcoll_t 628get_list_call_func(size_t offset) 629{ 630 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 631 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 632 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 633 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 634 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 635 list_coll_18, list_coll_19, list_coll_20 }; 636 637 if (offset <= 20) 638 return (lsarray[offset]); 639 640 return (list_coll); 641} 642 643/* 644 * Compare two sort list items, only by their original string. 645 */ 646int 647list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 648{ 649 650 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 651} 652 653/* 654 * Maximum size of a number in the string (before or after decimal point) 655 */ 656#define MAX_NUM_SIZE (128) 657 658/* 659 * Set suffix value 660 */ 661static void setsuffix(wchar_t c, unsigned char *si) 662{ 663 switch (c){ 664 case L'k': 665 case L'K': 666 *si = 1; 667 break; 668 case L'M': 669 *si = 2; 670 break; 671 case L'G': 672 *si = 3; 673 break; 674 case L'T': 675 *si = 4; 676 break; 677 case L'P': 678 *si = 5; 679 break; 680 case L'E': 681 *si = 6; 682 break; 683 case L'Z': 684 *si = 7; 685 break; 686 case L'Y': 687 *si = 8; 688 break; 689 default: 690 *si = 0; 691 }; 692} 693 694/* 695 * Read string s and parse the string into a fixed-decimal-point number. 696 * sign equals -1 if the number is negative (explicit plus is not allowed, 697 * according to GNU sort's "info sort". 698 * The number part before decimal point is in the smain, after the decimal 699 * point is in sfrac, tail is the pointer to the remainder of the string. 700 */ 701static int 702read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 703{ 704 bwstring_iterator s; 705 706 s = bws_begin(s0); 707 708 /* always end the fraction with zero, even if we have no fraction */ 709 sfrac[0] = 0; 710 711 while (iswblank(bws_get_iter_value(s))) 712 s = bws_iterator_inc(s, 1); 713 714 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 715 *sign = -1; 716 s = bws_iterator_inc(s, 1); 717 } 718 719 // This is '0', not '\0', do not change this 720 while (iswdigit(bws_get_iter_value(s)) && 721 (bws_get_iter_value(s) == L'0')) 722 s = bws_iterator_inc(s, 1); 723 724 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 725 if (iswdigit(bws_get_iter_value(s))) { 726 smain[*main_len] = bws_get_iter_value(s); 727 s = bws_iterator_inc(s, 1); 728 *main_len += 1; 729 } else if (symbol_thousands_sep && 730 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 731 s = bws_iterator_inc(s, 1); 732 else 733 break; 734 } 735 736 smain[*main_len] = 0; 737 738 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 739 s = bws_iterator_inc(s, 1); 740 while (iswdigit(bws_get_iter_value(s)) && 741 *frac_len < MAX_NUM_SIZE) { 742 sfrac[*frac_len] = bws_get_iter_value(s); 743 s = bws_iterator_inc(s, 1); 744 *frac_len += 1; 745 } 746 sfrac[*frac_len] = 0; 747 748 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 749 --(*frac_len); 750 sfrac[*frac_len] = L'\0'; 751 } 752 } 753 754 setsuffix(bws_get_iter_value(s),si); 755 756 if ((*main_len + *frac_len) == 0) 757 *sign = 0; 758 759 return (0); 760} 761 762/* 763 * Implements string sort. 764 */ 765static int 766wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 767{ 768 769 if (debug_sort) { 770 if (offset) 771 printf("; offset=%d\n", (int) offset); 772 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 773 printf("(%zu)", BWSLEN(kv1->k)); 774 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 775 printf("(%zu)", BWSLEN(kv2->k)); 776 } 777 778 return (bwscoll(kv1->k, kv2->k, offset)); 779} 780 781/* 782 * Compare two suffixes 783 */ 784static inline int 785cmpsuffix(unsigned char si1, unsigned char si2) 786{ 787 788 return ((char)si1 - (char)si2); 789} 790 791/* 792 * Implements numeric sort for -n and -h. 793 */ 794static int 795numcoll_impl(struct key_value *kv1, struct key_value *kv2, 796 size_t offset __unused, bool use_suffix) 797{ 798 struct bwstring *s1, *s2; 799 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 800 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 801 int cmp_res, sign1, sign2; 802 size_t frac1, frac2, main1, main2; 803 unsigned char SI1, SI2; 804 bool e1, e2, key1_read, key2_read; 805 806 s1 = kv1->k; 807 s2 = kv2->k; 808 sign1 = sign2 = 0; 809 main1 = main2 = 0; 810 frac1 = frac2 = 0; 811 812 cmp_res = 0; 813 key1_read = key2_read = false; 814 815 if (debug_sort) { 816 bwsprintf(stdout, s1, "; k1=<", ">"); 817 bwsprintf(stdout, s2, ", k2=<", ">"); 818 } 819 820 if (s1 == s2) 821 return (0); 822 823 if (kv1->hint->status == HS_UNINITIALIZED) { 824 /* read the number from the string */ 825 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 826 key1_read = true; 827 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 828 if(main1 < 1 && frac1 < 1) 829 kv1->hint->v.nh.empty=true; 830 kv1->hint->v.nh.si = SI1; 831 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 832 HS_INITIALIZED : HS_ERROR; 833 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 834 } 835 836 if (kv2->hint->status == HS_UNINITIALIZED) { 837 /* read the number from the string */ 838 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 839 key2_read = true; 840 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 841 if(main2 < 1 && frac2 < 1) 842 kv2->hint->v.nh.empty=true; 843 kv2->hint->v.nh.si = SI2; 844 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 845 HS_INITIALIZED : HS_ERROR; 846 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 847 } 848 849 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 850 HS_INITIALIZED) { 851 unsigned long long n1, n2; 852 bool neg1, neg2; 853 854 e1 = kv1->hint->v.nh.empty; 855 e2 = kv2->hint->v.nh.empty; 856 857 if (e1 && e2) 858 return (0); 859 860 neg1 = kv1->hint->v.nh.neg; 861 neg2 = kv2->hint->v.nh.neg; 862 863 if (neg1 && !neg2) 864 return (-1); 865 if (neg2 && !neg1) 866 return (+1); 867 868 if (e1) 869 return (neg2 ? +1 : -1); 870 else if (e2) 871 return (neg1 ? -1 : +1); 872 873 874 if (use_suffix) { 875 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 876 if (cmp_res) 877 return (neg1 ? -cmp_res : cmp_res); 878 } 879 880 n1 = kv1->hint->v.nh.n1; 881 n2 = kv2->hint->v.nh.n1; 882 if (n1 < n2) 883 return (neg1 ? +1 : -1); 884 else if (n1 > n2) 885 return (neg1 ? -1 : +1); 886 } 887 888 /* read the numbers from the strings */ 889 if (!key1_read) 890 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 891 if (!key2_read) 892 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 893 894 e1 = ((main1 + frac1) == 0); 895 e2 = ((main2 + frac2) == 0); 896 897 if (e1 && e2) 898 return (0); 899 900 /* we know the result if the signs are different */ 901 if (sign1 < 0 && sign2 >= 0) 902 return (-1); 903 if (sign1 >= 0 && sign2 < 0) 904 return (+1); 905 906 if (e1) 907 return ((sign2 < 0) ? +1 : -1); 908 else if (e2) 909 return ((sign1 < 0) ? -1 : +1); 910 911 if (use_suffix) { 912 cmp_res = cmpsuffix(SI1, SI2); 913 if (cmp_res) 914 return ((sign1 < 0) ? -cmp_res : cmp_res); 915 } 916 917 /* if both numbers are empty assume that the strings are equal */ 918 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 919 return (0); 920 921 /* 922 * if the main part is of different size, we know the result 923 * (because the leading zeros are removed) 924 */ 925 if (main1 < main2) 926 cmp_res = -1; 927 else if (main1 > main2) 928 cmp_res = +1; 929 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 930 else 931 cmp_res = wcscmp(smain1, smain2); 932 933 /* check fraction */ 934 if (!cmp_res) 935 cmp_res = wcscmp(sfrac1, sfrac2); 936 937 if (!cmp_res) 938 return (0); 939 940 /* reverse result if the signs are negative */ 941 if (sign1 < 0 && sign2 < 0) 942 cmp_res = -cmp_res; 943 944 return (cmp_res); 945} 946 947/* 948 * Implements numeric sort (-n). 949 */ 950static int 951numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 952{ 953 954 return (numcoll_impl(kv1, kv2, offset, false)); 955} 956 957/* 958 * Implements 'human' numeric sort (-h). 959 */ 960static int 961hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 962{ 963 964 return (numcoll_impl(kv1, kv2, offset, true)); 965} 966 967/* 968 * Implements random sort (-R). 969 */ 970static int 971randomcoll(struct key_value *kv1, struct key_value *kv2, 972 size_t offset __unused) 973{ 974 struct bwstring *s1, *s2; 975 MD5_CTX ctx1, ctx2; 976 char *b1, *b2; 977 978 s1 = kv1->k; 979 s2 = kv2->k; 980 981 if (debug_sort) { 982 bwsprintf(stdout, s1, "; k1=<", ">"); 983 bwsprintf(stdout, s2, ", k2=<", ">"); 984 } 985 986 if (s1 == s2) 987 return (0); 988 989 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 990 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 991 992 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 993 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 994 b1 = MD5End(&ctx1, NULL); 995 b2 = MD5End(&ctx2, NULL); 996 if (b1 == NULL) { 997 if (b2 == NULL) 998 return (0); 999 else { 1000 sort_free(b2); 1001 return (-1); 1002 } 1003 } else if (b2 == NULL) { 1004 sort_free(b1); 1005 return (+1); 1006 } else { 1007 int cmp_res; 1008 1009 cmp_res = strcmp(b1,b2); 1010 sort_free(b1); 1011 sort_free(b2); 1012 1013 if (!cmp_res) 1014 cmp_res = bwscoll(s1, s2, 0); 1015 1016 return (cmp_res); 1017 } 1018} 1019 1020/* 1021 * Implements version sort (-V). 1022 */ 1023static int 1024versioncoll(struct key_value *kv1, struct key_value *kv2, 1025 size_t offset __unused) 1026{ 1027 struct bwstring *s1, *s2; 1028 1029 s1 = kv1->k; 1030 s2 = kv2->k; 1031 1032 if (debug_sort) { 1033 bwsprintf(stdout, s1, "; k1=<", ">"); 1034 bwsprintf(stdout, s2, ", k2=<", ">"); 1035 } 1036 1037 if (s1 == s2) 1038 return (0); 1039 1040 return (vcmp(s1, s2)); 1041} 1042 1043/* 1044 * Check for minus infinity 1045 */ 1046static inline bool 1047huge_minus(double d, int err1) 1048{ 1049 1050 if (err1 == ERANGE) 1051 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1052 return (+1); 1053 1054 return (0); 1055} 1056 1057/* 1058 * Check for plus infinity 1059 */ 1060static inline bool 1061huge_plus(double d, int err1) 1062{ 1063 1064 if (err1 == ERANGE) 1065 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1066 return (+1); 1067 1068 return (0); 1069} 1070 1071/* 1072 * Check whether a function is a NAN 1073 */ 1074static bool 1075is_nan(double d) 1076{ 1077 1078 return ((d == NAN) || (isnan(d))); 1079} 1080 1081/* 1082 * Compare two NANs 1083 */ 1084static int 1085cmp_nans(double d1, double d2) 1086{ 1087 1088 if (d1 < d2) 1089 return (-1); 1090 if (d2 > d2) 1091 return (+1); 1092 return (0); 1093} 1094 1095/* 1096 * Implements general numeric sort (-g). 1097 */ 1098static int 1099gnumcoll(struct key_value *kv1, struct key_value *kv2, 1100 size_t offset __unused) 1101{ 1102 double d1, d2; 1103 int err1, err2; 1104 bool empty1, empty2, key1_read, key2_read; 1105 1106 d1 = d2 = 0; 1107 err1 = err2 = 0; 1108 key1_read = key2_read = false; 1109 1110 if (debug_sort) { 1111 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1112 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1113 } 1114 1115 if (kv1->hint->status == HS_UNINITIALIZED) { 1116 errno = 0; 1117 d1 = bwstod(kv1->k, &empty1); 1118 err1 = errno; 1119 1120 if (empty1) 1121 kv1->hint->v.gh.notnum = true; 1122 else if (err1 == 0) { 1123 kv1->hint->v.gh.d = d1; 1124 kv1->hint->v.gh.nan = is_nan(d1); 1125 kv1->hint->status = HS_INITIALIZED; 1126 } else 1127 kv1->hint->status = HS_ERROR; 1128 1129 key1_read = true; 1130 } 1131 1132 if (kv2->hint->status == HS_UNINITIALIZED) { 1133 errno = 0; 1134 d2 = bwstod(kv2->k, &empty2); 1135 err2 = errno; 1136 1137 if (empty2) 1138 kv2->hint->v.gh.notnum = true; 1139 else if (err2 == 0) { 1140 kv2->hint->v.gh.d = d2; 1141 kv2->hint->v.gh.nan = is_nan(d2); 1142 kv2->hint->status = HS_INITIALIZED; 1143 } else 1144 kv2->hint->status = HS_ERROR; 1145 1146 key2_read = true; 1147 } 1148 1149 if (kv1->hint->status == HS_INITIALIZED && 1150 kv2->hint->status == HS_INITIALIZED) { 1151 if (kv1->hint->v.gh.notnum) 1152 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1153 else if (kv2->hint->v.gh.notnum) 1154 return (+1); 1155 1156 if (kv1->hint->v.gh.nan) 1157 return ((kv2->hint->v.gh.nan) ? 1158 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1159 -1); 1160 else if (kv2->hint->v.gh.nan) 1161 return (+1); 1162 1163 d1 = kv1->hint->v.gh.d; 1164 d2 = kv2->hint->v.gh.d; 1165 1166 if (d1 < d2) 1167 return (-1); 1168 else if (d1 > d2) 1169 return (+1); 1170 else 1171 return (0); 1172 } 1173 1174 if (!key1_read) { 1175 errno = 0; 1176 d1 = bwstod(kv1->k, &empty1); 1177 err1 = errno; 1178 } 1179 1180 if (!key2_read) { 1181 errno = 0; 1182 d2 = bwstod(kv2->k, &empty2); 1183 err2 = errno; 1184 } 1185 1186 /* Non-value case: */ 1187 if (empty1) 1188 return (empty2 ? 0 : -1); 1189 else if (empty2) 1190 return (+1); 1191 1192 /* NAN case */ 1193 if (is_nan(d1)) 1194 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1195 else if (is_nan(d2)) 1196 return (+1); 1197 1198 /* Infinities */ 1199 if (err1 == ERANGE || err2 == ERANGE) { 1200 /* Minus infinity case */ 1201 if (huge_minus(d1, err1)) { 1202 if (huge_minus(d2, err2)) { 1203 if (d1 < d2) 1204 return (-1); 1205 if (d1 > d2) 1206 return (+1); 1207 return (0); 1208 } else 1209 return (-1); 1210 1211 } else if (huge_minus(d2, err2)) { 1212 if (huge_minus(d1, err1)) { 1213 if (d1 < d2) 1214 return (-1); 1215 if (d1 > d2) 1216 return (+1); 1217 return (0); 1218 } else 1219 return (+1); 1220 } 1221 1222 /* Plus infinity case */ 1223 if (huge_plus(d1, err1)) { 1224 if (huge_plus(d2, err2)) { 1225 if (d1 < d2) 1226 return (-1); 1227 if (d1 > d2) 1228 return (+1); 1229 return (0); 1230 } else 1231 return (+1); 1232 } else if (huge_plus(d2, err2)) { 1233 if (huge_plus(d1, err1)) { 1234 if (d1 < d2) 1235 return (-1); 1236 if (d1 > d2) 1237 return (+1); 1238 return (0); 1239 } else 1240 return (-1); 1241 } 1242 } 1243 1244 if (d1 < d2) 1245 return (-1); 1246 if (d1 > d2) 1247 return (+1); 1248 1249 return (0); 1250} 1251 1252/* 1253 * Implements month sort (-M). 1254 */ 1255static int 1256monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1257{ 1258 int val1, val2; 1259 bool key1_read, key2_read; 1260 1261 val1 = val2 = 0; 1262 key1_read = key2_read = false; 1263 1264 if (debug_sort) { 1265 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1266 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1267 } 1268 1269 if (kv1->hint->status == HS_UNINITIALIZED) { 1270 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1271 key1_read = true; 1272 kv1->hint->status = HS_INITIALIZED; 1273 } 1274 1275 if (kv2->hint->status == HS_UNINITIALIZED) { 1276 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1277 key2_read = true; 1278 kv2->hint->status = HS_INITIALIZED; 1279 } 1280 1281 if (kv1->hint->status == HS_INITIALIZED) { 1282 val1 = kv1->hint->v.Mh.m; 1283 key1_read = true; 1284 } 1285 1286 if (kv2->hint->status == HS_INITIALIZED) { 1287 val2 = kv2->hint->v.Mh.m; 1288 key2_read = true; 1289 } 1290 1291 if (!key1_read) 1292 val1 = bws_month_score(kv1->k); 1293 if (!key2_read) 1294 val2 = bws_month_score(kv2->k); 1295 1296 if (val1 == val2) { 1297 return (0); 1298 } 1299 if (val1 < val2) 1300 return (-1); 1301 return (+1); 1302} 1303