coll.c revision 236764
1/*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/usr.bin/sort/coll.c 236764 2012-06-08 19:21:49Z gabor $"); 30 31#include <sys/types.h> 32 33#include <errno.h> 34#include <err.h> 35#include <langinfo.h> 36#include <limits.h> 37#include <math.h> 38#include <md5.h> 39#include <stdlib.h> 40#include <string.h> 41#include <wchar.h> 42#include <wctype.h> 43 44#include "coll.h" 45#include "vsort.h" 46 47struct key_specs *keys; 48size_t keys_num = 0; 49 50wchar_t symbol_decimal_point = L'.'; 51/* there is no default thousands separator in collate rules: */ 52wchar_t symbol_thousands_sep = 0; 53wchar_t symbol_negative_sign = L'-'; 54wchar_t symbol_positive_sign = L'+'; 55 56static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 57static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 58static int monthcoll(struct key_value*, struct key_value *, size_t offset); 59static int numcoll(struct key_value*, struct key_value *, size_t offset); 60static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 61static int randomcoll(struct key_value*, struct key_value *, size_t offset); 62static int versioncoll(struct key_value*, struct key_value *, size_t offset); 63 64/* 65 * Allocate keys array 66 */ 67struct keys_array * 68keys_array_alloc(void) 69{ 70 struct keys_array *ka; 71 size_t sz; 72 73 sz = keys_array_size(); 74 ka = sort_malloc(sz); 75 memset(ka, 0, sz); 76 77 return (ka); 78} 79 80/* 81 * Calculate whether we need key hint space 82 */ 83static size_t 84key_hint_size(void) 85{ 86 87 return (need_hint ? sizeof(struct key_hint) : 0); 88} 89 90/* 91 * Calculate keys array size 92 */ 93size_t 94keys_array_size(void) 95{ 96 97 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 98} 99 100/* 101 * Clean data of keys array 102 */ 103void 104clean_keys_array(const struct bwstring *s, struct keys_array *ka) 105{ 106 107 if (ka) { 108 for (size_t i = 0; i < keys_num; ++i) 109 if (ka->key[i].k && ka->key[i].k != s) 110 bwsfree(ka->key[i].k); 111 memset(ka, 0, keys_array_size()); 112 } 113} 114 115/* 116 * Set value of a key in the keys set 117 */ 118void 119set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 120{ 121 122 if (ka && keys_num > ind) { 123 struct key_value *kv; 124 125 kv = &(ka->key[ind]); 126 127 if (kv->k && kv->k != s) 128 bwsfree(kv->k); 129 kv->k = s; 130 } 131} 132 133/* 134 * Initialize a sort list item 135 */ 136struct sort_list_item * 137sort_list_item_alloc(void) 138{ 139 struct sort_list_item *si; 140 size_t sz; 141 142 sz = sizeof(struct sort_list_item) + keys_array_size(); 143 si = sort_malloc(sz); 144 memset(si, 0, sz); 145 146 return (si); 147} 148 149size_t 150sort_list_item_size(struct sort_list_item *si) 151{ 152 size_t ret = 0; 153 154 if (si) { 155 ret = sizeof(struct sort_list_item) + keys_array_size(); 156 if (si->str) 157 ret += bws_memsize(si->str); 158 for (size_t i = 0; i < keys_num; ++i) { 159 struct key_value *kv; 160 161 kv = &(si->ka.key[i]); 162 163 if (kv->k != si->str) 164 ret += bws_memsize(kv->k); 165 } 166 } 167 return (ret); 168} 169 170/* 171 * Calculate key for a sort list item 172 */ 173static void 174sort_list_item_make_key(struct sort_list_item *si) 175{ 176 177 preproc(si->str, &(si->ka)); 178} 179 180/* 181 * Set value of a sort list item. 182 * Return combined string and keys memory size. 183 */ 184void 185sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 186{ 187 188 if (si) { 189 clean_keys_array(si->str, &(si->ka)); 190 if (si->str) { 191 if (si->str == str) { 192 /* we are trying to reset the same string */ 193 return; 194 } else { 195 bwsfree(si->str); 196 si->str = NULL; 197 } 198 } 199 si->str = str; 200 sort_list_item_make_key(si); 201 } 202} 203 204/* 205 * De-allocate a sort list item object memory 206 */ 207void 208sort_list_item_clean(struct sort_list_item *si) 209{ 210 211 if (si) { 212 clean_keys_array(si->str, &(si->ka)); 213 if (si->str) { 214 bwsfree(si->str); 215 si->str = NULL; 216 } 217 } 218} 219 220/* 221 * Skip columns according to specs 222 */ 223static size_t 224skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 225 bool skip_blanks, bool *empty_key) 226{ 227 if (cols < 1) 228 return (BWSLEN(s) + 1); 229 230 if (skip_blanks) 231 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 232 ++start; 233 234 while (start < BWSLEN(s) && cols > 1) { 235 --cols; 236 ++start; 237 } 238 239 if (start >= BWSLEN(s)) 240 *empty_key = true; 241 242 return (start); 243} 244 245/* 246 * Skip fields according to specs 247 */ 248static size_t 249skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 250{ 251 252 if (fields < 2) { 253 if (BWSLEN(s) == 0) 254 *empty_field = true; 255 return (0); 256 } else if (!(sort_opts_vals.tflag)) { 257 size_t cpos = 0; 258 bool pb = true; 259 260 while (cpos < BWSLEN(s)) { 261 bool isblank; 262 263 isblank = iswblank(BWS_GET(s,cpos)); 264 265 if (isblank && !pb) { 266 --fields; 267 if (fields <= 1) 268 return (cpos); 269 } 270 pb = isblank; 271 ++cpos; 272 } 273 if (fields > 1) 274 *empty_field = true; 275 return (cpos); 276 } else { 277 size_t cpos = 0; 278 279 while (cpos < BWSLEN(s)) { 280 if (BWS_GET(s,cpos) == sort_opts_vals.field_sep) { 281 --fields; 282 if (fields <= 1) 283 return (cpos + 1); 284 } 285 ++cpos; 286 } 287 if (fields > 1) 288 *empty_field = true; 289 return (cpos); 290 } 291} 292 293/* 294 * Find fields start 295 */ 296static void 297find_field_start(const struct bwstring *s, struct key_specs *ks, 298 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 299{ 300 301 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 302 if (!*empty_field) 303 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 304 ks->pos1b, empty_key); 305 else 306 *empty_key = true; 307} 308 309/* 310 * Find end key position 311 */ 312static size_t 313find_field_end(const struct bwstring *s, struct key_specs *ks) 314{ 315 size_t f2, next_field_start, pos_end; 316 bool empty_field, empty_key; 317 318 pos_end = 0; 319 next_field_start = 0; 320 empty_field = false; 321 empty_key = false; 322 f2 = ks->f2; 323 324 if (f2 == 0) 325 return (BWSLEN(s) + 1); 326 else { 327 if (ks->c2 == 0) { 328 next_field_start = skip_fields_to_start(s, f2 + 1, 329 &empty_field); 330 if ((next_field_start > 0) && sort_opts_vals.tflag && 331 (sort_opts_vals.field_sep == BWS_GET(s, 332 next_field_start - 1))) 333 --next_field_start; 334 } else 335 next_field_start = skip_fields_to_start(s, f2, 336 &empty_field); 337 } 338 339 if (empty_field || (next_field_start >= BWSLEN(s))) 340 return (BWSLEN(s) + 1); 341 342 if (ks->c2) { 343 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 344 ks->pos2b, &empty_key); 345 if (pos_end < BWSLEN(s)) 346 ++pos_end; 347 } else 348 pos_end = next_field_start; 349 350 return (pos_end); 351} 352 353/* 354 * Cut a field according to the key specs 355 */ 356static struct bwstring * 357cut_field(const struct bwstring *s, struct key_specs *ks) 358{ 359 struct bwstring *ret = NULL; 360 361 if (s && ks) { 362 size_t field_start, key_end, key_start, sz; 363 bool empty_field, empty_key; 364 365 field_start = 0; 366 key_start = 0; 367 empty_field = false; 368 empty_key = false; 369 370 find_field_start(s, ks, &field_start, &key_start, 371 &empty_field, &empty_key); 372 373 if (empty_key) 374 sz = 0; 375 else { 376 key_end = find_field_end(s, ks); 377 sz = (key_end < key_start) ? 0 : (key_end - key_start); 378 } 379 380 ret = bwsalloc(sz); 381 if (sz) 382 bwsnocpy(ret, s, key_start, sz); 383 } else 384 ret = bwsalloc(0); 385 386 return (ret); 387} 388 389/* 390 * Preprocesses a line applying the necessary transformations 391 * specified by command line options and returns the preprocessed 392 * string, which can be used to compare. 393 */ 394int 395preproc(struct bwstring *s, struct keys_array *ka) 396{ 397 398 if (sort_opts_vals.kflag) 399 for (size_t i = 0; i < keys_num; i++) { 400 struct bwstring *key; 401 struct key_specs *kspecs; 402 struct sort_mods *sm; 403 404 kspecs = &(keys[i]); 405 key = cut_field(s, kspecs); 406 407 sm = &(kspecs->sm); 408 if (sm->dflag) 409 key = dictionary_order(key); 410 else if (sm->iflag) 411 key = ignore_nonprinting(key); 412 if (sm->fflag || sm->Mflag) 413 key = ignore_case(key); 414 415 set_key_on_keys_array(ka, key, i); 416 } 417 else { 418 struct bwstring *ret = NULL; 419 struct sort_mods *sm = default_sort_mods; 420 421 if (sm->bflag) { 422 if (ret == NULL) 423 ret = bwsdup(s); 424 ret = ignore_leading_blanks(ret); 425 } 426 if (sm->dflag) { 427 if (ret == NULL) 428 ret = bwsdup(s); 429 ret = dictionary_order(ret); 430 } else if (sm->iflag) { 431 if (ret == NULL) 432 ret = bwsdup(s); 433 ret = ignore_nonprinting(ret); 434 } 435 if (sm->fflag || sm->Mflag) { 436 if (ret == NULL) 437 ret = bwsdup(s); 438 ret = ignore_case(ret); 439 } 440 if (ret == NULL) 441 set_key_on_keys_array(ka, s, 0); 442 else 443 set_key_on_keys_array(ka, ret, 0); 444 } 445 446 return 0; 447} 448 449cmpcoll_t 450get_sort_func(struct sort_mods *sm) 451{ 452 453 if (sm->nflag) 454 return (numcoll); 455 else if (sm->hflag) 456 return (hnumcoll); 457 else if (sm->gflag) 458 return (gnumcoll); 459 else if (sm->Mflag) 460 return (monthcoll); 461 else if (sm->Rflag) 462 return (randomcoll); 463 else if (sm->Vflag) 464 return (versioncoll); 465 else 466 return (wstrcoll); 467} 468 469/* 470 * Compares the given strings. Returns a positive number if 471 * the first precedes the second, a negative number if the second is 472 * the preceding one, and zero if they are equal. This function calls 473 * the underlying collate functions, which done the actual comparison. 474 */ 475int 476key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 477{ 478 struct sort_mods *sm; 479 int res = 0; 480 481 for (size_t i = 0; i < keys_num; ++i) { 482 sm = &(keys[i].sm); 483 484 if (sm->rflag) 485 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 486 else 487 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 488 489 if (res) 490 break; 491 492 /* offset applies to only the first key */ 493 offset = 0; 494 } 495 return (res); 496} 497 498/* 499 * Compare two strings. 500 * Plain symbol-by-symbol comparison. 501 */ 502int 503top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 504{ 505 506 if (default_sort_mods->rflag) { 507 const struct bwstring *tmp; 508 509 tmp = s1; 510 s1 = s2; 511 s2 = tmp; 512 } 513 514 return (bwscoll(s1, s2, 0)); 515} 516 517/* 518 * Compare a string and a sort list item, according to the sort specs. 519 */ 520int 521str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 522{ 523 struct keys_array *ka1; 524 int ret = 0; 525 526 ka1 = keys_array_alloc(); 527 528 preproc(str1, ka1); 529 530 sort_list_item_make_key(*ss2); 531 532 if (debug_sort) { 533 bwsprintf(stdout, str1, "; s1=<", ">"); 534 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 535 } 536 537 ret = key_coll(ka1, &((*ss2)->ka), 0); 538 539 if (debug_sort) 540 printf("; cmp1=%d", ret); 541 542 clean_keys_array(str1, ka1); 543 sort_free(ka1); 544 545 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 546 ret = top_level_str_coll(str1, ((*ss2)->str)); 547 if (debug_sort) 548 printf("; cmp2=%d", ret); 549 } 550 551 if (debug_sort) 552 printf("\n"); 553 554 return (ret); 555} 556 557/* 558 * Compare two sort list items, according to the sort specs. 559 */ 560int 561list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 562 size_t offset) 563{ 564 int ret; 565 566 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 567 568 if (debug_sort) { 569 if (offset) 570 printf("; offset=%d", (int) offset); 571 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 572 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 573 printf("; cmp1=%d\n", ret); 574 } 575 576 if (ret) 577 return (ret); 578 579 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 580 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 581 if (debug_sort) 582 printf("; cmp2=%d\n", ret); 583 } 584 585 return (ret); 586} 587 588/* 589 * Compare two sort list items, according to the sort specs. 590 */ 591int 592list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 593{ 594 595 return (list_coll_offset(ss1, ss2, 0)); 596} 597 598#define LSCDEF(N) \ 599static int \ 600list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 601{ \ 602 \ 603 return (list_coll_offset(ss1, ss2, N)); \ 604} 605 606LSCDEF(1) 607LSCDEF(2) 608LSCDEF(3) 609LSCDEF(4) 610LSCDEF(5) 611LSCDEF(6) 612LSCDEF(7) 613LSCDEF(8) 614LSCDEF(9) 615LSCDEF(10) 616LSCDEF(11) 617LSCDEF(12) 618LSCDEF(13) 619LSCDEF(14) 620LSCDEF(15) 621LSCDEF(16) 622LSCDEF(17) 623LSCDEF(18) 624LSCDEF(19) 625LSCDEF(20) 626 627listcoll_t 628get_list_call_func(size_t offset) 629{ 630 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 631 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 632 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 633 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 634 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 635 list_coll_18, list_coll_19, list_coll_20 }; 636 637 if (offset <= 20) 638 return (lsarray[offset]); 639 640 return (list_coll); 641} 642 643/* 644 * Compare two sort list items, only by their original string. 645 */ 646int 647list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 648{ 649 650 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 651} 652 653/* 654 * Maximum size of a number in the string (before or after decimal point) 655 */ 656#define MAX_NUM_SIZE (128) 657 658/* 659 * Set suffix value 660 */ 661static void setsuffix(wchar_t c, unsigned char *si) 662{ 663 switch (c){ 664 case L'k': 665 case L'K': 666 *si = 1; 667 break; 668 case L'M': 669 *si = 2; 670 break; 671 case L'G': 672 *si = 3; 673 break; 674 case L'T': 675 *si = 4; 676 break; 677 case L'P': 678 *si = 5; 679 break; 680 case L'E': 681 *si = 6; 682 break; 683 case L'Z': 684 *si = 7; 685 break; 686 case L'Y': 687 *si = 8; 688 break; 689 default: 690 *si = 0; 691 }; 692} 693 694/* 695 * Read string s and parse the string into a fixed-decimal-point number. 696 * sign equals -1 if the number is negative (explicit plus is not allowed, 697 * according to GNU sort's "info sort". 698 * The number part before decimal point is in the smain, after the decimal 699 * point is in sfrac, tail is the pointer to the remainder of the string. 700 */ 701static int 702read_number(struct bwstring *s0, int *sign, wchar_t *smain, int *main_len, wchar_t *sfrac, int *frac_len, unsigned char *si) 703{ 704 bwstring_iterator s; 705 706 s = bws_begin(s0); 707 708 /* always end the fraction with zero, even if we have no fraction */ 709 sfrac[0] = 0; 710 711 while (iswblank(bws_get_iter_value(s))) 712 s = bws_iterator_inc(s, 1); 713 714 if (bws_get_iter_value(s) == symbol_negative_sign) { 715 *sign = -1; 716 s = bws_iterator_inc(s, 1); 717 } 718 719 // This is '0', not '\0', do not change this 720 while (iswdigit(bws_get_iter_value(s)) && 721 (bws_get_iter_value(s) == L'0')) 722 s = bws_iterator_inc(s, 1); 723 724 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 725 if (iswdigit(bws_get_iter_value(s))) { 726 smain[*main_len] = bws_get_iter_value(s); 727 s = bws_iterator_inc(s, 1); 728 *main_len += 1; 729 } else if (symbol_thousands_sep && 730 (bws_get_iter_value(s) == symbol_thousands_sep)) 731 s = bws_iterator_inc(s, 1); 732 else 733 break; 734 } 735 736 smain[*main_len] = 0; 737 738 if (bws_get_iter_value(s) == symbol_decimal_point) { 739 s = bws_iterator_inc(s, 1); 740 while (iswdigit(bws_get_iter_value(s)) && 741 *frac_len < MAX_NUM_SIZE) { 742 sfrac[*frac_len] = bws_get_iter_value(s); 743 s = bws_iterator_inc(s, 1); 744 *frac_len += 1; 745 } 746 sfrac[*frac_len] = 0; 747 748 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 749 --(*frac_len); 750 sfrac[*frac_len] = L'\0'; 751 } 752 } 753 754 setsuffix(bws_get_iter_value(s),si); 755 756 if ((*main_len + *frac_len) == 0) 757 *sign = 0; 758 759 return (0); 760} 761 762/* 763 * Implements string sort. 764 */ 765static int 766wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 767{ 768 769 if (debug_sort) { 770 if (offset) 771 printf("; offset=%d\n", (int) offset); 772 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 773 printf("(%zu)", BWSLEN(kv1->k)); 774 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 775 printf("(%zu)", BWSLEN(kv2->k)); 776 } 777 778 return (bwscoll(kv1->k, kv2->k, offset)); 779} 780 781/* 782 * Compare two suffixes 783 */ 784static inline int 785cmpsuffix(unsigned char si1, unsigned char si2) 786{ 787 788 return ((char)si1 - (char)si2); 789} 790 791/* 792 * Implements numeric sort for -n and -h. 793 */ 794static int 795numcoll_impl(struct key_value *kv1, struct key_value *kv2, 796 size_t offset __unused, bool use_suffix) 797{ 798 struct bwstring *s1, *s2; 799 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 800 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 801 int cmp_res, frac1, frac2, main1, main2, sign1, sign2; 802 unsigned char SI1, SI2; 803 bool e1, e2, key1_read, key2_read; 804 805 s1 = kv1->k; 806 s2 = kv2->k; 807 sign1 = sign2 = 0; 808 main1 = main2 = 0; 809 frac1 = frac2 = 0; 810 811 cmp_res = 0; 812 key1_read = key2_read = false; 813 814 if (debug_sort) { 815 bwsprintf(stdout, s1, "; k1=<", ">"); 816 bwsprintf(stdout, s2, ", k2=<", ">"); 817 } 818 819 if (s1 == s2) 820 return (0); 821 822 if (kv1->hint->status == HS_UNINITIALIZED) { 823 /* read the number from the string */ 824 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 825 key1_read = true; 826 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 827 if(main1 < 1 && frac1 < 1) 828 kv1->hint->v.nh.empty=true; 829 kv1->hint->v.nh.si = SI1; 830 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 831 HS_INITIALIZED : HS_ERROR; 832 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 833 } 834 835 if (kv2->hint->status == HS_UNINITIALIZED) { 836 /* read the number from the string */ 837 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 838 key2_read = true; 839 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 840 if(main2 < 1 && frac2 < 1) 841 kv2->hint->v.nh.empty=true; 842 kv2->hint->v.nh.si = SI2; 843 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 844 HS_INITIALIZED : HS_ERROR; 845 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 846 } 847 848 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 849 HS_INITIALIZED) { 850 unsigned long long n1, n2; 851 bool neg1, neg2; 852 853 e1 = kv1->hint->v.nh.empty; 854 e2 = kv2->hint->v.nh.empty; 855 856 if (e1 && e2) 857 return (0); 858 859 neg1 = kv1->hint->v.nh.neg; 860 neg2 = kv2->hint->v.nh.neg; 861 862 if (neg1 && !neg2) 863 return (-1); 864 if (neg2 && !neg1) 865 return (+1); 866 867 if (e1) 868 return (neg2 ? +1 : -1); 869 else if (e2) 870 return (neg1 ? -1 : +1); 871 872 873 if (use_suffix) { 874 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 875 if (cmp_res) 876 return (neg1 ? -cmp_res : cmp_res); 877 } 878 879 n1 = kv1->hint->v.nh.n1; 880 n2 = kv2->hint->v.nh.n1; 881 if (n1 < n2) 882 return (neg1 ? +1 : -1); 883 else if (n1 > n2) 884 return (neg1 ? -1 : +1); 885 } 886 887 /* read the numbers from the strings */ 888 if (!key1_read) 889 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 890 if (!key2_read) 891 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 892 893 e1 = ((main1 + frac1) == 0); 894 e2 = ((main2 + frac2) == 0); 895 896 if (e1 && e2) 897 return (0); 898 899 /* we know the result if the signs are different */ 900 if (sign1 < 0 && sign2 >= 0) 901 return (-1); 902 if (sign1 >= 0 && sign2 < 0) 903 return (+1); 904 905 if (e1) 906 return ((sign2 < 0) ? +1 : -1); 907 else if (e2) 908 return ((sign1 < 0) ? -1 : +1); 909 910 if (use_suffix) { 911 cmp_res = cmpsuffix(SI1, SI2); 912 if (cmp_res) 913 return ((sign1 < 0) ? -cmp_res : cmp_res); 914 } 915 916 /* if both numbers are empty assume that the strings are equal */ 917 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 918 return (0); 919 920 /* 921 * if the main part is of different size, we know the result 922 * (because the leading zeros are removed) 923 */ 924 if (main1 < main2) 925 cmp_res = -1; 926 else if (main1 > main2) 927 cmp_res = +1; 928 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 929 else 930 cmp_res = wcscmp(smain1, smain2); 931 932 /* check fraction */ 933 if (!cmp_res) 934 cmp_res = wcscmp(sfrac1, sfrac2); 935 936 if (!cmp_res) 937 return (0); 938 939 /* reverse result if the signs are negative */ 940 if (sign1 < 0 && sign2 < 0) 941 cmp_res = -cmp_res; 942 943 return (cmp_res); 944} 945 946/* 947 * Implements numeric sort (-n). 948 */ 949static int 950numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 951{ 952 953 return (numcoll_impl(kv1, kv2, offset, false)); 954} 955 956/* 957 * Implements 'human' numeric sort (-h). 958 */ 959static int 960hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 961{ 962 963 return (numcoll_impl(kv1, kv2, offset, true)); 964} 965 966/* 967 * Implements random sort (-R). 968 */ 969static int 970randomcoll(struct key_value *kv1, struct key_value *kv2, 971 size_t offset __unused) 972{ 973 struct bwstring *s1, *s2; 974 MD5_CTX ctx1, ctx2; 975 char *b1, *b2; 976 977 s1 = kv1->k; 978 s2 = kv2->k; 979 980 if (debug_sort) { 981 bwsprintf(stdout, s1, "; k1=<", ">"); 982 bwsprintf(stdout, s2, ", k2=<", ">"); 983 } 984 985 if (s1 == s2) 986 return (0); 987 988 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 989 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 990 991 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 992 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 993 b1 = MD5End(&ctx1, NULL); 994 b2 = MD5End(&ctx2, NULL); 995 if (b1 == NULL) { 996 if (b2 == NULL) 997 return (0); 998 else { 999 sort_free(b2); 1000 return (-1); 1001 } 1002 } else if (b2 == NULL) { 1003 sort_free(b1); 1004 return (+1); 1005 } else { 1006 int cmp_res; 1007 1008 cmp_res = strcmp(b1,b2); 1009 sort_free(b1); 1010 sort_free(b2); 1011 1012 if (!cmp_res) 1013 cmp_res = bwscoll(s1, s2, 0); 1014 1015 return (cmp_res); 1016 } 1017} 1018 1019/* 1020 * Implements version sort (-V). 1021 */ 1022static int 1023versioncoll(struct key_value *kv1, struct key_value *kv2, 1024 size_t offset __unused) 1025{ 1026 struct bwstring *s1, *s2; 1027 1028 s1 = kv1->k; 1029 s2 = kv2->k; 1030 1031 if (debug_sort) { 1032 bwsprintf(stdout, s1, "; k1=<", ">"); 1033 bwsprintf(stdout, s2, ", k2=<", ">"); 1034 } 1035 1036 if (s1 == s2) 1037 return (0); 1038 1039 return (vcmp(s1, s2)); 1040} 1041 1042/* 1043 * Check for minus infinity 1044 */ 1045static inline bool 1046huge_minus(double d, int err1) 1047{ 1048 1049 if (err1 == ERANGE) 1050 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1051 return (+1); 1052 1053 return (0); 1054} 1055 1056/* 1057 * Check for plus infinity 1058 */ 1059static inline bool 1060huge_plus(double d, int err1) 1061{ 1062 1063 if (err1 == ERANGE) 1064 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1065 return (+1); 1066 1067 return (0); 1068} 1069 1070/* 1071 * Check whether a function is a NAN 1072 */ 1073static bool 1074is_nan(double d) 1075{ 1076 1077 return ((d == NAN) || (isnan(d))); 1078} 1079 1080/* 1081 * Compare two NANs 1082 */ 1083static int 1084cmp_nans(double d1, double d2) 1085{ 1086 1087 if (d1 < d2) 1088 return (-1); 1089 if (d2 > d2) 1090 return (+1); 1091 return (0); 1092} 1093 1094/* 1095 * Implements general numeric sort (-g). 1096 */ 1097static int 1098gnumcoll(struct key_value *kv1, struct key_value *kv2, 1099 size_t offset __unused) 1100{ 1101 double d1, d2; 1102 int err1, err2; 1103 bool empty1, empty2, key1_read, key2_read; 1104 1105 d1 = d2 = 0; 1106 err1 = err2 = 0; 1107 key1_read = key2_read = false; 1108 1109 if (debug_sort) { 1110 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1111 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1112 } 1113 1114 if (kv1->hint->status == HS_UNINITIALIZED) { 1115 errno = 0; 1116 d1 = bwstod(kv1->k, &empty1); 1117 err1 = errno; 1118 1119 if (empty1) 1120 kv1->hint->v.gh.notnum = true; 1121 else if (err1 == 0) { 1122 kv1->hint->v.gh.d = d1; 1123 kv1->hint->v.gh.nan = is_nan(d1); 1124 kv1->hint->status = HS_INITIALIZED; 1125 } else 1126 kv1->hint->status = HS_ERROR; 1127 1128 key1_read = true; 1129 } 1130 1131 if (kv2->hint->status == HS_UNINITIALIZED) { 1132 errno = 0; 1133 d2 = bwstod(kv2->k, &empty2); 1134 err2 = errno; 1135 1136 if (empty2) 1137 kv2->hint->v.gh.notnum = true; 1138 else if (err2 == 0) { 1139 kv2->hint->v.gh.d = d2; 1140 kv2->hint->v.gh.nan = is_nan(d2); 1141 kv2->hint->status = HS_INITIALIZED; 1142 } else 1143 kv2->hint->status = HS_ERROR; 1144 1145 key2_read = true; 1146 } 1147 1148 if (kv1->hint->status == HS_INITIALIZED && 1149 kv2->hint->status == HS_INITIALIZED) { 1150 if (kv1->hint->v.gh.notnum) 1151 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1152 else if (kv2->hint->v.gh.notnum) 1153 return (+1); 1154 1155 if (kv1->hint->v.gh.nan) 1156 return ((kv2->hint->v.gh.nan) ? 1157 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1158 -1); 1159 else if (kv2->hint->v.gh.nan) 1160 return (+1); 1161 1162 d1 = kv1->hint->v.gh.d; 1163 d2 = kv2->hint->v.gh.d; 1164 1165 if (d1 < d2) 1166 return (-1); 1167 else if (d1 > d2) 1168 return (+1); 1169 else 1170 return (0); 1171 } 1172 1173 if (!key1_read) { 1174 errno = 0; 1175 d1 = bwstod(kv1->k, &empty1); 1176 err1 = errno; 1177 } 1178 1179 if (!key2_read) { 1180 errno = 0; 1181 d2 = bwstod(kv2->k, &empty2); 1182 err2 = errno; 1183 } 1184 1185 /* Non-value case: */ 1186 if (empty1) 1187 return (empty2 ? 0 : -1); 1188 else if (empty2) 1189 return (+1); 1190 1191 /* NAN case */ 1192 if (is_nan(d1)) 1193 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1194 else if (is_nan(d2)) 1195 return (+1); 1196 1197 /* Infinities */ 1198 if (err1 == ERANGE || err2 == ERANGE) { 1199 /* Minus infinity case */ 1200 if (huge_minus(d1, err1)) { 1201 if (huge_minus(d2, err2)) { 1202 if (d1 < d2) 1203 return (-1); 1204 if (d1 > d2) 1205 return (+1); 1206 return (0); 1207 } else 1208 return (-1); 1209 1210 } else if (huge_minus(d2, err2)) { 1211 if (huge_minus(d1, err1)) { 1212 if (d1 < d2) 1213 return (-1); 1214 if (d1 > d2) 1215 return (+1); 1216 return (0); 1217 } else 1218 return (+1); 1219 } 1220 1221 /* Plus infinity case */ 1222 if (huge_plus(d1, err1)) { 1223 if (huge_plus(d2, err2)) { 1224 if (d1 < d2) 1225 return (-1); 1226 if (d1 > d2) 1227 return (+1); 1228 return (0); 1229 } else 1230 return (+1); 1231 } else if (huge_plus(d2, err2)) { 1232 if (huge_plus(d1, err1)) { 1233 if (d1 < d2) 1234 return (-1); 1235 if (d1 > d2) 1236 return (+1); 1237 return (0); 1238 } else 1239 return (-1); 1240 } 1241 } 1242 1243 if (d1 < d2) 1244 return (-1); 1245 if (d1 > d2) 1246 return (+1); 1247 1248 return (0); 1249} 1250 1251/* 1252 * Implements month sort (-M). 1253 */ 1254static int 1255monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1256{ 1257 int val1, val2; 1258 bool key1_read, key2_read; 1259 1260 val1 = val2 = 0; 1261 key1_read = key2_read = false; 1262 1263 if (debug_sort) { 1264 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1265 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1266 } 1267 1268 if (kv1->hint->status == HS_UNINITIALIZED) { 1269 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1270 key1_read = true; 1271 kv1->hint->status = HS_INITIALIZED; 1272 } 1273 1274 if (kv2->hint->status == HS_UNINITIALIZED) { 1275 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1276 key2_read = true; 1277 kv2->hint->status = HS_INITIALIZED; 1278 } 1279 1280 if (kv1->hint->status == HS_INITIALIZED) { 1281 val1 = kv1->hint->v.Mh.m; 1282 key1_read = true; 1283 } 1284 1285 if (kv2->hint->status == HS_INITIALIZED) { 1286 val2 = kv2->hint->v.Mh.m; 1287 key2_read = true; 1288 } 1289 1290 if (!key1_read) 1291 val1 = bws_month_score(kv1->k); 1292 if (!key2_read) 1293 val2 = bws_month_score(kv2->k); 1294 1295 if (val1 == val2) { 1296 return (0); 1297 } 1298 if (val1 < val2) 1299 return (-1); 1300 return (+1); 1301} 1302