1/* $NetBSD: idl.c,v 1.2 2021/08/14 16:15:02 christos Exp $ */ 2 3/* OpenLDAP WiredTiger backend */ 4/* $OpenLDAP$ */ 5/* This work is part of OpenLDAP Software <http://www.openldap.org/>. 6 * 7 * Copyright 2002-2021 The OpenLDAP Foundation. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted only as authorized by the OpenLDAP 12 * Public License. 13 * 14 * A copy of this license is available in the file LICENSE in the 15 * top-level directory of the distribution or, alternatively, at 16 * <http://www.OpenLDAP.org/license.html>. 17 */ 18/* ACKNOWLEDGEMENTS: 19 * This work was developed by HAMANO Tsukasa <hamano@osstech.co.jp> 20 * based on back-bdb for inclusion in OpenLDAP Software. 21 * WiredTiger is a product of MongoDB Inc. 22 */ 23 24#include <sys/cdefs.h> 25__RCSID("$NetBSD: idl.c,v 1.2 2021/08/14 16:15:02 christos Exp $"); 26 27#include "portable.h" 28 29#include <stdio.h> 30#include <ac/string.h> 31 32#include "back-wt.h" 33#include "idl.h" 34 35#define IDL_MAX(x,y) ( (x) > (y) ? (x) : (y) ) 36#define IDL_MIN(x,y) ( (x) < (y) ? (x) : (y) ) 37#define IDL_CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) 38 39#if IDL_DEBUG > 0 40static void idl_check( ID *ids ) 41{ 42 if( WT_IDL_IS_RANGE( ids ) ) { 43 assert( WT_IDL_RANGE_FIRST(ids) <= WT_IDL_RANGE_LAST(ids) ); 44 } else { 45 ID i; 46 for( i=1; i < ids[0]; i++ ) { 47 assert( ids[i+1] > ids[i] ); 48 } 49 } 50} 51 52#if IDL_DEBUG > 1 53static void idl_dump( ID *ids ) 54{ 55 if( WT_IDL_IS_RANGE( ids ) ) { 56 Debug( LDAP_DEBUG_ANY, 57 "IDL: range ( %ld - %ld )\n", 58 (long) WT_IDL_RANGE_FIRST( ids ), 59 (long) WT_IDL_RANGE_LAST( ids ) ); 60 61 } else { 62 ID i; 63 Debug( LDAP_DEBUG_ANY, "IDL: size %ld", (long) ids[0] ); 64 65 for( i=1; i<=ids[0]; i++ ) { 66 if( i % 16 == 1 ) { 67 Debug( LDAP_DEBUG_ANY, "\n" ); 68 } 69 Debug( LDAP_DEBUG_ANY, " %02lx", (long) ids[i] ); 70 } 71 72 Debug( LDAP_DEBUG_ANY, "\n" ); 73 } 74 75 idl_check( ids ); 76} 77#endif /* IDL_DEBUG > 1 */ 78#endif /* IDL_DEBUG > 0 */ 79 80unsigned wt_idl_search( ID *ids, ID id ) 81{ 82#define IDL_BINARY_SEARCH 1 83#ifdef IDL_BINARY_SEARCH 84 /* 85 * binary search of id in ids 86 * if found, returns position of id 87 * if not found, returns first position greater than id 88 */ 89 unsigned base = 0; 90 unsigned cursor = 1; 91 int val = 0; 92 unsigned n = ids[0]; 93 94#if IDL_DEBUG > 0 95 idl_check( ids ); 96#endif 97 98 while( 0 < n ) { 99 unsigned pivot = n >> 1; 100 cursor = base + pivot + 1; 101 val = IDL_CMP( id, ids[cursor] ); 102 103 if( val < 0 ) { 104 n = pivot; 105 106 } else if ( val > 0 ) { 107 base = cursor; 108 n -= pivot + 1; 109 110 } else { 111 return cursor; 112 } 113 } 114 115 if( val > 0 ) { 116 ++cursor; 117 } 118 return cursor; 119 120#else 121 /* (reverse) linear search */ 122 int i; 123 124#if IDL_DEBUG > 0 125 idl_check( ids ); 126#endif 127 128 for( i=ids[0]; i; i-- ) { 129 if( id > ids[i] ) { 130 break; 131 } 132 } 133 134 return i+1; 135#endif 136} 137 138int wt_idl_insert( ID *ids, ID id ) 139{ 140 unsigned x; 141 142#if IDL_DEBUG > 1 143 Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x ); 144 idl_dump( ids ); 145#elif IDL_DEBUG > 0 146 idl_check( ids ); 147#endif 148 149 if (WT_IDL_IS_RANGE( ids )) { 150 /* if already in range, treat as a dup */ 151 if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids)) 152 return -1; 153 if (id < WT_IDL_RANGE_FIRST(ids)) 154 ids[1] = id; 155 else if (id > WT_IDL_RANGE_LAST(ids)) 156 ids[2] = id; 157 return 0; 158 } 159 160 x = wt_idl_search( ids, id ); 161 assert( x > 0 ); 162 163 if( x < 1 ) { 164 /* internal error */ 165 return -2; 166 } 167 168 if ( x <= ids[0] && ids[x] == id ) { 169 /* duplicate */ 170 return -1; 171 } 172 173 if ( ++ids[0] >= WT_IDL_DB_MAX ) { 174 if( id < ids[1] ) { 175 ids[1] = id; 176 ids[2] = ids[ids[0]-1]; 177 } else if ( ids[ids[0]-1] < id ) { 178 ids[2] = id; 179 } else { 180 ids[2] = ids[ids[0]-1]; 181 } 182 ids[0] = NOID; 183 184 } else { 185 /* insert id */ 186 AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) ); 187 ids[x] = id; 188 } 189 190#if IDL_DEBUG > 1 191 idl_dump( ids ); 192#elif IDL_DEBUG > 0 193 idl_check( ids ); 194#endif 195 196 return 0; 197} 198 199static int wt_idl_delete( ID *ids, ID id ) 200{ 201 unsigned x; 202 203#if IDL_DEBUG > 1 204 Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x ); 205 idl_dump( ids ); 206#elif IDL_DEBUG > 0 207 idl_check( ids ); 208#endif 209 210 if (WT_IDL_IS_RANGE( ids )) { 211 /* If deleting a range boundary, adjust */ 212 if ( ids[1] == id ) 213 ids[1]++; 214 else if ( ids[2] == id ) 215 ids[2]--; 216 /* deleting from inside a range is a no-op */ 217 218 /* If the range has collapsed, re-adjust */ 219 if ( ids[1] > ids[2] ) 220 ids[0] = 0; 221 else if ( ids[1] == ids[2] ) 222 ids[1] = 1; 223 return 0; 224 } 225 226 x = wt_idl_search( ids, id ); 227 assert( x > 0 ); 228 229 if( x <= 0 ) { 230 /* internal error */ 231 return -2; 232 } 233 234 if( x > ids[0] || ids[x] != id ) { 235 /* not found */ 236 return -1; 237 238 } else if ( --ids[0] == 0 ) { 239 if( x != 1 ) { 240 return -3; 241 } 242 243 } else { 244 AC_MEMCPY( &ids[x], &ids[x+1], (1+ids[0]-x) * sizeof(ID) ); 245 } 246 247#if IDL_DEBUG > 1 248 idl_dump( ids ); 249#elif IDL_DEBUG > 0 250 idl_check( ids ); 251#endif 252 253 return 0; 254} 255 256static char * 257wt_show_key( 258 char *buf, 259 void *val, 260 size_t len ) 261{ 262 if ( len == 4 /* LUTIL_HASH_BYTES */ ) { 263 unsigned char *c = val; 264 sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] ); 265 return buf; 266 } else { 267 return val; 268 } 269} 270 271/* 272 * idl_intersection - return a = a intersection b 273 */ 274int 275wt_idl_intersection( 276 ID *a, 277 ID *b ) 278{ 279 ID ida, idb; 280 ID idmax, idmin; 281 ID cursora = 0, cursorb = 0, cursorc; 282 int swap = 0; 283 284 if ( WT_IDL_IS_ZERO( a ) || WT_IDL_IS_ZERO( b ) ) { 285 a[0] = 0; 286 return 0; 287 } 288 289 idmin = IDL_MAX( WT_IDL_FIRST(a), WT_IDL_FIRST(b) ); 290 idmax = IDL_MIN( WT_IDL_LAST(a), WT_IDL_LAST(b) ); 291 if ( idmin > idmax ) { 292 a[0] = 0; 293 return 0; 294 } else if ( idmin == idmax ) { 295 a[0] = 1; 296 a[1] = idmin; 297 return 0; 298 } 299 300 if ( WT_IDL_IS_RANGE( a ) ) { 301 if ( WT_IDL_IS_RANGE(b) ) { 302 /* If both are ranges, just shrink the boundaries */ 303 a[1] = idmin; 304 a[2] = idmax; 305 return 0; 306 } else { 307 /* Else swap so that b is the range, a is a list */ 308 ID *tmp = a; 309 a = b; 310 b = tmp; 311 swap = 1; 312 } 313 } 314 315 /* If a range completely covers the list, the result is 316 * just the list. If idmin to idmax is contiguous, just 317 * turn it into a range. 318 */ 319 if ( WT_IDL_IS_RANGE( b ) 320 && WT_IDL_RANGE_FIRST( b ) <= WT_IDL_FIRST( a ) 321 && WT_IDL_RANGE_LAST( b ) >= WT_IDL_LLAST( a ) ) { 322 if (idmax - idmin + 1 == a[0]) 323 { 324 a[0] = NOID; 325 a[1] = idmin; 326 a[2] = idmax; 327 } 328 goto done; 329 } 330 331 /* Fine, do the intersection one element at a time. 332 * First advance to idmin in both IDLs. 333 */ 334 cursora = cursorb = idmin; 335 ida = wt_idl_first( a, &cursora ); 336 idb = wt_idl_first( b, &cursorb ); 337 cursorc = 0; 338 339 while( ida <= idmax || idb <= idmax ) { 340 if( ida == idb ) { 341 a[++cursorc] = ida; 342 ida = wt_idl_next( a, &cursora ); 343 idb = wt_idl_next( b, &cursorb ); 344 } else if ( ida < idb ) { 345 ida = wt_idl_next( a, &cursora ); 346 } else { 347 idb = wt_idl_next( b, &cursorb ); 348 } 349 } 350 a[0] = cursorc; 351done: 352 if (swap) 353 WT_IDL_CPY( b, a ); 354 355 return 0; 356} 357 358 359/* 360 * idl_union - return a = a union b 361 */ 362int 363wt_idl_union( 364 ID *a, 365 ID *b ) 366{ 367 ID ida, idb; 368 ID cursora = 0, cursorb = 0, cursorc; 369 370 if ( WT_IDL_IS_ZERO( b ) ) { 371 return 0; 372 } 373 374 if ( WT_IDL_IS_ZERO( a ) ) { 375 WT_IDL_CPY( a, b ); 376 return 0; 377 } 378 379 if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) ) { 380over: ida = IDL_MIN( WT_IDL_FIRST(a), WT_IDL_FIRST(b) ); 381 idb = IDL_MAX( WT_IDL_LAST(a), WT_IDL_LAST(b) ); 382 a[0] = NOID; 383 a[1] = ida; 384 a[2] = idb; 385 return 0; 386 } 387 388 ida = wt_idl_first( a, &cursora ); 389 idb = wt_idl_first( b, &cursorb ); 390 391 cursorc = b[0]; 392 393 /* The distinct elements of a are cat'd to b */ 394 while( ida != NOID || idb != NOID ) { 395 if ( ida < idb ) { 396 if( ++cursorc > WT_IDL_UM_MAX ) { 397 goto over; 398 } 399 b[cursorc] = ida; 400 ida = wt_idl_next( a, &cursora ); 401 402 } else { 403 if ( ida == idb ) 404 ida = wt_idl_next( a, &cursora ); 405 idb = wt_idl_next( b, &cursorb ); 406 } 407 } 408 409 /* b is copied back to a in sorted order */ 410 a[0] = cursorc; 411 cursora = 1; 412 cursorb = 1; 413 cursorc = b[0]+1; 414 while (cursorb <= b[0] || cursorc <= a[0]) { 415 if (cursorc > a[0]) 416 idb = NOID; 417 else 418 idb = b[cursorc]; 419 if (cursorb <= b[0] && b[cursorb] < idb) 420 a[cursora++] = b[cursorb++]; 421 else { 422 a[cursora++] = idb; 423 cursorc++; 424 } 425 } 426 427 return 0; 428} 429 430 431#if 0 432/* 433 * wt_idl_notin - return a intersection ~b (or a minus b) 434 */ 435int 436wt_idl_notin( 437 ID *a, 438 ID *b, 439 ID *ids ) 440{ 441 ID ida, idb; 442 ID cursora = 0, cursorb = 0; 443 444 if( WT_IDL_IS_ZERO( a ) || 445 WT_IDL_IS_ZERO( b ) || 446 WT_IDL_IS_RANGE( b ) ) 447 { 448 WT_IDL_CPY( ids, a ); 449 return 0; 450 } 451 452 if( WT_IDL_IS_RANGE( a ) ) { 453 WT_IDL_CPY( ids, a ); 454 return 0; 455 } 456 457 ida = wt_idl_first( a, &cursora ), 458 idb = wt_idl_first( b, &cursorb ); 459 460 ids[0] = 0; 461 462 while( ida != NOID ) { 463 if ( idb == NOID ) { 464 /* we could shortcut this */ 465 ids[++ids[0]] = ida; 466 ida = wt_idl_next( a, &cursora ); 467 468 } else if ( ida < idb ) { 469 ids[++ids[0]] = ida; 470 ida = wt_idl_next( a, &cursora ); 471 472 } else if ( ida > idb ) { 473 idb = wt_idl_next( b, &cursorb ); 474 475 } else { 476 ida = wt_idl_next( a, &cursora ); 477 idb = wt_idl_next( b, &cursorb ); 478 } 479 } 480 481 return 0; 482} 483#endif 484 485ID wt_idl_first( ID *ids, ID *cursor ) 486{ 487 ID pos; 488 489 if ( ids[0] == 0 ) { 490 *cursor = NOID; 491 return NOID; 492 } 493 494 if ( WT_IDL_IS_RANGE( ids ) ) { 495 if( *cursor < ids[1] ) { 496 *cursor = ids[1]; 497 } 498 return *cursor; 499 } 500 501 if ( *cursor == 0 ) 502 pos = 1; 503 else 504 pos = wt_idl_search( ids, *cursor ); 505 506 if( pos > ids[0] ) { 507 return NOID; 508 } 509 510 *cursor = pos; 511 return ids[pos]; 512} 513 514ID wt_idl_next( ID *ids, ID *cursor ) 515{ 516 if ( WT_IDL_IS_RANGE( ids ) ) { 517 if( ids[2] < ++(*cursor) ) { 518 return NOID; 519 } 520 return *cursor; 521 } 522 523 if ( ++(*cursor) <= ids[0] ) { 524 return ids[*cursor]; 525 } 526 527 return NOID; 528} 529 530/* Add one ID to an unsorted list. We ensure that the first element is the 531 * minimum and the last element is the maximum, for fast range compaction. 532 * this means IDLs up to length 3 are always sorted... 533 */ 534int wt_idl_append_one( ID *ids, ID id ) 535{ 536 if (WT_IDL_IS_RANGE( ids )) { 537 /* if already in range, treat as a dup */ 538 if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids)) 539 return -1; 540 if (id < WT_IDL_RANGE_FIRST(ids)) 541 ids[1] = id; 542 else if (id > WT_IDL_RANGE_LAST(ids)) 543 ids[2] = id; 544 return 0; 545 } 546 if ( ids[0] ) { 547 ID tmp; 548 549 if (id < ids[1]) { 550 tmp = ids[1]; 551 ids[1] = id; 552 id = tmp; 553 } 554 if ( ids[0] > 1 && id < ids[ids[0]] ) { 555 tmp = ids[ids[0]]; 556 ids[ids[0]] = id; 557 id = tmp; 558 } 559 } 560 ids[0]++; 561 if ( ids[0] >= WT_IDL_UM_MAX ) { 562 ids[0] = NOID; 563 ids[2] = id; 564 } else { 565 ids[ids[0]] = id; 566 } 567 return 0; 568} 569 570/* Append sorted list b to sorted list a. The result is unsorted but 571 * a[1] is the min of the result and a[a[0]] is the max. 572 */ 573int wt_idl_append( ID *a, ID *b ) 574{ 575 ID ida, idb, tmp, swap = 0; 576 577 if ( WT_IDL_IS_ZERO( b ) ) { 578 return 0; 579 } 580 581 if ( WT_IDL_IS_ZERO( a ) ) { 582 WT_IDL_CPY( a, b ); 583 return 0; 584 } 585 586 ida = WT_IDL_LAST( a ); 587 idb = WT_IDL_LAST( b ); 588 if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) || 589 a[0] + b[0] >= WT_IDL_UM_MAX ) { 590 a[2] = IDL_MAX( ida, idb ); 591 a[1] = IDL_MIN( a[1], b[1] ); 592 a[0] = NOID; 593 return 0; 594 } 595 596 if ( b[0] > 1 && ida > idb ) { 597 swap = idb; 598 a[a[0]] = idb; 599 b[b[0]] = ida; 600 } 601 602 if ( b[1] < a[1] ) { 603 tmp = a[1]; 604 a[1] = b[1]; 605 } else { 606 tmp = b[1]; 607 } 608 a[0]++; 609 a[a[0]] = tmp; 610 611 if ( b[0] > 1 ) { 612 int i = b[0] - 1; 613 AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID)); 614 a[0] += i; 615 } 616 if ( swap ) { 617 b[b[0]] = swap; 618 } 619 return 0; 620} 621 622#if 1 623 624/* Quicksort + Insertion sort for small arrays */ 625 626#define SMALL 8 627#define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp 628 629void 630wt_idl_sort( ID *ids, ID *tmp ) 631{ 632 int *istack = (int *)tmp; /* Private stack, not used by caller */ 633 int i,j,k,l,ir,jstack; 634 ID a, itmp; 635 636 if ( WT_IDL_IS_RANGE( ids )) 637 return; 638 639 ir = ids[0]; 640 l = 1; 641 jstack = 0; 642 for(;;) { 643 if (ir - l < SMALL) { /* Insertion sort */ 644 for (j=l+1;j<=ir;j++) { 645 a = ids[j]; 646 for (i=j-1;i>=1;i--) { 647 if (ids[i] <= a) break; 648 ids[i+1] = ids[i]; 649 } 650 ids[i+1] = a; 651 } 652 if (jstack == 0) break; 653 ir = istack[jstack--]; 654 l = istack[jstack--]; 655 } else { 656 k = (l + ir) >> 1; /* Choose median of left, center, right */ 657 SWAP(ids[k], ids[l+1]); 658 if (ids[l] > ids[ir]) { 659 SWAP(ids[l], ids[ir]); 660 } 661 if (ids[l+1] > ids[ir]) { 662 SWAP(ids[l+1], ids[ir]); 663 } 664 if (ids[l] > ids[l+1]) { 665 SWAP(ids[l], ids[l+1]); 666 } 667 i = l+1; 668 j = ir; 669 a = ids[l+1]; 670 for(;;) { 671 do i++; while(ids[i] < a); 672 do j--; while(ids[j] > a); 673 if (j < i) break; 674 SWAP(ids[i],ids[j]); 675 } 676 ids[l+1] = ids[j]; 677 ids[j] = a; 678 jstack += 2; 679 if (ir-i+1 >= j-l) { 680 istack[jstack] = ir; 681 istack[jstack-1] = i; 682 ir = j-1; 683 } else { 684 istack[jstack] = j-1; 685 istack[jstack-1] = l; 686 l = i; 687 } 688 } 689 } 690} 691 692#else 693 694/* 8 bit Radix sort + insertion sort 695 * 696 * based on code from http://www.cubic.org/docs/radix.htm 697 * with improvements by ebackes@symas.com and hyc@symas.com 698 * 699 * This code is O(n) but has a relatively high constant factor. For lists 700 * up to ~50 Quicksort is slightly faster; up to ~100 they are even. 701 * Much faster than quicksort for lists longer than ~100. Insertion 702 * sort is actually superior for lists <50. 703 */ 704 705#define BUCKETS (1<<8) 706#define SMALL 50 707 708void 709wt_idl_sort( ID *ids, ID *tmp ) 710{ 711 int count, soft_limit, phase = 0, size = ids[0]; 712 ID *idls[2]; 713 unsigned char *maxv = (unsigned char *)&ids[size]; 714 715 if ( WT_IDL_IS_RANGE( ids )) 716 return; 717 718 /* Use insertion sort for small lists */ 719 if ( size <= SMALL ) { 720 int i,j; 721 ID a; 722 723 for (j=1;j<=size;j++) { 724 a = ids[j]; 725 for (i=j-1;i>=1;i--) { 726 if (ids[i] <= a) break; 727 ids[i+1] = ids[i]; 728 } 729 ids[i+1] = a; 730 } 731 return; 732 } 733 734 tmp[0] = size; 735 idls[0] = ids; 736 idls[1] = tmp; 737 738#if BYTE_ORDER == BIG_ENDIAN 739 for (soft_limit = 0; !maxv[soft_limit]; soft_limit++); 740#else 741 for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--); 742#endif 743 744 for ( 745#if BYTE_ORDER == BIG_ENDIAN 746 count = sizeof(ID)-1; count >= soft_limit; --count 747#else 748 count = 0; count <= soft_limit; ++count 749#endif 750 ) { 751 unsigned int num[BUCKETS], * np, n, sum; 752 int i; 753 ID *sp, *source, *dest; 754 unsigned char *bp, *source_start; 755 756 source = idls[phase]+1; 757 dest = idls[phase^1]+1; 758 source_start = ((unsigned char *) source) + count; 759 760 np = num; 761 for ( i = BUCKETS; i > 0; --i ) *np++ = 0; 762 763 /* count occurrences of every byte value */ 764 bp = source_start; 765 for ( i = size; i > 0; --i, bp += sizeof(ID) ) 766 num[*bp]++; 767 768 /* transform count into index by summing elements and storing 769 * into same array 770 */ 771 sum = 0; 772 np = num; 773 for ( i = BUCKETS; i > 0; --i ) { 774 n = *np; 775 *np++ = sum; 776 sum += n; 777 } 778 779 /* fill dest with the right values in the right place */ 780 bp = source_start; 781 sp = source; 782 for ( i = size; i > 0; --i, bp += sizeof(ID) ) { 783 np = num + *bp; 784 dest[*np] = *sp++; 785 ++(*np); 786 } 787 phase ^= 1; 788 } 789 790 /* copy back from temp if needed */ 791 if ( phase ) { 792 ids++; tmp++; 793 for ( count = 0; count < size; ++count ) 794 *ids++ = *tmp++; 795 } 796} 797#endif /* Quick vs Radix */ 798 799