1/*	$NetBSD: idl.c,v 1.2 2021/08/14 16:15:02 christos Exp $	*/
2
3/* OpenLDAP WiredTiger backend */
4/* $OpenLDAP$ */
5/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6 *
7 * Copyright 2002-2021 The OpenLDAP Foundation.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 */
18/* ACKNOWLEDGEMENTS:
19 * This work was developed by HAMANO Tsukasa <hamano@osstech.co.jp>
20 * based on back-bdb for inclusion in OpenLDAP Software.
21 * WiredTiger is a product of MongoDB Inc.
22 */
23
24#include <sys/cdefs.h>
25__RCSID("$NetBSD: idl.c,v 1.2 2021/08/14 16:15:02 christos Exp $");
26
27#include "portable.h"
28
29#include <stdio.h>
30#include <ac/string.h>
31
32#include "back-wt.h"
33#include "idl.h"
34
35#define IDL_MAX(x,y)	( (x) > (y) ? (x) : (y) )
36#define IDL_MIN(x,y)	( (x) < (y) ? (x) : (y) )
37#define IDL_CMP(x,y)	( (x) < (y) ? -1 : (x) > (y) )
38
39#if IDL_DEBUG > 0
40static void idl_check( ID *ids )
41{
42	if( WT_IDL_IS_RANGE( ids ) ) {
43		assert( WT_IDL_RANGE_FIRST(ids) <= WT_IDL_RANGE_LAST(ids) );
44	} else {
45		ID i;
46		for( i=1; i < ids[0]; i++ ) {
47			assert( ids[i+1] > ids[i] );
48		}
49	}
50}
51
52#if IDL_DEBUG > 1
53static void idl_dump( ID *ids )
54{
55	if( WT_IDL_IS_RANGE( ids ) ) {
56		Debug( LDAP_DEBUG_ANY,
57               "IDL: range ( %ld - %ld )\n",
58               (long) WT_IDL_RANGE_FIRST( ids ),
59               (long) WT_IDL_RANGE_LAST( ids ) );
60
61	} else {
62		ID i;
63		Debug( LDAP_DEBUG_ANY, "IDL: size %ld", (long) ids[0] );
64
65		for( i=1; i<=ids[0]; i++ ) {
66			if( i % 16 == 1 ) {
67				Debug( LDAP_DEBUG_ANY, "\n" );
68			}
69			Debug( LDAP_DEBUG_ANY, "  %02lx", (long) ids[i] );
70		}
71
72		Debug( LDAP_DEBUG_ANY, "\n" );
73	}
74
75	idl_check( ids );
76}
77#endif /* IDL_DEBUG > 1 */
78#endif /* IDL_DEBUG > 0 */
79
80unsigned wt_idl_search( ID *ids, ID id )
81{
82#define IDL_BINARY_SEARCH 1
83#ifdef IDL_BINARY_SEARCH
84	/*
85	 * binary search of id in ids
86	 * if found, returns position of id
87	 * if not found, returns first position greater than id
88	 */
89	unsigned base = 0;
90	unsigned cursor = 1;
91	int val = 0;
92	unsigned n = ids[0];
93
94#if IDL_DEBUG > 0
95	idl_check( ids );
96#endif
97
98	while( 0 < n ) {
99		unsigned pivot = n >> 1;
100		cursor = base + pivot + 1;
101		val = IDL_CMP( id, ids[cursor] );
102
103		if( val < 0 ) {
104			n = pivot;
105
106		} else if ( val > 0 ) {
107			base = cursor;
108			n -= pivot + 1;
109
110		} else {
111			return cursor;
112		}
113	}
114
115	if( val > 0 ) {
116		++cursor;
117	}
118	return cursor;
119
120#else
121	/* (reverse) linear search */
122	int i;
123
124#if IDL_DEBUG > 0
125	idl_check( ids );
126#endif
127
128	for( i=ids[0]; i; i-- ) {
129		if( id > ids[i] ) {
130			break;
131		}
132	}
133
134	return i+1;
135#endif
136}
137
138int wt_idl_insert( ID *ids, ID id )
139{
140	unsigned x;
141
142#if IDL_DEBUG > 1
143	Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x );
144	idl_dump( ids );
145#elif IDL_DEBUG > 0
146	idl_check( ids );
147#endif
148
149	if (WT_IDL_IS_RANGE( ids )) {
150		/* if already in range, treat as a dup */
151		if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids))
152			return -1;
153		if (id < WT_IDL_RANGE_FIRST(ids))
154			ids[1] = id;
155		else if (id > WT_IDL_RANGE_LAST(ids))
156			ids[2] = id;
157		return 0;
158	}
159
160	x = wt_idl_search( ids, id );
161	assert( x > 0 );
162
163	if( x < 1 ) {
164		/* internal error */
165		return -2;
166	}
167
168	if ( x <= ids[0] && ids[x] == id ) {
169		/* duplicate */
170		return -1;
171	}
172
173	if ( ++ids[0] >= WT_IDL_DB_MAX ) {
174		if( id < ids[1] ) {
175			ids[1] = id;
176			ids[2] = ids[ids[0]-1];
177		} else if ( ids[ids[0]-1] < id ) {
178			ids[2] = id;
179		} else {
180			ids[2] = ids[ids[0]-1];
181		}
182		ids[0] = NOID;
183
184	} else {
185		/* insert id */
186		AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) );
187		ids[x] = id;
188	}
189
190#if IDL_DEBUG > 1
191	idl_dump( ids );
192#elif IDL_DEBUG > 0
193	idl_check( ids );
194#endif
195
196	return 0;
197}
198
199static int wt_idl_delete( ID *ids, ID id )
200{
201	unsigned x;
202
203#if IDL_DEBUG > 1
204	Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x );
205	idl_dump( ids );
206#elif IDL_DEBUG > 0
207	idl_check( ids );
208#endif
209
210	if (WT_IDL_IS_RANGE( ids )) {
211		/* If deleting a range boundary, adjust */
212		if ( ids[1] == id )
213			ids[1]++;
214		else if ( ids[2] == id )
215			ids[2]--;
216		/* deleting from inside a range is a no-op */
217
218		/* If the range has collapsed, re-adjust */
219		if ( ids[1] > ids[2] )
220			ids[0] = 0;
221		else if ( ids[1] == ids[2] )
222			ids[1] = 1;
223		return 0;
224	}
225
226	x = wt_idl_search( ids, id );
227	assert( x > 0 );
228
229	if( x <= 0 ) {
230		/* internal error */
231		return -2;
232	}
233
234	if( x > ids[0] || ids[x] != id ) {
235		/* not found */
236		return -1;
237
238	} else if ( --ids[0] == 0 ) {
239		if( x != 1 ) {
240			return -3;
241		}
242
243	} else {
244		AC_MEMCPY( &ids[x], &ids[x+1], (1+ids[0]-x) * sizeof(ID) );
245	}
246
247#if IDL_DEBUG > 1
248	idl_dump( ids );
249#elif IDL_DEBUG > 0
250	idl_check( ids );
251#endif
252
253	return 0;
254}
255
256static char *
257wt_show_key(
258	char		*buf,
259	void		*val,
260	size_t		len )
261{
262	if ( len == 4 /* LUTIL_HASH_BYTES */ ) {
263		unsigned char *c = val;
264		sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] );
265		return buf;
266	} else {
267		return val;
268	}
269}
270
271/*
272 * idl_intersection - return a = a intersection b
273 */
274int
275wt_idl_intersection(
276	ID *a,
277	ID *b )
278{
279	ID ida, idb;
280	ID idmax, idmin;
281	ID cursora = 0, cursorb = 0, cursorc;
282	int swap = 0;
283
284	if ( WT_IDL_IS_ZERO( a ) || WT_IDL_IS_ZERO( b ) ) {
285		a[0] = 0;
286		return 0;
287	}
288
289	idmin = IDL_MAX( WT_IDL_FIRST(a), WT_IDL_FIRST(b) );
290	idmax = IDL_MIN( WT_IDL_LAST(a), WT_IDL_LAST(b) );
291	if ( idmin > idmax ) {
292		a[0] = 0;
293		return 0;
294	} else if ( idmin == idmax ) {
295		a[0] = 1;
296		a[1] = idmin;
297		return 0;
298	}
299
300	if ( WT_IDL_IS_RANGE( a ) ) {
301		if ( WT_IDL_IS_RANGE(b) ) {
302		/* If both are ranges, just shrink the boundaries */
303			a[1] = idmin;
304			a[2] = idmax;
305			return 0;
306		} else {
307		/* Else swap so that b is the range, a is a list */
308			ID *tmp = a;
309			a = b;
310			b = tmp;
311			swap = 1;
312		}
313	}
314
315	/* If a range completely covers the list, the result is
316	 * just the list. If idmin to idmax is contiguous, just
317	 * turn it into a range.
318	 */
319	if ( WT_IDL_IS_RANGE( b )
320		&& WT_IDL_RANGE_FIRST( b ) <= WT_IDL_FIRST( a )
321		&& WT_IDL_RANGE_LAST( b ) >= WT_IDL_LLAST( a ) ) {
322		if (idmax - idmin + 1 == a[0])
323		{
324			a[0] = NOID;
325			a[1] = idmin;
326			a[2] = idmax;
327		}
328		goto done;
329	}
330
331	/* Fine, do the intersection one element at a time.
332	 * First advance to idmin in both IDLs.
333	 */
334	cursora = cursorb = idmin;
335	ida = wt_idl_first( a, &cursora );
336	idb = wt_idl_first( b, &cursorb );
337	cursorc = 0;
338
339	while( ida <= idmax || idb <= idmax ) {
340		if( ida == idb ) {
341			a[++cursorc] = ida;
342			ida = wt_idl_next( a, &cursora );
343			idb = wt_idl_next( b, &cursorb );
344		} else if ( ida < idb ) {
345			ida = wt_idl_next( a, &cursora );
346		} else {
347			idb = wt_idl_next( b, &cursorb );
348		}
349	}
350	a[0] = cursorc;
351done:
352	if (swap)
353		WT_IDL_CPY( b, a );
354
355	return 0;
356}
357
358
359/*
360 * idl_union - return a = a union b
361 */
362int
363wt_idl_union(
364	ID	*a,
365	ID	*b )
366{
367	ID ida, idb;
368	ID cursora = 0, cursorb = 0, cursorc;
369
370	if ( WT_IDL_IS_ZERO( b ) ) {
371		return 0;
372	}
373
374	if ( WT_IDL_IS_ZERO( a ) ) {
375		WT_IDL_CPY( a, b );
376		return 0;
377	}
378
379	if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) ) {
380over:		ida = IDL_MIN( WT_IDL_FIRST(a), WT_IDL_FIRST(b) );
381		idb = IDL_MAX( WT_IDL_LAST(a), WT_IDL_LAST(b) );
382		a[0] = NOID;
383		a[1] = ida;
384		a[2] = idb;
385		return 0;
386	}
387
388	ida = wt_idl_first( a, &cursora );
389	idb = wt_idl_first( b, &cursorb );
390
391	cursorc = b[0];
392
393	/* The distinct elements of a are cat'd to b */
394	while( ida != NOID || idb != NOID ) {
395		if ( ida < idb ) {
396			if( ++cursorc > WT_IDL_UM_MAX ) {
397				goto over;
398			}
399			b[cursorc] = ida;
400			ida = wt_idl_next( a, &cursora );
401
402		} else {
403			if ( ida == idb )
404				ida = wt_idl_next( a, &cursora );
405			idb = wt_idl_next( b, &cursorb );
406		}
407	}
408
409	/* b is copied back to a in sorted order */
410	a[0] = cursorc;
411	cursora = 1;
412	cursorb = 1;
413	cursorc = b[0]+1;
414	while (cursorb <= b[0] || cursorc <= a[0]) {
415		if (cursorc > a[0])
416			idb = NOID;
417		else
418			idb = b[cursorc];
419		if (cursorb <= b[0] && b[cursorb] < idb)
420			a[cursora++] = b[cursorb++];
421		else {
422			a[cursora++] = idb;
423			cursorc++;
424		}
425	}
426
427	return 0;
428}
429
430
431#if 0
432/*
433 * wt_idl_notin - return a intersection ~b (or a minus b)
434 */
435int
436wt_idl_notin(
437	ID	*a,
438	ID	*b,
439	ID *ids )
440{
441	ID ida, idb;
442	ID cursora = 0, cursorb = 0;
443
444	if( WT_IDL_IS_ZERO( a ) ||
445		WT_IDL_IS_ZERO( b ) ||
446		WT_IDL_IS_RANGE( b ) )
447	{
448		WT_IDL_CPY( ids, a );
449		return 0;
450	}
451
452	if( WT_IDL_IS_RANGE( a ) ) {
453		WT_IDL_CPY( ids, a );
454		return 0;
455	}
456
457	ida = wt_idl_first( a, &cursora ),
458	idb = wt_idl_first( b, &cursorb );
459
460	ids[0] = 0;
461
462	while( ida != NOID ) {
463		if ( idb == NOID ) {
464			/* we could shortcut this */
465			ids[++ids[0]] = ida;
466			ida = wt_idl_next( a, &cursora );
467
468		} else if ( ida < idb ) {
469			ids[++ids[0]] = ida;
470			ida = wt_idl_next( a, &cursora );
471
472		} else if ( ida > idb ) {
473			idb = wt_idl_next( b, &cursorb );
474
475		} else {
476			ida = wt_idl_next( a, &cursora );
477			idb = wt_idl_next( b, &cursorb );
478		}
479	}
480
481	return 0;
482}
483#endif
484
485ID wt_idl_first( ID *ids, ID *cursor )
486{
487	ID pos;
488
489	if ( ids[0] == 0 ) {
490		*cursor = NOID;
491		return NOID;
492	}
493
494	if ( WT_IDL_IS_RANGE( ids ) ) {
495		if( *cursor < ids[1] ) {
496			*cursor = ids[1];
497		}
498		return *cursor;
499	}
500
501	if ( *cursor == 0 )
502		pos = 1;
503	else
504		pos = wt_idl_search( ids, *cursor );
505
506	if( pos > ids[0] ) {
507		return NOID;
508	}
509
510	*cursor = pos;
511	return ids[pos];
512}
513
514ID wt_idl_next( ID *ids, ID *cursor )
515{
516	if ( WT_IDL_IS_RANGE( ids ) ) {
517		if( ids[2] < ++(*cursor) ) {
518			return NOID;
519		}
520		return *cursor;
521	}
522
523	if ( ++(*cursor) <= ids[0] ) {
524		return ids[*cursor];
525	}
526
527	return NOID;
528}
529
530/* Add one ID to an unsorted list. We ensure that the first element is the
531 * minimum and the last element is the maximum, for fast range compaction.
532 *   this means IDLs up to length 3 are always sorted...
533 */
534int wt_idl_append_one( ID *ids, ID id )
535{
536	if (WT_IDL_IS_RANGE( ids )) {
537		/* if already in range, treat as a dup */
538		if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids))
539			return -1;
540		if (id < WT_IDL_RANGE_FIRST(ids))
541			ids[1] = id;
542		else if (id > WT_IDL_RANGE_LAST(ids))
543			ids[2] = id;
544		return 0;
545	}
546	if ( ids[0] ) {
547		ID tmp;
548
549		if (id < ids[1]) {
550			tmp = ids[1];
551			ids[1] = id;
552			id = tmp;
553		}
554		if ( ids[0] > 1 && id < ids[ids[0]] ) {
555			tmp = ids[ids[0]];
556			ids[ids[0]] = id;
557			id = tmp;
558		}
559	}
560	ids[0]++;
561	if ( ids[0] >= WT_IDL_UM_MAX ) {
562		ids[0] = NOID;
563		ids[2] = id;
564	} else {
565		ids[ids[0]] = id;
566	}
567	return 0;
568}
569
570/* Append sorted list b to sorted list a. The result is unsorted but
571 * a[1] is the min of the result and a[a[0]] is the max.
572 */
573int wt_idl_append( ID *a, ID *b )
574{
575	ID ida, idb, tmp, swap = 0;
576
577	if ( WT_IDL_IS_ZERO( b ) ) {
578		return 0;
579	}
580
581	if ( WT_IDL_IS_ZERO( a ) ) {
582		WT_IDL_CPY( a, b );
583		return 0;
584	}
585
586	ida = WT_IDL_LAST( a );
587	idb = WT_IDL_LAST( b );
588	if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) ||
589		a[0] + b[0] >= WT_IDL_UM_MAX ) {
590		a[2] = IDL_MAX( ida, idb );
591		a[1] = IDL_MIN( a[1], b[1] );
592		a[0] = NOID;
593		return 0;
594	}
595
596	if ( b[0] > 1 && ida > idb ) {
597		swap = idb;
598		a[a[0]] = idb;
599		b[b[0]] = ida;
600	}
601
602	if ( b[1] < a[1] ) {
603		tmp = a[1];
604		a[1] = b[1];
605	} else {
606		tmp = b[1];
607	}
608	a[0]++;
609	a[a[0]] = tmp;
610
611	if ( b[0] > 1 ) {
612		int i = b[0] - 1;
613		AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID));
614		a[0] += i;
615	}
616	if ( swap ) {
617		b[b[0]] = swap;
618	}
619	return 0;
620}
621
622#if 1
623
624/* Quicksort + Insertion sort for small arrays */
625
626#define SMALL	8
627#define	SWAP(a,b)	itmp=(a);(a)=(b);(b)=itmp
628
629void
630wt_idl_sort( ID *ids, ID *tmp )
631{
632	int *istack = (int *)tmp; /* Private stack, not used by caller */
633	int i,j,k,l,ir,jstack;
634	ID a, itmp;
635
636	if ( WT_IDL_IS_RANGE( ids ))
637		return;
638
639	ir = ids[0];
640	l = 1;
641	jstack = 0;
642	for(;;) {
643		if (ir - l < SMALL) {	/* Insertion sort */
644			for (j=l+1;j<=ir;j++) {
645				a = ids[j];
646				for (i=j-1;i>=1;i--) {
647					if (ids[i] <= a) break;
648					ids[i+1] = ids[i];
649				}
650				ids[i+1] = a;
651			}
652			if (jstack == 0) break;
653			ir = istack[jstack--];
654			l = istack[jstack--];
655		} else {
656			k = (l + ir) >> 1;	/* Choose median of left, center, right */
657			SWAP(ids[k], ids[l+1]);
658			if (ids[l] > ids[ir]) {
659				SWAP(ids[l], ids[ir]);
660			}
661			if (ids[l+1] > ids[ir]) {
662				SWAP(ids[l+1], ids[ir]);
663			}
664			if (ids[l] > ids[l+1]) {
665				SWAP(ids[l], ids[l+1]);
666			}
667			i = l+1;
668			j = ir;
669			a = ids[l+1];
670			for(;;) {
671				do i++; while(ids[i] < a);
672				do j--; while(ids[j] > a);
673				if (j < i) break;
674				SWAP(ids[i],ids[j]);
675			}
676			ids[l+1] = ids[j];
677			ids[j] = a;
678			jstack += 2;
679			if (ir-i+1 >= j-l) {
680				istack[jstack] = ir;
681				istack[jstack-1] = i;
682				ir = j-1;
683			} else {
684				istack[jstack] = j-1;
685				istack[jstack-1] = l;
686				l = i;
687			}
688		}
689	}
690}
691
692#else
693
694/* 8 bit Radix sort + insertion sort
695 *
696 * based on code from http://www.cubic.org/docs/radix.htm
697 * with improvements by ebackes@symas.com and hyc@symas.com
698 *
699 * This code is O(n) but has a relatively high constant factor. For lists
700 * up to ~50 Quicksort is slightly faster; up to ~100 they are even.
701 * Much faster than quicksort for lists longer than ~100. Insertion
702 * sort is actually superior for lists <50.
703 */
704
705#define BUCKETS	(1<<8)
706#define SMALL	50
707
708void
709wt_idl_sort( ID *ids, ID *tmp )
710{
711	int count, soft_limit, phase = 0, size = ids[0];
712	ID *idls[2];
713	unsigned char *maxv = (unsigned char *)&ids[size];
714
715	if ( WT_IDL_IS_RANGE( ids ))
716		return;
717
718	/* Use insertion sort for small lists */
719	if ( size <= SMALL ) {
720		int i,j;
721		ID a;
722
723		for (j=1;j<=size;j++) {
724			a = ids[j];
725			for (i=j-1;i>=1;i--) {
726				if (ids[i] <= a) break;
727				ids[i+1] = ids[i];
728			}
729			ids[i+1] = a;
730		}
731		return;
732	}
733
734	tmp[0] = size;
735	idls[0] = ids;
736	idls[1] = tmp;
737
738#if BYTE_ORDER == BIG_ENDIAN
739    for (soft_limit = 0; !maxv[soft_limit]; soft_limit++);
740#else
741    for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--);
742#endif
743
744	for (
745#if BYTE_ORDER == BIG_ENDIAN
746	count = sizeof(ID)-1; count >= soft_limit; --count
747#else
748	count = 0; count <= soft_limit; ++count
749#endif
750	) {
751		unsigned int num[BUCKETS], * np, n, sum;
752		int i;
753        ID *sp, *source, *dest;
754        unsigned char *bp, *source_start;
755
756		source = idls[phase]+1;
757		dest = idls[phase^1]+1;
758		source_start =  ((unsigned char *) source) + count;
759
760        np = num;
761        for ( i = BUCKETS; i > 0; --i ) *np++ = 0;
762
763		/* count occurrences of every byte value */
764		bp = source_start;
765        for ( i = size; i > 0; --i, bp += sizeof(ID) )
766				num[*bp]++;
767
768		/* transform count into index by summing elements and storing
769		 * into same array
770		 */
771        sum = 0;
772        np = num;
773        for ( i = BUCKETS; i > 0; --i ) {
774                n = *np;
775                *np++ = sum;
776                sum += n;
777        }
778
779		/* fill dest with the right values in the right place */
780		bp = source_start;
781        sp = source;
782        for ( i = size; i > 0; --i, bp += sizeof(ID) ) {
783                np = num + *bp;
784                dest[*np] = *sp++;
785                ++(*np);
786        }
787		phase ^= 1;
788	}
789
790	/* copy back from temp if needed */
791	if ( phase ) {
792		ids++; tmp++;
793		for ( count = 0; count < size; ++count )
794			*ids++ = *tmp++;
795	}
796}
797#endif	/* Quick vs Radix */
798
799