1/*	$NetBSD$	*/
2
3/* zn_malloc.c - zone-based malloc routines */
4/* OpenLDAP: pkg/ldap/servers/slapd/zn_malloc.c,v 1.11.2.7 2010/04/19 20:58:45 quanah Exp*/
5/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6 *
7 * Copyright 2003-2010 The OpenLDAP Foundation.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 */
18/* Portions Copyright 2004 IBM Corporation
19 * All rights reserved.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted only as authorized by the OpenLDAP
22 * Public License.
23 */
24/* ACKNOWLEDGEMENTS
25 * This work originally developed by Jong-Hyuk Choi for inclusion in
26 * OpenLDAP Software.
27 */
28
29#include "portable.h"
30
31#include <stdio.h>
32#include <ac/string.h>
33#include <sys/types.h>
34#include <fcntl.h>
35
36#include "slap.h"
37
38#ifdef SLAP_ZONE_ALLOC
39
40#include <sys/mman.h>
41
42static int slap_zone_cmp(const void *v1, const void *v2);
43void * slap_replenish_zopool(void *ctx);
44
45static void
46slap_zo_release(void *data)
47{
48	struct zone_object *zo = (struct zone_object *)data;
49	ch_free( zo );
50}
51
52void
53slap_zn_mem_destroy(
54	void *ctx
55)
56{
57	struct zone_heap *zh = ctx;
58	int pad = 2*sizeof(int)-1, pad_shift;
59	int order_start = -1, i, j;
60	struct zone_object *zo;
61
62	pad_shift = pad - 1;
63	do {
64		order_start++;
65	} while (pad_shift >>= 1);
66
67	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
68	for (i = 0; i < zh->zh_zoneorder - order_start + 1; i++) {
69		zo = LDAP_LIST_FIRST(&zh->zh_free[i]);
70		while (zo) {
71			struct zone_object *zo_tmp = zo;
72			zo = LDAP_LIST_NEXT(zo, zo_link);
73			LDAP_LIST_REMOVE(zo_tmp, zo_link);
74			LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_tmp, zo_link);
75		}
76	}
77	ch_free(zh->zh_free);
78
79	for (i = 0; i < zh->zh_numzones; i++) {
80		for (j = 0; j < zh->zh_zoneorder - order_start + 1; j++) {
81			ch_free(zh->zh_maps[i][j]);
82		}
83		ch_free(zh->zh_maps[i]);
84		munmap(zh->zh_zones[i], zh->zh_zonesize);
85		ldap_pvt_thread_rdwr_destroy(&zh->zh_znlock[i]);
86	}
87	ch_free(zh->zh_maps);
88	ch_free(zh->zh_zones);
89	ch_free(zh->zh_seqno);
90	ch_free(zh->zh_znlock);
91
92	avl_free(zh->zh_zonetree, slap_zo_release);
93
94	zo = LDAP_LIST_FIRST(&zh->zh_zopool);
95	while (zo) {
96		struct zone_object *zo_tmp = zo;
97		zo = LDAP_LIST_NEXT(zo, zo_link);
98		if (!zo_tmp->zo_blockhead) {
99			LDAP_LIST_REMOVE(zo_tmp, zo_link);
100		}
101	}
102	zo = LDAP_LIST_FIRST(&zh->zh_zopool);
103	while (zo) {
104		struct zone_object *zo_tmp = zo;
105		zo = LDAP_LIST_NEXT(zo, zo_link);
106		ch_free(zo_tmp);
107	}
108	ldap_pvt_thread_mutex_unlock(&zh->zh_mutex);
109	ldap_pvt_thread_rdwr_destroy(&zh->zh_lock);
110	ldap_pvt_thread_mutex_destroy(&zh->zh_mutex);
111	ch_free(zh);
112}
113
114void *
115slap_zn_mem_create(
116	ber_len_t initsize,
117	ber_len_t maxsize,
118	ber_len_t deltasize,
119	ber_len_t zonesize
120)
121{
122	struct zone_heap *zh = NULL;
123	ber_len_t zpad;
124	int pad = 2*sizeof(int)-1, pad_shift;
125	int size_shift;
126	int order = -1, order_start = -1, order_end = -1;
127	int i, j;
128	struct zone_object *zo;
129
130	Debug(LDAP_DEBUG_NONE,
131		"--> slap_zn_mem_create: initsize=%d, maxsize=%d\n",
132		initsize, maxsize, 0);
133	Debug(LDAP_DEBUG_NONE,
134		"++> slap_zn_mem_create: deltasize=%d, zonesize=%d\n",
135		deltasize, zonesize, 0);
136
137	zh = (struct zone_heap *)ch_calloc(1, sizeof(struct zone_heap));
138
139	zh->zh_fd = open("/dev/zero", O_RDWR);
140
141	if ( zonesize ) {
142		zh->zh_zonesize = zonesize;
143	} else {
144		zh->zh_zonesize = SLAP_ZONE_SIZE;
145	}
146
147	zpad = zh->zh_zonesize - 1;
148	zh->zh_numzones = ((initsize + zpad) & ~zpad) / zh->zh_zonesize;
149
150	if ( maxsize && maxsize >= initsize ) {
151		zh->zh_maxzones = ((maxsize + zpad) & ~zpad) / zh->zh_zonesize;
152	} else {
153		zh->zh_maxzones = ((initsize + zpad) & ~zpad) / zh->zh_zonesize;
154	}
155
156	if ( deltasize ) {
157		zh->zh_deltazones = ((deltasize + zpad) & ~zpad) / zh->zh_zonesize;
158	} else {
159		zh->zh_deltazones = ((SLAP_ZONE_DELTA+zpad) & ~zpad) / zh->zh_zonesize;
160	}
161
162	size_shift = zh->zh_zonesize - 1;
163	do {
164		order_end++;
165	} while (size_shift >>= 1);
166
167	pad_shift = pad - 1;
168	do {
169		order_start++;
170	} while (pad_shift >>= 1);
171
172	order = order_end - order_start + 1;
173
174	zh->zh_zones = (void **)ch_malloc(zh->zh_maxzones * sizeof(void*));
175	zh->zh_znlock = (ldap_pvt_thread_rdwr_t *)ch_malloc(
176						zh->zh_maxzones * sizeof(ldap_pvt_thread_rdwr_t *));
177	zh->zh_maps = (unsigned char ***)ch_malloc(
178					zh->zh_maxzones * sizeof(unsigned char**));
179
180	zh->zh_zoneorder = order_end;
181	zh->zh_free = (struct zh_freelist *)
182					ch_malloc(order * sizeof(struct zh_freelist));
183	zh->zh_seqno = (unsigned long *)ch_calloc(zh->zh_maxzones,
184											sizeof(unsigned long));
185	for (i = 0; i < order; i++) {
186		LDAP_LIST_INIT(&zh->zh_free[i]);
187	}
188	LDAP_LIST_INIT(&zh->zh_zopool);
189
190	for (i = 0; i < zh->zh_numzones; i++) {
191		zh->zh_zones[i] = mmap(0, zh->zh_zonesize, PROT_READ | PROT_WRITE,
192							MAP_PRIVATE, zh->zh_fd, 0);
193		zh->zh_maps[i] = (unsigned char **)
194					ch_malloc(order * sizeof(unsigned char *));
195		for (j = 0; j < order; j++) {
196			int shiftamt = order_start + 1 + j;
197			int nummaps = zh->zh_zonesize >> shiftamt;
198			assert(nummaps);
199			nummaps >>= 3;
200			if (!nummaps) nummaps = 1;
201			zh->zh_maps[i][j] = (unsigned char *)ch_malloc(nummaps);
202			memset(zh->zh_maps[i][j], 0, nummaps);
203		}
204
205		if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
206			slap_replenish_zopool(zh);
207		}
208		zo = LDAP_LIST_FIRST(&zh->zh_zopool);
209		LDAP_LIST_REMOVE(zo, zo_link);
210		zo->zo_ptr = zh->zh_zones[i];
211		zo->zo_idx = i;
212		LDAP_LIST_INSERT_HEAD(&zh->zh_free[order-1], zo, zo_link);
213
214		if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
215			slap_replenish_zopool(zh);
216		}
217		zo = LDAP_LIST_FIRST(&zh->zh_zopool);
218		LDAP_LIST_REMOVE(zo, zo_link);
219		zo->zo_ptr = zh->zh_zones[i];
220		zo->zo_siz = zh->zh_zonesize;
221		zo->zo_idx = i;
222		avl_insert(&zh->zh_zonetree, zo, slap_zone_cmp, avl_dup_error);
223		ldap_pvt_thread_rdwr_init(&zh->zh_znlock[i]);
224	}
225
226	LDAP_STAILQ_INIT(&zh->zh_latency_history_queue);
227	ldap_pvt_thread_mutex_init(&zh->zh_mutex);
228	ldap_pvt_thread_rdwr_init(&zh->zh_lock);
229
230	return zh;
231}
232
233void *
234slap_zn_malloc(
235    ber_len_t	size,
236	void *ctx
237)
238{
239	struct zone_heap *zh = ctx;
240	ber_len_t size_shift;
241	int pad = 2*sizeof(int)-1, pad_shift;
242	int order = -1, order_start = -1;
243	struct zone_object *zo, *zo_new, *zo_left, *zo_right;
244	ber_len_t *ptr, *new;
245	int idx;
246	unsigned long diff;
247	int i, j, k;
248
249	Debug(LDAP_DEBUG_NONE,
250		"--> slap_zn_malloc: size=%d\n", size, 0, 0);
251
252	if (!zh) return ber_memalloc_x(size, NULL);
253
254	/* round up to doubleword boundary */
255	size += 2*sizeof(ber_len_t) + pad;
256	size &= ~pad;
257
258	size_shift = size - 1;
259	do {
260		order++;
261	} while (size_shift >>= 1);
262
263	pad_shift = pad - 1;
264	do {
265		order_start++;
266	} while (pad_shift >>= 1);
267
268retry:
269
270	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
271	for (i = order; i <= zh->zh_zoneorder &&
272			LDAP_LIST_EMPTY(&zh->zh_free[i-order_start]); i++);
273
274	if (i == order) {
275		zo_new = LDAP_LIST_FIRST(&zh->zh_free[i-order_start]);
276		LDAP_LIST_REMOVE(zo_new, zo_link);
277		ptr = zo_new->zo_ptr;
278		idx = zo_new->zo_idx;
279		diff = (unsigned long)((char*)ptr -
280				(char*)zh->zh_zones[idx]) >> (order + 1);
281		zh->zh_maps[idx][order-order_start][diff>>3] |= (1 << (diff & 0x7));
282		*ptr++ = zh->zh_seqno[idx];
283		*ptr++ = size - 2*sizeof(ber_len_t);
284		zo_new->zo_ptr = NULL;
285		zo_new->zo_idx = -1;
286		LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_new, zo_link);
287		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
288		Debug(LDAP_DEBUG_NONE, "slap_zn_malloc: returning 0x%x, 0x%x\n",
289				ptr, (int)ptr>>(zh->zh_zoneorder+1), 0);
290		return((void*)ptr);
291	} else if (i <= zh->zh_zoneorder) {
292		for (j = i; j > order; j--) {
293			zo_left = LDAP_LIST_FIRST(&zh->zh_free[j-order_start]);
294			LDAP_LIST_REMOVE(zo_left, zo_link);
295			if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
296				slap_replenish_zopool(zh);
297			}
298			zo_right = LDAP_LIST_FIRST(&zh->zh_zopool);
299			LDAP_LIST_REMOVE(zo_right, zo_link);
300			zo_right->zo_ptr = zo_left->zo_ptr + (1 << j);
301			zo_right->zo_idx = zo_left->zo_idx;
302			Debug(LDAP_DEBUG_NONE,
303				"slap_zn_malloc: split (left=0x%x, right=0x%x)\n",
304				zo_left->zo_ptr, zo_right->zo_ptr, 0);
305			if (j == order + 1) {
306				ptr = zo_left->zo_ptr;
307				diff = (unsigned long)((char*)ptr -
308						(char*)zh->zh_zones[zo_left->zo_idx]) >> (order+1);
309				zh->zh_maps[zo_left->zo_idx][order-order_start][diff>>3] |=
310						(1 << (diff & 0x7));
311				*ptr++ = zh->zh_seqno[zo_left->zo_idx];
312				*ptr++ = size - 2*sizeof(ber_len_t);
313				LDAP_LIST_INSERT_HEAD(
314						&zh->zh_free[j-1-order_start], zo_right, zo_link);
315				LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_left, zo_link);
316				ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
317				Debug(LDAP_DEBUG_NONE,
318					"slap_zn_malloc: returning 0x%x, 0x%x\n",
319					ptr, (int)ptr>>(zh->zh_zoneorder+1), 0);
320				return((void*)ptr);
321			} else {
322				LDAP_LIST_INSERT_HEAD(
323						&zh->zh_free[j-1-order_start], zo_right, zo_link);
324				LDAP_LIST_INSERT_HEAD(
325						&zh->zh_free[j-1-order_start], zo_left, zo_link);
326			}
327		}
328		assert(0);
329	} else {
330
331		if ( zh->zh_maxzones < zh->zh_numzones + zh->zh_deltazones ) {
332			ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
333			Debug( LDAP_DEBUG_TRACE,
334				"zn_malloc %lu: ch_malloc\n\n",
335				(long)size, 0, 0);
336			Debug(LDAP_DEBUG_NONE,
337				"slap_zn_malloc: returning 0x%x, 0x%x\n",
338				ptr, (int)ptr>>(zh->zh_zoneorder+1), 0);
339			return (void*)ch_malloc(size);
340		}
341
342		for (i = zh->zh_numzones; i < zh->zh_numzones+zh->zh_deltazones; i++) {
343			zh->zh_zones[i] = mmap(0, zh->zh_zonesize, PROT_READ | PROT_WRITE,
344								MAP_PRIVATE, zh->zh_fd, 0);
345			zh->zh_maps[i] = (unsigned char **)
346						ch_malloc((zh->zh_zoneorder - order_start + 1) *
347						sizeof(unsigned char *));
348			for (j = 0; j < zh->zh_zoneorder-order_start+1; j++) {
349				int shiftamt = order_start + 1 + j;
350				int nummaps = zh->zh_zonesize >> shiftamt;
351				assert(nummaps);
352				nummaps >>= 3;
353				if (!nummaps) nummaps = 1;
354				zh->zh_maps[i][j] = (unsigned char *)ch_malloc(nummaps);
355				memset(zh->zh_maps[i][j], 0, nummaps);
356			}
357
358			if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
359				slap_replenish_zopool(zh);
360			}
361			zo = LDAP_LIST_FIRST(&zh->zh_zopool);
362			LDAP_LIST_REMOVE(zo, zo_link);
363			zo->zo_ptr = zh->zh_zones[i];
364			zo->zo_idx = i;
365			LDAP_LIST_INSERT_HEAD(&zh->
366						zh_free[zh->zh_zoneorder-order_start],zo,zo_link);
367
368			if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
369				slap_replenish_zopool(zh);
370			}
371			zo = LDAP_LIST_FIRST(&zh->zh_zopool);
372			LDAP_LIST_REMOVE(zo, zo_link);
373			zo->zo_ptr = zh->zh_zones[i];
374			zo->zo_siz = zh->zh_zonesize;
375			zo->zo_idx = i;
376			avl_insert(&zh->zh_zonetree, zo, slap_zone_cmp, avl_dup_error);
377			ldap_pvt_thread_rdwr_init(&zh->zh_znlock[i]);
378		}
379		zh->zh_numzones += zh->zh_deltazones;
380		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
381		goto retry;
382	}
383}
384
385void *
386slap_zn_calloc( ber_len_t n, ber_len_t size, void *ctx )
387{
388	void *new;
389
390	new = slap_zn_malloc( n*size, ctx );
391	if ( new ) {
392		memset( new, 0, n*size );
393	}
394	return new;
395}
396
397void *
398slap_zn_realloc(void *ptr, ber_len_t size, void *ctx)
399{
400	struct zone_heap *zh = ctx;
401	int pad = 2*sizeof(int)-1, pad_shift;
402	int order_start = -1, order = -1;
403	struct zone_object zoi, *zoo;
404	ber_len_t *p = (ber_len_t *)ptr, *new;
405	unsigned long diff;
406	int i;
407	void *newptr = NULL;
408	struct zone_heap *zone = NULL;
409
410	Debug(LDAP_DEBUG_NONE,
411		"--> slap_zn_realloc: ptr=0x%x, size=%d\n", ptr, size, 0);
412
413	if (ptr == NULL)
414		return slap_zn_malloc(size, zh);
415
416	zoi.zo_ptr = p;
417	zoi.zo_idx = -1;
418
419	if (zh) {
420		ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
421		zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
422		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
423	}
424
425	/* Not our memory? */
426	if (!zoo) {
427		/* duplicate of realloc behavior, oh well */
428		new = ber_memrealloc_x(ptr, size, NULL);
429		if (new) {
430			return new;
431		}
432		Debug(LDAP_DEBUG_ANY, "ch_realloc of %lu bytes failed\n",
433				(long) size, 0, 0);
434		assert(0);
435		exit( EXIT_FAILURE );
436	}
437
438	assert(zoo->zo_idx != -1);
439
440	zone = zh->zh_zones[zoo->zo_idx];
441
442	if (size == 0) {
443		slap_zn_free(ptr, zh);
444		return NULL;
445	}
446
447	newptr = slap_zn_malloc(size, zh);
448	if (size < p[-1]) {
449		AC_MEMCPY(newptr, ptr, size);
450	} else {
451		AC_MEMCPY(newptr, ptr, p[-1]);
452	}
453	slap_zn_free(ptr, zh);
454	return newptr;
455}
456
457void
458slap_zn_free(void *ptr, void *ctx)
459{
460	struct zone_heap *zh = ctx;
461	int size, size_shift, order_size;
462	int pad = 2*sizeof(int)-1, pad_shift;
463	ber_len_t *p = (ber_len_t *)ptr, *tmpp;
464	int order_start = -1, order = -1;
465	struct zone_object zoi, *zoo, *zo;
466	unsigned long diff;
467	int i, k, inserted = 0, idx;
468	struct zone_heap *zone = NULL;
469
470	zoi.zo_ptr = p;
471	zoi.zo_idx = -1;
472
473	Debug(LDAP_DEBUG_NONE, "--> slap_zn_free: ptr=0x%x\n", ptr, 0, 0);
474
475	if (zh) {
476		ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
477		zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
478		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
479	}
480
481	if (!zoo) {
482		ber_memfree_x(ptr, NULL);
483	} else {
484		idx = zoo->zo_idx;
485		assert(idx != -1);
486		zone = zh->zh_zones[idx];
487
488		size = *(--p);
489		size_shift = size + 2*sizeof(ber_len_t) - 1;
490		do {
491			order++;
492		} while (size_shift >>= 1);
493
494		pad_shift = pad - 1;
495		do {
496			order_start++;
497		} while (pad_shift >>= 1);
498
499		ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
500		for (i = order, tmpp = p; i <= zh->zh_zoneorder; i++) {
501			order_size = 1 << (i+1);
502			diff = (unsigned long)((char*)tmpp - (char*)zone) >> (i+1);
503			zh->zh_maps[idx][i-order_start][diff>>3] &= (~(1 << (diff & 0x7)));
504			if (diff == ((diff>>1)<<1)) {
505				if (!(zh->zh_maps[idx][i-order_start][(diff+1)>>3] &
506						(1<<((diff+1)&0x7)))) {
507					zo = LDAP_LIST_FIRST(&zh->zh_free[i-order_start]);
508					while (zo) {
509						if ((char*)zo->zo_ptr == (char*)tmpp) {
510							LDAP_LIST_REMOVE( zo, zo_link );
511						} else if ((char*)zo->zo_ptr ==
512								(char*)tmpp + order_size) {
513							LDAP_LIST_REMOVE(zo, zo_link);
514							break;
515						}
516						zo = LDAP_LIST_NEXT(zo, zo_link);
517					}
518					if (zo) {
519						if (i < zh->zh_zoneorder) {
520							inserted = 1;
521							zo->zo_ptr = tmpp;
522							Debug(LDAP_DEBUG_NONE,
523								"slap_zn_free: merging 0x%x\n",
524								zo->zo_ptr, 0, 0);
525							LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start+1],
526									zo, zo_link);
527						}
528						continue;
529					} else {
530						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
531							slap_replenish_zopool(zh);
532						}
533						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
534						LDAP_LIST_REMOVE(zo, zo_link);
535						zo->zo_ptr = tmpp;
536						zo->zo_idx = idx;
537						Debug(LDAP_DEBUG_NONE,
538							"slap_zn_free: merging 0x%x\n",
539							zo->zo_ptr, 0, 0);
540						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
541								zo, zo_link);
542						break;
543
544						Debug(LDAP_DEBUG_ANY, "slap_zn_free: "
545							"free object not found while bit is clear.\n",
546							0, 0, 0);
547						assert(zo != NULL);
548
549					}
550				} else {
551					if (!inserted) {
552						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
553							slap_replenish_zopool(zh);
554						}
555						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
556						LDAP_LIST_REMOVE(zo, zo_link);
557						zo->zo_ptr = tmpp;
558						zo->zo_idx = idx;
559						Debug(LDAP_DEBUG_NONE,
560							"slap_zn_free: merging 0x%x\n",
561							zo->zo_ptr, 0, 0);
562						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
563								zo, zo_link);
564					}
565					break;
566				}
567			} else {
568				if (!(zh->zh_maps[idx][i-order_start][(diff-1)>>3] &
569						(1<<((diff-1)&0x7)))) {
570					zo = LDAP_LIST_FIRST(&zh->zh_free[i-order_start]);
571					while (zo) {
572						if ((char*)zo->zo_ptr == (char*)tmpp) {
573							LDAP_LIST_REMOVE(zo, zo_link);
574						} else if ((char*)tmpp == zo->zo_ptr + order_size) {
575							LDAP_LIST_REMOVE(zo, zo_link);
576							tmpp = zo->zo_ptr;
577							break;
578						}
579						zo = LDAP_LIST_NEXT(zo, zo_link);
580					}
581					if (zo) {
582						if (i < zh->zh_zoneorder) {
583							inserted = 1;
584							Debug(LDAP_DEBUG_NONE,
585								"slap_zn_free: merging 0x%x\n",
586								zo->zo_ptr, 0, 0);
587							LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start+1],
588									zo, zo_link);
589							continue;
590						}
591					} else {
592						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
593							slap_replenish_zopool(zh);
594						}
595						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
596						LDAP_LIST_REMOVE(zo, zo_link);
597						zo->zo_ptr = tmpp;
598						zo->zo_idx = idx;
599						Debug(LDAP_DEBUG_NONE,
600							"slap_zn_free: merging 0x%x\n",
601							zo->zo_ptr, 0, 0);
602						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
603								zo, zo_link);
604						break;
605
606						Debug(LDAP_DEBUG_ANY, "slap_zn_free: "
607							"free object not found while bit is clear.\n",
608							0, 0, 0 );
609						assert(zo != NULL);
610
611					}
612				} else {
613					if ( !inserted ) {
614						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
615							slap_replenish_zopool(zh);
616						}
617						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
618						LDAP_LIST_REMOVE(zo, zo_link);
619						zo->zo_ptr = tmpp;
620						zo->zo_idx = idx;
621						Debug(LDAP_DEBUG_NONE,
622							"slap_zn_free: merging 0x%x\n",
623							zo->zo_ptr, 0, 0);
624						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
625								zo, zo_link);
626					}
627					break;
628				}
629			}
630		}
631		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
632	}
633}
634
635static int
636slap_zone_cmp(const void *v1, const void *v2)
637{
638	const struct zone_object *zo1 = v1;
639	const struct zone_object *zo2 = v2;
640	char *ptr1;
641	char *ptr2;
642	ber_len_t zpad;
643
644	zpad = zo2->zo_siz - 1;
645	ptr1 = (char*)(((unsigned long)zo1->zo_ptr + zpad) & ~zpad);
646	ptr2 = (char*)zo2->zo_ptr + ((char*)ptr1 - (char*)zo1->zo_ptr);
647	ptr2 = (char*)(((unsigned long)ptr2 + zpad) & ~zpad);
648	return (int)((char*)ptr1 - (char*)ptr2);
649}
650
651void *
652slap_replenish_zopool(
653	void *ctx
654)
655{
656	struct zone_heap* zh = ctx;
657	struct zone_object *zo_block;
658	int i;
659
660	zo_block = (struct zone_object *)ch_malloc(
661					SLAP_ZONE_ZOBLOCK * sizeof(struct zone_object));
662
663	if ( zo_block == NULL ) {
664		return NULL;
665	}
666
667	zo_block[0].zo_blockhead = 1;
668	LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, &zo_block[0], zo_link);
669	for (i = 1; i < SLAP_ZONE_ZOBLOCK; i++) {
670		zo_block[i].zo_blockhead = 0;
671		LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, &zo_block[i], zo_link );
672	}
673
674	return zo_block;
675}
676
677int
678slap_zn_invalidate(
679	void *ctx,
680	void *ptr
681)
682{
683	struct zone_heap* zh = ctx;
684	struct zone_object zoi, *zoo;
685	struct zone_heap *zone = NULL;
686	int seqno = *((ber_len_t*)ptr - 2);
687	int idx = -1, rc = 0;
688	int pad = 2*sizeof(int)-1, pad_shift;
689	int order_start = -1, i;
690	struct zone_object *zo;
691
692	pad_shift = pad - 1;
693	do {
694		order_start++;
695	} while (pad_shift >>= 1);
696
697	zoi.zo_ptr = ptr;
698	zoi.zo_idx = -1;
699
700	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
701	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
702
703	if (zoo) {
704		idx = zoo->zo_idx;
705		assert(idx != -1);
706		madvise(zh->zh_zones[idx], zh->zh_zonesize, MADV_DONTNEED);
707		for (i = 0; i < zh->zh_zoneorder - order_start + 1; i++) {
708			int shiftamt = order_start + 1 + i;
709			int nummaps = zh->zh_zonesize >> shiftamt;
710			assert(nummaps);
711			nummaps >>= 3;
712			if (!nummaps) nummaps = 1;
713			memset(zh->zh_maps[idx][i], 0, nummaps);
714			zo = LDAP_LIST_FIRST(&zh->zh_free[i]);
715			while (zo) {
716				struct zone_object *zo_tmp = zo;
717				zo = LDAP_LIST_NEXT(zo, zo_link);
718				if (zo_tmp && zo_tmp->zo_idx == idx) {
719					LDAP_LIST_REMOVE(zo_tmp, zo_link);
720					LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_tmp, zo_link);
721				}
722			}
723		}
724		if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
725			slap_replenish_zopool(zh);
726		}
727		zo = LDAP_LIST_FIRST(&zh->zh_zopool);
728		LDAP_LIST_REMOVE(zo, zo_link);
729		zo->zo_ptr = zh->zh_zones[idx];
730		zo->zo_idx = idx;
731		LDAP_LIST_INSERT_HEAD(&zh->zh_free[zh->zh_zoneorder-order_start],
732								zo, zo_link);
733		zh->zh_seqno[idx]++;
734	} else {
735		Debug(LDAP_DEBUG_NONE, "zone not found for (ctx=0x%x, ptr=0x%x) !\n",
736				ctx, ptr, 0);
737	}
738
739	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
740	Debug(LDAP_DEBUG_NONE, "zone %d invalidate\n", idx, 0, 0);
741	return rc;
742}
743
744int
745slap_zn_validate(
746	void *ctx,
747	void *ptr,
748	int seqno
749)
750{
751	struct zone_heap* zh = ctx;
752	struct zone_object zoi, *zoo;
753	struct zone_heap *zone = NULL;
754	int idx, rc = 0;
755
756	zoi.zo_ptr = ptr;
757	zoi.zo_idx = -1;
758
759	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
760
761	if (zoo) {
762		idx = zoo->zo_idx;
763		assert(idx != -1);
764		assert(seqno <= zh->zh_seqno[idx]);
765		rc = (seqno == zh->zh_seqno[idx]);
766	}
767
768	return rc;
769}
770
771int slap_zh_rlock(
772	void *ctx
773)
774{
775	struct zone_heap* zh = ctx;
776	ldap_pvt_thread_rdwr_rlock(&zh->zh_lock);
777}
778
779int slap_zh_runlock(
780	void *ctx
781)
782{
783	struct zone_heap* zh = ctx;
784	ldap_pvt_thread_rdwr_runlock(&zh->zh_lock);
785}
786
787int slap_zh_wlock(
788	void *ctx
789)
790{
791	struct zone_heap* zh = ctx;
792	ldap_pvt_thread_rdwr_wlock(&zh->zh_lock);
793}
794
795int slap_zh_wunlock(
796	void *ctx
797)
798{
799	struct zone_heap* zh = ctx;
800	ldap_pvt_thread_rdwr_wunlock(&zh->zh_lock);
801}
802
803int slap_zn_rlock(
804	void *ctx,
805	void *ptr
806)
807{
808	struct zone_heap* zh = ctx;
809	struct zone_object zoi, *zoo;
810	struct zone_heap *zone = NULL;
811	int idx;
812
813	zoi.zo_ptr = ptr;
814	zoi.zo_idx = -1;
815
816	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
817	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
818	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
819
820	if (zoo) {
821		idx = zoo->zo_idx;
822		assert(idx != -1);
823		ldap_pvt_thread_rdwr_rlock(&zh->zh_znlock[idx]);
824	}
825}
826
827int slap_zn_runlock(
828	void *ctx,
829	void *ptr
830)
831{
832	struct zone_heap* zh = ctx;
833	struct zone_object zoi, *zoo;
834	struct zone_heap *zone = NULL;
835	int idx;
836
837	zoi.zo_ptr = ptr;
838	zoi.zo_idx = -1;
839
840	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
841	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
842	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
843
844	if (zoo) {
845		idx = zoo->zo_idx;
846		assert(idx != -1);
847		ldap_pvt_thread_rdwr_runlock(&zh->zh_znlock[idx]);
848	}
849}
850
851int slap_zn_wlock(
852	void *ctx,
853	void *ptr
854)
855{
856	struct zone_heap* zh = ctx;
857	struct zone_object zoi, *zoo;
858	struct zone_heap *zone = NULL;
859	int idx;
860
861	zoi.zo_ptr = ptr;
862	zoi.zo_idx = -1;
863
864	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
865	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
866	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
867
868	if (zoo) {
869		idx = zoo->zo_idx;
870		assert(idx != -1);
871		ldap_pvt_thread_rdwr_wlock(&zh->zh_znlock[idx]);
872	}
873}
874
875int slap_zn_wunlock(
876	void *ctx,
877	void *ptr
878)
879{
880	struct zone_heap* zh = ctx;
881	struct zone_object zoi, *zoo;
882	struct zone_heap *zone = NULL;
883	int idx;
884
885	zoi.zo_ptr = ptr;
886	zoi.zo_idx = -1;
887
888	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
889	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
890	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
891
892	if (zoo) {
893		idx = zoo->zo_idx;
894		assert(idx != -1);
895		ldap_pvt_thread_rdwr_wunlock(&zh->zh_znlock[idx]);
896	}
897}
898
899#define T_SEC_IN_USEC 1000000
900
901static int
902slap_timediff(struct timeval *tv_begin, struct timeval *tv_end)
903{
904	uint64_t t_begin, t_end, t_diff;
905
906	t_begin = T_SEC_IN_USEC * tv_begin->tv_sec + tv_begin->tv_usec;
907	t_end  = T_SEC_IN_USEC * tv_end->tv_sec  + tv_end->tv_usec;
908	t_diff  = t_end - t_begin;
909
910	if ( t_diff < 0 )
911		t_diff = 0;
912
913	return (int)t_diff;
914}
915
916void
917slap_set_timing(struct timeval *tv_set)
918{
919	gettimeofday(tv_set, (struct timezone *)NULL);
920}
921
922int
923slap_measure_timing(struct timeval *tv_set, struct timeval *tv_measure)
924{
925	gettimeofday(tv_measure, (struct timezone *)NULL);
926	return(slap_timediff(tv_set, tv_measure));
927}
928
929#define EMA_WEIGHT 0.999000
930#define SLAP_ZN_LATENCY_HISTORY_QLEN 500
931int
932slap_zn_latency_history(void* ctx, int ea_latency)
933{
934/* TODO: monitor /proc/stat (swap) as well */
935	struct zone_heap* zh = ctx;
936	double t_diff = 0.0;
937
938	zh->zh_ema_latency = (double)ea_latency * (1.0 - EMA_WEIGHT)
939					+ zh->zh_ema_latency * EMA_WEIGHT;
940	if (!zh->zh_swapping && zh->zh_ema_samples++ % 100 == 99) {
941		struct zone_latency_history *zlh_entry;
942		zlh_entry = ch_calloc(1, sizeof(struct zone_latency_history));
943		zlh_entry->zlh_latency = zh->zh_ema_latency;
944		LDAP_STAILQ_INSERT_TAIL(
945				&zh->zh_latency_history_queue, zlh_entry, zlh_next);
946		zh->zh_latency_history_qlen++;
947		while (zh->zh_latency_history_qlen > SLAP_ZN_LATENCY_HISTORY_QLEN) {
948			struct zone_latency_history *zlh;
949			zlh = LDAP_STAILQ_FIRST(&zh->zh_latency_history_queue);
950			LDAP_STAILQ_REMOVE_HEAD(
951					&zh->zh_latency_history_queue, zlh_next);
952			zh->zh_latency_history_qlen--;
953			ch_free(zlh);
954		}
955		if (zh->zh_latency_history_qlen == SLAP_ZN_LATENCY_HISTORY_QLEN) {
956			struct zone_latency_history *zlh_first, *zlh_last;
957			zlh_first = LDAP_STAILQ_FIRST(&zh->zh_latency_history_queue);
958			zlh_last = LDAP_STAILQ_LAST(&zh->zh_latency_history_queue,
959						zone_latency_history, zlh_next);
960			t_diff = zlh_last->zlh_latency - zlh_first->zlh_latency;
961		}
962		if (t_diff >= 2000) {
963			zh->zh_latency_jump++;
964		} else {
965			zh->zh_latency_jump = 0;
966		}
967		if (zh->zh_latency_jump > 3) {
968			zh->zh_latency_jump = 0;
969			zh->zh_swapping = 1;
970		}
971	}
972	return zh->zh_swapping;
973}
974#endif /* SLAP_ZONE_ALLOC */
975