1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2006,2008 Oracle.  All rights reserved.
5 *
6 * $Id: mp_mvcc.c,v 12.42 2008/01/31 18:40:45 bostic Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/log.h"
13#include "dbinc/mp.h"
14#include "dbinc/txn.h"
15
16static int __pgno_cmp __P((const void *, const void *));
17
18/*
19 * __memp_bh_priority --
20 *	Get the the aggregate priority of a chain of buffer headers.
21 *
22 * PUBLIC: u_int32_t __memp_bh_priority __P((BH *));
23 */
24u_int32_t
25__memp_bh_priority(bhp)
26	BH *bhp;
27{
28	u_int32_t priority;
29
30	while (SH_CHAIN_HASNEXT(bhp, vc))
31		bhp = SH_CHAIN_NEXT(bhp, vc, __bh);
32
33	priority = bhp->priority;
34
35	while ((bhp = SH_CHAIN_PREV(bhp, vc, __bh)) != NULL)
36		if (bhp->priority < priority)
37			priority = bhp->priority;
38
39	return (priority);
40}
41
42/*
43 * __memp_bh_settxn --
44 *	Set the transaction that owns the given buffer.
45 *
46 * PUBLIC: int __memp_bh_settxn __P((DB_MPOOL *, MPOOLFILE *mfp, BH *, void *));
47 */
48int
49__memp_bh_settxn(dbmp, mfp, bhp, vtd)
50	DB_MPOOL *dbmp;
51	MPOOLFILE *mfp;
52	BH *bhp;
53	void *vtd;
54{
55	ENV *env;
56	TXN_DETAIL *td;
57
58	env = dbmp->env;
59	td = (TXN_DETAIL *)vtd;
60
61	if (td == NULL) {
62		__db_errx(env,
63		      "%s: non-transactional update to a multiversion file",
64		    __memp_fns(dbmp, mfp));
65		return (EINVAL);
66	}
67
68	if (bhp->td_off != INVALID_ROFF) {
69		DB_ASSERT(env, BH_OWNER(env, bhp) == td);
70		return (0);
71	}
72
73	bhp->td_off = R_OFFSET(&env->tx_handle->reginfo, td);
74	return (__txn_add_buffer(env, td));
75}
76
77/*
78 * __memp_skip_curadj --
79 *	Indicate whether a cursor adjustment can be skipped for a snapshot
80 *	cursor.
81 *
82 * PUBLIC: int __memp_skip_curadj __P((DBC *, db_pgno_t));
83 */
84int
85__memp_skip_curadj(dbc, pgno)
86	DBC * dbc;
87	db_pgno_t pgno;
88{
89	BH *bhp;
90	DB_MPOOL *dbmp;
91	DB_MPOOLFILE *dbmfp;
92	DB_MPOOL_HASH *hp;
93	DB_TXN *txn;
94	ENV *env;
95	MPOOLFILE *mfp;
96	REGINFO *infop;
97	roff_t mf_offset;
98	int ret, skip;
99
100	env = dbc->env;
101	dbmp = env->mp_handle;
102	dbmfp = dbc->dbp->mpf;
103	mfp = dbmfp->mfp;
104	mf_offset = R_OFFSET(dbmp->reginfo, mfp);
105	skip = 0;
106
107	for (txn = dbc->txn; txn->parent != NULL; txn = txn->parent)
108		;
109
110	/*
111	 * Determine the cache and hash bucket where this page lives and get
112	 * local pointers to them.  Reset on each pass through this code, the
113	 * page number can change.
114	 */
115	MP_GET_BUCKET(env, mfp, pgno, &infop, hp, ret);
116	if (ret != 0) {
117		/* Panic: there is no way to return the error. */
118		(void)__env_panic(env, ret);
119		return (0);
120	}
121
122	SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) {
123		if (bhp->pgno != pgno || bhp->mf_offset != mf_offset)
124			continue;
125
126		if (!BH_OWNED_BY(env, bhp, txn))
127			skip = 1;
128		break;
129	}
130	MUTEX_UNLOCK(env, hp->mtx_hash);
131
132	return (skip);
133}
134
135#define	DB_FREEZER_MAGIC 0x06102002
136
137/*
138 * __memp_bh_freeze --
139 *	Save a buffer header to temporary storage in case it is needed later by
140 *	a snapshot transaction.  This function should be called with the hash
141 *	bucket locked and will exit with it locked, as it inserts a frozen
142 *	buffer after writing the data.
143 *
144 * PUBLIC: int __memp_bh_freeze __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *,
145 * PUBLIC:     BH *, int *));
146 */
147int
148__memp_bh_freeze(dbmp, infop, hp, bhp, need_frozenp)
149	DB_MPOOL *dbmp;
150	REGINFO *infop;
151	DB_MPOOL_HASH *hp;
152	BH *bhp;
153	int *need_frozenp;
154{
155	BH *frozen_bhp;
156	BH_FROZEN_ALLOC *frozen_alloc;
157	DB_FH *fhp;
158	ENV *env;
159	MPOOL *c_mp;
160	MPOOLFILE *bh_mfp;
161	db_pgno_t maxpgno, newpgno, nextfree;
162	size_t nio;
163	int ret, t_ret;
164	u_int32_t magic, nbucket, ncache, pagesize;
165	char filename[100], *real_name;
166
167	env = dbmp->env;
168	c_mp = infop->primary;
169	ret = 0;
170	/* Find the associated MPOOLFILE. */
171	bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
172	pagesize = bh_mfp->stat.st_pagesize;
173	real_name = NULL;
174	fhp = NULL;
175
176	DB_ASSERT(env, bhp->ref == 0);
177	DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY | BH_FROZEN | BH_LOCKED));
178
179	++bhp->ref;
180	F_SET(bhp, BH_LOCKED);
181	MVCC_MPROTECT(bhp->buf, pagesize, PROT_READ | PROT_WRITE);
182
183	MUTEX_UNLOCK(env, hp->mtx_hash);
184
185	MPOOL_REGION_LOCK(env, infop);
186	frozen_bhp = SH_TAILQ_FIRST(&c_mp->free_frozen, __bh);
187	if (frozen_bhp != NULL) {
188		SH_TAILQ_REMOVE(&c_mp->free_frozen, frozen_bhp, hq, __bh);
189		*need_frozenp = SH_TAILQ_EMPTY(&c_mp->free_frozen);
190	} else {
191		*need_frozenp = 1;
192
193		/* There might be a small amount of unallocated space. */
194		if (__env_alloc(infop,
195		    sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE),
196		    &frozen_alloc) == 0) {
197			frozen_bhp = (BH *)(frozen_alloc + 1);
198			SH_TAILQ_INSERT_TAIL(&c_mp->alloc_frozen,
199			    frozen_alloc, links);
200		}
201	}
202	MPOOL_REGION_UNLOCK(env, infop);
203	MUTEX_LOCK(env, hp->mtx_hash);
204
205	/*
206	 * If we can't get a frozen buffer header, return ENOMEM immediately:
207	 * we don't want to call __memp_alloc recursively.  __memp_alloc will
208	 * turn the next free page it finds into frozen buffer headers.
209	 */
210	if (frozen_bhp == NULL) {
211		ret = ENOMEM;
212		goto err;
213	}
214
215	/*
216	 * For now, keep things simple and have one file per page size per
217	 * hash bucket.  This improves concurrency but can mean lots of files
218	 * if there is lots of freezing.
219	 */
220	ncache = (u_int32_t)(infop - dbmp->reginfo);
221	nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab));
222	snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK",
223	    (u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024);
224
225	if ((ret = __db_appname(env, DB_APP_NONE, filename,
226	    0, NULL, &real_name)) != 0)
227		goto err;
228	if ((ret = __os_open(env, real_name, pagesize,
229	    DB_OSO_CREATE | DB_OSO_EXCL, env->db_mode, &fhp)) == 0) {
230		/* We're creating the file -- initialize the metadata page. */
231		magic = DB_FREEZER_MAGIC;
232		maxpgno = newpgno = 0;
233		if ((ret = __os_write(env, fhp, &magic, sizeof(u_int32_t),
234		    &nio)) < 0 || nio == 0 ||
235		    (ret = __os_write(env, fhp, &newpgno, sizeof(db_pgno_t),
236		    &nio)) < 0 || nio == 0 ||
237		    (ret = __os_write(env, fhp, &maxpgno, sizeof(db_pgno_t),
238		    &nio)) < 0 || nio == 0 ||
239		    (ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
240			goto err;
241	} else if (ret == EEXIST)
242		ret = __os_open(
243		    env, real_name, pagesize, 0, env->db_mode, &fhp);
244	if (ret != 0)
245		goto err;
246	if ((ret = __os_read(env, fhp, &magic, sizeof(u_int32_t),
247	    &nio)) < 0 || nio == 0 ||
248	    (ret = __os_read(env, fhp, &newpgno, sizeof(db_pgno_t),
249	    &nio)) < 0 || nio == 0 ||
250	    (ret = __os_read(env, fhp, &maxpgno, sizeof(db_pgno_t),
251	    &nio)) < 0 || nio == 0)
252		goto err;
253	if (magic != DB_FREEZER_MAGIC) {
254		ret = EINVAL;
255		goto err;
256	}
257	if (newpgno == 0) {
258		newpgno = ++maxpgno;
259		if ((ret = __os_seek(env,
260		    fhp, 0, 0, sizeof(u_int32_t) + sizeof(db_pgno_t))) != 0 ||
261		    (ret = __os_write(env, fhp, &maxpgno, sizeof(db_pgno_t),
262		    &nio)) < 0 || nio == 0)
263			goto err;
264	} else {
265		if ((ret = __os_seek(env, fhp, newpgno, pagesize, 0)) != 0 ||
266		    (ret = __os_read(env, fhp, &nextfree, sizeof(db_pgno_t),
267		    &nio)) < 0 || nio == 0)
268			goto err;
269		if ((ret =
270		    __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
271		    (ret = __os_write(env, fhp, &nextfree, sizeof(db_pgno_t),
272		    &nio)) < 0 || nio == 0)
273			goto err;
274	}
275
276	/* Write the buffer to the allocated page. */
277	if ((ret = __os_io(env, DB_IO_WRITE, fhp, newpgno, pagesize, 0,
278	    pagesize, bhp->buf, &nio)) != 0 || nio == 0)
279		goto err;
280
281	/*
282	 * Set up the frozen_bhp with the freezer page number.  The original
283	 * buffer header is about to be freed, so transfer resources to the
284	 * frozen header here.
285	 */
286#ifdef DIAG_MVCC
287	memcpy(frozen_bhp, bhp, SSZ(BH, align_off));
288#else
289	memcpy(frozen_bhp, bhp, SSZA(BH, buf));
290#endif
291	frozen_bhp->ref = frozen_bhp->ref_sync = 0;
292	F_SET(frozen_bhp, BH_FROZEN);
293	F_CLR(frozen_bhp, BH_LOCKED);
294	frozen_bhp->priority = UINT32_MAX;
295	((BH_FROZEN_PAGE *)frozen_bhp)->spgno = newpgno;
296
297	/*
298	 * We're about to add the frozen buffer header to the version chain, so
299	 * we have temporarily created another buffer for the owning
300	 * transaction.
301	 */
302	if (frozen_bhp->td_off != INVALID_ROFF &&
303	    (ret = __txn_add_buffer(env, BH_OWNER(env, frozen_bhp))) != 0) {
304		(void)__env_panic(env, ret);
305		goto err;
306	}
307
308	/*
309	 * Add the frozen buffer to the version chain and update the hash
310	 * bucket if this is the head revision.  The original buffer will be
311	 * freed by __memp_alloc calling __memp_bhfree (assuming no other
312	 * thread has blocked waiting for it while we were freezing).
313	 */
314	SH_CHAIN_INSERT_AFTER(bhp, frozen_bhp, vc, __bh);
315	if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) {
316		SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket,
317		    bhp, frozen_bhp, hq, __bh);
318		SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
319	}
320
321	/*
322	 * Increment the file's block count -- freeing the original buffer will
323	 * decrement it.
324	 */
325	MUTEX_LOCK(env, bh_mfp->mutex);
326	++bh_mfp->block_cnt;
327	MUTEX_UNLOCK(env, bh_mfp->mutex);
328
329	STAT(++hp->hash_frozen);
330
331	if (0) {
332err:		if (ret == 0)
333			ret = EIO;
334		if (frozen_bhp != NULL) {
335			MUTEX_UNLOCK(env, hp->mtx_hash);
336			MPOOL_REGION_LOCK(env, infop);
337			SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen,
338			    frozen_bhp, hq);
339			MPOOL_REGION_UNLOCK(env, infop);
340			MUTEX_LOCK(env, hp->mtx_hash);
341		}
342	}
343	if (real_name != NULL)
344		__os_free(env, real_name);
345	if (fhp != NULL &&
346	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
347		ret = t_ret;
348	if (ret != 0 && ret != ENOMEM)
349		__db_err(env, ret, "__memp_bh_freeze");
350	F_CLR(bhp, BH_LOCKED);
351	--bhp->ref;
352
353	/*
354	 * If a thread of control is waiting on this buffer, wake it up.
355	 */
356	if (F_ISSET(hp, IO_WAITER)) {
357		F_CLR(hp, IO_WAITER);
358		MUTEX_UNLOCK(env, hp->mtx_io);
359	}
360	return (ret);
361}
362
363static int
364__pgno_cmp(a, b)
365	const void *a, *b;
366{
367	db_pgno_t *ap, *bp;
368
369	ap = (db_pgno_t *)a;
370	bp = (db_pgno_t *)b;
371
372	return (int)(*ap - *bp);
373}
374
375/*
376 * __memp_bh_thaw --
377 *	Free a buffer header in temporary storage.  optionally restore the
378 *	buffer (if alloc_bhp != NULL).  This function should be
379 *	called with the hash bucket locked and will return with it locked.
380 *
381 * PUBLIC: int __memp_bh_thaw __P((DB_MPOOL *, REGINFO *,
382 * PUBLIC:	DB_MPOOL_HASH *, BH *, BH *));
383 */
384int
385__memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp)
386	DB_MPOOL *dbmp;
387	REGINFO *infop;
388	DB_MPOOL_HASH *hp;
389	BH *frozen_bhp, *alloc_bhp;
390{
391	BH *next_bhp;
392	DB_FH *fhp;
393	ENV *env;
394#ifdef DIAGNOSTIC
395	DB_LSN vlsn;
396#endif
397	MPOOL *c_mp;
398	MPOOLFILE *bh_mfp;
399	db_pgno_t *freelist, *ppgno, freepgno, maxpgno, spgno;
400	size_t nio;
401	u_int32_t listsize, magic, nbucket, ncache, ntrunc, nfree, pagesize;
402#ifdef HAVE_FTRUNCATE
403	int i;
404#endif
405	int needfree, ret, t_ret;
406	char filename[100], *real_name;
407
408	env = dbmp->env;
409	fhp = NULL;
410	c_mp = infop->primary;
411	bh_mfp = R_ADDR(dbmp->reginfo, frozen_bhp->mf_offset);
412	freelist = NULL;
413	pagesize = bh_mfp->stat.st_pagesize;
414	ret = 0;
415	real_name = NULL;
416
417	DB_ASSERT(env, F_ISSET(frozen_bhp, BH_FROZEN));
418	DB_ASSERT(env, !F_ISSET(frozen_bhp, BH_LOCKED));
419	DB_ASSERT(env, alloc_bhp != NULL ||
420	    BH_OBSOLETE(frozen_bhp, hp->old_reader, vlsn));
421
422	spgno = ((BH_FROZEN_PAGE *)frozen_bhp)->spgno;
423
424	if (alloc_bhp != NULL) {
425#ifdef DIAG_MVCC
426		memcpy(alloc_bhp, frozen_bhp, SSZ(BH, align_off));
427#else
428		memcpy(alloc_bhp, frozen_bhp, SSZA(BH, buf));
429#endif
430		alloc_bhp->ref = 1;
431		alloc_bhp->ref_sync = 0;
432		F_CLR(alloc_bhp, BH_FROZEN);
433	}
434
435	F_SET(frozen_bhp, BH_LOCKED);
436
437	/*
438	 * For now, keep things simple and have one file per page size per
439	 * hash bucket.  This improves concurrency but can mean lots of files
440	 * if there is lots of freezing.
441	 */
442	ncache = (u_int32_t)(infop - dbmp->reginfo);
443	nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab));
444	snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK",
445	    (u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024);
446
447	if ((ret = __db_appname(
448	    env, DB_APP_NONE, filename, 0, NULL, &real_name)) != 0)
449		goto err;
450
451	if ((ret = __os_open(
452	    env, real_name, pagesize, 0, env->db_mode, &fhp)) != 0)
453		goto err;
454
455	/*
456	 * Read the first free page number -- we're about to free the page
457	 * after we we read it.
458	 */
459	if ((ret = __os_read(env, fhp, &magic, sizeof(u_int32_t),
460	    &nio)) < 0 || nio == 0 ||
461	    (ret = __os_read(env, fhp, &freepgno, sizeof(db_pgno_t),
462	    &nio)) < 0 || nio == 0 ||
463	    (ret = __os_read(env, fhp, &maxpgno, sizeof(db_pgno_t),
464	    &nio)) < 0 || nio == 0)
465		goto err;
466
467	if (magic != DB_FREEZER_MAGIC) {
468		ret = EINVAL;
469		goto err;
470	}
471
472	/* Read the buffer from the frozen page. */
473	if (alloc_bhp != NULL &&
474	    ((ret = __os_io(env, DB_IO_READ, fhp, spgno, pagesize,
475	    0, pagesize, alloc_bhp->buf, &nio)) != 0 || nio == 0))
476		goto err;
477
478	/*
479	 * Free the page from the file.  If it's the last page, truncate.
480	 * Otherwise, update free page linked list.
481	 */
482	needfree = 1;
483	if (spgno == maxpgno) {
484		listsize = 100;
485		if ((ret = __os_malloc(env,
486		    listsize * sizeof(db_pgno_t), &freelist)) != 0)
487			goto err;
488		nfree = 0;
489		while (freepgno != 0) {
490			if (nfree == listsize - 1) {
491				listsize *= 2;
492				if ((ret = __os_realloc(env,
493				    listsize * sizeof(db_pgno_t),
494				    &freelist)) != 0)
495					goto err;
496			}
497			freelist[nfree++] = freepgno;
498			if ((ret = __os_seek(
499			    env, fhp, freepgno, pagesize, 0)) != 0 ||
500			    (ret = __os_read(env, fhp, &freepgno,
501			    sizeof(db_pgno_t), &nio)) < 0 || nio == 0)
502				goto err;
503		}
504		freelist[nfree++] = spgno;
505		qsort(freelist, nfree, sizeof(db_pgno_t), __pgno_cmp);
506		for (ppgno = &freelist[nfree - 1]; ppgno > freelist; ppgno--)
507			if (*(ppgno - 1) != *ppgno - 1)
508				break;
509		ntrunc = (u_int32_t)(&freelist[nfree] - ppgno);
510		if (ntrunc == (u_int32_t)maxpgno) {
511			needfree = 0;
512			ret = __os_closehandle(env, fhp);
513			fhp = NULL;
514			if (ret != 0 ||
515			    (ret = __os_unlink(env, real_name, 0)) != 0)
516				goto err;
517		}
518#ifdef HAVE_FTRUNCATE
519		else {
520			maxpgno -= (db_pgno_t)ntrunc;
521			if ((ret = __os_truncate(env, fhp,
522			    maxpgno + 1, pagesize)) != 0)
523				goto err;
524
525			/* Fix up the linked list */
526			freelist[nfree - ntrunc] = 0;
527			if ((ret = __os_seek(
528			    env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
529			    (ret = __os_write(env, fhp, &freelist[0],
530			    sizeof(db_pgno_t), &nio)) < 0 || nio == 0 ||
531			    (ret = __os_write(env, fhp, &maxpgno,
532			    sizeof(db_pgno_t), &nio)) < 0 || nio == 0)
533				goto err;
534
535			for (i = 0; i < (int)(nfree - ntrunc); i++)
536				if ((ret = __os_seek(env,
537				    fhp, freelist[i], pagesize, 0)) != 0 ||
538				    (ret = __os_write(env, fhp,
539				    &freelist[i + 1], sizeof(db_pgno_t),
540				    &nio)) < 0 || nio == 0)
541					goto err;
542			needfree = 0;
543		}
544#endif
545	}
546	if (needfree &&
547	    ((ret = __os_seek(env, fhp, spgno, pagesize, 0)) != 0 ||
548	    (ret = __os_write(env, fhp, &freepgno, sizeof(db_pgno_t),
549	    &nio)) < 0 || nio == 0 ||
550	    (ret = __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
551	    (ret = __os_write(env, fhp, &spgno, sizeof(db_pgno_t),
552	    &nio)) < 0 || nio == 0))
553		goto err;
554
555	/*
556	 * Add the thawed buffer (if any) to the version chain.  We can't
557	 * do this any earlier, because we can't guarantee that another thread
558	 * won't be waiting for it, which means we can't clean up if there are
559	 * errors reading from the freezer.  We can't do it any later, because
560	 * we're about to free frozen_bhp, and without it we would need to do
561	 * another cache lookup to find out where the new page should live.
562	 */
563	if (alloc_bhp != NULL) {
564		alloc_bhp->priority = c_mp->lru_count;
565
566		SH_CHAIN_INSERT_AFTER(frozen_bhp, alloc_bhp, vc, __bh);
567		if (!SH_CHAIN_HASNEXT(alloc_bhp, vc)) {
568			SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket,
569			    frozen_bhp, alloc_bhp, hq, __bh);
570			SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh);
571		}
572	}
573
574	if ((next_bhp = SH_CHAIN_NEXT(frozen_bhp, vc, __bh)) == NULL) {
575		if ((next_bhp = SH_CHAIN_PREV(frozen_bhp, vc, __bh)) != NULL)
576			SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp,
577			    next_bhp, hq, __bh);
578		SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh);
579	}
580	SH_CHAIN_REMOVE(frozen_bhp, vc, __bh);
581
582	/*
583	 * If other threads are waiting for this buffer as well, they will have
584	 * incremented the reference count and will be waiting on the I/O mutex.
585	 * For that reason, we can't unconditionally free the memory here.
586	 */
587	if (--frozen_bhp->ref == 0) {
588		MUTEX_UNLOCK(env, hp->mtx_hash);
589
590		if (alloc_bhp == NULL && frozen_bhp->td_off != INVALID_ROFF &&
591		    (ret = __txn_remove_buffer(env,
592		    BH_OWNER(env, frozen_bhp), MUTEX_INVALID)) != 0) {
593			(void)__env_panic(env, ret);
594			goto err;
595		}
596
597		/*
598		 * We need to be careful in the error case, because our caller
599		 * will attempt to free frozen_bhp.
600		 */
601		MPOOL_REGION_LOCK(env, infop);
602		SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, frozen_bhp, hq);
603		MPOOL_REGION_UNLOCK(env, infop);
604		MUTEX_LOCK(env, hp->mtx_hash);
605	} else {
606		F_SET(frozen_bhp, BH_THAWED);
607		F_CLR(frozen_bhp, BH_LOCKED);
608	}
609
610#ifdef HAVE_STATISTICS
611	if (alloc_bhp != NULL)
612		++hp->hash_thawed;
613	else
614		++hp->hash_frozen_freed;
615#endif
616
617	if (0) {
618err:		if (ret == 0)
619			ret = EIO;
620	}
621	if (real_name != NULL)
622		__os_free(env, real_name);
623	if (freelist != NULL)
624		__os_free(env, freelist);
625	if (fhp != NULL &&
626	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
627		ret = t_ret;
628	if (ret != 0)
629		__db_err(env, ret, "__memp_bh_thaw");
630
631	/*
632	 * If a thread of control is waiting on this buffer, wake it up.
633	 */
634	if (F_ISSET(hp, IO_WAITER)) {
635		F_CLR(hp, IO_WAITER);
636		MUTEX_UNLOCK(env, hp->mtx_io);
637	}
638
639	return (ret);
640}
641