1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2006-2009 Oracle.  All rights reserved.
5 *
6 * $Id$
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/log.h"
13#include "dbinc/mp.h"
14#include "dbinc/txn.h"
15
16static int __pgno_cmp __P((const void *, const void *));
17
18/*
19 * __memp_bh_settxn --
20 *	Set the transaction that owns the given buffer.
21 *
22 * PUBLIC: int __memp_bh_settxn __P((DB_MPOOL *, MPOOLFILE *mfp, BH *, void *));
23 */
24int
25__memp_bh_settxn(dbmp, mfp, bhp, vtd)
26	DB_MPOOL *dbmp;
27	MPOOLFILE *mfp;
28	BH *bhp;
29	void *vtd;
30{
31	ENV *env;
32	TXN_DETAIL *td;
33
34	env = dbmp->env;
35	td = (TXN_DETAIL *)vtd;
36
37	if (td == NULL) {
38		__db_errx(env,
39		      "%s: non-transactional update to a multiversion file",
40		    __memp_fns(dbmp, mfp));
41		return (EINVAL);
42	}
43
44	if (bhp->td_off != INVALID_ROFF) {
45		DB_ASSERT(env, BH_OWNER(env, bhp) == td);
46		return (0);
47	}
48
49	bhp->td_off = R_OFFSET(&env->tx_handle->reginfo, td);
50	return (__txn_add_buffer(env, td));
51}
52
53/*
54 * __memp_skip_curadj --
55 *	Indicate whether a cursor adjustment can be skipped for a snapshot
56 *	cursor.
57 *
58 * PUBLIC: int __memp_skip_curadj __P((DBC *, db_pgno_t));
59 */
60int
61__memp_skip_curadj(dbc, pgno)
62	DBC * dbc;
63	db_pgno_t pgno;
64{
65	BH *bhp;
66	DB_MPOOL *dbmp;
67	DB_MPOOLFILE *dbmfp;
68	DB_MPOOL_HASH *hp;
69	DB_TXN *txn;
70	ENV *env;
71	MPOOLFILE *mfp;
72	REGINFO *infop;
73	roff_t mf_offset;
74	int ret, skip;
75	u_int32_t bucket;
76
77	env = dbc->env;
78	dbmp = env->mp_handle;
79	dbmfp = dbc->dbp->mpf;
80	mfp = dbmfp->mfp;
81	mf_offset = R_OFFSET(dbmp->reginfo, mfp);
82	skip = 0;
83
84	for (txn = dbc->txn; txn->parent != NULL; txn = txn->parent)
85		;
86
87	/*
88	 * Determine the cache and hash bucket where this page lives and get
89	 * local pointers to them.  Reset on each pass through this code, the
90	 * page number can change.
91	 */
92	MP_GET_BUCKET(env, mfp, pgno, &infop, hp, bucket, ret);
93	if (ret != 0) {
94		/* Panic: there is no way to return the error. */
95		(void)__env_panic(env, ret);
96		return (0);
97	}
98
99	SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) {
100		if (bhp->pgno != pgno || bhp->mf_offset != mf_offset)
101			continue;
102
103		if (!BH_OWNED_BY(env, bhp, txn))
104			skip = 1;
105		break;
106	}
107	MUTEX_UNLOCK(env, hp->mtx_hash);
108
109	return (skip);
110}
111
112#define	DB_FREEZER_MAGIC 0x06102002
113
114/*
115 * __memp_bh_freeze --
116 *	Save a buffer to temporary storage in case it is needed later by
117 *	a snapshot transaction.  This function should be called with the buffer
118 *	locked and will exit with it locked.  A BH_FROZEN buffer header is
119 *	allocated to represent the frozen data in mpool.
120 *
121 * PUBLIC: int __memp_bh_freeze __P((DB_MPOOL *, REGINFO *, DB_MPOOL_HASH *,
122 * PUBLIC:     BH *, int *));
123 */
124int
125__memp_bh_freeze(dbmp, infop, hp, bhp, need_frozenp)
126	DB_MPOOL *dbmp;
127	REGINFO *infop;
128	DB_MPOOL_HASH *hp;
129	BH *bhp;
130	int *need_frozenp;
131{
132	BH *frozen_bhp;
133	BH_FROZEN_ALLOC *frozen_alloc;
134	DB_FH *fhp;
135	ENV *env;
136	MPOOL *c_mp;
137	MPOOLFILE *mfp;
138	db_mutex_t mutex;
139	db_pgno_t maxpgno, newpgno, nextfree;
140	size_t nio;
141	int created, h_locked, ret, t_ret;
142	u_int32_t magic, nbucket, ncache, pagesize;
143	char filename[100], *real_name;
144
145	env = dbmp->env;
146	c_mp = infop->primary;
147	created = h_locked = ret = 0;
148	/* Find the associated MPOOLFILE. */
149	mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
150	pagesize = mfp->stat.st_pagesize;
151	real_name = NULL;
152	fhp = NULL;
153
154	MVCC_MPROTECT(bhp->buf, pagesize, PROT_READ | PROT_WRITE);
155
156	MPOOL_REGION_LOCK(env, infop);
157	frozen_bhp = SH_TAILQ_FIRST(&c_mp->free_frozen, __bh);
158	if (frozen_bhp != NULL) {
159		SH_TAILQ_REMOVE(&c_mp->free_frozen, frozen_bhp, hq, __bh);
160		*need_frozenp = SH_TAILQ_EMPTY(&c_mp->free_frozen);
161	} else {
162		*need_frozenp = 1;
163
164		/* There might be a small amount of unallocated space. */
165		if (__env_alloc(infop,
166		    sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE),
167		    &frozen_alloc) == 0) {
168			frozen_bhp = (BH *)(frozen_alloc + 1);
169			frozen_bhp->mtx_buf = MUTEX_INVALID;
170			SH_TAILQ_INSERT_TAIL(&c_mp->alloc_frozen,
171			    frozen_alloc, links);
172		}
173	}
174	MPOOL_REGION_UNLOCK(env, infop);
175
176	/*
177	 * If we can't get a frozen buffer header, return ENOMEM immediately:
178	 * we don't want to call __memp_alloc recursively.  __memp_alloc will
179	 * turn the next free page it finds into frozen buffer headers.
180	 */
181	if (frozen_bhp == NULL) {
182		ret = ENOMEM;
183		goto err;
184	}
185
186	/*
187	 * For now, keep things simple and have one file per page size per
188	 * hash bucket.  This improves concurrency but can mean lots of files
189	 * if there is lots of freezing.
190	 */
191	ncache = (u_int32_t)(infop - dbmp->reginfo);
192	nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab));
193	snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK",
194	    (u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024);
195
196	if ((ret = __db_appname(env,
197	    DB_APP_NONE, filename, NULL, &real_name)) != 0)
198		goto err;
199
200	MUTEX_LOCK(env, hp->mtx_hash);
201	h_locked = 1;
202	DB_ASSERT(env, F_ISSET(bhp, BH_EXCLUSIVE) && !F_ISSET(bhp, BH_FROZEN));
203
204	if (BH_REFCOUNT(bhp) > 1 || F_ISSET(bhp, BH_DIRTY)) {
205		ret = EBUSY;
206		goto err;
207	}
208
209	if ((ret = __os_open(env, real_name, pagesize,
210	    DB_OSO_CREATE | DB_OSO_EXCL, env->db_mode, &fhp)) == 0) {
211		/* We're creating the file -- initialize the metadata page. */
212		created = 1;
213		magic = DB_FREEZER_MAGIC;
214		maxpgno = newpgno = 0;
215		if ((ret = __os_write(env, fhp,
216		    &magic, sizeof(u_int32_t), &nio)) != 0 ||
217		    (ret = __os_write(env, fhp,
218		    &newpgno, sizeof(db_pgno_t), &nio)) != 0 ||
219		    (ret = __os_write(env, fhp,
220		    &maxpgno, sizeof(db_pgno_t), &nio)) != 0 ||
221		    (ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
222			goto err;
223	} else if (ret == EEXIST)
224		ret = __os_open(env,
225		    real_name, pagesize, 0, env->db_mode, &fhp);
226	if (ret != 0)
227		goto err;
228	if ((ret = __os_read(env, fhp,
229	    &magic, sizeof(u_int32_t), &nio)) != 0 ||
230	    (ret = __os_read(env, fhp,
231	    &newpgno, sizeof(db_pgno_t), &nio)) != 0 ||
232	    (ret = __os_read(env, fhp,
233	    &maxpgno, sizeof(db_pgno_t), &nio)) != 0)
234		goto err;
235	if (magic != DB_FREEZER_MAGIC) {
236		ret = EINVAL;
237		goto err;
238	}
239	if (newpgno == 0) {
240		newpgno = ++maxpgno;
241		if ((ret = __os_seek(env,
242		    fhp, 0, 0, sizeof(u_int32_t) + sizeof(db_pgno_t))) != 0 ||
243		    (ret = __os_write(env, fhp, &maxpgno, sizeof(db_pgno_t),
244		    &nio)) != 0)
245			goto err;
246	} else {
247		if ((ret = __os_seek(env, fhp, newpgno, pagesize, 0)) != 0 ||
248		    (ret = __os_read(env, fhp, &nextfree, sizeof(db_pgno_t),
249		    &nio)) != 0)
250			goto err;
251		if ((ret =
252		    __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
253		    (ret = __os_write(env, fhp, &nextfree, sizeof(db_pgno_t),
254		    &nio)) != 0)
255			goto err;
256	}
257
258	/* Write the buffer to the allocated page. */
259	if ((ret = __os_io(env, DB_IO_WRITE, fhp, newpgno, pagesize, 0,
260	    pagesize, bhp->buf, &nio)) != 0)
261		goto err;
262
263	ret = __os_closehandle(env, fhp);
264	fhp = NULL;
265	if (ret != 0)
266		goto err;
267
268	/*
269	 * Set up the frozen_bhp with the freezer page number.  The original
270	 * buffer header is about to be freed, so transfer resources to the
271	 * frozen header here.
272	 */
273	mutex = frozen_bhp->mtx_buf;
274#ifdef DIAG_MVCC
275	memcpy(frozen_bhp, bhp, SSZ(BH, align_off));
276#else
277	memcpy(frozen_bhp, bhp, SSZA(BH, buf));
278#endif
279	atomic_init(&frozen_bhp->ref, 0);
280	if (mutex != MUTEX_INVALID)
281		frozen_bhp->mtx_buf = mutex;
282	else if ((ret = __mutex_alloc(env, MTX_MPOOL_BH,
283	    DB_MUTEX_SHARED, &frozen_bhp->mtx_buf)) != 0)
284		goto err;
285	F_SET(frozen_bhp, BH_FROZEN);
286	F_CLR(frozen_bhp, BH_EXCLUSIVE);
287	((BH_FROZEN_PAGE *)frozen_bhp)->spgno = newpgno;
288
289	/*
290	 * We're about to add the frozen buffer header to the version chain, so
291	 * we have temporarily created another buffer for the owning
292	 * transaction.
293	 */
294	if (frozen_bhp->td_off != INVALID_ROFF &&
295	    (ret = __txn_add_buffer(env, BH_OWNER(env, frozen_bhp))) != 0) {
296		(void)__env_panic(env, ret);
297		goto err;
298	}
299
300	/*
301	 * Add the frozen buffer to the version chain and update the hash
302	 * bucket if this is the head revision.  The original buffer will be
303	 * freed by __memp_alloc calling __memp_bhfree (assuming no other
304	 * thread has blocked waiting for it while we were freezing).
305	 */
306	SH_CHAIN_INSERT_AFTER(bhp, frozen_bhp, vc, __bh);
307	if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) {
308		SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket,
309		    bhp, frozen_bhp, hq, __bh);
310		SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
311	}
312	MUTEX_UNLOCK(env, hp->mtx_hash);
313	h_locked = 0;
314
315	/*
316	 * Increment the file's block count -- freeing the original buffer will
317	 * decrement it.
318	 */
319	MUTEX_LOCK(env, mfp->mutex);
320	++mfp->block_cnt;
321	MUTEX_UNLOCK(env, mfp->mutex);
322
323	STAT(++hp->hash_frozen);
324
325	if (0) {
326err:		if (fhp != NULL &&
327		    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
328			ret = t_ret;
329		if (created) {
330			DB_ASSERT(env, h_locked);
331			if ((t_ret = __os_unlink(env, real_name, 0)) != 0 &&
332			    ret == 0)
333				ret = t_ret;
334		}
335		if (h_locked)
336			MUTEX_UNLOCK(env, hp->mtx_hash);
337		if (ret == 0)
338			ret = EIO;
339		if (frozen_bhp != NULL) {
340			MPOOL_REGION_LOCK(env, infop);
341			SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen,
342			    frozen_bhp, hq);
343			MPOOL_REGION_UNLOCK(env, infop);
344		}
345	}
346	if (real_name != NULL)
347		__os_free(env, real_name);
348	if (ret != 0 && ret != EBUSY && ret != ENOMEM)
349		__db_err(env, ret, "__memp_bh_freeze");
350
351	return (ret);
352}
353
354static int
355__pgno_cmp(a, b)
356	const void *a, *b;
357{
358	db_pgno_t *ap, *bp;
359
360	ap = (db_pgno_t *)a;
361	bp = (db_pgno_t *)b;
362
363	return (int)(*ap - *bp);
364}
365
366/*
367 * __memp_bh_thaw --
368 *	Free a buffer header in temporary storage.  Optionally restore the
369 *	buffer (if alloc_bhp != NULL).  This function should be
370 *	called with the hash bucket locked and will return with it unlocked.
371 *
372 * PUBLIC: int __memp_bh_thaw __P((DB_MPOOL *, REGINFO *,
373 * PUBLIC:	DB_MPOOL_HASH *, BH *, BH *));
374 */
375int
376__memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp)
377	DB_MPOOL *dbmp;
378	REGINFO *infop;
379	DB_MPOOL_HASH *hp;
380	BH *frozen_bhp, *alloc_bhp;
381{
382	DB_FH *fhp;
383	ENV *env;
384#ifdef DIAGNOSTIC
385	DB_LSN vlsn;
386#endif
387	MPOOL *c_mp;
388	MPOOLFILE *mfp;
389	db_mutex_t mutex;
390	db_pgno_t *freelist, *ppgno, freepgno, maxpgno, spgno;
391	size_t nio;
392	u_int32_t listsize, magic, nbucket, ncache, ntrunc, nfree, pagesize;
393#ifdef HAVE_FTRUNCATE
394	int i;
395#endif
396	int h_locked, needfree, ret, t_ret;
397	char filename[100], *real_name;
398
399	env = dbmp->env;
400	fhp = NULL;
401	c_mp = infop->primary;
402	mfp = R_ADDR(dbmp->reginfo, frozen_bhp->mf_offset);
403	freelist = NULL;
404	pagesize = mfp->stat.st_pagesize;
405	ret = 0;
406	real_name = NULL;
407
408	MUTEX_REQUIRED(env, hp->mtx_hash);
409	DB_ASSERT(env, F_ISSET(frozen_bhp, BH_EXCLUSIVE) || alloc_bhp == NULL);
410	h_locked = 1;
411
412	DB_ASSERT(env, F_ISSET(frozen_bhp, BH_FROZEN) &&
413	    !F_ISSET(frozen_bhp, BH_THAWED));
414	DB_ASSERT(env, alloc_bhp != NULL ||
415	    SH_CHAIN_SINGLETON(frozen_bhp, vc) ||
416	    (SH_CHAIN_HASNEXT(frozen_bhp, vc) &&
417	    BH_OBSOLETE(frozen_bhp, hp->old_reader, vlsn)));
418	DB_ASSERT(env, alloc_bhp == NULL || !F_ISSET(alloc_bhp, BH_FROZEN));
419
420	spgno = ((BH_FROZEN_PAGE *)frozen_bhp)->spgno;
421
422	if (alloc_bhp != NULL) {
423		mutex = alloc_bhp->mtx_buf;
424#ifdef DIAG_MVCC
425		memcpy(alloc_bhp, frozen_bhp, SSZ(BH, align_off));
426#else
427		memcpy(alloc_bhp, frozen_bhp, SSZA(BH, buf));
428#endif
429		alloc_bhp->mtx_buf = mutex;
430		MUTEX_LOCK(env, alloc_bhp->mtx_buf);
431		atomic_init(&alloc_bhp->ref, 1);
432		F_CLR(alloc_bhp, BH_FROZEN);
433	}
434
435	/*
436	 * For now, keep things simple and have one file per page size per
437	 * hash bucket.  This improves concurrency but can mean lots of files
438	 * if there is lots of freezing.
439	 */
440	ncache = (u_int32_t)(infop - dbmp->reginfo);
441	nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab));
442	snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK",
443	    (u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024);
444
445	if ((ret = __db_appname(env,
446	    DB_APP_NONE, filename, NULL, &real_name)) != 0)
447		goto err;
448	if ((ret = __os_open(env,
449	    real_name, pagesize, 0, env->db_mode, &fhp)) != 0)
450		goto err;
451
452	/*
453	 * Read the first free page number -- we're about to free the page
454	 * after we we read it.
455	 */
456	if ((ret = __os_read(env, fhp, &magic, sizeof(u_int32_t), &nio)) != 0 ||
457	    (ret =
458	    __os_read(env, fhp, &freepgno, sizeof(db_pgno_t), &nio)) != 0 ||
459	    (ret = __os_read(env, fhp, &maxpgno, sizeof(db_pgno_t), &nio)) != 0)
460		goto err;
461
462	if (magic != DB_FREEZER_MAGIC) {
463		ret = EINVAL;
464		goto err;
465	}
466
467	/* Read the buffer from the frozen page. */
468	if (alloc_bhp != NULL) {
469		DB_ASSERT(env, !F_ISSET(frozen_bhp, BH_FREED));
470		if ((ret = __os_io(env, DB_IO_READ, fhp,
471		    spgno, pagesize, 0, pagesize, alloc_bhp->buf, &nio)) != 0)
472			goto err;
473	}
474
475	/*
476	 * Free the page from the file.  If it's the last page, truncate.
477	 * Otherwise, update free page linked list.
478	 */
479	needfree = 1;
480	if (spgno == maxpgno) {
481		listsize = 100;
482		if ((ret = __os_malloc(env,
483		    listsize * sizeof(db_pgno_t), &freelist)) != 0)
484			goto err;
485		nfree = 0;
486		while (freepgno != 0) {
487			if (nfree == listsize - 1) {
488				listsize *= 2;
489				if ((ret = __os_realloc(env,
490				    listsize * sizeof(db_pgno_t),
491				    &freelist)) != 0)
492					goto err;
493			}
494			freelist[nfree++] = freepgno;
495			if ((ret = __os_seek(env, fhp,
496			    freepgno, pagesize, 0)) != 0 ||
497			    (ret = __os_read(env, fhp, &freepgno,
498			    sizeof(db_pgno_t), &nio)) != 0)
499				goto err;
500		}
501		freelist[nfree++] = spgno;
502		qsort(freelist, nfree, sizeof(db_pgno_t), __pgno_cmp);
503		for (ppgno = &freelist[nfree - 1]; ppgno > freelist; ppgno--)
504			if (*(ppgno - 1) != *ppgno - 1)
505				break;
506		ntrunc = (u_int32_t)(&freelist[nfree] - ppgno);
507		if (ntrunc == (u_int32_t)maxpgno) {
508			needfree = 0;
509			ret = __os_closehandle(env, fhp);
510			fhp = NULL;
511			if (ret != 0 ||
512			    (ret = __os_unlink(env, real_name, 0)) != 0)
513				goto err;
514		}
515#ifdef HAVE_FTRUNCATE
516		else {
517			maxpgno -= (db_pgno_t)ntrunc;
518			if ((ret = __os_truncate(env, fhp,
519			    maxpgno + 1, pagesize)) != 0)
520				goto err;
521
522			/* Fix up the linked list */
523			freelist[nfree - ntrunc] = 0;
524			if ((ret = __os_seek(env, fhp,
525			    0, 0, sizeof(u_int32_t))) != 0 ||
526			    (ret = __os_write(env, fhp, &freelist[0],
527			    sizeof(db_pgno_t), &nio)) != 0 ||
528			    (ret = __os_write(env, fhp, &maxpgno,
529			    sizeof(db_pgno_t), &nio)) != 0)
530				goto err;
531
532			for (i = 0; i < (int)(nfree - ntrunc); i++)
533				if ((ret = __os_seek(env,
534				    fhp, freelist[i], pagesize, 0)) != 0 ||
535				    (ret = __os_write(env, fhp,
536				    &freelist[i + 1], sizeof(db_pgno_t),
537				    &nio)) != 0)
538					goto err;
539			needfree = 0;
540		}
541#endif
542	}
543	if (needfree) {
544		if ((ret = __os_seek(env, fhp, spgno, pagesize, 0)) != 0 ||
545		    (ret = __os_write(env, fhp,
546		    &freepgno, sizeof(db_pgno_t), &nio)) != 0 ||
547	    	    (ret = __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
548		    (ret = __os_write(env, fhp,
549		    &spgno, sizeof(db_pgno_t), &nio)) != 0)
550			goto err;
551
552		ret = __os_closehandle(env, fhp);
553		fhp = NULL;
554		if (ret != 0)
555			goto err;
556	}
557
558	/*
559	 * Add the thawed buffer (if any) to the version chain.  We can't
560	 * do this any earlier, because we can't guarantee that another thread
561	 * won't be waiting for it, which means we can't clean up if there are
562	 * errors reading from the freezer.  We can't do it any later, because
563	 * we're about to free frozen_bhp, and without it we would need to do
564	 * another cache lookup to find out where the new page should live.
565	 */
566	MUTEX_REQUIRED(env, hp->mtx_hash);
567	if (alloc_bhp != NULL) {
568		alloc_bhp->priority = c_mp->lru_count;
569
570		SH_CHAIN_INSERT_AFTER(frozen_bhp, alloc_bhp, vc, __bh);
571		if (!SH_CHAIN_HASNEXT(alloc_bhp, vc)) {
572			SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp,
573			    alloc_bhp, hq, __bh);
574			SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh);
575		}
576	} else if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) {
577		if (SH_CHAIN_HASPREV(frozen_bhp, vc))
578			SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp,
579			    SH_CHAIN_PREV(frozen_bhp, vc, __bh), hq, __bh);
580		SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh);
581	}
582	SH_CHAIN_REMOVE(frozen_bhp, vc, __bh);
583
584	if (alloc_bhp == NULL && frozen_bhp->td_off != INVALID_ROFF &&
585	    (ret = __txn_remove_buffer(env,
586	    BH_OWNER(env, frozen_bhp), MUTEX_INVALID)) != 0) {
587		(void)__env_panic(env, ret);
588		goto err;
589	}
590	frozen_bhp->td_off = INVALID_ROFF;
591
592	/*
593	 * If other threads are waiting for this buffer as well, they will have
594	 * incremented the reference count and will be waiting on the mutex.
595	 * For that reason, we can't unconditionally free the memory here.
596	 */
597	needfree = (atomic_dec(env, &frozen_bhp->ref) == 0);
598	if (!needfree)
599		F_SET(frozen_bhp, BH_THAWED);
600	MUTEX_UNLOCK(env, hp->mtx_hash);
601	if (F_ISSET(frozen_bhp, BH_EXCLUSIVE))
602		MUTEX_UNLOCK(env, frozen_bhp->mtx_buf);
603	h_locked = 0;
604	if (needfree) {
605		MPOOL_REGION_LOCK(env, infop);
606		SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, frozen_bhp, hq);
607		MPOOL_REGION_UNLOCK(env, infop);
608	}
609
610#ifdef HAVE_STATISTICS
611	if (alloc_bhp != NULL)
612		++hp->hash_thawed;
613	else
614		++hp->hash_frozen_freed;
615#endif
616
617	if (0) {
618err:		if (h_locked)
619			MUTEX_UNLOCK(env, hp->mtx_hash);
620		if (ret == 0)
621			ret = EIO;
622	}
623	if (real_name != NULL)
624		__os_free(env, real_name);
625	if (freelist != NULL)
626		__os_free(env, freelist);
627	if (fhp != NULL &&
628	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
629		ret = t_ret;
630	if (ret != 0)
631		__db_err(env, ret, "__memp_bh_thaw");
632
633	return (ret);
634}
635