ufs_dirhash.c revision 187474
1139825Simp/*-
2107868Siedowse * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
379561Siedowse *
479561Siedowse * Redistribution and use in source and binary forms, with or without
579561Siedowse * modification, are permitted provided that the following conditions
679561Siedowse * are met:
779561Siedowse * 1. Redistributions of source code must retain the above copyright
879561Siedowse *    notice, this list of conditions and the following disclaimer.
979561Siedowse * 2. Redistributions in binary form must reproduce the above copyright
1079561Siedowse *    notice, this list of conditions and the following disclaimer in the
1179561Siedowse *    documentation and/or other materials provided with the distribution.
1279561Siedowse *
1379561Siedowse * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1479561Siedowse * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1579561Siedowse * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1679561Siedowse * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1779561Siedowse * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1879561Siedowse * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1979561Siedowse * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2079561Siedowse * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2179561Siedowse * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2279561Siedowse * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2379561Siedowse * SUCH DAMAGE.
2479561Siedowse */
25116192Sobrien
2679561Siedowse/*
2779561Siedowse * This implements a hash-based lookup scheme for UFS directories.
2879561Siedowse */
2979561Siedowse
30116192Sobrien#include <sys/cdefs.h>
31116192Sobrien__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_dirhash.c 187474 2009-01-20 16:35:34Z jhb $");
32116192Sobrien
3379561Siedowse#include "opt_ufs.h"
3479561Siedowse
3579561Siedowse#ifdef UFS_DIRHASH
3679561Siedowse
3779561Siedowse#include <sys/param.h>
3879561Siedowse#include <sys/systm.h>
3979561Siedowse#include <sys/kernel.h>
4084811Sjhb#include <sys/lock.h>
4179561Siedowse#include <sys/mutex.h>
4279561Siedowse#include <sys/malloc.h>
4379561Siedowse#include <sys/fnv_hash.h>
4479561Siedowse#include <sys/proc.h>
4579561Siedowse#include <sys/bio.h>
4679561Siedowse#include <sys/buf.h>
4779561Siedowse#include <sys/vnode.h>
4879561Siedowse#include <sys/mount.h>
49183080Sjhb#include <sys/refcount.h>
5079561Siedowse#include <sys/sysctl.h>
51183080Sjhb#include <sys/sx.h>
5292768Sjeff#include <vm/uma.h>
5379561Siedowse
5479561Siedowse#include <ufs/ufs/quota.h>
5579561Siedowse#include <ufs/ufs/inode.h>
5679561Siedowse#include <ufs/ufs/dir.h>
5779561Siedowse#include <ufs/ufs/dirhash.h>
5879561Siedowse#include <ufs/ufs/extattr.h>
5979561Siedowse#include <ufs/ufs/ufsmount.h>
6079561Siedowse#include <ufs/ufs/ufs_extern.h>
6179561Siedowse
6279561Siedowse#define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
6392807Sdwmalone#define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
6479561Siedowse#define OFSFMT(vp)		((vp)->v_mount->mnt_maxsymlinklen <= 0)
6592098Siedowse#define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
6679561Siedowse
67151897Srwatsonstatic MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
6879561Siedowse
69141631Sphkstatic SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
7079561Siedowse
7179561Siedowsestatic int ufs_mindirhashsize = DIRBLKSIZ * 5;
7279561SiedowseSYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
7379561Siedowse    &ufs_mindirhashsize,
7479561Siedowse    0, "minimum directory size in bytes for which to use hashed lookup");
7579561Siedowsestatic int ufs_dirhashmaxmem = 2 * 1024 * 1024;
7679561SiedowseSYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem,
7779561Siedowse    0, "maximum allowed dirhash memory usage");
7879561Siedowsestatic int ufs_dirhashmem;
7979561SiedowseSYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem,
8079561Siedowse    0, "current dirhash memory usage");
8185512Siedowsestatic int ufs_dirhashcheck = 0;
8279561SiedowseSYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
8379561Siedowse    0, "enable extra sanity tests");
8479561Siedowse
8579561Siedowse
8679561Siedowsestatic int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
8779561Siedowsestatic void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
8879561Siedowsestatic void ufsdirhash_delslot(struct dirhash *dh, int slot);
8979561Siedowsestatic int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
9079561Siedowse	   doff_t offset);
9179561Siedowsestatic doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
9279561Siedowsestatic int ufsdirhash_recycle(int wanted);
93178110Sjeffstatic void ufsdirhash_free_locked(struct inode *ip);
9479561Siedowse
9592768Sjeffstatic uma_zone_t	ufsdirhash_zone;
9679561Siedowse
97125854Sdwmalone#define DIRHASHLIST_LOCK() 		mtx_lock(&ufsdirhash_mtx)
98125854Sdwmalone#define DIRHASHLIST_UNLOCK() 		mtx_unlock(&ufsdirhash_mtx)
99125854Sdwmalone#define DIRHASH_BLKALLOC_WAITOK() 	uma_zalloc(ufsdirhash_zone, M_WAITOK)
100125854Sdwmalone#define DIRHASH_BLKFREE(ptr) 		uma_zfree(ufsdirhash_zone, (ptr))
101178110Sjeff#define	DIRHASH_ASSERT_LOCKED(dh)					\
102183080Sjhb    sx_assert(&(dh)->dh_lock, SA_LOCKED)
103125854Sdwmalone
10479561Siedowse/* Dirhash list; recently-used entries are near the tail. */
10579561Siedowsestatic TAILQ_HEAD(, dirhash) ufsdirhash_list;
10679561Siedowse
10779561Siedowse/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
10879561Siedowsestatic struct mtx	ufsdirhash_mtx;
10979561Siedowse
11079561Siedowse/*
111178110Sjeff * Locking:
11279561Siedowse *
113178110Sjeff * The relationship between inode and dirhash is protected either by an
114178110Sjeff * exclusive vnode lock or the vnode interlock where a shared vnode lock
115183080Sjhb * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
116183080Sjhb * handle teardown races, code wishing to lock the dirhash for an inode
117183080Sjhb * when using a shared vnode lock must obtain a private reference on the
118183080Sjhb * dirhash while holding the vnode interlock.  They can drop it once they
119183080Sjhb * have obtained the dirhash lock and verified that the dirhash wasn't
120183080Sjhb * recycled while they waited for the dirhash lock.
121178110Sjeff *
122178110Sjeff * ufsdirhash_build() acquires a shared lock on the dirhash when it is
123178110Sjeff * successful.  This lock is released after a call to ufsdirhash_lookup().
124178110Sjeff *
125178110Sjeff * Functions requiring exclusive access use ufsdirhash_acquire() which may
126178110Sjeff * free a dirhash structure that was recycled by ufsdirhash_recycle().
127178110Sjeff *
128178110Sjeff * The dirhash lock may be held across io operations.
129187474Sjhb *
130187474Sjhb * WITNESS reports a lock order reversal between the "bufwait" lock
131187474Sjhb * and the "dirhash" lock.  However, this specific reversal will not
132187474Sjhb * cause a deadlock.  To get a deadlock, one would have to lock a
133187474Sjhb * buffer followed by the dirhash while a second thread locked a
134187474Sjhb * buffer while holding the dirhash lock.  The second order can happen
135187474Sjhb * under a shared or exclusive vnode lock for the associated directory
136187474Sjhb * in lookup().  The first order, however, can only happen under an
137187474Sjhb * exclusive vnode lock (e.g. unlink(), rename(), etc.).  Thus, for
138187474Sjhb * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold
139187474Sjhb * an exclusive vnode lock.  That exclusive vnode lock will prevent
140187474Sjhb * any other threads from doing a "dirhash" -> "bufwait" order.
14179561Siedowse */
14279561Siedowse
143183080Sjhbstatic void
144183080Sjhbufsdirhash_hold(struct dirhash *dh)
145183080Sjhb{
146183080Sjhb
147183080Sjhb	refcount_acquire(&dh->dh_refcount);
148183080Sjhb}
149183080Sjhb
150183080Sjhbstatic void
151183080Sjhbufsdirhash_drop(struct dirhash *dh)
152183080Sjhb{
153183080Sjhb
154183080Sjhb	if (refcount_release(&dh->dh_refcount)) {
155183080Sjhb		sx_destroy(&dh->dh_lock);
156183080Sjhb		free(dh, M_DIRHASH);
157183080Sjhb	}
158183080Sjhb}
159183080Sjhb
16079561Siedowse/*
161178110Sjeff * Release the lock on a dirhash.
162178110Sjeff */
163178110Sjeffstatic void
164178110Sjeffufsdirhash_release(struct dirhash *dh)
165178110Sjeff{
166178110Sjeff
167183080Sjhb	sx_unlock(&dh->dh_lock);
168178110Sjeff}
169178110Sjeff
170178110Sjeff/*
171178110Sjeff * Either acquire an existing hash locked shared or create a new hash and
172178110Sjeff * return it exclusively locked.  May return NULL if the allocation fails.
173178110Sjeff *
174178110Sjeff * The vnode interlock is used to protect the i_dirhash pointer from
175178110Sjeff * simultaneous access while only a shared vnode lock is held.
176178110Sjeff */
177178110Sjeffstatic struct dirhash *
178178110Sjeffufsdirhash_create(struct inode *ip)
179178110Sjeff{
180178110Sjeff	struct dirhash *ndh;
181178110Sjeff	struct dirhash *dh;
182178110Sjeff	struct vnode *vp;
183178110Sjeff	int error;
184178110Sjeff
185178110Sjeff	error = 0;
186178110Sjeff	ndh = dh = NULL;
187178110Sjeff	vp = ip->i_vnode;
188178110Sjeff	for (;;) {
189185102Sjhb		/* Racy check for i_dirhash to prefetch a dirhash structure. */
190178110Sjeff		if (ip->i_dirhash == NULL && ndh == NULL) {
191184205Sdes			ndh = malloc(sizeof *dh, M_DIRHASH,
192178110Sjeff			    M_NOWAIT | M_ZERO);
193178110Sjeff			if (ndh == NULL)
194178110Sjeff				return (NULL);
195183080Sjhb			refcount_init(&ndh->dh_refcount, 1);
196184651Sjhb
197184651Sjhb			/*
198184651Sjhb			 * The DUPOK is to prevent warnings from the
199184651Sjhb			 * sx_slock() a few lines down which is safe
200184651Sjhb			 * since the duplicate lock in that case is
201184651Sjhb			 * the one for this dirhash we are creating
202184651Sjhb			 * now which has no external references until
203184651Sjhb			 * after this function returns.
204184651Sjhb			 */
205184651Sjhb			sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK);
206183080Sjhb			sx_xlock(&ndh->dh_lock);
207178110Sjeff		}
208178110Sjeff		/*
209178110Sjeff		 * Check i_dirhash.  If it's NULL just try to use a
210178110Sjeff		 * preallocated structure.  If none exists loop and try again.
211178110Sjeff		 */
212178110Sjeff		VI_LOCK(vp);
213178110Sjeff		dh = ip->i_dirhash;
214178110Sjeff		if (dh == NULL) {
215178110Sjeff			ip->i_dirhash = ndh;
216178110Sjeff			VI_UNLOCK(vp);
217178110Sjeff			if (ndh == NULL)
218178110Sjeff				continue;
219178110Sjeff			return (ndh);
220178110Sjeff		}
221183080Sjhb		ufsdirhash_hold(dh);
222183080Sjhb		VI_UNLOCK(vp);
223183080Sjhb
224183080Sjhb		/* Acquire a shared lock on existing hashes. */
225183080Sjhb		sx_slock(&dh->dh_lock);
226183080Sjhb
227178110Sjeff		/* The hash could've been recycled while we were waiting. */
228183080Sjhb		VI_LOCK(vp);
229178110Sjeff		if (ip->i_dirhash != dh) {
230183080Sjhb			VI_UNLOCK(vp);
231178110Sjeff			ufsdirhash_release(dh);
232183080Sjhb			ufsdirhash_drop(dh);
233178110Sjeff			continue;
234178110Sjeff		}
235183080Sjhb		VI_UNLOCK(vp);
236183080Sjhb		ufsdirhash_drop(dh);
237183080Sjhb
238178110Sjeff		/* If the hash is still valid we've succeeded. */
239178110Sjeff		if (dh->dh_hash != NULL)
240178110Sjeff			break;
241178110Sjeff		/*
242178110Sjeff		 * If the hash is NULL it has been recycled.  Try to upgrade
243183080Sjhb		 * so we can recreate it.  If we fail the upgrade, drop our
244183080Sjhb		 * lock and try again.
245178110Sjeff		 */
246183080Sjhb		if (sx_try_upgrade(&dh->dh_lock))
247178110Sjeff			break;
248183080Sjhb		sx_sunlock(&dh->dh_lock);
249178110Sjeff	}
250178110Sjeff	/* Free the preallocated structure if it was not necessary. */
251178110Sjeff	if (ndh) {
252183080Sjhb		ufsdirhash_release(ndh);
253183080Sjhb		ufsdirhash_drop(ndh);
254178110Sjeff	}
255178110Sjeff	return (dh);
256178110Sjeff}
257178110Sjeff
258178110Sjeff/*
259178110Sjeff * Acquire an exclusive lock on an existing hash.  Requires an exclusive
260178110Sjeff * vnode lock to protect the i_dirhash pointer.  hashes that have been
261178110Sjeff * recycled are reclaimed here and NULL is returned.
262178110Sjeff */
263178110Sjeffstatic struct dirhash *
264178110Sjeffufsdirhash_acquire(struct inode *ip)
265178110Sjeff{
266178110Sjeff	struct dirhash *dh;
267178110Sjeff	struct vnode *vp;
268178110Sjeff
269178110Sjeff	ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
270178110Sjeff
271178110Sjeff	vp = ip->i_vnode;
272178110Sjeff	dh = ip->i_dirhash;
273178110Sjeff	if (dh == NULL)
274178110Sjeff		return (NULL);
275183080Sjhb	sx_xlock(&dh->dh_lock);
276178110Sjeff	if (dh->dh_hash != NULL)
277178110Sjeff		return (dh);
278178110Sjeff	ufsdirhash_free_locked(ip);
279178110Sjeff	return (NULL);
280178110Sjeff}
281178110Sjeff
282178110Sjeff/*
283178110Sjeff * Acquire exclusively and free the hash pointed to by ip.  Works with a
284178110Sjeff * shared or exclusive vnode lock.
285178110Sjeff */
286178110Sjeffvoid
287178110Sjeffufsdirhash_free(struct inode *ip)
288178110Sjeff{
289178110Sjeff	struct dirhash *dh;
290178110Sjeff	struct vnode *vp;
291178110Sjeff
292178110Sjeff	vp = ip->i_vnode;
293178110Sjeff	for (;;) {
294183080Sjhb		/* Grab a reference on this inode's dirhash if it has one. */
295178110Sjeff		VI_LOCK(vp);
296178110Sjeff		dh = ip->i_dirhash;
297178110Sjeff		if (dh == NULL) {
298178110Sjeff			VI_UNLOCK(vp);
299178110Sjeff			return;
300178110Sjeff		}
301183080Sjhb		ufsdirhash_hold(dh);
302183080Sjhb		VI_UNLOCK(vp);
303183080Sjhb
304183080Sjhb		/* Exclusively lock the dirhash. */
305183080Sjhb		sx_xlock(&dh->dh_lock);
306183080Sjhb
307183080Sjhb		/* If this dirhash still belongs to this inode, then free it. */
308183080Sjhb		VI_LOCK(vp);
309183080Sjhb		if (ip->i_dirhash == dh) {
310183080Sjhb			VI_UNLOCK(vp);
311183080Sjhb			ufsdirhash_drop(dh);
312178110Sjeff			break;
313183080Sjhb		}
314183080Sjhb		VI_UNLOCK(vp);
315183080Sjhb
316183080Sjhb		/*
317183080Sjhb		 * This inode's dirhash has changed while we were
318183080Sjhb		 * waiting for the dirhash lock, so try again.
319183080Sjhb		 */
320178110Sjeff		ufsdirhash_release(dh);
321183080Sjhb		ufsdirhash_drop(dh);
322178110Sjeff	}
323178110Sjeff	ufsdirhash_free_locked(ip);
324178110Sjeff}
325178110Sjeff
326178110Sjeff/*
32779561Siedowse * Attempt to build up a hash table for the directory contents in
32879561Siedowse * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
32979561Siedowse */
33079561Siedowseint
33179561Siedowseufsdirhash_build(struct inode *ip)
33279561Siedowse{
33379561Siedowse	struct dirhash *dh;
33479561Siedowse	struct buf *bp = NULL;
33579561Siedowse	struct direct *ep;
33679561Siedowse	struct vnode *vp;
33779561Siedowse	doff_t bmask, pos;
33879561Siedowse	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
33979561Siedowse
340178110Sjeff	/* Take care of a decreased sysctl value. */
341178110Sjeff	while (ufs_dirhashmem > ufs_dirhashmaxmem)
342178110Sjeff		if (ufsdirhash_recycle(0) != 0)
343178110Sjeff			return (-1);
344178110Sjeff
34579561Siedowse	/* Check if we can/should use dirhash. */
346178110Sjeff	if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) ||
347178110Sjeff	    ip->i_effnlink == 0) {
348178110Sjeff		if (ip->i_dirhash)
34979561Siedowse			ufsdirhash_free(ip);
350178110Sjeff		return (-1);
35179561Siedowse	}
352178110Sjeff	dh = ufsdirhash_create(ip);
353178110Sjeff	if (dh == NULL)
35482364Siedowse		return (-1);
355178110Sjeff	if (dh->dh_hash != NULL)
356178110Sjeff		return (0);
35782364Siedowse
35879561Siedowse	vp = ip->i_vnode;
35979561Siedowse	/* Allocate 50% more entries than this dir size could ever need. */
36079561Siedowse	KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size"));
36179561Siedowse	nslots = ip->i_size / DIRECTSIZ(1);
36279561Siedowse	nslots = (nslots * 3 + 1) / 2;
36379561Siedowse	narrays = howmany(nslots, DH_NBLKOFF);
36479561Siedowse	nslots = narrays * DH_NBLKOFF;
36579561Siedowse	dirblocks = howmany(ip->i_size, DIRBLKSIZ);
36679561Siedowse	nblocks = (dirblocks * 3 + 1) / 2;
36779561Siedowse	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
36879561Siedowse	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
36979561Siedowse	    nblocks * sizeof(*dh->dh_blkfree);
370125854Sdwmalone	DIRHASHLIST_LOCK();
37179561Siedowse	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
372125854Sdwmalone		DIRHASHLIST_UNLOCK();
37379561Siedowse		if (memreqd > ufs_dirhashmaxmem / 2)
374178110Sjeff			goto fail;
37579561Siedowse		/* Try to free some space. */
37679561Siedowse		if (ufsdirhash_recycle(memreqd) != 0)
377178110Sjeff			goto fail;
378125854Sdwmalone		/* Enough was freed, and list has been locked. */
37979561Siedowse	}
38079561Siedowse	ufs_dirhashmem += memreqd;
381125854Sdwmalone	DIRHASHLIST_UNLOCK();
38279561Siedowse
383178110Sjeff	/* Initialise the hash table and block statistics. */
384178110Sjeff	dh->dh_memreq = memreqd;
385178110Sjeff	dh->dh_narrays = narrays;
386178110Sjeff	dh->dh_hlen = nslots;
387178110Sjeff	dh->dh_nblk = nblocks;
388178110Sjeff	dh->dh_dirblks = dirblocks;
389178110Sjeff	for (i = 0; i < DH_NFSTATS; i++)
390178110Sjeff		dh->dh_firstfree[i] = -1;
391178110Sjeff	dh->dh_firstfree[DH_NFSTATS] = 0;
392178110Sjeff	dh->dh_hused = 0;
393178110Sjeff	dh->dh_seqopt = 0;
394178110Sjeff	dh->dh_seqoff = 0;
395178110Sjeff	dh->dh_score = DH_SCOREINIT;
396178110Sjeff
39779561Siedowse	/*
39879561Siedowse	 * Use non-blocking mallocs so that we will revert to a linear
39979561Siedowse	 * lookup on failure rather than potentially blocking forever.
40079561Siedowse	 */
401184205Sdes	dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]),
40279561Siedowse	    M_DIRHASH, M_NOWAIT | M_ZERO);
403178110Sjeff	if (dh->dh_hash == NULL)
404178110Sjeff		goto fail;
405184205Sdes	dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]),
40679561Siedowse	    M_DIRHASH, M_NOWAIT);
407178110Sjeff	if (dh->dh_blkfree == NULL)
40879561Siedowse		goto fail;
40979561Siedowse	for (i = 0; i < narrays; i++) {
410125854Sdwmalone		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
41179561Siedowse			goto fail;
41279561Siedowse		for (j = 0; j < DH_NBLKOFF; j++)
41379561Siedowse			dh->dh_hash[i][j] = DIRHASH_EMPTY;
41479561Siedowse	}
41579561Siedowse	for (i = 0; i < dirblocks; i++)
41679561Siedowse		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
41779561Siedowse	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
41879561Siedowse	pos = 0;
41979561Siedowse	while (pos < ip->i_size) {
42079561Siedowse		/* If necessary, get the next directory block. */
42179561Siedowse		if ((pos & bmask) == 0) {
42279561Siedowse			if (bp != NULL)
42379561Siedowse				brelse(bp);
42479561Siedowse			if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0)
42579561Siedowse				goto fail;
42679561Siedowse		}
42779561Siedowse
42879561Siedowse		/* Add this entry to the hash. */
42979561Siedowse		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
43079561Siedowse		if (ep->d_reclen == 0 || ep->d_reclen >
43179561Siedowse		    DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
43279561Siedowse			/* Corrupted directory. */
43379561Siedowse			brelse(bp);
43479561Siedowse			goto fail;
43579561Siedowse		}
43679561Siedowse		if (ep->d_ino != 0) {
43779561Siedowse			/* Add the entry (simplified ufsdirhash_add). */
43879561Siedowse			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
43979561Siedowse			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
44079561Siedowse				slot = WRAPINCR(slot, dh->dh_hlen);
44179561Siedowse			dh->dh_hused++;
44279561Siedowse			DH_ENTRY(dh, slot) = pos;
44379561Siedowse			ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep));
44479561Siedowse		}
44579561Siedowse		pos += ep->d_reclen;
44679561Siedowse	}
44779561Siedowse
44879561Siedowse	if (bp != NULL)
44979561Siedowse		brelse(bp);
450125854Sdwmalone	DIRHASHLIST_LOCK();
45179561Siedowse	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
45279561Siedowse	dh->dh_onlist = 1;
453125854Sdwmalone	DIRHASHLIST_UNLOCK();
454183080Sjhb	sx_downgrade(&dh->dh_lock);
45579561Siedowse	return (0);
45679561Siedowse
45779561Siedowsefail:
458178110Sjeff	ufsdirhash_free_locked(ip);
45979561Siedowse	return (-1);
46079561Siedowse}
46179561Siedowse
46279561Siedowse/*
46379561Siedowse * Free any hash table associated with inode 'ip'.
46479561Siedowse */
465178110Sjeffstatic void
466178110Sjeffufsdirhash_free_locked(struct inode *ip)
46779561Siedowse{
46879561Siedowse	struct dirhash *dh;
469178110Sjeff	struct vnode *vp;
470178110Sjeff	int i;
47179561Siedowse
472178110Sjeff	DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
473183080Sjhb
474178110Sjeff	/*
475178110Sjeff	 * Clear the pointer in the inode to prevent new threads from
476178110Sjeff	 * finding the dead structure.
477178110Sjeff	 */
478178110Sjeff	vp = ip->i_vnode;
479178110Sjeff	VI_LOCK(vp);
480178110Sjeff	dh = ip->i_dirhash;
481178110Sjeff	ip->i_dirhash = NULL;
482178110Sjeff	VI_UNLOCK(vp);
483183080Sjhb
484178110Sjeff	/*
485183280Sjhb	 * Remove the hash from the list since we are going to free its
486183280Sjhb	 * memory.
487183280Sjhb	 */
488183280Sjhb	DIRHASHLIST_LOCK();
489183280Sjhb	if (dh->dh_onlist)
490183280Sjhb		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
491183280Sjhb	ufs_dirhashmem -= dh->dh_memreq;
492183280Sjhb	DIRHASHLIST_UNLOCK();
493183280Sjhb
494183280Sjhb	/*
495183080Sjhb	 * At this point, any waiters for the lock should hold their
496183080Sjhb	 * own reference on the dirhash structure.  They will drop
497183080Sjhb	 * that reference once they grab the vnode interlock and see
498183080Sjhb	 * that ip->i_dirhash is NULL.
499178110Sjeff	 */
500183080Sjhb	sx_xunlock(&dh->dh_lock);
501183080Sjhb
502178110Sjeff	/*
503178110Sjeff	 * Handle partially recycled as well as fully constructed hashes.
504178110Sjeff	 */
505178110Sjeff	if (dh->dh_hash != NULL) {
506178110Sjeff		for (i = 0; i < dh->dh_narrays; i++)
507178110Sjeff			if (dh->dh_hash[i] != NULL)
508178110Sjeff				DIRHASH_BLKFREE(dh->dh_hash[i]);
509184205Sdes		free(dh->dh_hash, M_DIRHASH);
510178110Sjeff		if (dh->dh_blkfree != NULL)
511184205Sdes			free(dh->dh_blkfree, M_DIRHASH);
512178110Sjeff	}
513183080Sjhb
514178110Sjeff	/*
515183080Sjhb	 * Drop the inode's reference to the data structure.
516178110Sjeff	 */
517183080Sjhb	ufsdirhash_drop(dh);
51879561Siedowse}
51979561Siedowse
52079561Siedowse/*
52179561Siedowse * Find the offset of the specified name within the given inode.
52279561Siedowse * Returns 0 on success, ENOENT if the entry does not exist, or
52379561Siedowse * EJUSTRETURN if the caller should revert to a linear search.
52479561Siedowse *
52579690Siedowse * If successful, the directory offset is stored in *offp, and a
52679690Siedowse * pointer to a struct buf containing the entry is stored in *bpp. If
52779561Siedowse * prevoffp is non-NULL, the offset of the previous entry within
52879561Siedowse * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
52979561Siedowse * is the first in a block, the start of the block is used).
530178110Sjeff *
531178110Sjeff * Must be called with the hash locked.  Returns with the hash unlocked.
53279561Siedowse */
53379561Siedowseint
53479561Siedowseufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
53579690Siedowse    struct buf **bpp, doff_t *prevoffp)
53679561Siedowse{
53779561Siedowse	struct dirhash *dh, *dh_next;
53879561Siedowse	struct direct *dp;
53979561Siedowse	struct vnode *vp;
54079561Siedowse	struct buf *bp;
54179561Siedowse	doff_t blkoff, bmask, offset, prevoff;
54279561Siedowse	int i, slot;
543178110Sjeff	int error;
54479561Siedowse
545178110Sjeff	dh = ip->i_dirhash;
546178110Sjeff	KASSERT(dh != NULL && dh->dh_hash != NULL,
547178110Sjeff	    ("ufsdirhash_lookup: Invalid dirhash %p\n", dh));
548178110Sjeff	DIRHASH_ASSERT_LOCKED(dh);
54979561Siedowse	/*
55079561Siedowse	 * Move this dirhash towards the end of the list if it has a
551178110Sjeff	 * score higher than the next entry, and acquire the dh_lock.
55279561Siedowse	 */
553178110Sjeff	DIRHASHLIST_LOCK();
55479561Siedowse	if (TAILQ_NEXT(dh, dh_list) != NULL) {
55579561Siedowse		/*
55679561Siedowse		 * If the new score will be greater than that of the next
55779561Siedowse		 * entry, then move this entry past it. With both mutexes
55879561Siedowse		 * held, dh_next won't go away, but its dh_score could
55979561Siedowse		 * change; that's not important since it is just a hint.
56079561Siedowse		 */
561178110Sjeff		if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
56279561Siedowse		    dh->dh_score >= dh_next->dh_score) {
56379561Siedowse			KASSERT(dh->dh_onlist, ("dirhash: not on list"));
56479561Siedowse			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
56579561Siedowse			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
56679561Siedowse			    dh_list);
56779561Siedowse		}
56879561Siedowse	}
56979561Siedowse	/* Update the score. */
57079561Siedowse	if (dh->dh_score < DH_SCOREMAX)
57179561Siedowse		dh->dh_score++;
572178110Sjeff	DIRHASHLIST_UNLOCK();
57379561Siedowse
57479561Siedowse	vp = ip->i_vnode;
57579561Siedowse	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
57679561Siedowse	blkoff = -1;
57779561Siedowse	bp = NULL;
57879561Siedowserestart:
57979561Siedowse	slot = ufsdirhash_hash(dh, name, namelen);
58079561Siedowse
58179561Siedowse	if (dh->dh_seqopt) {
58279561Siedowse		/*
58379561Siedowse		 * Sequential access optimisation. dh_seqoff contains the
58479561Siedowse		 * offset of the directory entry immediately following
58579561Siedowse		 * the last entry that was looked up. Check if this offset
58679561Siedowse		 * appears in the hash chain for the name we are looking for.
58779561Siedowse		 */
58879561Siedowse		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
58979561Siedowse		    i = WRAPINCR(i, dh->dh_hlen))
59086350Siedowse			if (offset == dh->dh_seqoff)
59179561Siedowse				break;
59279561Siedowse		if (offset == dh->dh_seqoff) {
59379561Siedowse			/*
59479561Siedowse			 * We found an entry with the expected offset. This
59579561Siedowse			 * is probably the entry we want, but if not, the
596149178Siedowse			 * code below will turn off seqopt and retry.
59779561Siedowse			 */
59879561Siedowse			slot = i;
59979561Siedowse		} else
60079561Siedowse			dh->dh_seqopt = 0;
60179561Siedowse	}
60279561Siedowse
60379561Siedowse	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
60479561Siedowse	    slot = WRAPINCR(slot, dh->dh_hlen)) {
60579561Siedowse		if (offset == DIRHASH_DEL)
60679561Siedowse			continue;
60779561Siedowse		if (offset < 0 || offset >= ip->i_size)
60879561Siedowse			panic("ufsdirhash_lookup: bad offset in hash array");
60979561Siedowse		if ((offset & ~bmask) != blkoff) {
61079561Siedowse			if (bp != NULL)
61179561Siedowse				brelse(bp);
61279561Siedowse			blkoff = offset & ~bmask;
613178110Sjeff			if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) {
614178110Sjeff				error = EJUSTRETURN;
615178110Sjeff				goto fail;
616178110Sjeff			}
61779561Siedowse		}
61879561Siedowse		dp = (struct direct *)(bp->b_data + (offset & bmask));
61979561Siedowse		if (dp->d_reclen == 0 || dp->d_reclen >
62079561Siedowse		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
62179561Siedowse			/* Corrupted directory. */
622178110Sjeff			error = EJUSTRETURN;
623178110Sjeff			goto fail;
62479561Siedowse		}
62579561Siedowse		if (dp->d_namlen == namelen &&
62679561Siedowse		    bcmp(dp->d_name, name, namelen) == 0) {
62779561Siedowse			/* Found. Get the prev offset if needed. */
62879561Siedowse			if (prevoffp != NULL) {
62979561Siedowse				if (offset & (DIRBLKSIZ - 1)) {
63079561Siedowse					prevoff = ufsdirhash_getprev(dp,
63179561Siedowse					    offset);
63279561Siedowse					if (prevoff == -1) {
633178110Sjeff						error = EJUSTRETURN;
634178110Sjeff						goto fail;
63579561Siedowse					}
63679561Siedowse				} else
63779561Siedowse					prevoff = offset;
63879561Siedowse				*prevoffp = prevoff;
63979561Siedowse			}
64079561Siedowse
64179561Siedowse			/* Check for sequential access, and update offset. */
64279561Siedowse			if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
64379561Siedowse				dh->dh_seqopt = 1;
64479561Siedowse			dh->dh_seqoff = offset + DIRSIZ(0, dp);
64579690Siedowse			*bpp = bp;
64679561Siedowse			*offp = offset;
647178110Sjeff			ufsdirhash_release(dh);
64879561Siedowse			return (0);
64979561Siedowse		}
65079561Siedowse
65179561Siedowse		/*
65279561Siedowse		 * When the name doesn't match in the seqopt case, go back
65379561Siedowse		 * and search normally.
65479561Siedowse		 */
65579561Siedowse		if (dh->dh_seqopt) {
65679561Siedowse			dh->dh_seqopt = 0;
65779561Siedowse			goto restart;
65879561Siedowse		}
65979561Siedowse	}
660178110Sjeff	error = ENOENT;
661178110Sjefffail:
662178110Sjeff	ufsdirhash_release(dh);
66379561Siedowse	if (bp != NULL)
66479561Siedowse		brelse(bp);
665178110Sjeff	return (error);
66679561Siedowse}
66779561Siedowse
66879561Siedowse/*
66979561Siedowse * Find a directory block with room for 'slotneeded' bytes. Returns
67079561Siedowse * the offset of the directory entry that begins the free space.
67179561Siedowse * This will either be the offset of an existing entry that has free
67279561Siedowse * space at the end, or the offset of an entry with d_ino == 0 at
67379561Siedowse * the start of a DIRBLKSIZ block.
67479561Siedowse *
67579561Siedowse * To use the space, the caller may need to compact existing entries in
67679561Siedowse * the directory. The total number of bytes in all of the entries involved
67779561Siedowse * in the compaction is stored in *slotsize. In other words, all of
67879561Siedowse * the entries that must be compacted are exactly contained in the
67979561Siedowse * region beginning at the returned offset and spanning *slotsize bytes.
68079561Siedowse *
68179561Siedowse * Returns -1 if no space was found, indicating that the directory
68279561Siedowse * must be extended.
68379561Siedowse */
68479561Siedowsedoff_t
68579561Siedowseufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
68679561Siedowse{
68779561Siedowse	struct direct *dp;
68879561Siedowse	struct dirhash *dh;
68979561Siedowse	struct buf *bp;
69079561Siedowse	doff_t pos, slotstart;
69179561Siedowse	int dirblock, error, freebytes, i;
69279561Siedowse
693178110Sjeff	dh = ip->i_dirhash;
694178110Sjeff	KASSERT(dh != NULL && dh->dh_hash != NULL,
695178110Sjeff	    ("ufsdirhash_findfree: Invalid dirhash %p\n", dh));
696178110Sjeff	DIRHASH_ASSERT_LOCKED(dh);
69779561Siedowse
69879561Siedowse	/* Find a directory block with the desired free space. */
69979561Siedowse	dirblock = -1;
70079561Siedowse	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
70179561Siedowse		if ((dirblock = dh->dh_firstfree[i]) != -1)
70279561Siedowse			break;
703178110Sjeff	if (dirblock == -1)
70479561Siedowse		return (-1);
70579561Siedowse
70679561Siedowse	KASSERT(dirblock < dh->dh_nblk &&
70779561Siedowse	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
70879561Siedowse	    ("ufsdirhash_findfree: bad stats"));
70979561Siedowse	pos = dirblock * DIRBLKSIZ;
71079561Siedowse	error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
71179561Siedowse	if (error)
71279561Siedowse		return (-1);
71379561Siedowse
71479561Siedowse	/* Find the first entry with free space. */
71579561Siedowse	for (i = 0; i < DIRBLKSIZ; ) {
71679561Siedowse		if (dp->d_reclen == 0) {
71779561Siedowse			brelse(bp);
71879561Siedowse			return (-1);
71979561Siedowse		}
72079561Siedowse		if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp))
72179561Siedowse			break;
72279561Siedowse		i += dp->d_reclen;
72379561Siedowse		dp = (struct direct *)((char *)dp + dp->d_reclen);
72479561Siedowse	}
72579561Siedowse	if (i > DIRBLKSIZ) {
72679561Siedowse		brelse(bp);
72779561Siedowse		return (-1);
72879561Siedowse	}
72979561Siedowse	slotstart = pos + i;
73079561Siedowse
73179561Siedowse	/* Find the range of entries needed to get enough space */
73279561Siedowse	freebytes = 0;
73379561Siedowse	while (i < DIRBLKSIZ && freebytes < slotneeded) {
73479561Siedowse		freebytes += dp->d_reclen;
73579561Siedowse		if (dp->d_ino != 0)
73679561Siedowse			freebytes -= DIRSIZ(0, dp);
73779561Siedowse		if (dp->d_reclen == 0) {
73879561Siedowse			brelse(bp);
73979561Siedowse			return (-1);
74079561Siedowse		}
74179561Siedowse		i += dp->d_reclen;
74279561Siedowse		dp = (struct direct *)((char *)dp + dp->d_reclen);
74379561Siedowse	}
74479561Siedowse	if (i > DIRBLKSIZ) {
74579561Siedowse		brelse(bp);
74679561Siedowse		return (-1);
74779561Siedowse	}
74879561Siedowse	if (freebytes < slotneeded)
74979561Siedowse		panic("ufsdirhash_findfree: free mismatch");
75079561Siedowse	brelse(bp);
75179561Siedowse	*slotsize = pos + i - slotstart;
75279561Siedowse	return (slotstart);
75379561Siedowse}
75479561Siedowse
75579561Siedowse/*
75679561Siedowse * Return the start of the unused space at the end of a directory, or
75779561Siedowse * -1 if there are no trailing unused blocks.
75879561Siedowse */
75979561Siedowsedoff_t
76079561Siedowseufsdirhash_enduseful(struct inode *ip)
76179561Siedowse{
76279561Siedowse
76379561Siedowse	struct dirhash *dh;
76479561Siedowse	int i;
76579561Siedowse
766178110Sjeff	dh = ip->i_dirhash;
767178110Sjeff	DIRHASH_ASSERT_LOCKED(dh);
768178110Sjeff	KASSERT(dh != NULL && dh->dh_hash != NULL,
769178110Sjeff	    ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh));
77079561Siedowse
771178110Sjeff	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN)
77279561Siedowse		return (-1);
77379561Siedowse
77479561Siedowse	for (i = dh->dh_dirblks - 1; i >= 0; i--)
77579561Siedowse		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
77679561Siedowse			break;
777178110Sjeff
77879561Siedowse	return ((doff_t)(i + 1) * DIRBLKSIZ);
77979561Siedowse}
78079561Siedowse
78179561Siedowse/*
78279561Siedowse * Insert information into the hash about a new directory entry. dirp
78379561Siedowse * points to a struct direct containing the entry, and offset specifies
78479561Siedowse * the offset of this entry.
78579561Siedowse */
78679561Siedowsevoid
78779561Siedowseufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
78879561Siedowse{
78979561Siedowse	struct dirhash *dh;
79079561Siedowse	int slot;
79179561Siedowse
792178110Sjeff	if ((dh = ufsdirhash_acquire(ip)) == NULL)
79379561Siedowse		return;
794178110Sjeff
79579561Siedowse	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
79679561Siedowse	    ("ufsdirhash_add: bad offset"));
79779561Siedowse	/*
79879561Siedowse	 * Normal hash usage is < 66%. If the usage gets too high then
79979561Siedowse	 * remove the hash entirely and let it be rebuilt later.
80079561Siedowse	 */
80179561Siedowse	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
802178110Sjeff		ufsdirhash_free_locked(ip);
80379561Siedowse		return;
80479561Siedowse	}
80579561Siedowse
80679561Siedowse	/* Find a free hash slot (empty or deleted), and add the entry. */
80779561Siedowse	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
80879561Siedowse	while (DH_ENTRY(dh, slot) >= 0)
80979561Siedowse		slot = WRAPINCR(slot, dh->dh_hlen);
81079561Siedowse	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
81179561Siedowse		dh->dh_hused++;
81279561Siedowse	DH_ENTRY(dh, slot) = offset;
81379561Siedowse
81479561Siedowse	/* Update the per-block summary info. */
81579561Siedowse	ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
816178110Sjeff	ufsdirhash_release(dh);
81779561Siedowse}
81879561Siedowse
81979561Siedowse/*
82079561Siedowse * Remove the specified directory entry from the hash. The entry to remove
82179561Siedowse * is defined by the name in `dirp', which must exist at the specified
82279561Siedowse * `offset' within the directory.
82379561Siedowse */
82479561Siedowsevoid
82579561Siedowseufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
82679561Siedowse{
82779561Siedowse	struct dirhash *dh;
82879561Siedowse	int slot;
82979561Siedowse
830178110Sjeff	if ((dh = ufsdirhash_acquire(ip)) == NULL)
83179561Siedowse		return;
83279561Siedowse
83379561Siedowse	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
83479561Siedowse	    ("ufsdirhash_remove: bad offset"));
83579561Siedowse	/* Find the entry */
83679561Siedowse	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
83779561Siedowse
83879561Siedowse	/* Remove the hash entry. */
83979561Siedowse	ufsdirhash_delslot(dh, slot);
84079561Siedowse
84179561Siedowse	/* Update the per-block summary info. */
84279561Siedowse	ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
843178110Sjeff	ufsdirhash_release(dh);
84479561Siedowse}
84579561Siedowse
84679561Siedowse/*
84779561Siedowse * Change the offset associated with a directory entry in the hash. Used
84879561Siedowse * when compacting directory blocks.
84979561Siedowse */
85079561Siedowsevoid
85179561Siedowseufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
85279561Siedowse    doff_t newoff)
85379561Siedowse{
85479561Siedowse	struct dirhash *dh;
85579561Siedowse	int slot;
85679561Siedowse
857178110Sjeff	if ((dh = ufsdirhash_acquire(ip)) == NULL)
85879561Siedowse		return;
85979561Siedowse
86079561Siedowse	KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
86179561Siedowse	    newoff < dh->dh_dirblks * DIRBLKSIZ,
86279561Siedowse	    ("ufsdirhash_move: bad offset"));
86379561Siedowse	/* Find the entry, and update the offset. */
86479561Siedowse	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
86579561Siedowse	DH_ENTRY(dh, slot) = newoff;
866178110Sjeff	ufsdirhash_release(dh);
86779561Siedowse}
86879561Siedowse
86979561Siedowse/*
87079561Siedowse * Inform dirhash that the directory has grown by one block that
87179561Siedowse * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
87279561Siedowse */
87379561Siedowsevoid
87479561Siedowseufsdirhash_newblk(struct inode *ip, doff_t offset)
87579561Siedowse{
87679561Siedowse	struct dirhash *dh;
87779561Siedowse	int block;
87879561Siedowse
879178110Sjeff	if ((dh = ufsdirhash_acquire(ip)) == NULL)
88079561Siedowse		return;
88179561Siedowse
88279561Siedowse	KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
88379561Siedowse	    ("ufsdirhash_newblk: bad offset"));
88479561Siedowse	block = offset / DIRBLKSIZ;
88579561Siedowse	if (block >= dh->dh_nblk) {
88679561Siedowse		/* Out of space; must rebuild. */
887178110Sjeff		ufsdirhash_free_locked(ip);
88879561Siedowse		return;
88979561Siedowse	}
89079561Siedowse	dh->dh_dirblks = block + 1;
89179561Siedowse
89279561Siedowse	/* Account for the new free block. */
89379561Siedowse	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
89479561Siedowse	if (dh->dh_firstfree[DH_NFSTATS] == -1)
89579561Siedowse		dh->dh_firstfree[DH_NFSTATS] = block;
896178110Sjeff	ufsdirhash_release(dh);
89779561Siedowse}
89879561Siedowse
89979561Siedowse/*
90079561Siedowse * Inform dirhash that the directory is being truncated.
90179561Siedowse */
90279561Siedowsevoid
90379561Siedowseufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
90479561Siedowse{
90579561Siedowse	struct dirhash *dh;
90679561Siedowse	int block, i;
90779561Siedowse
908178110Sjeff	if ((dh = ufsdirhash_acquire(ip)) == NULL)
90979561Siedowse		return;
91079561Siedowse
91179561Siedowse	KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
91279561Siedowse	    ("ufsdirhash_dirtrunc: bad offset"));
91379561Siedowse	block = howmany(offset, DIRBLKSIZ);
91479561Siedowse	/*
91579561Siedowse	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
91679561Siedowse	 * (about 20% of its original size due to the 50% extra added in
91779561Siedowse	 * ufsdirhash_build) then free it, and let the caller rebuild
91879561Siedowse	 * if necessary.
91979561Siedowse	 */
92079561Siedowse	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
921178110Sjeff		ufsdirhash_free_locked(ip);
92279561Siedowse		return;
92379561Siedowse	}
92479561Siedowse
92579561Siedowse	/*
92679561Siedowse	 * Remove any `first free' information pertaining to the
92779561Siedowse	 * truncated blocks. All blocks we're removing should be
92879561Siedowse	 * completely unused.
92979561Siedowse	 */
93079561Siedowse	if (dh->dh_firstfree[DH_NFSTATS] >= block)
93179561Siedowse		dh->dh_firstfree[DH_NFSTATS] = -1;
93279561Siedowse	for (i = block; i < dh->dh_dirblks; i++)
93379561Siedowse		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
93479561Siedowse			panic("ufsdirhash_dirtrunc: blocks in use");
93579561Siedowse	for (i = 0; i < DH_NFSTATS; i++)
93679561Siedowse		if (dh->dh_firstfree[i] >= block)
93779561Siedowse			panic("ufsdirhash_dirtrunc: first free corrupt");
93879561Siedowse	dh->dh_dirblks = block;
939178110Sjeff	ufsdirhash_release(dh);
94079561Siedowse}
94179561Siedowse
94279561Siedowse/*
94379561Siedowse * Debugging function to check that the dirhash information about
94479561Siedowse * a directory block matches its actual contents. Panics if a mismatch
94579561Siedowse * is detected.
94679561Siedowse *
94779561Siedowse * On entry, `buf' should point to the start of an in-core
94879561Siedowse * DIRBLKSIZ-sized directory block, and `offset' should contain the
94979561Siedowse * offset from the start of the directory of that block.
95079561Siedowse */
95179561Siedowsevoid
95279561Siedowseufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
95379561Siedowse{
95479561Siedowse	struct dirhash *dh;
95579561Siedowse	struct direct *dp;
95679561Siedowse	int block, ffslot, i, nfree;
95779561Siedowse
95879561Siedowse	if (!ufs_dirhashcheck)
95979561Siedowse		return;
960178110Sjeff	if ((dh = ufsdirhash_acquire(ip)) == NULL)
96179561Siedowse		return;
96279561Siedowse
96379561Siedowse	block = offset / DIRBLKSIZ;
96479561Siedowse	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
96579561Siedowse		panic("ufsdirhash_checkblock: bad offset");
96679561Siedowse
96779561Siedowse	nfree = 0;
96879561Siedowse	for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
96979561Siedowse		dp = (struct direct *)(buf + i);
97079561Siedowse		if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
97179561Siedowse			panic("ufsdirhash_checkblock: bad dir");
97279561Siedowse
97379561Siedowse		if (dp->d_ino == 0) {
97480456Siedowse#if 0
97580456Siedowse			/*
97680456Siedowse			 * XXX entries with d_ino == 0 should only occur
97780456Siedowse			 * at the start of a DIRBLKSIZ block. However the
97880456Siedowse			 * ufs code is tolerant of such entries at other
97980456Siedowse			 * offsets, and fsck does not fix them.
98080456Siedowse			 */
98179561Siedowse			if (i != 0)
98279561Siedowse				panic("ufsdirhash_checkblock: bad dir inode");
98380456Siedowse#endif
98479561Siedowse			nfree += dp->d_reclen;
98579561Siedowse			continue;
98679561Siedowse		}
98779561Siedowse
98879561Siedowse		/* Check that the entry	exists (will panic if it doesn't). */
98979561Siedowse		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
99079561Siedowse
99179561Siedowse		nfree += dp->d_reclen - DIRSIZ(0, dp);
99279561Siedowse	}
99379561Siedowse	if (i != DIRBLKSIZ)
99479561Siedowse		panic("ufsdirhash_checkblock: bad dir end");
99579561Siedowse
99679561Siedowse	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
99779561Siedowse		panic("ufsdirhash_checkblock: bad free count");
99879561Siedowse
99992098Siedowse	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
100079561Siedowse	for (i = 0; i <= DH_NFSTATS; i++)
100179561Siedowse		if (dh->dh_firstfree[i] == block && i != ffslot)
100279561Siedowse			panic("ufsdirhash_checkblock: bad first-free");
100392098Siedowse	if (dh->dh_firstfree[ffslot] == -1)
100492098Siedowse		panic("ufsdirhash_checkblock: missing first-free entry");
1005178110Sjeff	ufsdirhash_release(dh);
100679561Siedowse}
100779561Siedowse
100879561Siedowse/*
100979561Siedowse * Hash the specified filename into a dirhash slot.
101079561Siedowse */
101179561Siedowsestatic int
101279561Siedowseufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
101379561Siedowse{
101492807Sdwmalone	u_int32_t hash;
101592807Sdwmalone
101692807Sdwmalone	/*
1017107868Siedowse	 * We hash the name and then some other bit of data that is
1018107868Siedowse	 * invariant over the dirhash's lifetime. Otherwise names
101992807Sdwmalone	 * differing only in the last byte are placed close to one
102092807Sdwmalone	 * another in the table, which is bad for linear probing.
102192807Sdwmalone	 */
102292807Sdwmalone	hash = fnv_32_buf(name, namelen, FNV1_32_INIT);
1023133837Sdwmalone	hash = fnv_32_buf(&dh, sizeof(dh), hash);
102492807Sdwmalone	return (hash % dh->dh_hlen);
102579561Siedowse}
102679561Siedowse
102779561Siedowse/*
102879561Siedowse * Adjust the number of free bytes in the block containing `offset'
102979561Siedowse * by the value specified by `diff'.
103079561Siedowse *
103179561Siedowse * The caller must ensure we have exclusive access to `dh'; normally
1032178110Sjeff * that means that dh_lock should be held, but this is also called
103379561Siedowse * from ufsdirhash_build() where exclusive access can be assumed.
103479561Siedowse */
103579561Siedowsestatic void
103679561Siedowseufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
103779561Siedowse{
103879561Siedowse	int block, i, nfidx, ofidx;
103979561Siedowse
104079561Siedowse	/* Update the per-block summary info. */
104179561Siedowse	block = offset / DIRBLKSIZ;
104279561Siedowse	KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
104379561Siedowse	     ("dirhash bad offset"));
104492098Siedowse	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
104579561Siedowse	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
104692098Siedowse	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
104779561Siedowse
104879561Siedowse	/* Update the `first free' list if necessary. */
104979561Siedowse	if (ofidx != nfidx) {
105079561Siedowse		/* If removing, scan forward for the next block. */
105179561Siedowse		if (dh->dh_firstfree[ofidx] == block) {
105279561Siedowse			for (i = block + 1; i < dh->dh_dirblks; i++)
105392098Siedowse				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
105479561Siedowse					break;
105579561Siedowse			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
105679561Siedowse		}
105779561Siedowse
105879561Siedowse		/* Make this the new `first free' if necessary */
105979561Siedowse		if (dh->dh_firstfree[nfidx] > block ||
106079561Siedowse		    dh->dh_firstfree[nfidx] == -1)
106179561Siedowse			dh->dh_firstfree[nfidx] = block;
106279561Siedowse	}
106379561Siedowse}
106479561Siedowse
106579561Siedowse/*
106679561Siedowse * Find the specified name which should have the specified offset.
106779561Siedowse * Returns a slot number, and panics on failure.
106879561Siedowse *
106979561Siedowse * `dh' must be locked on entry and remains so on return.
107079561Siedowse */
107179561Siedowsestatic int
107279561Siedowseufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
107379561Siedowse{
107479561Siedowse	int slot;
107579561Siedowse
1076178110Sjeff	DIRHASH_ASSERT_LOCKED(dh);
107779561Siedowse
107879561Siedowse	/* Find the entry. */
107979561Siedowse	KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
108079561Siedowse	slot = ufsdirhash_hash(dh, name, namelen);
108179561Siedowse	while (DH_ENTRY(dh, slot) != offset &&
108279561Siedowse	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
108379561Siedowse		slot = WRAPINCR(slot, dh->dh_hlen);
108479561Siedowse	if (DH_ENTRY(dh, slot) != offset)
108579561Siedowse		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
108679561Siedowse
108779561Siedowse	return (slot);
108879561Siedowse}
108979561Siedowse
109079561Siedowse/*
109179561Siedowse * Remove the entry corresponding to the specified slot from the hash array.
109279561Siedowse *
109379561Siedowse * `dh' must be locked on entry and remains so on return.
109479561Siedowse */
109579561Siedowsestatic void
109679561Siedowseufsdirhash_delslot(struct dirhash *dh, int slot)
109779561Siedowse{
109879561Siedowse	int i;
109979561Siedowse
1100178110Sjeff	DIRHASH_ASSERT_LOCKED(dh);
110179561Siedowse
110279561Siedowse	/* Mark the entry as deleted. */
110379561Siedowse	DH_ENTRY(dh, slot) = DIRHASH_DEL;
110479561Siedowse
110579561Siedowse	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
110679561Siedowse	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
110779561Siedowse		i = WRAPINCR(i, dh->dh_hlen);
110879561Siedowse	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
110992807Sdwmalone		i = WRAPDECR(i, dh->dh_hlen);
111092807Sdwmalone		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
111179561Siedowse			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
111279561Siedowse			dh->dh_hused--;
111392807Sdwmalone			i = WRAPDECR(i, dh->dh_hlen);
111479561Siedowse		}
111579561Siedowse		KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
111679561Siedowse	}
111779561Siedowse}
111879561Siedowse
111979561Siedowse/*
112079561Siedowse * Given a directory entry and its offset, find the offset of the
112179561Siedowse * previous entry in the same DIRBLKSIZ-sized block. Returns an
112279561Siedowse * offset, or -1 if there is no previous entry in the block or some
112379561Siedowse * other problem occurred.
112479561Siedowse */
112579561Siedowsestatic doff_t
112679561Siedowseufsdirhash_getprev(struct direct *dirp, doff_t offset)
112779561Siedowse{
112879561Siedowse	struct direct *dp;
112979561Siedowse	char *blkbuf;
113079561Siedowse	doff_t blkoff, prevoff;
113179561Siedowse	int entrypos, i;
113279561Siedowse
113379561Siedowse	blkoff = offset & ~(DIRBLKSIZ - 1);	/* offset of start of block */
113479561Siedowse	entrypos = offset & (DIRBLKSIZ - 1);	/* entry relative to block */
113579561Siedowse	blkbuf = (char *)dirp - entrypos;
113679561Siedowse	prevoff = blkoff;
113779561Siedowse
113879561Siedowse	/* If `offset' is the start of a block, there is no previous entry. */
113979561Siedowse	if (entrypos == 0)
114079561Siedowse		return (-1);
114179561Siedowse
114279561Siedowse	/* Scan from the start of the block until we get to the entry. */
114379561Siedowse	for (i = 0; i < entrypos; i += dp->d_reclen) {
114479561Siedowse		dp = (struct direct *)(blkbuf + i);
114579561Siedowse		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
114679561Siedowse			return (-1);	/* Corrupted directory. */
114779561Siedowse		prevoff = blkoff + i;
114879561Siedowse	}
114979561Siedowse	return (prevoff);
115079561Siedowse}
115179561Siedowse
115279561Siedowse/*
115379561Siedowse * Try to free up `wanted' bytes by stealing memory from existing
1154125854Sdwmalone * dirhashes. Returns zero with list locked if successful.
115579561Siedowse */
115679561Siedowsestatic int
115779561Siedowseufsdirhash_recycle(int wanted)
115879561Siedowse{
115979561Siedowse	struct dirhash *dh;
116079561Siedowse	doff_t **hash;
116179561Siedowse	u_int8_t *blkfree;
116279561Siedowse	int i, mem, narrays;
116379561Siedowse
1164125854Sdwmalone	DIRHASHLIST_LOCK();
1165178110Sjeff	dh = TAILQ_FIRST(&ufsdirhash_list);
116679561Siedowse	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1167178110Sjeff		/* Decrement the score; only recycle if it becomes zero. */
1168178110Sjeff		if (dh == NULL || --dh->dh_score > 0) {
1169125854Sdwmalone			DIRHASHLIST_UNLOCK();
117079561Siedowse			return (-1);
117179561Siedowse		}
1172178110Sjeff		/*
1173178110Sjeff		 * If we can't lock it it's in use and we don't want to
1174178110Sjeff		 * recycle it anyway.
1175178110Sjeff		 */
1176183080Sjhb		if (!sx_try_xlock(&dh->dh_lock)) {
1177178110Sjeff			dh = TAILQ_NEXT(dh, dh_list);
1178178110Sjeff			continue;
1179178110Sjeff		}
118079561Siedowse		KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
118179561Siedowse
118279561Siedowse		/* Remove it from the list and detach its memory. */
118379561Siedowse		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
118479561Siedowse		dh->dh_onlist = 0;
118579561Siedowse		hash = dh->dh_hash;
118679561Siedowse		dh->dh_hash = NULL;
118779561Siedowse		blkfree = dh->dh_blkfree;
118879561Siedowse		dh->dh_blkfree = NULL;
118979561Siedowse		narrays = dh->dh_narrays;
1190178110Sjeff		mem = dh->dh_memreq;
1191178110Sjeff		dh->dh_memreq = 0;
119279561Siedowse
119379561Siedowse		/* Unlock everything, free the detached memory. */
1194178110Sjeff		ufsdirhash_release(dh);
1195125854Sdwmalone		DIRHASHLIST_UNLOCK();
119679561Siedowse		for (i = 0; i < narrays; i++)
1197125854Sdwmalone			DIRHASH_BLKFREE(hash[i]);
1198184205Sdes		free(hash, M_DIRHASH);
1199184205Sdes		free(blkfree, M_DIRHASH);
120079561Siedowse
120179561Siedowse		/* Account for the returned memory, and repeat if necessary. */
1202125854Sdwmalone		DIRHASHLIST_LOCK();
120379561Siedowse		ufs_dirhashmem -= mem;
1204178110Sjeff		dh = TAILQ_FIRST(&ufsdirhash_list);
120579561Siedowse	}
1206125854Sdwmalone	/* Success; return with list locked. */
120779561Siedowse	return (0);
120879561Siedowse}
120979561Siedowse
121079561Siedowse
121199101Siedowsevoid
121279561Siedowseufsdirhash_init()
121379561Siedowse{
121496874Siedowse	ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t),
121592768Sjeff	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
121693818Sjhb	mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF);
121779561Siedowse	TAILQ_INIT(&ufsdirhash_list);
121879561Siedowse}
121979561Siedowse
122099101Siedowsevoid
122199101Siedowseufsdirhash_uninit()
122299101Siedowse{
122399101Siedowse	KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
122499101Siedowse	uma_zdestroy(ufsdirhash_zone);
122599101Siedowse	mtx_destroy(&ufsdirhash_mtx);
122699101Siedowse}
122779561Siedowse
122879561Siedowse#endif /* UFS_DIRHASH */
1229