ufs_dirhash.c revision 302408
1/*-
2 * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * This implements a hash-based lookup scheme for UFS directories.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/11/sys/ufs/ufs/ufs_dirhash.c 298433 2016-04-21 19:57:40Z pfg $");
32
33#include "opt_ufs.h"
34
35#ifdef UFS_DIRHASH
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/mutex.h>
42#include <sys/malloc.h>
43#include <sys/fnv_hash.h>
44#include <sys/proc.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/vnode.h>
48#include <sys/mount.h>
49#include <sys/refcount.h>
50#include <sys/sysctl.h>
51#include <sys/sx.h>
52#include <sys/eventhandler.h>
53#include <sys/time.h>
54#include <vm/uma.h>
55
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/inode.h>
58#include <ufs/ufs/dir.h>
59#include <ufs/ufs/dirhash.h>
60#include <ufs/ufs/extattr.h>
61#include <ufs/ufs/ufsmount.h>
62#include <ufs/ufs/ufs_extern.h>
63
64#define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
65#define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
66#define OFSFMT(vp)		((vp)->v_mount->mnt_maxsymlinklen <= 0)
67#define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
68
69static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
70
71static int ufs_mindirhashsize = DIRBLKSIZ * 5;
72SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
73    &ufs_mindirhashsize,
74    0, "minimum directory size in bytes for which to use hashed lookup");
75static int ufs_dirhashmaxmem = 2 * 1024 * 1024;	/* NOTE: initial value. It is
76						   tuned in ufsdirhash_init() */
77SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem,
78    0, "maximum allowed dirhash memory usage");
79static int ufs_dirhashmem;
80SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem,
81    0, "current dirhash memory usage");
82static int ufs_dirhashcheck = 0;
83SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
84    0, "enable extra sanity tests");
85static int ufs_dirhashlowmemcount = 0;
86SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD,
87    &ufs_dirhashlowmemcount, 0, "number of times low memory hook called");
88static int ufs_dirhashreclaimpercent = 10;
89static int ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS);
90SYSCTL_PROC(_vfs_ufs, OID_AUTO, dirhash_reclaimpercent,
91    CTLTYPE_INT | CTLFLAG_RW, 0, 0, ufsdirhash_set_reclaimpercent, "I",
92    "set percentage of dirhash cache to be removed in low VM events");
93
94
95static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
96static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
97static void ufsdirhash_delslot(struct dirhash *dh, int slot);
98static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
99	   doff_t offset);
100static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
101static int ufsdirhash_recycle(int wanted);
102static void ufsdirhash_lowmem(void);
103static void ufsdirhash_free_locked(struct inode *ip);
104
105static uma_zone_t	ufsdirhash_zone;
106
107#define DIRHASHLIST_LOCK() 		mtx_lock(&ufsdirhash_mtx)
108#define DIRHASHLIST_UNLOCK() 		mtx_unlock(&ufsdirhash_mtx)
109#define DIRHASH_BLKALLOC_WAITOK() 	uma_zalloc(ufsdirhash_zone, M_WAITOK)
110#define DIRHASH_BLKFREE(ptr) 		uma_zfree(ufsdirhash_zone, (ptr))
111#define	DIRHASH_ASSERT_LOCKED(dh)					\
112    sx_assert(&(dh)->dh_lock, SA_LOCKED)
113
114/* Dirhash list; recently-used entries are near the tail. */
115static TAILQ_HEAD(, dirhash) ufsdirhash_list;
116
117/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
118static struct mtx	ufsdirhash_mtx;
119
120/*
121 * Locking:
122 *
123 * The relationship between inode and dirhash is protected either by an
124 * exclusive vnode lock or the vnode interlock where a shared vnode lock
125 * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
126 * handle teardown races, code wishing to lock the dirhash for an inode
127 * when using a shared vnode lock must obtain a private reference on the
128 * dirhash while holding the vnode interlock.  They can drop it once they
129 * have obtained the dirhash lock and verified that the dirhash wasn't
130 * recycled while they waited for the dirhash lock.
131 *
132 * ufsdirhash_build() acquires a shared lock on the dirhash when it is
133 * successful.  This lock is released after a call to ufsdirhash_lookup().
134 *
135 * Functions requiring exclusive access use ufsdirhash_acquire() which may
136 * free a dirhash structure that was recycled by ufsdirhash_recycle().
137 *
138 * The dirhash lock may be held across io operations.
139 *
140 * WITNESS reports a lock order reversal between the "bufwait" lock
141 * and the "dirhash" lock.  However, this specific reversal will not
142 * cause a deadlock.  To get a deadlock, one would have to lock a
143 * buffer followed by the dirhash while a second thread locked a
144 * buffer while holding the dirhash lock.  The second order can happen
145 * under a shared or exclusive vnode lock for the associated directory
146 * in lookup().  The first order, however, can only happen under an
147 * exclusive vnode lock (e.g. unlink(), rename(), etc.).  Thus, for
148 * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold
149 * an exclusive vnode lock.  That exclusive vnode lock will prevent
150 * any other threads from doing a "dirhash" -> "bufwait" order.
151 */
152
153static void
154ufsdirhash_hold(struct dirhash *dh)
155{
156
157	refcount_acquire(&dh->dh_refcount);
158}
159
160static void
161ufsdirhash_drop(struct dirhash *dh)
162{
163
164	if (refcount_release(&dh->dh_refcount)) {
165		sx_destroy(&dh->dh_lock);
166		free(dh, M_DIRHASH);
167	}
168}
169
170/*
171 * Release the lock on a dirhash.
172 */
173static void
174ufsdirhash_release(struct dirhash *dh)
175{
176
177	sx_unlock(&dh->dh_lock);
178}
179
180/*
181 * Either acquire an existing hash locked shared or create a new hash and
182 * return it exclusively locked.  May return NULL if the allocation fails.
183 *
184 * The vnode interlock is used to protect the i_dirhash pointer from
185 * simultaneous access while only a shared vnode lock is held.
186 */
187static struct dirhash *
188ufsdirhash_create(struct inode *ip)
189{
190	struct dirhash *ndh;
191	struct dirhash *dh;
192	struct vnode *vp;
193
194	ndh = dh = NULL;
195	vp = ip->i_vnode;
196	for (;;) {
197		/* Racy check for i_dirhash to prefetch a dirhash structure. */
198		if (ip->i_dirhash == NULL && ndh == NULL) {
199			ndh = malloc(sizeof *dh, M_DIRHASH,
200			    M_NOWAIT | M_ZERO);
201			if (ndh == NULL)
202				return (NULL);
203			refcount_init(&ndh->dh_refcount, 1);
204
205			/*
206			 * The DUPOK is to prevent warnings from the
207			 * sx_slock() a few lines down which is safe
208			 * since the duplicate lock in that case is
209			 * the one for this dirhash we are creating
210			 * now which has no external references until
211			 * after this function returns.
212			 */
213			sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK);
214			sx_xlock(&ndh->dh_lock);
215		}
216		/*
217		 * Check i_dirhash.  If it's NULL just try to use a
218		 * preallocated structure.  If none exists loop and try again.
219		 */
220		VI_LOCK(vp);
221		dh = ip->i_dirhash;
222		if (dh == NULL) {
223			ip->i_dirhash = ndh;
224			VI_UNLOCK(vp);
225			if (ndh == NULL)
226				continue;
227			return (ndh);
228		}
229		ufsdirhash_hold(dh);
230		VI_UNLOCK(vp);
231
232		/* Acquire a shared lock on existing hashes. */
233		sx_slock(&dh->dh_lock);
234
235		/* The hash could've been recycled while we were waiting. */
236		VI_LOCK(vp);
237		if (ip->i_dirhash != dh) {
238			VI_UNLOCK(vp);
239			ufsdirhash_release(dh);
240			ufsdirhash_drop(dh);
241			continue;
242		}
243		VI_UNLOCK(vp);
244		ufsdirhash_drop(dh);
245
246		/* If the hash is still valid we've succeeded. */
247		if (dh->dh_hash != NULL)
248			break;
249		/*
250		 * If the hash is NULL it has been recycled.  Try to upgrade
251		 * so we can recreate it.  If we fail the upgrade, drop our
252		 * lock and try again.
253		 */
254		if (sx_try_upgrade(&dh->dh_lock))
255			break;
256		sx_sunlock(&dh->dh_lock);
257	}
258	/* Free the preallocated structure if it was not necessary. */
259	if (ndh) {
260		ufsdirhash_release(ndh);
261		ufsdirhash_drop(ndh);
262	}
263	return (dh);
264}
265
266/*
267 * Acquire an exclusive lock on an existing hash.  Requires an exclusive
268 * vnode lock to protect the i_dirhash pointer.  hashes that have been
269 * recycled are reclaimed here and NULL is returned.
270 */
271static struct dirhash *
272ufsdirhash_acquire(struct inode *ip)
273{
274	struct dirhash *dh;
275
276	ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
277
278	dh = ip->i_dirhash;
279	if (dh == NULL)
280		return (NULL);
281	sx_xlock(&dh->dh_lock);
282	if (dh->dh_hash != NULL)
283		return (dh);
284	ufsdirhash_free_locked(ip);
285	return (NULL);
286}
287
288/*
289 * Acquire exclusively and free the hash pointed to by ip.  Works with a
290 * shared or exclusive vnode lock.
291 */
292void
293ufsdirhash_free(struct inode *ip)
294{
295	struct dirhash *dh;
296	struct vnode *vp;
297
298	vp = ip->i_vnode;
299	for (;;) {
300		/* Grab a reference on this inode's dirhash if it has one. */
301		VI_LOCK(vp);
302		dh = ip->i_dirhash;
303		if (dh == NULL) {
304			VI_UNLOCK(vp);
305			return;
306		}
307		ufsdirhash_hold(dh);
308		VI_UNLOCK(vp);
309
310		/* Exclusively lock the dirhash. */
311		sx_xlock(&dh->dh_lock);
312
313		/* If this dirhash still belongs to this inode, then free it. */
314		VI_LOCK(vp);
315		if (ip->i_dirhash == dh) {
316			VI_UNLOCK(vp);
317			ufsdirhash_drop(dh);
318			break;
319		}
320		VI_UNLOCK(vp);
321
322		/*
323		 * This inode's dirhash has changed while we were
324		 * waiting for the dirhash lock, so try again.
325		 */
326		ufsdirhash_release(dh);
327		ufsdirhash_drop(dh);
328	}
329	ufsdirhash_free_locked(ip);
330}
331
332/*
333 * Attempt to build up a hash table for the directory contents in
334 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
335 */
336int
337ufsdirhash_build(struct inode *ip)
338{
339	struct dirhash *dh;
340	struct buf *bp = NULL;
341	struct direct *ep;
342	struct vnode *vp;
343	doff_t bmask, pos;
344	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
345
346	/* Take care of a decreased sysctl value. */
347	while (ufs_dirhashmem > ufs_dirhashmaxmem) {
348		if (ufsdirhash_recycle(0) != 0)
349			return (-1);
350		/* Recycled enough memory, so unlock the list. */
351		DIRHASHLIST_UNLOCK();
352	}
353
354	/* Check if we can/should use dirhash. */
355	if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) ||
356	    ip->i_effnlink == 0) {
357		if (ip->i_dirhash)
358			ufsdirhash_free(ip);
359		return (-1);
360	}
361	dh = ufsdirhash_create(ip);
362	if (dh == NULL)
363		return (-1);
364	if (dh->dh_hash != NULL)
365		return (0);
366
367	vp = ip->i_vnode;
368	/* Allocate 50% more entries than this dir size could ever need. */
369	KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size"));
370	nslots = ip->i_size / DIRECTSIZ(1);
371	nslots = (nslots * 3 + 1) / 2;
372	narrays = howmany(nslots, DH_NBLKOFF);
373	nslots = narrays * DH_NBLKOFF;
374	dirblocks = howmany(ip->i_size, DIRBLKSIZ);
375	nblocks = (dirblocks * 3 + 1) / 2;
376	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
377	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
378	    nblocks * sizeof(*dh->dh_blkfree);
379	DIRHASHLIST_LOCK();
380	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
381		DIRHASHLIST_UNLOCK();
382		if (memreqd > ufs_dirhashmaxmem / 2)
383			goto fail;
384		/* Try to free some space. */
385		if (ufsdirhash_recycle(memreqd) != 0)
386			goto fail;
387		/* Enough was freed, and list has been locked. */
388	}
389	ufs_dirhashmem += memreqd;
390	DIRHASHLIST_UNLOCK();
391
392	/* Initialise the hash table and block statistics. */
393	dh->dh_memreq = memreqd;
394	dh->dh_narrays = narrays;
395	dh->dh_hlen = nslots;
396	dh->dh_nblk = nblocks;
397	dh->dh_dirblks = dirblocks;
398	for (i = 0; i < DH_NFSTATS; i++)
399		dh->dh_firstfree[i] = -1;
400	dh->dh_firstfree[DH_NFSTATS] = 0;
401	dh->dh_hused = 0;
402	dh->dh_seqoff = -1;
403	dh->dh_score = DH_SCOREINIT;
404	dh->dh_lastused = time_second;
405
406	/*
407	 * Use non-blocking mallocs so that we will revert to a linear
408	 * lookup on failure rather than potentially blocking forever.
409	 */
410	dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]),
411	    M_DIRHASH, M_NOWAIT | M_ZERO);
412	if (dh->dh_hash == NULL)
413		goto fail;
414	dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]),
415	    M_DIRHASH, M_NOWAIT);
416	if (dh->dh_blkfree == NULL)
417		goto fail;
418	for (i = 0; i < narrays; i++) {
419		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
420			goto fail;
421		for (j = 0; j < DH_NBLKOFF; j++)
422			dh->dh_hash[i][j] = DIRHASH_EMPTY;
423	}
424	for (i = 0; i < dirblocks; i++)
425		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
426	bmask = vp->v_mount->mnt_stat.f_iosize - 1;
427	pos = 0;
428	while (pos < ip->i_size) {
429		/* If necessary, get the next directory block. */
430		if ((pos & bmask) == 0) {
431			if (bp != NULL)
432				brelse(bp);
433			if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0)
434				goto fail;
435		}
436
437		/* Add this entry to the hash. */
438		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
439		if (ep->d_reclen == 0 || ep->d_reclen >
440		    DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
441			/* Corrupted directory. */
442			brelse(bp);
443			goto fail;
444		}
445		if (ep->d_ino != 0) {
446			/* Add the entry (simplified ufsdirhash_add). */
447			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
448			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
449				slot = WRAPINCR(slot, dh->dh_hlen);
450			dh->dh_hused++;
451			DH_ENTRY(dh, slot) = pos;
452			ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep));
453		}
454		pos += ep->d_reclen;
455	}
456
457	if (bp != NULL)
458		brelse(bp);
459	DIRHASHLIST_LOCK();
460	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
461	dh->dh_onlist = 1;
462	DIRHASHLIST_UNLOCK();
463	sx_downgrade(&dh->dh_lock);
464	return (0);
465
466fail:
467	ufsdirhash_free_locked(ip);
468	return (-1);
469}
470
471/*
472 * Free any hash table associated with inode 'ip'.
473 */
474static void
475ufsdirhash_free_locked(struct inode *ip)
476{
477	struct dirhash *dh;
478	struct vnode *vp;
479	int i;
480
481	DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
482
483	/*
484	 * Clear the pointer in the inode to prevent new threads from
485	 * finding the dead structure.
486	 */
487	vp = ip->i_vnode;
488	VI_LOCK(vp);
489	dh = ip->i_dirhash;
490	ip->i_dirhash = NULL;
491	VI_UNLOCK(vp);
492
493	/*
494	 * Remove the hash from the list since we are going to free its
495	 * memory.
496	 */
497	DIRHASHLIST_LOCK();
498	if (dh->dh_onlist)
499		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
500	ufs_dirhashmem -= dh->dh_memreq;
501	DIRHASHLIST_UNLOCK();
502
503	/*
504	 * At this point, any waiters for the lock should hold their
505	 * own reference on the dirhash structure.  They will drop
506	 * that reference once they grab the vnode interlock and see
507	 * that ip->i_dirhash is NULL.
508	 */
509	sx_xunlock(&dh->dh_lock);
510
511	/*
512	 * Handle partially recycled as well as fully constructed hashes.
513	 */
514	if (dh->dh_hash != NULL) {
515		for (i = 0; i < dh->dh_narrays; i++)
516			if (dh->dh_hash[i] != NULL)
517				DIRHASH_BLKFREE(dh->dh_hash[i]);
518		free(dh->dh_hash, M_DIRHASH);
519		if (dh->dh_blkfree != NULL)
520			free(dh->dh_blkfree, M_DIRHASH);
521	}
522
523	/*
524	 * Drop the inode's reference to the data structure.
525	 */
526	ufsdirhash_drop(dh);
527}
528
529/*
530 * Find the offset of the specified name within the given inode.
531 * Returns 0 on success, ENOENT if the entry does not exist, or
532 * EJUSTRETURN if the caller should revert to a linear search.
533 *
534 * If successful, the directory offset is stored in *offp, and a
535 * pointer to a struct buf containing the entry is stored in *bpp. If
536 * prevoffp is non-NULL, the offset of the previous entry within
537 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
538 * is the first in a block, the start of the block is used).
539 *
540 * Must be called with the hash locked.  Returns with the hash unlocked.
541 */
542int
543ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
544    struct buf **bpp, doff_t *prevoffp)
545{
546	struct dirhash *dh, *dh_next;
547	struct direct *dp;
548	struct vnode *vp;
549	struct buf *bp;
550	doff_t blkoff, bmask, offset, prevoff, seqoff;
551	int i, slot;
552	int error;
553
554	dh = ip->i_dirhash;
555	KASSERT(dh != NULL && dh->dh_hash != NULL,
556	    ("ufsdirhash_lookup: Invalid dirhash %p\n", dh));
557	DIRHASH_ASSERT_LOCKED(dh);
558	/*
559	 * Move this dirhash towards the end of the list if it has a
560	 * score higher than the next entry, and acquire the dh_lock.
561	 */
562	DIRHASHLIST_LOCK();
563	if (TAILQ_NEXT(dh, dh_list) != NULL) {
564		/*
565		 * If the new score will be greater than that of the next
566		 * entry, then move this entry past it. With both mutexes
567		 * held, dh_next won't go away, but its dh_score could
568		 * change; that's not important since it is just a hint.
569		 */
570		if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
571		    dh->dh_score >= dh_next->dh_score) {
572			KASSERT(dh->dh_onlist, ("dirhash: not on list"));
573			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
574			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
575			    dh_list);
576		}
577	}
578	/* Update the score. */
579	if (dh->dh_score < DH_SCOREMAX)
580		dh->dh_score++;
581
582	/* Update last used time. */
583	dh->dh_lastused = time_second;
584	DIRHASHLIST_UNLOCK();
585
586	vp = ip->i_vnode;
587	bmask = vp->v_mount->mnt_stat.f_iosize - 1;
588	blkoff = -1;
589	bp = NULL;
590	seqoff = dh->dh_seqoff;
591restart:
592	slot = ufsdirhash_hash(dh, name, namelen);
593
594	if (seqoff != -1) {
595		/*
596		 * Sequential access optimisation. seqoff contains the
597		 * offset of the directory entry immediately following
598		 * the last entry that was looked up. Check if this offset
599		 * appears in the hash chain for the name we are looking for.
600		 */
601		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
602		    i = WRAPINCR(i, dh->dh_hlen))
603			if (offset == seqoff)
604				break;
605		if (offset == seqoff) {
606			/*
607			 * We found an entry with the expected offset. This
608			 * is probably the entry we want, but if not, the
609			 * code below will retry.
610			 */
611			slot = i;
612		} else
613			seqoff = -1;
614	}
615
616	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
617	    slot = WRAPINCR(slot, dh->dh_hlen)) {
618		if (offset == DIRHASH_DEL)
619			continue;
620		if (offset < 0 || offset >= ip->i_size)
621			panic("ufsdirhash_lookup: bad offset in hash array");
622		if ((offset & ~bmask) != blkoff) {
623			if (bp != NULL)
624				brelse(bp);
625			blkoff = offset & ~bmask;
626			if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) {
627				error = EJUSTRETURN;
628				goto fail;
629			}
630		}
631		KASSERT(bp != NULL, ("no buffer allocated"));
632		dp = (struct direct *)(bp->b_data + (offset & bmask));
633		if (dp->d_reclen == 0 || dp->d_reclen >
634		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
635			/* Corrupted directory. */
636			error = EJUSTRETURN;
637			goto fail;
638		}
639		if (dp->d_namlen == namelen &&
640		    bcmp(dp->d_name, name, namelen) == 0) {
641			/* Found. Get the prev offset if needed. */
642			if (prevoffp != NULL) {
643				if (offset & (DIRBLKSIZ - 1)) {
644					prevoff = ufsdirhash_getprev(dp,
645					    offset);
646					if (prevoff == -1) {
647						error = EJUSTRETURN;
648						goto fail;
649					}
650				} else
651					prevoff = offset;
652				*prevoffp = prevoff;
653			}
654
655			/* Update offset. */
656			dh->dh_seqoff = offset + DIRSIZ(0, dp);
657			*bpp = bp;
658			*offp = offset;
659			ufsdirhash_release(dh);
660			return (0);
661		}
662
663		/*
664		 * When the name doesn't match in the sequential
665		 * optimization case, go back and search normally.
666		 */
667		if (seqoff != -1) {
668			seqoff = -1;
669			goto restart;
670		}
671	}
672	error = ENOENT;
673fail:
674	ufsdirhash_release(dh);
675	if (bp != NULL)
676		brelse(bp);
677	return (error);
678}
679
680/*
681 * Find a directory block with room for 'slotneeded' bytes. Returns
682 * the offset of the directory entry that begins the free space.
683 * This will either be the offset of an existing entry that has free
684 * space at the end, or the offset of an entry with d_ino == 0 at
685 * the start of a DIRBLKSIZ block.
686 *
687 * To use the space, the caller may need to compact existing entries in
688 * the directory. The total number of bytes in all of the entries involved
689 * in the compaction is stored in *slotsize. In other words, all of
690 * the entries that must be compacted are exactly contained in the
691 * region beginning at the returned offset and spanning *slotsize bytes.
692 *
693 * Returns -1 if no space was found, indicating that the directory
694 * must be extended.
695 */
696doff_t
697ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
698{
699	struct direct *dp;
700	struct dirhash *dh;
701	struct buf *bp;
702	doff_t pos, slotstart;
703	int dirblock, error, freebytes, i;
704
705	dh = ip->i_dirhash;
706	KASSERT(dh != NULL && dh->dh_hash != NULL,
707	    ("ufsdirhash_findfree: Invalid dirhash %p\n", dh));
708	DIRHASH_ASSERT_LOCKED(dh);
709
710	/* Find a directory block with the desired free space. */
711	dirblock = -1;
712	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
713		if ((dirblock = dh->dh_firstfree[i]) != -1)
714			break;
715	if (dirblock == -1)
716		return (-1);
717
718	KASSERT(dirblock < dh->dh_nblk &&
719	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
720	    ("ufsdirhash_findfree: bad stats"));
721	pos = dirblock * DIRBLKSIZ;
722	error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
723	if (error)
724		return (-1);
725
726	/* Find the first entry with free space. */
727	for (i = 0; i < DIRBLKSIZ; ) {
728		if (dp->d_reclen == 0) {
729			brelse(bp);
730			return (-1);
731		}
732		if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp))
733			break;
734		i += dp->d_reclen;
735		dp = (struct direct *)((char *)dp + dp->d_reclen);
736	}
737	if (i > DIRBLKSIZ) {
738		brelse(bp);
739		return (-1);
740	}
741	slotstart = pos + i;
742
743	/* Find the range of entries needed to get enough space */
744	freebytes = 0;
745	while (i < DIRBLKSIZ && freebytes < slotneeded) {
746		freebytes += dp->d_reclen;
747		if (dp->d_ino != 0)
748			freebytes -= DIRSIZ(0, dp);
749		if (dp->d_reclen == 0) {
750			brelse(bp);
751			return (-1);
752		}
753		i += dp->d_reclen;
754		dp = (struct direct *)((char *)dp + dp->d_reclen);
755	}
756	if (i > DIRBLKSIZ) {
757		brelse(bp);
758		return (-1);
759	}
760	if (freebytes < slotneeded)
761		panic("ufsdirhash_findfree: free mismatch");
762	brelse(bp);
763	*slotsize = pos + i - slotstart;
764	return (slotstart);
765}
766
767/*
768 * Return the start of the unused space at the end of a directory, or
769 * -1 if there are no trailing unused blocks.
770 */
771doff_t
772ufsdirhash_enduseful(struct inode *ip)
773{
774
775	struct dirhash *dh;
776	int i;
777
778	dh = ip->i_dirhash;
779	DIRHASH_ASSERT_LOCKED(dh);
780	KASSERT(dh != NULL && dh->dh_hash != NULL,
781	    ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh));
782
783	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN)
784		return (-1);
785
786	for (i = dh->dh_dirblks - 1; i >= 0; i--)
787		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
788			break;
789
790	return ((doff_t)(i + 1) * DIRBLKSIZ);
791}
792
793/*
794 * Insert information into the hash about a new directory entry. dirp
795 * points to a struct direct containing the entry, and offset specifies
796 * the offset of this entry.
797 */
798void
799ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
800{
801	struct dirhash *dh;
802	int slot;
803
804	if ((dh = ufsdirhash_acquire(ip)) == NULL)
805		return;
806
807	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
808	    ("ufsdirhash_add: bad offset"));
809	/*
810	 * Normal hash usage is < 66%. If the usage gets too high then
811	 * remove the hash entirely and let it be rebuilt later.
812	 */
813	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
814		ufsdirhash_free_locked(ip);
815		return;
816	}
817
818	/* Find a free hash slot (empty or deleted), and add the entry. */
819	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
820	while (DH_ENTRY(dh, slot) >= 0)
821		slot = WRAPINCR(slot, dh->dh_hlen);
822	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
823		dh->dh_hused++;
824	DH_ENTRY(dh, slot) = offset;
825
826	/* Update last used time. */
827	dh->dh_lastused = time_second;
828
829	/* Update the per-block summary info. */
830	ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
831	ufsdirhash_release(dh);
832}
833
834/*
835 * Remove the specified directory entry from the hash. The entry to remove
836 * is defined by the name in `dirp', which must exist at the specified
837 * `offset' within the directory.
838 */
839void
840ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
841{
842	struct dirhash *dh;
843	int slot;
844
845	if ((dh = ufsdirhash_acquire(ip)) == NULL)
846		return;
847
848	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
849	    ("ufsdirhash_remove: bad offset"));
850	/* Find the entry */
851	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
852
853	/* Remove the hash entry. */
854	ufsdirhash_delslot(dh, slot);
855
856	/* Update the per-block summary info. */
857	ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
858	ufsdirhash_release(dh);
859}
860
861/*
862 * Change the offset associated with a directory entry in the hash. Used
863 * when compacting directory blocks.
864 */
865void
866ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
867    doff_t newoff)
868{
869	struct dirhash *dh;
870	int slot;
871
872	if ((dh = ufsdirhash_acquire(ip)) == NULL)
873		return;
874
875	KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
876	    newoff < dh->dh_dirblks * DIRBLKSIZ,
877	    ("ufsdirhash_move: bad offset"));
878	/* Find the entry, and update the offset. */
879	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
880	DH_ENTRY(dh, slot) = newoff;
881	ufsdirhash_release(dh);
882}
883
884/*
885 * Inform dirhash that the directory has grown by one block that
886 * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
887 */
888void
889ufsdirhash_newblk(struct inode *ip, doff_t offset)
890{
891	struct dirhash *dh;
892	int block;
893
894	if ((dh = ufsdirhash_acquire(ip)) == NULL)
895		return;
896
897	KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
898	    ("ufsdirhash_newblk: bad offset"));
899	block = offset / DIRBLKSIZ;
900	if (block >= dh->dh_nblk) {
901		/* Out of space; must rebuild. */
902		ufsdirhash_free_locked(ip);
903		return;
904	}
905	dh->dh_dirblks = block + 1;
906
907	/* Account for the new free block. */
908	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
909	if (dh->dh_firstfree[DH_NFSTATS] == -1)
910		dh->dh_firstfree[DH_NFSTATS] = block;
911	ufsdirhash_release(dh);
912}
913
914/*
915 * Inform dirhash that the directory is being truncated.
916 */
917void
918ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
919{
920	struct dirhash *dh;
921	int block, i;
922
923	if ((dh = ufsdirhash_acquire(ip)) == NULL)
924		return;
925
926	KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
927	    ("ufsdirhash_dirtrunc: bad offset"));
928	block = howmany(offset, DIRBLKSIZ);
929	/*
930	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
931	 * (about 20% of its original size due to the 50% extra added in
932	 * ufsdirhash_build) then free it, and let the caller rebuild
933	 * if necessary.
934	 */
935	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
936		ufsdirhash_free_locked(ip);
937		return;
938	}
939
940	/*
941	 * Remove any `first free' information pertaining to the
942	 * truncated blocks. All blocks we're removing should be
943	 * completely unused.
944	 */
945	if (dh->dh_firstfree[DH_NFSTATS] >= block)
946		dh->dh_firstfree[DH_NFSTATS] = -1;
947	for (i = block; i < dh->dh_dirblks; i++)
948		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
949			panic("ufsdirhash_dirtrunc: blocks in use");
950	for (i = 0; i < DH_NFSTATS; i++)
951		if (dh->dh_firstfree[i] >= block)
952			panic("ufsdirhash_dirtrunc: first free corrupt");
953	dh->dh_dirblks = block;
954	ufsdirhash_release(dh);
955}
956
957/*
958 * Debugging function to check that the dirhash information about
959 * a directory block matches its actual contents. Panics if a mismatch
960 * is detected.
961 *
962 * On entry, `buf' should point to the start of an in-core
963 * DIRBLKSIZ-sized directory block, and `offset' should contain the
964 * offset from the start of the directory of that block.
965 */
966void
967ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
968{
969	struct dirhash *dh;
970	struct direct *dp;
971	int block, ffslot, i, nfree;
972
973	if (!ufs_dirhashcheck)
974		return;
975	if ((dh = ufsdirhash_acquire(ip)) == NULL)
976		return;
977
978	block = offset / DIRBLKSIZ;
979	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
980		panic("ufsdirhash_checkblock: bad offset");
981
982	nfree = 0;
983	for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
984		dp = (struct direct *)(buf + i);
985		if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
986			panic("ufsdirhash_checkblock: bad dir");
987
988		if (dp->d_ino == 0) {
989#if 0
990			/*
991			 * XXX entries with d_ino == 0 should only occur
992			 * at the start of a DIRBLKSIZ block. However the
993			 * ufs code is tolerant of such entries at other
994			 * offsets, and fsck does not fix them.
995			 */
996			if (i != 0)
997				panic("ufsdirhash_checkblock: bad dir inode");
998#endif
999			nfree += dp->d_reclen;
1000			continue;
1001		}
1002
1003		/* Check that the entry	exists (will panic if it doesn't). */
1004		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
1005
1006		nfree += dp->d_reclen - DIRSIZ(0, dp);
1007	}
1008	if (i != DIRBLKSIZ)
1009		panic("ufsdirhash_checkblock: bad dir end");
1010
1011	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
1012		panic("ufsdirhash_checkblock: bad free count");
1013
1014	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
1015	for (i = 0; i <= DH_NFSTATS; i++)
1016		if (dh->dh_firstfree[i] == block && i != ffslot)
1017			panic("ufsdirhash_checkblock: bad first-free");
1018	if (dh->dh_firstfree[ffslot] == -1)
1019		panic("ufsdirhash_checkblock: missing first-free entry");
1020	ufsdirhash_release(dh);
1021}
1022
1023/*
1024 * Hash the specified filename into a dirhash slot.
1025 */
1026static int
1027ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
1028{
1029	u_int32_t hash;
1030
1031	/*
1032	 * We hash the name and then some other bit of data that is
1033	 * invariant over the dirhash's lifetime. Otherwise names
1034	 * differing only in the last byte are placed close to one
1035	 * another in the table, which is bad for linear probing.
1036	 */
1037	hash = fnv_32_buf(name, namelen, FNV1_32_INIT);
1038	hash = fnv_32_buf(&dh, sizeof(dh), hash);
1039	return (hash % dh->dh_hlen);
1040}
1041
1042/*
1043 * Adjust the number of free bytes in the block containing `offset'
1044 * by the value specified by `diff'.
1045 *
1046 * The caller must ensure we have exclusive access to `dh'; normally
1047 * that means that dh_lock should be held, but this is also called
1048 * from ufsdirhash_build() where exclusive access can be assumed.
1049 */
1050static void
1051ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
1052{
1053	int block, i, nfidx, ofidx;
1054
1055	/* Update the per-block summary info. */
1056	block = offset / DIRBLKSIZ;
1057	KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
1058	     ("dirhash bad offset"));
1059	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
1060	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
1061	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
1062
1063	/* Update the `first free' list if necessary. */
1064	if (ofidx != nfidx) {
1065		/* If removing, scan forward for the next block. */
1066		if (dh->dh_firstfree[ofidx] == block) {
1067			for (i = block + 1; i < dh->dh_dirblks; i++)
1068				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
1069					break;
1070			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
1071		}
1072
1073		/* Make this the new `first free' if necessary */
1074		if (dh->dh_firstfree[nfidx] > block ||
1075		    dh->dh_firstfree[nfidx] == -1)
1076			dh->dh_firstfree[nfidx] = block;
1077	}
1078}
1079
1080/*
1081 * Find the specified name which should have the specified offset.
1082 * Returns a slot number, and panics on failure.
1083 *
1084 * `dh' must be locked on entry and remains so on return.
1085 */
1086static int
1087ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
1088{
1089	int slot;
1090
1091	DIRHASH_ASSERT_LOCKED(dh);
1092
1093	/* Find the entry. */
1094	KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
1095	slot = ufsdirhash_hash(dh, name, namelen);
1096	while (DH_ENTRY(dh, slot) != offset &&
1097	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
1098		slot = WRAPINCR(slot, dh->dh_hlen);
1099	if (DH_ENTRY(dh, slot) != offset)
1100		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
1101
1102	return (slot);
1103}
1104
1105/*
1106 * Remove the entry corresponding to the specified slot from the hash array.
1107 *
1108 * `dh' must be locked on entry and remains so on return.
1109 */
1110static void
1111ufsdirhash_delslot(struct dirhash *dh, int slot)
1112{
1113	int i;
1114
1115	DIRHASH_ASSERT_LOCKED(dh);
1116
1117	/* Mark the entry as deleted. */
1118	DH_ENTRY(dh, slot) = DIRHASH_DEL;
1119
1120	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
1121	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
1122		i = WRAPINCR(i, dh->dh_hlen);
1123	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
1124		i = WRAPDECR(i, dh->dh_hlen);
1125		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
1126			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
1127			dh->dh_hused--;
1128			i = WRAPDECR(i, dh->dh_hlen);
1129		}
1130		KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
1131	}
1132}
1133
1134/*
1135 * Given a directory entry and its offset, find the offset of the
1136 * previous entry in the same DIRBLKSIZ-sized block. Returns an
1137 * offset, or -1 if there is no previous entry in the block or some
1138 * other problem occurred.
1139 */
1140static doff_t
1141ufsdirhash_getprev(struct direct *dirp, doff_t offset)
1142{
1143	struct direct *dp;
1144	char *blkbuf;
1145	doff_t blkoff, prevoff;
1146	int entrypos, i;
1147
1148	blkoff = rounddown2(offset, DIRBLKSIZ);	/* offset of start of block */
1149	entrypos = offset & (DIRBLKSIZ - 1);	/* entry relative to block */
1150	blkbuf = (char *)dirp - entrypos;
1151	prevoff = blkoff;
1152
1153	/* If `offset' is the start of a block, there is no previous entry. */
1154	if (entrypos == 0)
1155		return (-1);
1156
1157	/* Scan from the start of the block until we get to the entry. */
1158	for (i = 0; i < entrypos; i += dp->d_reclen) {
1159		dp = (struct direct *)(blkbuf + i);
1160		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
1161			return (-1);	/* Corrupted directory. */
1162		prevoff = blkoff + i;
1163	}
1164	return (prevoff);
1165}
1166
1167/*
1168 * Delete the given dirhash and reclaim its memory. Assumes that
1169 * ufsdirhash_list is locked, and leaves it locked. Also assumes
1170 * that dh is locked. Returns the amount of memory freed.
1171 */
1172static int
1173ufsdirhash_destroy(struct dirhash *dh)
1174{
1175	doff_t **hash;
1176	u_int8_t *blkfree;
1177	int i, mem, narrays;
1178
1179	KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
1180
1181	/* Remove it from the list and detach its memory. */
1182	TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1183	dh->dh_onlist = 0;
1184	hash = dh->dh_hash;
1185	dh->dh_hash = NULL;
1186	blkfree = dh->dh_blkfree;
1187	dh->dh_blkfree = NULL;
1188	narrays = dh->dh_narrays;
1189	mem = dh->dh_memreq;
1190	dh->dh_memreq = 0;
1191
1192	/* Unlock dirhash and free the detached memory. */
1193	ufsdirhash_release(dh);
1194	for (i = 0; i < narrays; i++)
1195		DIRHASH_BLKFREE(hash[i]);
1196	free(hash, M_DIRHASH);
1197	free(blkfree, M_DIRHASH);
1198
1199	/* Account for the returned memory. */
1200	ufs_dirhashmem -= mem;
1201
1202	return (mem);
1203}
1204
1205/*
1206 * Try to free up `wanted' bytes by stealing memory from existing
1207 * dirhashes. Returns zero with list locked if successful.
1208 */
1209static int
1210ufsdirhash_recycle(int wanted)
1211{
1212	struct dirhash *dh;
1213
1214	DIRHASHLIST_LOCK();
1215	dh = TAILQ_FIRST(&ufsdirhash_list);
1216	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1217		/* Decrement the score; only recycle if it becomes zero. */
1218		if (dh == NULL || --dh->dh_score > 0) {
1219			DIRHASHLIST_UNLOCK();
1220			return (-1);
1221		}
1222		/*
1223		 * If we can't lock it it's in use and we don't want to
1224		 * recycle it anyway.
1225		 */
1226		if (!sx_try_xlock(&dh->dh_lock)) {
1227			dh = TAILQ_NEXT(dh, dh_list);
1228			continue;
1229		}
1230
1231		ufsdirhash_destroy(dh);
1232
1233		/* Repeat if necessary. */
1234		dh = TAILQ_FIRST(&ufsdirhash_list);
1235	}
1236	/* Success; return with list locked. */
1237	return (0);
1238}
1239
1240/*
1241 * Callback that frees some dirhashes when the system is low on virtual memory.
1242 */
1243static void
1244ufsdirhash_lowmem()
1245{
1246	struct dirhash *dh, *dh_temp;
1247	int memfreed, memwanted;
1248
1249	ufs_dirhashlowmemcount++;
1250	memfreed = 0;
1251	memwanted = ufs_dirhashmem * ufs_dirhashreclaimpercent / 100;
1252
1253	DIRHASHLIST_LOCK();
1254
1255	/*
1256	 * Reclaim up to memwanted from the oldest dirhashes. This will allow
1257	 * us to make some progress when the system is running out of memory
1258	 * without compromising the dinamicity of maximum age. If the situation
1259	 * does not improve lowmem will be eventually retriggered and free some
1260	 * other entry in the cache. The entries on the head of the list should
1261	 * be the oldest. If during list traversal we can't get a lock on the
1262	 * dirhash, it will be skipped.
1263	 */
1264	TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) {
1265		if (sx_try_xlock(&dh->dh_lock))
1266			memfreed += ufsdirhash_destroy(dh);
1267		if (memfreed >= memwanted)
1268			break;
1269	}
1270	DIRHASHLIST_UNLOCK();
1271}
1272
1273static int
1274ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS)
1275{
1276	int error, v;
1277
1278	v = ufs_dirhashreclaimpercent;
1279	error = sysctl_handle_int(oidp, &v, v, req);
1280	if (error)
1281		return (error);
1282	if (req->newptr == NULL)
1283		return (error);
1284	if (v == ufs_dirhashreclaimpercent)
1285		return (0);
1286
1287	/* Refuse invalid percentages */
1288	if (v < 0 || v > 100)
1289		return (EINVAL);
1290	ufs_dirhashreclaimpercent = v;
1291	return (0);
1292}
1293
1294void
1295ufsdirhash_init()
1296{
1297	ufs_dirhashmaxmem = lmax(roundup(hibufspace / 64, PAGE_SIZE),
1298	    2 * 1024 * 1024);
1299
1300	ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t),
1301	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1302	mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF);
1303	TAILQ_INIT(&ufsdirhash_list);
1304
1305	/* Register a callback function to handle low memory signals */
1306	EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL,
1307	    EVENTHANDLER_PRI_FIRST);
1308}
1309
1310void
1311ufsdirhash_uninit()
1312{
1313	KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
1314	uma_zdestroy(ufsdirhash_zone);
1315	mtx_destroy(&ufsdirhash_mtx);
1316}
1317
1318#endif /* UFS_DIRHASH */
1319