1/* $OpenBSD: ufs_dirhash.c,v 1.43 2024/01/09 03:15:59 guenther Exp $	*/
2/*
3 * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * This implements a hash-based lookup scheme for UFS directories.
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/malloc.h>
35#include <sys/pool.h>
36#include <sys/buf.h>
37#include <sys/vnode.h>
38#include <sys/mount.h>
39#include <sys/sysctl.h>
40#include <sys/mutex.h>
41
42#include <crypto/siphash.h>
43
44#include <ufs/ufs/quota.h>
45#include <ufs/ufs/inode.h>
46#include <ufs/ufs/dir.h>
47#include <ufs/ufs/dirhash.h>
48#include <ufs/ufs/ufsmount.h>
49#include <ufs/ufs/ufs_extern.h>
50
51#define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
52#define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
53#define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
54
55int ufs_mindirhashsize;
56int ufs_dirhashmaxmem;
57int ufs_dirhashmem;
58int ufs_dirhashcheck;
59
60SIPHASH_KEY ufsdirhash_key;
61
62int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
63void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
64void ufsdirhash_delslot(struct dirhash *dh, int slot);
65int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
66   doff_t offset);
67doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
68int ufsdirhash_recycle(int wanted);
69
70struct pool		ufsdirhash_pool;
71
72#define	DIRHASHLIST_LOCK()	rw_enter_write(&ufsdirhash_mtx)
73#define	DIRHASHLIST_UNLOCK()	rw_exit_write(&ufsdirhash_mtx)
74#define	DIRHASH_LOCK(dh)	rw_enter_write(&(dh)->dh_mtx)
75#define	DIRHASH_UNLOCK(dh)	rw_exit_write(&(dh)->dh_mtx)
76#define	DIRHASH_BLKALLOC_WAITOK()	pool_get(&ufsdirhash_pool, PR_WAITOK)
77#define	DIRHASH_BLKFREE(v)		pool_put(&ufsdirhash_pool, v)
78
79#define	mtx_assert(l, f)	/* nothing */
80#define DIRHASH_ASSERT(e, m)	KASSERT((e))
81
82/* Dirhash list; recently-used entries are near the tail. */
83TAILQ_HEAD(, dirhash) ufsdirhash_list;
84
85/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
86struct rwlock		ufsdirhash_mtx;
87
88/*
89 * Locking order:
90 *	ufsdirhash_mtx
91 *	dh_mtx
92 *
93 * The dh_mtx mutex should be acquired either via the inode lock, or via
94 * ufsdirhash_mtx. Only the owner of the inode may free the associated
95 * dirhash, but anything can steal its memory and set dh_hash to NULL.
96 */
97
98/*
99 * Attempt to build up a hash table for the directory contents in
100 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
101 */
102int
103ufsdirhash_build(struct inode *ip)
104{
105	struct dirhash *dh;
106	struct buf *bp = NULL;
107	struct direct *ep;
108	struct vnode *vp;
109	doff_t bmask, pos;
110	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
111
112	/* Check if we can/should use dirhash. */
113	if (ip->i_dirhash == NULL) {
114		if (DIP(ip, size) < ufs_mindirhashsize)
115			return (-1);
116	} else {
117		/* Hash exists, but sysctls could have changed. */
118		if (DIP(ip, size) < ufs_mindirhashsize ||
119		    ufs_dirhashmem > ufs_dirhashmaxmem) {
120			ufsdirhash_free(ip);
121			return (-1);
122		}
123		/* Check if hash exists and is intact (note: unlocked read). */
124		if (ip->i_dirhash->dh_hash != NULL)
125			return (0);
126		/* Free the old, recycled hash and build a new one. */
127		ufsdirhash_free(ip);
128	}
129
130	/* Don't hash removed directories. */
131	if (ip->i_effnlink == 0)
132		return (-1);
133
134	vp = ip->i_vnode;
135	/* Allocate 50% more entries than this dir size could ever need. */
136	DIRHASH_ASSERT(DIP(ip, size) >= DIRBLKSIZ, ("ufsdirhash_build size"));
137	nslots = DIP(ip, size) / DIRECTSIZ(1);
138	nslots = (nslots * 3 + 1) / 2;
139	narrays = howmany(nslots, DH_NBLKOFF);
140	nslots = narrays * DH_NBLKOFF;
141	dirblocks = howmany(DIP(ip, size), DIRBLKSIZ);
142	nblocks = (dirblocks * 3 + 1) / 2;
143
144	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
145	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
146	    nblocks * sizeof(*dh->dh_blkfree);
147	DIRHASHLIST_LOCK();
148	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
149		DIRHASHLIST_UNLOCK();
150		if (memreqd > ufs_dirhashmaxmem / 2)
151			return (-1);
152
153		/* Try to free some space. */
154		if (ufsdirhash_recycle(memreqd) != 0)
155			return (-1);
156		/* Enough was freed, and list has been locked. */
157	}
158	ufs_dirhashmem += memreqd;
159	DIRHASHLIST_UNLOCK();
160
161	/*
162	 * Use non-blocking mallocs so that we will revert to a linear
163	 * lookup on failure rather than potentially blocking forever.
164	 */
165	dh = malloc(sizeof(*dh), M_DIRHASH, M_NOWAIT|M_ZERO);
166	if (dh == NULL) {
167		DIRHASHLIST_LOCK();
168		ufs_dirhashmem -= memreqd;
169		DIRHASHLIST_UNLOCK();
170		return (-1);
171	}
172	dh->dh_hash = mallocarray(narrays, sizeof(dh->dh_hash[0]),
173	    M_DIRHASH, M_NOWAIT|M_ZERO);
174	dh->dh_blkfree = mallocarray(nblocks, sizeof(dh->dh_blkfree[0]),
175	    M_DIRHASH, M_NOWAIT | M_ZERO);
176	if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
177		goto fail;
178	for (i = 0; i < narrays; i++) {
179		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
180			goto fail;
181		for (j = 0; j < DH_NBLKOFF; j++)
182			dh->dh_hash[i][j] = DIRHASH_EMPTY;
183	}
184
185	/* Initialise the hash table and block statistics. */
186	rw_init(&dh->dh_mtx, "dirhash");
187	dh->dh_narrays = narrays;
188	dh->dh_hlen = nslots;
189	dh->dh_nblk = nblocks;
190	dh->dh_dirblks = dirblocks;
191	for (i = 0; i < dirblocks; i++)
192		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
193	for (i = 0; i < DH_NFSTATS; i++)
194		dh->dh_firstfree[i] = -1;
195	dh->dh_firstfree[DH_NFSTATS] = 0;
196	dh->dh_seqopt = 0;
197	dh->dh_seqoff = 0;
198	dh->dh_score = DH_SCOREINIT;
199	ip->i_dirhash = dh;
200
201	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
202	pos = 0;
203	while (pos < DIP(ip, size)) {
204		/* If necessary, get the next directory block. */
205		if ((pos & bmask) == 0) {
206			if (bp != NULL)
207				brelse(bp);
208			if (UFS_BUFATOFF(ip, (off_t)pos, NULL, &bp) != 0)
209				goto fail;
210		}
211		/* Add this entry to the hash. */
212		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
213		if (ep->d_reclen == 0 || ep->d_reclen >
214		    DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
215			/* Corrupted directory. */
216			brelse(bp);
217			goto fail;
218		}
219		if (ep->d_ino != 0) {
220			/* Add the entry (simplified ufsdirhash_add). */
221			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
222			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
223				slot = WRAPINCR(slot, dh->dh_hlen);
224			dh->dh_hused++;
225			DH_ENTRY(dh, slot) = pos;
226			ufsdirhash_adjfree(dh, pos, -DIRSIZ(ep));
227		}
228		pos += ep->d_reclen;
229	}
230
231	if (bp != NULL)
232		brelse(bp);
233	DIRHASHLIST_LOCK();
234	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
235	dh->dh_onlist = 1;
236	DIRHASHLIST_UNLOCK();
237	return (0);
238
239fail:
240	if (dh->dh_hash != NULL) {
241		for (i = 0; i < narrays; i++)
242			if (dh->dh_hash[i] != NULL)
243				DIRHASH_BLKFREE(dh->dh_hash[i]);
244		free(dh->dh_hash, M_DIRHASH,
245		    narrays * sizeof(dh->dh_hash[0]));
246	}
247	if (dh->dh_blkfree != NULL)
248		free(dh->dh_blkfree, M_DIRHASH,
249		    nblocks * sizeof(dh->dh_blkfree[0]));
250	free(dh, M_DIRHASH, sizeof(*dh));
251	ip->i_dirhash = NULL;
252	DIRHASHLIST_LOCK();
253	ufs_dirhashmem -= memreqd;
254	DIRHASHLIST_UNLOCK();
255	return (-1);
256}
257
258/*
259 * Free any hash table associated with inode 'ip'.
260 */
261void
262ufsdirhash_free(struct inode *ip)
263{
264	struct dirhash *dh;
265	int i, mem;
266
267	if ((dh = ip->i_dirhash) == NULL)
268		return;
269	DIRHASHLIST_LOCK();
270	DIRHASH_LOCK(dh);
271	if (dh->dh_onlist)
272		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
273	DIRHASH_UNLOCK(dh);
274	DIRHASHLIST_UNLOCK();
275
276	/* The dirhash pointed to by 'dh' is exclusively ours now. */
277
278	mem = sizeof(*dh);
279	if (dh->dh_hash != NULL) {
280		for (i = 0; i < dh->dh_narrays; i++)
281			DIRHASH_BLKFREE(dh->dh_hash[i]);
282		free(dh->dh_hash, M_DIRHASH,
283		    dh->dh_narrays * sizeof(dh->dh_hash[0]));
284		free(dh->dh_blkfree, M_DIRHASH,
285		    dh->dh_nblk * sizeof(dh->dh_blkfree[0]));
286		mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
287		    dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
288		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
289	}
290	free(dh, M_DIRHASH, sizeof(*dh));
291	ip->i_dirhash = NULL;
292
293	DIRHASHLIST_LOCK();
294	ufs_dirhashmem -= mem;
295	DIRHASHLIST_UNLOCK();
296}
297
298/*
299 * Find the offset of the specified name within the given inode.
300 * Returns 0 on success, ENOENT if the entry does not exist, or
301 * EJUSTRETURN if the caller should revert to a linear search.
302 *
303 * If successful, the directory offset is stored in *offp, and a
304 * pointer to a struct buf containing the entry is stored in *bpp. If
305 * prevoffp is non-NULL, the offset of the previous entry within
306 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
307 * is the first in a block, the start of the block is used).
308 */
309int
310ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
311    struct buf **bpp, doff_t *prevoffp)
312{
313	struct dirhash *dh, *dh_next;
314	struct direct *dp;
315	struct vnode *vp;
316	struct buf *bp;
317	doff_t blkoff, bmask, offset, prevoff;
318	int i, slot;
319
320	if ((dh = ip->i_dirhash) == NULL)
321		return (EJUSTRETURN);
322	/*
323	 * Move this dirhash towards the end of the list if it has a
324	 * score higher than the next entry, and acquire the dh_mtx.
325	 * Optimise the case where it's already the last by performing
326	 * an unlocked read of the TAILQ_NEXT pointer.
327	 *
328	 * In both cases, end up holding just dh_mtx.
329	 */
330	if (TAILQ_NEXT(dh, dh_list) != NULL) {
331		DIRHASHLIST_LOCK();
332		DIRHASH_LOCK(dh);
333		/*
334		 * If the new score will be greater than that of the next
335		 * entry, then move this entry past it. With both mutexes
336		 * held, dh_next won't go away, but its dh_score could
337		 * change; that's not important since it is just a hint.
338		 */
339		if (dh->dh_hash != NULL &&
340		    (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
341		    dh->dh_score >= dh_next->dh_score) {
342			DIRHASH_ASSERT(dh->dh_onlist, ("dirhash: not on list"));
343			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
344			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
345			    dh_list);
346		}
347		DIRHASHLIST_UNLOCK();
348	} else {
349		/* Already the last, though that could change as we wait. */
350		DIRHASH_LOCK(dh);
351	}
352	if (dh->dh_hash == NULL) {
353		DIRHASH_UNLOCK(dh);
354		ufsdirhash_free(ip);
355		return (EJUSTRETURN);
356	}
357
358	/* Update the score. */
359	if (dh->dh_score < DH_SCOREMAX)
360		dh->dh_score++;
361
362	vp = ip->i_vnode;
363	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
364	blkoff = -1;
365	bp = NULL;
366restart:
367	slot = ufsdirhash_hash(dh, name, namelen);
368
369	if (dh->dh_seqopt) {
370		/*
371		 * Sequential access optimisation. dh_seqoff contains the
372		 * offset of the directory entry immediately following
373		 * the last entry that was looked up. Check if this offset
374		 * appears in the hash chain for the name we are looking for.
375		 */
376		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
377		    i = WRAPINCR(i, dh->dh_hlen))
378			if (offset == dh->dh_seqoff)
379				break;
380		if (offset == dh->dh_seqoff) {
381			/*
382			 * We found an entry with the expected offset. This
383			 * is probably the entry we want, but if not, the
384			 * code below will turn off seqopt and retry.
385			 */
386			slot = i;
387		} else
388			dh->dh_seqopt = 0;
389	}
390
391	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
392	    slot = WRAPINCR(slot, dh->dh_hlen)) {
393		if (offset == DIRHASH_DEL)
394			continue;
395		DIRHASH_UNLOCK(dh);
396
397		if (offset < 0 || offset >= DIP(ip, size))
398			panic("ufsdirhash_lookup: bad offset in hash array");
399		if ((offset & ~bmask) != blkoff) {
400			if (bp != NULL)
401				brelse(bp);
402			blkoff = offset & ~bmask;
403			if (UFS_BUFATOFF(ip, (off_t)blkoff, NULL, &bp) != 0)
404				return (EJUSTRETURN);
405		}
406		dp = (struct direct *)(bp->b_data + (offset & bmask));
407		if (dp->d_reclen == 0 || dp->d_reclen >
408		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
409			/* Corrupted directory. */
410			brelse(bp);
411			return (EJUSTRETURN);
412		}
413		if (dp->d_namlen == namelen &&
414		    memcmp(dp->d_name, name, namelen) == 0) {
415			/* Found. Get the prev offset if needed. */
416			if (prevoffp != NULL) {
417				if (offset & (DIRBLKSIZ - 1)) {
418					prevoff = ufsdirhash_getprev(dp,
419					    offset);
420					if (prevoff == -1) {
421						brelse(bp);
422						return (EJUSTRETURN);
423					}
424				} else
425					prevoff = offset;
426				*prevoffp = prevoff;
427			}
428
429			/* Check for sequential access, and update offset. */
430			if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
431				dh->dh_seqopt = 1;
432			dh->dh_seqoff = offset + DIRSIZ(dp);
433
434			*bpp = bp;
435			*offp = offset;
436			return (0);
437		}
438
439		DIRHASH_LOCK(dh);
440		if (dh->dh_hash == NULL) {
441			DIRHASH_UNLOCK(dh);
442			if (bp != NULL)
443				brelse(bp);
444			ufsdirhash_free(ip);
445			return (EJUSTRETURN);
446		}
447		/*
448		 * When the name doesn't match in the seqopt case, go back
449		 * and search normally.
450		 */
451		if (dh->dh_seqopt) {
452			dh->dh_seqopt = 0;
453			goto restart;
454		}
455	}
456	DIRHASH_UNLOCK(dh);
457	if (bp != NULL)
458		brelse(bp);
459	return (ENOENT);
460}
461
462/*
463 * Find a directory block with room for 'slotneeded' bytes. Returns
464 * the offset of the directory entry that begins the free space.
465 * This will either be the offset of an existing entry that has free
466 * space at the end, or the offset of an entry with d_ino == 0 at
467 * the start of a DIRBLKSIZ block.
468 *
469 * To use the space, the caller may need to compact existing entries in
470 * the directory. The total number of bytes in all of the entries involved
471 * in the compaction is stored in *slotsize. In other words, all of
472 * the entries that must be compacted are exactly contained in the
473 * region beginning at the returned offset and spanning *slotsize bytes.
474 *
475 * Returns -1 if no space was found, indicating that the directory
476 * must be extended.
477 */
478doff_t
479ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
480{
481	struct direct *dp;
482	struct dirhash *dh;
483	struct buf *bp;
484	doff_t pos, slotstart;
485	int dirblock, error, freebytes, i;
486
487	if ((dh = ip->i_dirhash) == NULL)
488		return (-1);
489	DIRHASH_LOCK(dh);
490	if (dh->dh_hash == NULL) {
491		DIRHASH_UNLOCK(dh);
492		ufsdirhash_free(ip);
493		return (-1);
494	}
495
496	/* Find a directory block with the desired free space. */
497	dirblock = -1;
498	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
499		if ((dirblock = dh->dh_firstfree[i]) != -1)
500			break;
501	if (dirblock == -1) {
502		DIRHASH_UNLOCK(dh);
503		return (-1);
504	}
505
506	DIRHASH_ASSERT(dirblock < dh->dh_nblk &&
507	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
508	    ("ufsdirhash_findfree: bad stats"));
509	DIRHASH_UNLOCK(dh);
510	pos = dirblock * DIRBLKSIZ;
511	error = UFS_BUFATOFF(ip, (off_t)pos, (char **)&dp, &bp);
512	if (error)
513		return (-1);
514
515	/* Find the first entry with free space. */
516	for (i = 0; i < DIRBLKSIZ; ) {
517		if (dp->d_reclen == 0) {
518			brelse(bp);
519			return (-1);
520		}
521		if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(dp))
522			break;
523		i += dp->d_reclen;
524		dp = (struct direct *)((char *)dp + dp->d_reclen);
525	}
526	if (i > DIRBLKSIZ) {
527		brelse(bp);
528		return (-1);
529	}
530	slotstart = pos + i;
531
532	/* Find the range of entries needed to get enough space */
533	freebytes = 0;
534	while (i < DIRBLKSIZ && freebytes < slotneeded) {
535		freebytes += dp->d_reclen;
536		if (dp->d_ino != 0)
537			freebytes -= DIRSIZ(dp);
538		if (dp->d_reclen == 0) {
539			brelse(bp);
540			return (-1);
541		}
542		i += dp->d_reclen;
543		dp = (struct direct *)((char *)dp + dp->d_reclen);
544	}
545	if (i > DIRBLKSIZ) {
546		brelse(bp);
547		return (-1);
548	}
549	if (freebytes < slotneeded)
550		panic("ufsdirhash_findfree: free mismatch");
551	brelse(bp);
552	*slotsize = pos + i - slotstart;
553	return (slotstart);
554}
555
556/*
557 * Return the start of the unused space at the end of a directory, or
558 * -1 if there are no trailing unused blocks.
559 */
560doff_t
561ufsdirhash_enduseful(struct inode *ip)
562{
563
564	struct dirhash *dh;
565	int i;
566
567	if ((dh = ip->i_dirhash) == NULL)
568		return (-1);
569	DIRHASH_LOCK(dh);
570	if (dh->dh_hash == NULL) {
571		DIRHASH_UNLOCK(dh);
572		ufsdirhash_free(ip);
573		return (-1);
574	}
575
576	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) {
577		DIRHASH_UNLOCK(dh);
578		return (-1);
579	}
580
581	for (i = dh->dh_dirblks - 1; i >= 0; i--)
582		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
583			break;
584	DIRHASH_UNLOCK(dh);
585	return ((doff_t)(i + 1) * DIRBLKSIZ);
586}
587
588/*
589 * Insert information into the hash about a new directory entry. dirp
590 * points to a struct direct containing the entry, and offset specifies
591 * the offset of this entry.
592 */
593void
594ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
595{
596	struct dirhash *dh;
597	int slot;
598
599	if ((dh = ip->i_dirhash) == NULL)
600		return;
601	DIRHASH_LOCK(dh);
602	if (dh->dh_hash == NULL) {
603		DIRHASH_UNLOCK(dh);
604		ufsdirhash_free(ip);
605		return;
606	}
607
608	DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
609	    ("ufsdirhash_add: bad offset"));
610	/*
611	 * Normal hash usage is < 66%. If the usage gets too high then
612	 * remove the hash entirely and let it be rebuilt later.
613	 */
614	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
615		DIRHASH_UNLOCK(dh);
616		ufsdirhash_free(ip);
617		return;
618	}
619
620	/* Find a free hash slot (empty or deleted), and add the entry. */
621	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
622	while (DH_ENTRY(dh, slot) >= 0)
623		slot = WRAPINCR(slot, dh->dh_hlen);
624	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
625		dh->dh_hused++;
626	DH_ENTRY(dh, slot) = offset;
627
628	/* Update the per-block summary info. */
629	ufsdirhash_adjfree(dh, offset, -DIRSIZ(dirp));
630	DIRHASH_UNLOCK(dh);
631}
632
633/*
634 * Remove the specified directory entry from the hash. The entry to remove
635 * is defined by the name in `dirp', which must exist at the specified
636 * `offset' within the directory.
637 */
638void
639ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
640{
641	struct dirhash *dh;
642	int slot;
643
644	if ((dh = ip->i_dirhash) == NULL)
645		return;
646	DIRHASH_LOCK(dh);
647	if (dh->dh_hash == NULL) {
648		DIRHASH_UNLOCK(dh);
649		ufsdirhash_free(ip);
650		return;
651	}
652
653	DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
654	    ("ufsdirhash_remove: bad offset"));
655	/* Find the entry */
656	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
657
658	/* Remove the hash entry. */
659	ufsdirhash_delslot(dh, slot);
660
661	/* Update the per-block summary info. */
662	ufsdirhash_adjfree(dh, offset, DIRSIZ(dirp));
663	DIRHASH_UNLOCK(dh);
664}
665
666/*
667 * Change the offset associated with a directory entry in the hash. Used
668 * when compacting directory blocks.
669 */
670void
671ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
672    doff_t newoff)
673{
674	struct dirhash *dh;
675	int slot;
676
677	if ((dh = ip->i_dirhash) == NULL)
678		return;
679	DIRHASH_LOCK(dh);
680	if (dh->dh_hash == NULL) {
681		DIRHASH_UNLOCK(dh);
682		ufsdirhash_free(ip);
683		return;
684	}
685
686	DIRHASH_ASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
687	    newoff < dh->dh_dirblks * DIRBLKSIZ,
688	    ("ufsdirhash_move: bad offset"));
689	/* Find the entry, and update the offset. */
690	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
691	DH_ENTRY(dh, slot) = newoff;
692	DIRHASH_UNLOCK(dh);
693}
694
695/*
696 * Inform dirhash that the directory has grown by one block that
697 * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
698 */
699void
700ufsdirhash_newblk(struct inode *ip, doff_t offset)
701{
702	struct dirhash *dh;
703	int block;
704
705	if ((dh = ip->i_dirhash) == NULL)
706		return;
707	DIRHASH_LOCK(dh);
708	if (dh->dh_hash == NULL) {
709		DIRHASH_UNLOCK(dh);
710		ufsdirhash_free(ip);
711		return;
712	}
713
714	DIRHASH_ASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
715	    ("ufsdirhash_newblk: bad offset"));
716	block = offset / DIRBLKSIZ;
717	if (block >= dh->dh_nblk) {
718		/* Out of space; must rebuild. */
719		DIRHASH_UNLOCK(dh);
720		ufsdirhash_free(ip);
721		return;
722	}
723	dh->dh_dirblks = block + 1;
724
725	/* Account for the new free block. */
726	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
727	if (dh->dh_firstfree[DH_NFSTATS] == -1)
728		dh->dh_firstfree[DH_NFSTATS] = block;
729	DIRHASH_UNLOCK(dh);
730}
731
732/*
733 * Inform dirhash that the directory is being truncated.
734 */
735void
736ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
737{
738	struct dirhash *dh;
739	int block, i;
740
741	if ((dh = ip->i_dirhash) == NULL)
742		return;
743	DIRHASH_LOCK(dh);
744	if (dh->dh_hash == NULL) {
745		DIRHASH_UNLOCK(dh);
746		ufsdirhash_free(ip);
747		return;
748	}
749
750	DIRHASH_ASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
751	    ("ufsdirhash_dirtrunc: bad offset"));
752	block = howmany(offset, DIRBLKSIZ);
753	/*
754	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
755	 * (about 20% of its original size due to the 50% extra added in
756	 * ufsdirhash_build) then free it, and let the caller rebuild
757	 * if necessary.
758	 */
759	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
760		DIRHASH_UNLOCK(dh);
761		ufsdirhash_free(ip);
762		return;
763	}
764
765	/*
766	 * Remove any `first free' information pertaining to the
767	 * truncated blocks. All blocks we're removing should be
768	 * completely unused.
769	 */
770	if (dh->dh_firstfree[DH_NFSTATS] >= block)
771		dh->dh_firstfree[DH_NFSTATS] = -1;
772	for (i = block; i < dh->dh_dirblks; i++)
773		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
774			panic("ufsdirhash_dirtrunc: blocks in use");
775	for (i = 0; i < DH_NFSTATS; i++)
776		if (dh->dh_firstfree[i] >= block)
777			panic("ufsdirhash_dirtrunc: first free corrupt");
778	dh->dh_dirblks = block;
779	DIRHASH_UNLOCK(dh);
780}
781
782/*
783 * Debugging function to check that the dirhash information about
784 * a directory block matches its actual contents. Panics if a mismatch
785 * is detected.
786 *
787 * On entry, `buf' should point to the start of an in-core
788 * DIRBLKSIZ-sized directory block, and `offset' should contain the
789 * offset from the start of the directory of that block.
790 */
791void
792ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
793{
794	struct dirhash *dh;
795	struct direct *dp;
796	int block, ffslot, i, nfree;
797
798	if (!ufs_dirhashcheck)
799		return;
800	if ((dh = ip->i_dirhash) == NULL)
801		return;
802	DIRHASH_LOCK(dh);
803	if (dh->dh_hash == NULL) {
804		DIRHASH_UNLOCK(dh);
805		ufsdirhash_free(ip);
806		return;
807	}
808
809	block = offset / DIRBLKSIZ;
810	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
811		panic("ufsdirhash_checkblock: bad offset");
812
813	nfree = 0;
814	for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
815		dp = (struct direct *)(buf + i);
816		if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
817			panic("ufsdirhash_checkblock: bad dir");
818
819		if (dp->d_ino == 0) {
820#if 0
821			/*
822			 * XXX entries with d_ino == 0 should only occur
823			 * at the start of a DIRBLKSIZ block. However the
824			 * ufs code is tolerant of such entries at other
825			 * offsets, and fsck does not fix them.
826			 */
827			if (i != 0)
828				panic("ufsdirhash_checkblock: bad dir inode");
829#endif
830			nfree += dp->d_reclen;
831			continue;
832		}
833
834		/* Check that the entry	exists (will panic if it doesn't). */
835		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
836
837		nfree += dp->d_reclen - DIRSIZ(dp);
838	}
839	if (i != DIRBLKSIZ)
840		panic("ufsdirhash_checkblock: bad dir end");
841
842	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
843		panic("ufsdirhash_checkblock: bad free count");
844
845	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
846	for (i = 0; i <= DH_NFSTATS; i++)
847		if (dh->dh_firstfree[i] == block && i != ffslot)
848			panic("ufsdirhash_checkblock: bad first-free");
849	if (dh->dh_firstfree[ffslot] == -1)
850		panic("ufsdirhash_checkblock: missing first-free entry");
851	DIRHASH_UNLOCK(dh);
852}
853
854/*
855 * Hash the specified filename into a dirhash slot.
856 */
857int
858ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
859{
860	return SipHash24(&ufsdirhash_key, name, namelen) % dh->dh_hlen;
861}
862
863/*
864 * Adjust the number of free bytes in the block containing `offset'
865 * by the value specified by `diff'.
866 *
867 * The caller must ensure we have exclusive access to `dh'; normally
868 * that means that dh_mtx should be held, but this is also called
869 * from ufsdirhash_build() where exclusive access can be assumed.
870 */
871void
872ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
873{
874	int block, i, nfidx, ofidx;
875
876	/* Update the per-block summary info. */
877	block = offset / DIRBLKSIZ;
878	DIRHASH_ASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
879	     ("dirhash bad offset"));
880	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
881	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
882	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
883
884	/* Update the `first free' list if necessary. */
885	if (ofidx != nfidx) {
886		/* If removing, scan forward for the next block. */
887		if (dh->dh_firstfree[ofidx] == block) {
888			for (i = block + 1; i < dh->dh_dirblks; i++)
889				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
890					break;
891			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
892		}
893
894		/* Make this the new `first free' if necessary */
895		if (dh->dh_firstfree[nfidx] > block ||
896		    dh->dh_firstfree[nfidx] == -1)
897			dh->dh_firstfree[nfidx] = block;
898	}
899}
900
901/*
902 * Find the specified name which should have the specified offset.
903 * Returns a slot number, and panics on failure.
904 *
905 * `dh' must be locked on entry and remains so on return.
906 */
907int
908ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
909{
910	int slot;
911
912	mtx_assert(&dh->dh_mtx, MA_OWNED);
913
914	/* Find the entry. */
915	DIRHASH_ASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
916	slot = ufsdirhash_hash(dh, name, namelen);
917	while (DH_ENTRY(dh, slot) != offset &&
918	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
919		slot = WRAPINCR(slot, dh->dh_hlen);
920	if (DH_ENTRY(dh, slot) != offset)
921		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
922
923	return (slot);
924}
925
926/*
927 * Remove the entry corresponding to the specified slot from the hash array.
928 *
929 * `dh' must be locked on entry and remains so on return.
930 */
931void
932ufsdirhash_delslot(struct dirhash *dh, int slot)
933{
934	int i;
935
936	mtx_assert(&dh->dh_mtx, MA_OWNED);
937
938	/* Mark the entry as deleted. */
939	DH_ENTRY(dh, slot) = DIRHASH_DEL;
940
941	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
942	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
943		i = WRAPINCR(i, dh->dh_hlen);
944	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
945		i = WRAPDECR(i, dh->dh_hlen);
946		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
947			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
948			dh->dh_hused--;
949			i = WRAPDECR(i, dh->dh_hlen);
950		}
951		DIRHASH_ASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
952	}
953}
954
955/*
956 * Given a directory entry and its offset, find the offset of the
957 * previous entry in the same DIRBLKSIZ-sized block. Returns an
958 * offset, or -1 if there is no previous entry in the block or some
959 * other problem occurred.
960 */
961doff_t
962ufsdirhash_getprev(struct direct *dirp, doff_t offset)
963{
964	struct direct *dp;
965	char *blkbuf;
966	doff_t blkoff, prevoff;
967	int entrypos, i;
968
969	blkoff = offset & ~(DIRBLKSIZ - 1);	/* offset of start of block */
970	entrypos = offset & (DIRBLKSIZ - 1);	/* entry relative to block */
971	blkbuf = (char *)dirp - entrypos;
972	prevoff = blkoff;
973
974	/* If `offset' is the start of a block, there is no previous entry. */
975	if (entrypos == 0)
976		return (-1);
977
978	/* Scan from the start of the block until we get to the entry. */
979	for (i = 0; i < entrypos; i += dp->d_reclen) {
980		dp = (struct direct *)(blkbuf + i);
981		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
982			return (-1);	/* Corrupted directory. */
983		prevoff = blkoff + i;
984	}
985	return (prevoff);
986}
987
988/*
989 * Try to free up `wanted' bytes by stealing memory from existing
990 * dirhashes. Returns zero with list locked if successful.
991 */
992int
993ufsdirhash_recycle(int wanted)
994{
995	struct dirhash *dh;
996	doff_t **hash;
997	u_int8_t *blkfree;
998	int i, mem, narrays, nblk;
999
1000	DIRHASHLIST_LOCK();
1001	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1002		/* Find a dirhash, and lock it. */
1003		if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
1004			DIRHASHLIST_UNLOCK();
1005			return (-1);
1006		}
1007		DIRHASH_LOCK(dh);
1008		DIRHASH_ASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
1009
1010		/* Decrement the score; only recycle if it becomes zero. */
1011		if (--dh->dh_score > 0) {
1012			DIRHASH_UNLOCK(dh);
1013			DIRHASHLIST_UNLOCK();
1014			return (-1);
1015		}
1016
1017		/* Remove it from the list and detach its memory. */
1018		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1019		dh->dh_onlist = 0;
1020		hash = dh->dh_hash;
1021		dh->dh_hash = NULL;
1022		blkfree = dh->dh_blkfree;
1023		dh->dh_blkfree = NULL;
1024		narrays = dh->dh_narrays;
1025		nblk = dh->dh_nblk;
1026		mem = narrays * sizeof(*dh->dh_hash) +
1027		    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
1028		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
1029
1030		/* Unlock everything, free the detached memory. */
1031		DIRHASH_UNLOCK(dh);
1032		DIRHASHLIST_UNLOCK();
1033		for (i = 0; i < narrays; i++)
1034			DIRHASH_BLKFREE(hash[i]);
1035		free(hash, M_DIRHASH, narrays * sizeof(hash[0]));
1036		free(blkfree, M_DIRHASH, nblk * sizeof(blkfree[0]));
1037
1038		/* Account for the returned memory, and repeat if necessary. */
1039		DIRHASHLIST_LOCK();
1040		ufs_dirhashmem -= mem;
1041	}
1042	/* Success; return with list locked. */
1043	return (0);
1044}
1045
1046
1047void
1048ufsdirhash_init(void)
1049{
1050	pool_init(&ufsdirhash_pool, DH_NBLKOFF * sizeof(doff_t), 0, IPL_NONE,
1051	    PR_WAITOK, "dirhash", NULL);
1052	rw_init(&ufsdirhash_mtx, "dirhash_list");
1053	arc4random_buf(&ufsdirhash_key, sizeof(ufsdirhash_key));
1054	TAILQ_INIT(&ufsdirhash_list);
1055	ufs_dirhashmaxmem = 5 * 1024 * 1024;
1056	ufs_mindirhashsize = 5 * DIRBLKSIZ;
1057}
1058
1059void
1060ufsdirhash_uninit(void)
1061{
1062	DIRHASH_ASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
1063	pool_destroy(&ufsdirhash_pool);
1064}
1065