1109864Sjeff/* $NetBSD: ufs_dirhash.c,v 1.33 2009/05/30 13:54:36 hannken Exp $ */ 2109864Sjeff 3109864Sjeff/* 4109864Sjeff * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. 5109864Sjeff * 6109864Sjeff * Redistribution and use in source and binary forms, with or without 7109864Sjeff * modification, are permitted provided that the following conditions 8109864Sjeff * are met: 9109864Sjeff * 1. Redistributions of source code must retain the above copyright 10109864Sjeff * notice, this list of conditions and the following disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16109864Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17109864Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18109864Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19109864Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20109864Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21109864Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22109864Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23109864Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24109864Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25109864Sjeff * SUCH DAMAGE. 26109864Sjeff * 27109864Sjeff * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.8 2004/12/08 11:54:13 dwmalone Exp $ 28109864Sjeff */ 29109864Sjeff 30109864Sjeff#include <sys/cdefs.h> 31109864Sjeff__KERNEL_RCSID(0, "$NetBSD: ufs_dirhash.c,v 1.33 2009/05/30 13:54:36 hannken Exp $"); 32109864Sjeff 33109864Sjeff/* 34109864Sjeff * This implements a hash-based lookup scheme for UFS directories. 35109864Sjeff */ 36109864Sjeff 37109864Sjeff#include <sys/param.h> 38109864Sjeff#include <sys/systm.h> 39109864Sjeff#include <sys/kernel.h> 40109864Sjeff#include <sys/kmem.h> 41109864Sjeff#include <sys/types.h> 42109864Sjeff#include <sys/hash.h> 43109864Sjeff#include <sys/proc.h> 44109864Sjeff#include <sys/buf.h> 45109864Sjeff#include <sys/vnode.h> 46109864Sjeff#include <sys/mount.h> 47109864Sjeff#include <sys/pool.h> 48109864Sjeff#include <sys/sysctl.h> 49109864Sjeff#include <sys/atomic.h> 50109864Sjeff 51109864Sjeff#include <ufs/ufs/inode.h> 52109864Sjeff#include <ufs/ufs/dir.h> 53109864Sjeff#include <ufs/ufs/dirhash.h> 54109864Sjeff#include <ufs/ufs/ufsmount.h> 55109864Sjeff#include <ufs/ufs/ufs_bswap.h> 56109864Sjeff#include <ufs/ufs/ufs_extern.h> 57109864Sjeff 58109864Sjeff#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 59109864Sjeff#define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) 60109864Sjeff#define OFSFMT(ip) ((ip)->i_ump->um_maxsymlinklen <= 0) 61109864Sjeff#define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 62109864Sjeff 63109864Sjeffstatic u_int ufs_dirhashminblks = 5; 64109864Sjeffstatic u_int ufs_dirhashmaxmem = 2 * 1024 * 1024; 65109864Sjeffstatic u_int ufs_dirhashmem; 66109864Sjeffstatic u_int ufs_dirhashcheck = 0; 67109864Sjeff 68109864Sjeffstatic int ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen); 69109864Sjeffstatic void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff, 70109864Sjeff int dirblksiz); 71109864Sjeffstatic void ufsdirhash_delslot(struct dirhash *dh, int slot); 72109864Sjeffstatic int ufsdirhash_findslot(struct dirhash *dh, const char *name, 73109864Sjeff int namelen, doff_t offset); 74109864Sjeffstatic doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset, 75109864Sjeff int dirblksiz); 76109864Sjeffstatic int ufsdirhash_recycle(int wanted); 77109864Sjeff 78109864Sjeffstatic pool_cache_t ufsdirhashblk_cache; 79109864Sjeffstatic pool_cache_t ufsdirhash_cache; 80109864Sjeff 81109864Sjeff#define DIRHASHLIST_LOCK() mutex_enter(&ufsdirhash_lock) 82109864Sjeff#define DIRHASHLIST_UNLOCK() mutex_exit(&ufsdirhash_lock) 83109864Sjeff#define DIRHASH_LOCK(dh) mutex_enter(&(dh)->dh_lock) 84109864Sjeff#define DIRHASH_UNLOCK(dh) mutex_exit(&(dh)->dh_lock) 85109864Sjeff#define DIRHASH_BLKALLOC() \ 86109864Sjeff pool_cache_get(ufsdirhashblk_cache, PR_NOWAIT) 87109864Sjeff#define DIRHASH_BLKFREE(ptr) \ 88109864Sjeff pool_cache_put(ufsdirhashblk_cache, ptr) 89109864Sjeff 90109864Sjeff/* Dirhash list; recently-used entries are near the tail. */ 91109864Sjeffstatic TAILQ_HEAD(, dirhash) ufsdirhash_list; 92109864Sjeff 93109864Sjeff/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 94109864Sjeffstatic kmutex_t ufsdirhash_lock; 95109864Sjeff 96109864Sjeffstatic struct sysctllog *ufsdirhash_sysctl_log; 97109864Sjeff 98109864Sjeff/* 99109864Sjeff * Locking order: 100109864Sjeff * ufsdirhash_lock 101109864Sjeff * dh_lock 102109864Sjeff * 103109864Sjeff * The dh_lock mutex should be acquired either via the inode lock, or via 104109864Sjeff * ufsdirhash_lock. Only the owner of the inode may free the associated 105109864Sjeff * dirhash, but anything can steal its memory and set dh_hash to NULL. 106109864Sjeff */ 107109864Sjeff 108109864Sjeff/* 109109864Sjeff * Attempt to build up a hash table for the directory contents in 110109864Sjeff * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 111109864Sjeff */ 112109864Sjeffint 113109864Sjeffufsdirhash_build(struct inode *ip) 114109864Sjeff{ 115109864Sjeff struct dirhash *dh; 116109864Sjeff struct buf *bp = NULL; 117109864Sjeff struct direct *ep; 118109864Sjeff struct vnode *vp; 119109864Sjeff doff_t bmask, pos; 120109864Sjeff int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 121109864Sjeff const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 122109864Sjeff int dirblksiz = ip->i_ump->um_dirblksiz; 123109864Sjeff 124109864Sjeff /* Check if we can/should use dirhash. */ 125109864Sjeff if (ip->i_dirhash == NULL) { 126109864Sjeff if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip)) 127109864Sjeff return (-1); 128109864Sjeff } else { 129109864Sjeff /* Hash exists, but sysctls could have changed. */ 130109864Sjeff if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || 131109864Sjeff ufs_dirhashmem > ufs_dirhashmaxmem) { 132109864Sjeff ufsdirhash_free(ip); 133109864Sjeff return (-1); 134109864Sjeff } 135109864Sjeff /* Check if hash exists and is intact (note: unlocked read). */ 136109864Sjeff if (ip->i_dirhash->dh_hash != NULL) 137109864Sjeff return (0); 138109864Sjeff /* Free the old, recycled hash and build a new one. */ 139109864Sjeff ufsdirhash_free(ip); 140109864Sjeff } 141109864Sjeff 142109864Sjeff /* Don't hash removed directories. */ 143109864Sjeff if (ip->i_nlink == 0) 144109864Sjeff return (-1); 145109864Sjeff 146109864Sjeff vp = ip->i_vnode; 147109864Sjeff /* Allocate 50% more entries than this dir size could ever need. */ 148109864Sjeff KASSERT(ip->i_size >= dirblksiz); 149109864Sjeff nslots = ip->i_size / DIRECTSIZ(1); 150109864Sjeff nslots = (nslots * 3 + 1) / 2; 151109864Sjeff narrays = howmany(nslots, DH_NBLKOFF); 152109864Sjeff nslots = narrays * DH_NBLKOFF; 153109864Sjeff dirblocks = howmany(ip->i_size, dirblksiz); 154109864Sjeff nblocks = (dirblocks * 3 + 1) / 2; 155109864Sjeff 156109864Sjeff memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 157109864Sjeff narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 158109864Sjeff nblocks * sizeof(*dh->dh_blkfree); 159109864Sjeff 160109864Sjeff while (atomic_add_int_nv(&ufs_dirhashmem, memreqd) > 161109864Sjeff ufs_dirhashmaxmem) { 162109864Sjeff atomic_add_int(&ufs_dirhashmem, -memreqd); 163109864Sjeff if (memreqd > ufs_dirhashmaxmem / 2) 164109864Sjeff return (-1); 165109864Sjeff /* Try to free some space. */ 166109864Sjeff if (ufsdirhash_recycle(memreqd) != 0) 167109864Sjeff return (-1); 168109864Sjeff else 169109864Sjeff DIRHASHLIST_UNLOCK(); 170109864Sjeff } 171109864Sjeff 172109864Sjeff /* 173109864Sjeff * Use non-blocking mallocs so that we will revert to a linear 174109864Sjeff * lookup on failure rather than potentially blocking forever. 175109864Sjeff */ 176109864Sjeff dh = pool_cache_get(ufsdirhash_cache, PR_NOWAIT); 177109864Sjeff if (dh == NULL) { 178109864Sjeff atomic_add_int(&ufs_dirhashmem, -memreqd); 179109864Sjeff return (-1); 180109864Sjeff } 181109864Sjeff memset(dh, 0, sizeof(*dh)); 182109864Sjeff mutex_init(&dh->dh_lock, MUTEX_DEFAULT, IPL_NONE); 183109864Sjeff DIRHASH_LOCK(dh); 184109864Sjeff dh->dh_hashsz = narrays * sizeof(dh->dh_hash[0]); 185109864Sjeff dh->dh_hash = kmem_zalloc(dh->dh_hashsz, KM_NOSLEEP); 186109864Sjeff dh->dh_blkfreesz = nblocks * sizeof(dh->dh_blkfree[0]); 187109864Sjeff dh->dh_blkfree = kmem_zalloc(dh->dh_blkfreesz, KM_NOSLEEP); 188109864Sjeff if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) 189109864Sjeff goto fail; 190109864Sjeff for (i = 0; i < narrays; i++) { 191109864Sjeff if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL) 192109864Sjeff goto fail; 193109864Sjeff for (j = 0; j < DH_NBLKOFF; j++) 194109864Sjeff dh->dh_hash[i][j] = DIRHASH_EMPTY; 195109864Sjeff } 196109864Sjeff 197109864Sjeff /* Initialise the hash table and block statistics. */ 198109864Sjeff dh->dh_narrays = narrays; 199109864Sjeff dh->dh_hlen = nslots; 200109864Sjeff dh->dh_nblk = nblocks; 201109864Sjeff dh->dh_dirblks = dirblocks; 202109864Sjeff for (i = 0; i < dirblocks; i++) 203109864Sjeff dh->dh_blkfree[i] = dirblksiz / DIRALIGN; 204109864Sjeff for (i = 0; i < DH_NFSTATS; i++) 205109864Sjeff dh->dh_firstfree[i] = -1; 206109864Sjeff dh->dh_firstfree[DH_NFSTATS] = 0; 207109864Sjeff dh->dh_seqopt = 0; 208109864Sjeff dh->dh_seqoff = 0; 209109864Sjeff dh->dh_score = DH_SCOREINIT; 210109864Sjeff ip->i_dirhash = dh; 211109864Sjeff 212109864Sjeff bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 213109864Sjeff pos = 0; 214109864Sjeff while (pos < ip->i_size) { 215109864Sjeff if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 216109864Sjeff != 0) { 217109864Sjeff preempt(); 218109864Sjeff } 219109864Sjeff /* If necessary, get the next directory block. */ 220109864Sjeff if ((pos & bmask) == 0) { 221109864Sjeff if (bp != NULL) 222109864Sjeff brelse(bp, 0); 223109864Sjeff if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp, false) != 0) 224109864Sjeff goto fail; 225109864Sjeff } 226109864Sjeff 227109864Sjeff /* Add this entry to the hash. */ 228109864Sjeff ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 229109864Sjeff if (ep->d_reclen == 0 || ep->d_reclen > 230109864Sjeff dirblksiz - (pos & (dirblksiz - 1))) { 231109864Sjeff /* Corrupted directory. */ 232109864Sjeff brelse(bp, 0); 233109864Sjeff goto fail; 234109864Sjeff } 235109864Sjeff if (ep->d_ino != 0) { 236109864Sjeff /* Add the entry (simplified ufsdirhash_add). */ 237109864Sjeff slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 238109864Sjeff while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 239109864Sjeff slot = WRAPINCR(slot, dh->dh_hlen); 240109864Sjeff dh->dh_hused++; 241109864Sjeff DH_ENTRY(dh, slot) = pos; 242109864Sjeff ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep, needswap), 243109864Sjeff dirblksiz); 244109864Sjeff } 245109864Sjeff pos += ep->d_reclen; 246109864Sjeff } 247109864Sjeff 248109864Sjeff if (bp != NULL) 249109864Sjeff brelse(bp, 0); 250109864Sjeff DIRHASHLIST_LOCK(); 251109864Sjeff TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 252109864Sjeff dh->dh_onlist = 1; 253109864Sjeff DIRHASH_UNLOCK(dh); 254109864Sjeff DIRHASHLIST_UNLOCK(); 255109864Sjeff return (0); 256109864Sjeff 257109864Sjefffail: 258109864Sjeff DIRHASH_UNLOCK(dh); 259109864Sjeff if (dh->dh_hash != NULL) { 260109864Sjeff for (i = 0; i < narrays; i++) 261109864Sjeff if (dh->dh_hash[i] != NULL) 262109864Sjeff DIRHASH_BLKFREE(dh->dh_hash[i]); 263109864Sjeff kmem_free(dh->dh_hash, dh->dh_hashsz); 264109864Sjeff } 265109864Sjeff if (dh->dh_blkfree != NULL) 266109864Sjeff kmem_free(dh->dh_blkfree, dh->dh_blkfreesz); 267109864Sjeff mutex_destroy(&dh->dh_lock); 268109864Sjeff pool_cache_put(ufsdirhash_cache, dh); 269109864Sjeff ip->i_dirhash = NULL; 270109864Sjeff atomic_add_int(&ufs_dirhashmem, -memreqd); 271109864Sjeff return (-1); 272109864Sjeff} 273109864Sjeff 274109864Sjeff/* 275109864Sjeff * Free any hash table associated with inode 'ip'. 276109864Sjeff */ 277109864Sjeffvoid 278109864Sjeffufsdirhash_free(struct inode *ip) 279109864Sjeff{ 280109864Sjeff struct dirhash *dh; 281109864Sjeff int i, mem; 282109864Sjeff 283109864Sjeff if ((dh = ip->i_dirhash) == NULL) 284109864Sjeff return; 285109864Sjeff 286109864Sjeff if (dh->dh_onlist) { 287109864Sjeff DIRHASHLIST_LOCK(); 288109864Sjeff if (dh->dh_onlist) 289109864Sjeff TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 290109864Sjeff DIRHASHLIST_UNLOCK(); 291109864Sjeff } 292109864Sjeff 293109864Sjeff /* The dirhash pointed to by 'dh' is exclusively ours now. */ 294109864Sjeff mem = sizeof(*dh); 295109864Sjeff if (dh->dh_hash != NULL) { 296109864Sjeff for (i = 0; i < dh->dh_narrays; i++) 297109864Sjeff DIRHASH_BLKFREE(dh->dh_hash[i]); 298109864Sjeff kmem_free(dh->dh_hash, dh->dh_hashsz); 299109864Sjeff kmem_free(dh->dh_blkfree, dh->dh_blkfreesz); 300109864Sjeff mem += dh->dh_hashsz; 301109864Sjeff mem += dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash); 302109864Sjeff mem += dh->dh_nblk * sizeof(*dh->dh_blkfree); 303109864Sjeff } 304109864Sjeff mutex_destroy(&dh->dh_lock); 305109864Sjeff pool_cache_put(ufsdirhash_cache, dh); 306109864Sjeff ip->i_dirhash = NULL; 307109864Sjeff 308109864Sjeff atomic_add_int(&ufs_dirhashmem, -mem); 309109864Sjeff} 310109864Sjeff 311109864Sjeff/* 312109864Sjeff * Find the offset of the specified name within the given inode. 313109864Sjeff * Returns 0 on success, ENOENT if the entry does not exist, or 314109864Sjeff * EJUSTRETURN if the caller should revert to a linear search. 315109864Sjeff * 316109864Sjeff * If successful, the directory offset is stored in *offp, and a 317109864Sjeff * pointer to a struct buf containing the entry is stored in *bpp. If 318109864Sjeff * prevoffp is non-NULL, the offset of the previous entry within 319109864Sjeff * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 320109864Sjeff * is the first in a block, the start of the block is used). 321109864Sjeff */ 322109864Sjeffint 323109864Sjeffufsdirhash_lookup(struct inode *ip, const char *name, int namelen, doff_t *offp, 324109864Sjeff struct buf **bpp, doff_t *prevoffp) 325109864Sjeff{ 326109864Sjeff struct dirhash *dh, *dh_next; 327109864Sjeff struct direct *dp; 328109864Sjeff struct vnode *vp; 329109864Sjeff struct buf *bp; 330109864Sjeff doff_t blkoff, bmask, offset, prevoff; 331109864Sjeff int i, slot; 332109864Sjeff const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 333109864Sjeff int dirblksiz = ip->i_ump->um_dirblksiz; 334109864Sjeff 335109864Sjeff if ((dh = ip->i_dirhash) == NULL) 336109864Sjeff return (EJUSTRETURN); 337109864Sjeff 338109864Sjeff /* 339109864Sjeff * Move this dirhash towards the end of the list if it has a 340109864Sjeff * score higher than the next entry, and acquire the dh_lock. 341109864Sjeff * Optimise the case where it's already the last by performing 342109864Sjeff * an unlocked read of the TAILQ_NEXT pointer. 343109864Sjeff * 344109864Sjeff * In both cases, end up holding just dh_lock. 345109864Sjeff */ 346109864Sjeff if (TAILQ_NEXT(dh, dh_list) != NULL) { 347109864Sjeff DIRHASHLIST_LOCK(); 348109864Sjeff DIRHASH_LOCK(dh); 349109864Sjeff /* 350109864Sjeff * If the new score will be greater than that of the next 351109864Sjeff * entry, then move this entry past it. With both mutexes 352109864Sjeff * held, dh_next won't go away, but its dh_score could 353109864Sjeff * change; that's not important since it is just a hint. 354109864Sjeff */ 355109864Sjeff if (dh->dh_hash != NULL && 356109864Sjeff (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 357109864Sjeff dh->dh_score >= dh_next->dh_score) { 358109864Sjeff KASSERT(dh->dh_onlist); 359109864Sjeff TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 360109864Sjeff TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 361109864Sjeff dh_list); 362109864Sjeff } 363109864Sjeff DIRHASHLIST_UNLOCK(); 364109864Sjeff } else { 365109864Sjeff /* Already the last, though that could change as we wait. */ 366109864Sjeff DIRHASH_LOCK(dh); 367109864Sjeff } 368109864Sjeff if (dh->dh_hash == NULL) { 369109864Sjeff DIRHASH_UNLOCK(dh); 370109864Sjeff ufsdirhash_free(ip); 371109864Sjeff return (EJUSTRETURN); 372109864Sjeff } 373109864Sjeff 374109864Sjeff /* Update the score. */ 375109864Sjeff if (dh->dh_score < DH_SCOREMAX) 376109864Sjeff dh->dh_score++; 377109864Sjeff 378109864Sjeff vp = ip->i_vnode; 379109864Sjeff bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 380109864Sjeff blkoff = -1; 381109864Sjeff bp = NULL; 382109864Sjeffrestart: 383109864Sjeff slot = ufsdirhash_hash(dh, name, namelen); 384109864Sjeff 385109864Sjeff if (dh->dh_seqopt) { 386109864Sjeff /* 387109864Sjeff * Sequential access optimisation. dh_seqoff contains the 388109864Sjeff * offset of the directory entry immediately following 389109864Sjeff * the last entry that was looked up. Check if this offset 390109864Sjeff * appears in the hash chain for the name we are looking for. 391109864Sjeff */ 392109864Sjeff for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 393109864Sjeff i = WRAPINCR(i, dh->dh_hlen)) 394109864Sjeff if (offset == dh->dh_seqoff) 395109864Sjeff break; 396109864Sjeff if (offset == dh->dh_seqoff) { 397109864Sjeff /* 398109864Sjeff * We found an entry with the expected offset. This 399109864Sjeff * is probably the entry we want, but if not, the 400109864Sjeff * code below will turn off seqoff and retry. 401109864Sjeff */ 402109864Sjeff slot = i; 403109864Sjeff } else 404109864Sjeff dh->dh_seqopt = 0; 405109864Sjeff } 406109864Sjeff 407109864Sjeff for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 408109864Sjeff slot = WRAPINCR(slot, dh->dh_hlen)) { 409109864Sjeff if (offset == DIRHASH_DEL) 410109864Sjeff continue; 411109864Sjeff 412109864Sjeff if (offset < 0 || offset >= ip->i_size) 413109864Sjeff panic("ufsdirhash_lookup: bad offset in hash array"); 414109864Sjeff if ((offset & ~bmask) != blkoff) { 415109864Sjeff if (bp != NULL) 416109864Sjeff brelse(bp, 0); 417109864Sjeff blkoff = offset & ~bmask; 418109864Sjeff if (ufs_blkatoff(vp, (off_t)blkoff, 419109864Sjeff NULL, &bp, false) != 0) { 420109864Sjeff DIRHASH_UNLOCK(dh); 421109864Sjeff return (EJUSTRETURN); 422109864Sjeff } 423109864Sjeff } 424109864Sjeff dp = (struct direct *)((char *)bp->b_data + (offset & bmask)); 425109864Sjeff if (dp->d_reclen == 0 || dp->d_reclen > 426109864Sjeff dirblksiz - (offset & (dirblksiz - 1))) { 427109864Sjeff /* Corrupted directory. */ 428109864Sjeff DIRHASH_UNLOCK(dh); 429109864Sjeff brelse(bp, 0); 430109864Sjeff return (EJUSTRETURN); 431109864Sjeff } 432109864Sjeff if (dp->d_namlen == namelen && 433109864Sjeff memcmp(dp->d_name, name, namelen) == 0) { 434109864Sjeff /* Found. Get the prev offset if needed. */ 435109864Sjeff if (prevoffp != NULL) { 436109864Sjeff if (offset & (dirblksiz - 1)) { 437109864Sjeff prevoff = ufsdirhash_getprev(dp, 438109864Sjeff offset, dirblksiz); 439109864Sjeff if (prevoff == -1) { 440109864Sjeff brelse(bp, 0); 441109864Sjeff return (EJUSTRETURN); 442109864Sjeff } 443109864Sjeff } else 444109864Sjeff prevoff = offset; 445109864Sjeff *prevoffp = prevoff; 446109864Sjeff } 447109864Sjeff 448109864Sjeff /* Check for sequential access, and update offset. */ 449109864Sjeff if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) 450109864Sjeff dh->dh_seqopt = 1; 451109864Sjeff dh->dh_seqoff = offset + DIRSIZ(0, dp, needswap); 452109864Sjeff DIRHASH_UNLOCK(dh); 453109864Sjeff 454109864Sjeff *bpp = bp; 455109864Sjeff *offp = offset; 456109864Sjeff return (0); 457109864Sjeff } 458109864Sjeff 459109864Sjeff if (dh->dh_hash == NULL) { 460109864Sjeff DIRHASH_UNLOCK(dh); 461109864Sjeff if (bp != NULL) 462109864Sjeff brelse(bp, 0); 463109864Sjeff ufsdirhash_free(ip); 464109864Sjeff return (EJUSTRETURN); 465109864Sjeff } 466109864Sjeff /* 467109864Sjeff * When the name doesn't match in the seqopt case, go back 468109864Sjeff * and search normally. 469109864Sjeff */ 470109864Sjeff if (dh->dh_seqopt) { 471109864Sjeff dh->dh_seqopt = 0; 472109864Sjeff goto restart; 473109864Sjeff } 474109864Sjeff } 475109864Sjeff DIRHASH_UNLOCK(dh); 476109864Sjeff if (bp != NULL) 477109864Sjeff brelse(bp, 0); 478109864Sjeff return (ENOENT); 479109864Sjeff} 480109864Sjeff 481109864Sjeff/* 482109864Sjeff * Find a directory block with room for 'slotneeded' bytes. Returns 483109864Sjeff * the offset of the directory entry that begins the free space. 484109864Sjeff * This will either be the offset of an existing entry that has free 485109864Sjeff * space at the end, or the offset of an entry with d_ino == 0 at 486109864Sjeff * the start of a DIRBLKSIZ block. 487109864Sjeff * 488109864Sjeff * To use the space, the caller may need to compact existing entries in 489109864Sjeff * the directory. The total number of bytes in all of the entries involved 490109864Sjeff * in the compaction is stored in *slotsize. In other words, all of 491109864Sjeff * the entries that must be compacted are exactly contained in the 492109864Sjeff * region beginning at the returned offset and spanning *slotsize bytes. 493109864Sjeff * 494109864Sjeff * Returns -1 if no space was found, indicating that the directory 495109864Sjeff * must be extended. 496109864Sjeff */ 497109864Sjeffdoff_t 498109864Sjeffufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 499109864Sjeff{ 500109864Sjeff struct direct *dp; 501109864Sjeff struct dirhash *dh; 502109864Sjeff struct buf *bp; 503109864Sjeff doff_t pos, slotstart; 504109864Sjeff int dirblock, error, freebytes, i; 505109864Sjeff const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 506109864Sjeff int dirblksiz = ip->i_ump->um_dirblksiz; 507109864Sjeff 508109864Sjeff if ((dh = ip->i_dirhash) == NULL) 509109864Sjeff return (-1); 510109864Sjeff 511109864Sjeff DIRHASH_LOCK(dh); 512109864Sjeff if (dh->dh_hash == NULL) { 513109864Sjeff DIRHASH_UNLOCK(dh); 514109864Sjeff ufsdirhash_free(ip); 515109864Sjeff return (-1); 516109864Sjeff } 517109864Sjeff 518109864Sjeff /* Find a directory block with the desired free space. */ 519109864Sjeff dirblock = -1; 520109864Sjeff for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 521109864Sjeff if ((dirblock = dh->dh_firstfree[i]) != -1) 522109864Sjeff break; 523109864Sjeff if (dirblock == -1) { 524109864Sjeff DIRHASH_UNLOCK(dh); 525109864Sjeff return (-1); 526109864Sjeff } 527109864Sjeff 528109864Sjeff KASSERT(dirblock < dh->dh_nblk && 529109864Sjeff dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN)); 530109864Sjeff pos = dirblock * dirblksiz; 531109864Sjeff error = ufs_blkatoff(ip->i_vnode, (off_t)pos, (void *)&dp, &bp, false); 532109864Sjeff if (error) { 533109864Sjeff DIRHASH_UNLOCK(dh); 534109864Sjeff return (-1); 535109864Sjeff } 536109864Sjeff /* Find the first entry with free space. */ 537109864Sjeff for (i = 0; i < dirblksiz; ) { 538109864Sjeff if (dp->d_reclen == 0) { 539109864Sjeff DIRHASH_UNLOCK(dh); 540109864Sjeff brelse(bp, 0); 541109864Sjeff return (-1); 542109864Sjeff } 543109864Sjeff if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp, needswap)) 544109864Sjeff break; 545109864Sjeff i += dp->d_reclen; 546109864Sjeff dp = (struct direct *)((char *)dp + dp->d_reclen); 547109864Sjeff } 548109864Sjeff if (i > dirblksiz) { 549109864Sjeff DIRHASH_UNLOCK(dh); 550109864Sjeff brelse(bp, 0); 551109864Sjeff return (-1); 552109864Sjeff } 553109864Sjeff slotstart = pos + i; 554109864Sjeff 555109864Sjeff /* Find the range of entries needed to get enough space */ 556109864Sjeff freebytes = 0; 557109864Sjeff while (i < dirblksiz && freebytes < slotneeded) { 558109864Sjeff freebytes += dp->d_reclen; 559109864Sjeff if (dp->d_ino != 0) 560109864Sjeff freebytes -= DIRSIZ(0, dp, needswap); 561109864Sjeff if (dp->d_reclen == 0) { 562109864Sjeff DIRHASH_UNLOCK(dh); 563109864Sjeff brelse(bp, 0); 564109864Sjeff return (-1); 565109864Sjeff } 566109864Sjeff i += dp->d_reclen; 567109864Sjeff dp = (struct direct *)((char *)dp + dp->d_reclen); 568109864Sjeff } 569109864Sjeff if (i > dirblksiz) { 570109864Sjeff DIRHASH_UNLOCK(dh); 571109864Sjeff brelse(bp, 0); 572109864Sjeff return (-1); 573109864Sjeff } 574109864Sjeff if (freebytes < slotneeded) 575109864Sjeff panic("ufsdirhash_findfree: free mismatch"); 576109864Sjeff DIRHASH_UNLOCK(dh); 577109864Sjeff brelse(bp, 0); 578109864Sjeff *slotsize = pos + i - slotstart; 579109864Sjeff return (slotstart); 580109864Sjeff} 581109864Sjeff 582109864Sjeff/* 583109864Sjeff * Return the start of the unused space at the end of a directory, or 584109864Sjeff * -1 if there are no trailing unused blocks. 585109864Sjeff */ 586109864Sjeffdoff_t 587109864Sjeffufsdirhash_enduseful(struct inode *ip) 588109864Sjeff{ 589109864Sjeff struct dirhash *dh; 590109864Sjeff int i; 591109864Sjeff int dirblksiz = ip->i_ump->um_dirblksiz; 592109864Sjeff 593109864Sjeff if ((dh = ip->i_dirhash) == NULL) 594109864Sjeff return (-1); 595109864Sjeff 596109864Sjeff DIRHASH_LOCK(dh); 597109864Sjeff if (dh->dh_hash == NULL) { 598109864Sjeff DIRHASH_UNLOCK(dh); 599109864Sjeff ufsdirhash_free(ip); 600109864Sjeff return (-1); 601109864Sjeff } 602109864Sjeff 603109864Sjeff if (dh->dh_blkfree[dh->dh_dirblks - 1] != dirblksiz / DIRALIGN) { 604109864Sjeff DIRHASH_UNLOCK(dh); 605109864Sjeff return (-1); 606109864Sjeff } 607109864Sjeff 608109864Sjeff for (i = dh->dh_dirblks - 1; i >= 0; i--) 609109864Sjeff if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN) 610109864Sjeff break; 611109864Sjeff DIRHASH_UNLOCK(dh); 612109864Sjeff return ((doff_t)(i + 1) * dirblksiz); 613109864Sjeff} 614109864Sjeff 615109864Sjeff/* 616109864Sjeff * Insert information into the hash about a new directory entry. dirp 617109864Sjeff * points to a struct direct containing the entry, and offset specifies 618109864Sjeff * the offset of this entry. 619109864Sjeff */ 620109864Sjeffvoid 621109864Sjeffufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 622109864Sjeff{ 623109864Sjeff struct dirhash *dh; 624109864Sjeff int slot; 625109864Sjeff const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 626109864Sjeff int dirblksiz = ip->i_ump->um_dirblksiz; 627109864Sjeff 628109864Sjeff if ((dh = ip->i_dirhash) == NULL) 629109864Sjeff return; 630109864Sjeff 631109864Sjeff DIRHASH_LOCK(dh); 632109864Sjeff if (dh->dh_hash == NULL) { 633109864Sjeff DIRHASH_UNLOCK(dh); 634109864Sjeff ufsdirhash_free(ip); 635109864Sjeff return; 636109864Sjeff } 637109864Sjeff 638109864Sjeff KASSERT(offset < dh->dh_dirblks * dirblksiz); 639109864Sjeff /* 640109864Sjeff * Normal hash usage is < 66%. If the usage gets too high then 641109864Sjeff * remove the hash entirely and let it be rebuilt later. 642109864Sjeff */ 643109864Sjeff if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 644109864Sjeff DIRHASH_UNLOCK(dh); 645109864Sjeff ufsdirhash_free(ip); 646109864Sjeff return; 647109864Sjeff } 648109864Sjeff 649109864Sjeff /* Find a free hash slot (empty or deleted), and add the entry. */ 650109864Sjeff slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 651109864Sjeff while (DH_ENTRY(dh, slot) >= 0) 652109864Sjeff slot = WRAPINCR(slot, dh->dh_hlen); 653109864Sjeff if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 654109864Sjeff dh->dh_hused++; 655109864Sjeff DH_ENTRY(dh, slot) = offset; 656109864Sjeff 657109864Sjeff /* Update the per-block summary info. */ 658109864Sjeff ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp, needswap), dirblksiz); 659109864Sjeff DIRHASH_UNLOCK(dh); 660109864Sjeff} 661109864Sjeff 662109864Sjeff/* 663109864Sjeff * Remove the specified directory entry from the hash. The entry to remove 664109864Sjeff * is defined by the name in `dirp', which must exist at the specified 665109864Sjeff * `offset' within the directory. 666109864Sjeff */ 667109864Sjeffvoid 668109864Sjeffufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 669109864Sjeff{ 670109864Sjeff struct dirhash *dh; 671109864Sjeff int slot; 672109864Sjeff const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 673109864Sjeff int dirblksiz = ip->i_ump->um_dirblksiz; 674109864Sjeff 675109864Sjeff if ((dh = ip->i_dirhash) == NULL) 676109864Sjeff return; 677109864Sjeff 678109864Sjeff DIRHASH_LOCK(dh); 679109864Sjeff if (dh->dh_hash == NULL) { 680109864Sjeff DIRHASH_UNLOCK(dh); 681109864Sjeff ufsdirhash_free(ip); 682109864Sjeff return; 683109864Sjeff } 684109864Sjeff 685109864Sjeff KASSERT(offset < dh->dh_dirblks * dirblksiz); 686109864Sjeff /* Find the entry */ 687109864Sjeff slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 688109864Sjeff 689109864Sjeff /* Remove the hash entry. */ 690109864Sjeff ufsdirhash_delslot(dh, slot); 691109864Sjeff 692109864Sjeff /* Update the per-block summary info. */ 693109864Sjeff ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp, needswap), dirblksiz); 694109864Sjeff DIRHASH_UNLOCK(dh); 695109864Sjeff} 696109864Sjeff 697109864Sjeff/* 698 * Change the offset associated with a directory entry in the hash. Used 699 * when compacting directory blocks. 700 */ 701void 702ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 703 doff_t newoff) 704{ 705 struct dirhash *dh; 706 int slot; 707 708 if ((dh = ip->i_dirhash) == NULL) 709 return; 710 DIRHASH_LOCK(dh); 711 if (dh->dh_hash == NULL) { 712 DIRHASH_UNLOCK(dh); 713 ufsdirhash_free(ip); 714 return; 715 } 716 717 KASSERT(oldoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz && 718 newoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz); 719 /* Find the entry, and update the offset. */ 720 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 721 DH_ENTRY(dh, slot) = newoff; 722 DIRHASH_UNLOCK(dh); 723} 724 725/* 726 * Inform dirhash that the directory has grown by one block that 727 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 728 */ 729void 730ufsdirhash_newblk(struct inode *ip, doff_t offset) 731{ 732 struct dirhash *dh; 733 int block; 734 int dirblksiz = ip->i_ump->um_dirblksiz; 735 736 if ((dh = ip->i_dirhash) == NULL) 737 return; 738 DIRHASH_LOCK(dh); 739 if (dh->dh_hash == NULL) { 740 DIRHASH_UNLOCK(dh); 741 ufsdirhash_free(ip); 742 return; 743 } 744 745 KASSERT(offset == dh->dh_dirblks * dirblksiz); 746 block = offset / dirblksiz; 747 if (block >= dh->dh_nblk) { 748 /* Out of space; must rebuild. */ 749 DIRHASH_UNLOCK(dh); 750 ufsdirhash_free(ip); 751 return; 752 } 753 dh->dh_dirblks = block + 1; 754 755 /* Account for the new free block. */ 756 dh->dh_blkfree[block] = dirblksiz / DIRALIGN; 757 if (dh->dh_firstfree[DH_NFSTATS] == -1) 758 dh->dh_firstfree[DH_NFSTATS] = block; 759 DIRHASH_UNLOCK(dh); 760} 761 762/* 763 * Inform dirhash that the directory is being truncated. 764 */ 765void 766ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 767{ 768 struct dirhash *dh; 769 int block, i; 770 int dirblksiz = ip->i_ump->um_dirblksiz; 771 772 if ((dh = ip->i_dirhash) == NULL) 773 return; 774 775 DIRHASH_LOCK(dh); 776 if (dh->dh_hash == NULL) { 777 DIRHASH_UNLOCK(dh); 778 ufsdirhash_free(ip); 779 return; 780 } 781 782 KASSERT(offset <= dh->dh_dirblks * dirblksiz); 783 block = howmany(offset, dirblksiz); 784 /* 785 * If the directory shrinks to less than 1/8 of dh_nblk blocks 786 * (about 20% of its original size due to the 50% extra added in 787 * ufsdirhash_build) then free it, and let the caller rebuild 788 * if necessary. 789 */ 790 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 791 DIRHASH_UNLOCK(dh); 792 ufsdirhash_free(ip); 793 return; 794 } 795 796 /* 797 * Remove any `first free' information pertaining to the 798 * truncated blocks. All blocks we're removing should be 799 * completely unused. 800 */ 801 if (dh->dh_firstfree[DH_NFSTATS] >= block) 802 dh->dh_firstfree[DH_NFSTATS] = -1; 803 for (i = block; i < dh->dh_dirblks; i++) 804 if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN) 805 panic("ufsdirhash_dirtrunc: blocks in use"); 806 for (i = 0; i < DH_NFSTATS; i++) 807 if (dh->dh_firstfree[i] >= block) 808 panic("ufsdirhash_dirtrunc: first free corrupt"); 809 dh->dh_dirblks = block; 810 DIRHASH_UNLOCK(dh); 811} 812 813/* 814 * Debugging function to check that the dirhash information about 815 * a directory block matches its actual contents. Panics if a mismatch 816 * is detected. 817 * 818 * On entry, `sbuf' should point to the start of an in-core 819 * DIRBLKSIZ-sized directory block, and `offset' should contain the 820 * offset from the start of the directory of that block. 821 */ 822void 823ufsdirhash_checkblock(struct inode *ip, char *sbuf, doff_t offset) 824{ 825 struct dirhash *dh; 826 struct direct *dp; 827 int block, ffslot, i, nfree; 828 const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 829 int dirblksiz = ip->i_ump->um_dirblksiz; 830 831 if (!ufs_dirhashcheck) 832 return; 833 if ((dh = ip->i_dirhash) == NULL) 834 return; 835 836 DIRHASH_LOCK(dh); 837 if (dh->dh_hash == NULL) { 838 DIRHASH_UNLOCK(dh); 839 ufsdirhash_free(ip); 840 return; 841 } 842 843 block = offset / dirblksiz; 844 if ((offset & (dirblksiz - 1)) != 0 || block >= dh->dh_dirblks) 845 panic("ufsdirhash_checkblock: bad offset"); 846 847 nfree = 0; 848 for (i = 0; i < dirblksiz; i += dp->d_reclen) { 849 dp = (struct direct *)(sbuf + i); 850 if (dp->d_reclen == 0 || i + dp->d_reclen > dirblksiz) 851 panic("ufsdirhash_checkblock: bad dir"); 852 853 if (dp->d_ino == 0) { 854#if 0 855 /* 856 * XXX entries with d_ino == 0 should only occur 857 * at the start of a DIRBLKSIZ block. However the 858 * ufs code is tolerant of such entries at other 859 * offsets, and fsck does not fix them. 860 */ 861 if (i != 0) 862 panic("ufsdirhash_checkblock: bad dir inode"); 863#endif 864 nfree += dp->d_reclen; 865 continue; 866 } 867 868 /* Check that the entry exists (will panic if it doesn't). */ 869 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 870 871 nfree += dp->d_reclen - DIRSIZ(0, dp, needswap); 872 } 873 if (i != dirblksiz) 874 panic("ufsdirhash_checkblock: bad dir end"); 875 876 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 877 panic("ufsdirhash_checkblock: bad free count"); 878 879 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 880 for (i = 0; i <= DH_NFSTATS; i++) 881 if (dh->dh_firstfree[i] == block && i != ffslot) 882 panic("ufsdirhash_checkblock: bad first-free"); 883 if (dh->dh_firstfree[ffslot] == -1) 884 panic("ufsdirhash_checkblock: missing first-free entry"); 885 DIRHASH_UNLOCK(dh); 886} 887 888/* 889 * Hash the specified filename into a dirhash slot. 890 */ 891static int 892ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen) 893{ 894 u_int32_t hash; 895 896 /* 897 * We hash the name and then some other bit of data that is 898 * invariant over the dirhash's lifetime. Otherwise names 899 * differing only in the last byte are placed close to one 900 * another in the table, which is bad for linear probing. 901 */ 902 hash = hash32_buf(name, namelen, HASH32_BUF_INIT); 903 hash = hash32_buf(&dh, sizeof(dh), hash); 904 return (hash % dh->dh_hlen); 905} 906 907/* 908 * Adjust the number of free bytes in the block containing `offset' 909 * by the value specified by `diff'. 910 * 911 * The caller must ensure we have exclusive access to `dh'; normally 912 * that means that dh_lock should be held, but this is also called 913 * from ufsdirhash_build() where exclusive access can be assumed. 914 */ 915static void 916ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff, int dirblksiz) 917{ 918 int block, i, nfidx, ofidx; 919 920 KASSERT(mutex_owned(&dh->dh_lock)); 921 922 /* Update the per-block summary info. */ 923 block = offset / dirblksiz; 924 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks); 925 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 926 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 927 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 928 929 /* Update the `first free' list if necessary. */ 930 if (ofidx != nfidx) { 931 /* If removing, scan forward for the next block. */ 932 if (dh->dh_firstfree[ofidx] == block) { 933 for (i = block + 1; i < dh->dh_dirblks; i++) 934 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 935 break; 936 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 937 } 938 939 /* Make this the new `first free' if necessary */ 940 if (dh->dh_firstfree[nfidx] > block || 941 dh->dh_firstfree[nfidx] == -1) 942 dh->dh_firstfree[nfidx] = block; 943 } 944} 945 946/* 947 * Find the specified name which should have the specified offset. 948 * Returns a slot number, and panics on failure. 949 * 950 * `dh' must be locked on entry and remains so on return. 951 */ 952static int 953ufsdirhash_findslot(struct dirhash *dh, const char *name, int namelen, 954 doff_t offset) 955{ 956 int slot; 957 958 KASSERT(mutex_owned(&dh->dh_lock)); 959 960 /* Find the entry. */ 961 KASSERT(dh->dh_hused < dh->dh_hlen); 962 slot = ufsdirhash_hash(dh, name, namelen); 963 while (DH_ENTRY(dh, slot) != offset && 964 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 965 slot = WRAPINCR(slot, dh->dh_hlen); 966 if (DH_ENTRY(dh, slot) != offset) 967 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 968 969 return (slot); 970} 971 972/* 973 * Remove the entry corresponding to the specified slot from the hash array. 974 * 975 * `dh' must be locked on entry and remains so on return. 976 */ 977static void 978ufsdirhash_delslot(struct dirhash *dh, int slot) 979{ 980 int i; 981 982 KASSERT(mutex_owned(&dh->dh_lock)); 983 984 /* Mark the entry as deleted. */ 985 DH_ENTRY(dh, slot) = DIRHASH_DEL; 986 987 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 988 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 989 i = WRAPINCR(i, dh->dh_hlen); 990 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 991 i = WRAPDECR(i, dh->dh_hlen); 992 while (DH_ENTRY(dh, i) == DIRHASH_DEL) { 993 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 994 dh->dh_hused--; 995 i = WRAPDECR(i, dh->dh_hlen); 996 } 997 KASSERT(dh->dh_hused >= 0); 998 } 999} 1000 1001/* 1002 * Given a directory entry and its offset, find the offset of the 1003 * previous entry in the same DIRBLKSIZ-sized block. Returns an 1004 * offset, or -1 if there is no previous entry in the block or some 1005 * other problem occurred. 1006 */ 1007static doff_t 1008ufsdirhash_getprev(struct direct *dirp, doff_t offset, int dirblksiz) 1009{ 1010 struct direct *dp; 1011 char *blkbuf; 1012 doff_t blkoff, prevoff; 1013 int entrypos, i; 1014 1015 blkoff = offset & ~(dirblksiz - 1); /* offset of start of block */ 1016 entrypos = offset & (dirblksiz - 1); /* entry relative to block */ 1017 blkbuf = (char *)dirp - entrypos; 1018 prevoff = blkoff; 1019 1020 /* If `offset' is the start of a block, there is no previous entry. */ 1021 if (entrypos == 0) 1022 return (-1); 1023 1024 /* Scan from the start of the block until we get to the entry. */ 1025 for (i = 0; i < entrypos; i += dp->d_reclen) { 1026 dp = (struct direct *)(blkbuf + i); 1027 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 1028 return (-1); /* Corrupted directory. */ 1029 prevoff = blkoff + i; 1030 } 1031 return (prevoff); 1032} 1033 1034/* 1035 * Try to free up `wanted' bytes by stealing memory from existing 1036 * dirhashes. Returns zero with list locked if successful. 1037 */ 1038static int 1039ufsdirhash_recycle(int wanted) 1040{ 1041 struct dirhash *dh; 1042 doff_t **hash; 1043 u_int8_t *blkfree; 1044 int i, mem, narrays; 1045 size_t hashsz, blkfreesz; 1046 1047 DIRHASHLIST_LOCK(); 1048 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1049 /* Find a dirhash, and lock it. */ 1050 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { 1051 DIRHASHLIST_UNLOCK(); 1052 return (-1); 1053 } 1054 DIRHASH_LOCK(dh); 1055 KASSERT(dh->dh_hash != NULL); 1056 1057 /* Decrement the score; only recycle if it becomes zero. */ 1058 if (--dh->dh_score > 0) { 1059 DIRHASH_UNLOCK(dh); 1060 DIRHASHLIST_UNLOCK(); 1061 return (-1); 1062 } 1063 1064 /* Remove it from the list and detach its memory. */ 1065 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1066 dh->dh_onlist = 0; 1067 hash = dh->dh_hash; 1068 hashsz = dh->dh_hashsz; 1069 dh->dh_hash = NULL; 1070 blkfree = dh->dh_blkfree; 1071 blkfreesz = dh->dh_blkfreesz; 1072 dh->dh_blkfree = NULL; 1073 narrays = dh->dh_narrays; 1074 mem = narrays * sizeof(*dh->dh_hash) + 1075 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 1076 dh->dh_nblk * sizeof(*dh->dh_blkfree); 1077 1078 /* Unlock everything, free the detached memory. */ 1079 DIRHASH_UNLOCK(dh); 1080 DIRHASHLIST_UNLOCK(); 1081 1082 for (i = 0; i < narrays; i++) 1083 DIRHASH_BLKFREE(hash[i]); 1084 kmem_free(hash, hashsz); 1085 kmem_free(blkfree, blkfreesz); 1086 1087 /* Account for the returned memory, and repeat if necessary. */ 1088 DIRHASHLIST_LOCK(); 1089 atomic_add_int(&ufs_dirhashmem, -mem); 1090 } 1091 /* Success. */ 1092 return (0); 1093} 1094 1095static void 1096ufsdirhash_sysctl_init(void) 1097{ 1098 const struct sysctlnode *rnode, *cnode; 1099 1100 sysctl_createv(&ufsdirhash_sysctl_log, 0, NULL, &rnode, 1101 CTLFLAG_PERMANENT, 1102 CTLTYPE_NODE, "vfs", NULL, 1103 NULL, 0, NULL, 0, 1104 CTL_VFS, CTL_EOL); 1105 1106 sysctl_createv(&ufsdirhash_sysctl_log, 0, &rnode, &rnode, 1107 CTLFLAG_PERMANENT, 1108 CTLTYPE_NODE, "ufs", 1109 SYSCTL_DESCR("ufs"), 1110 NULL, 0, NULL, 0, 1111 CTL_CREATE, CTL_EOL); 1112 1113 sysctl_createv(&ufsdirhash_sysctl_log, 0, &rnode, &rnode, 1114 CTLFLAG_PERMANENT, 1115 CTLTYPE_NODE, "dirhash", 1116 SYSCTL_DESCR("dirhash"), 1117 NULL, 0, NULL, 0, 1118 CTL_CREATE, CTL_EOL); 1119 1120 sysctl_createv(&ufsdirhash_sysctl_log, 0, &rnode, &cnode, 1121 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1122 CTLTYPE_INT, "minblocks", 1123 SYSCTL_DESCR("minimum hashed directory size in blocks"), 1124 NULL, 0, &ufs_dirhashminblks, 0, 1125 CTL_CREATE, CTL_EOL); 1126 1127 sysctl_createv(&ufsdirhash_sysctl_log, 0, &rnode, &cnode, 1128 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1129 CTLTYPE_INT, "maxmem", 1130 SYSCTL_DESCR("maximum dirhash memory usage"), 1131 NULL, 0, &ufs_dirhashmaxmem, 0, 1132 CTL_CREATE, CTL_EOL); 1133 1134 sysctl_createv(&ufsdirhash_sysctl_log, 0, &rnode, &cnode, 1135 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 1136 CTLTYPE_INT, "memused", 1137 SYSCTL_DESCR("current dirhash memory usage"), 1138 NULL, 0, &ufs_dirhashmem, 0, 1139 CTL_CREATE, CTL_EOL); 1140 1141 sysctl_createv(&ufsdirhash_sysctl_log, 0, &rnode, &cnode, 1142 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1143 CTLTYPE_INT, "docheck", 1144 SYSCTL_DESCR("enable extra sanity checks"), 1145 NULL, 0, &ufs_dirhashcheck, 0, 1146 CTL_CREATE, CTL_EOL); 1147} 1148 1149void 1150ufsdirhash_init(void) 1151{ 1152 1153 mutex_init(&ufsdirhash_lock, MUTEX_DEFAULT, IPL_NONE); 1154 ufsdirhashblk_cache = pool_cache_init(DH_NBLKOFF * sizeof(daddr_t), 0, 1155 0, 0, "dirhashblk", NULL, IPL_NONE, NULL, NULL, NULL); 1156 ufsdirhash_cache = pool_cache_init(sizeof(struct dirhash), 0, 1157 0, 0, "dirhash", NULL, IPL_NONE, NULL, NULL, NULL); 1158 TAILQ_INIT(&ufsdirhash_list); 1159 ufsdirhash_sysctl_init(); 1160} 1161 1162void 1163ufsdirhash_done(void) 1164{ 1165 1166 KASSERT(TAILQ_EMPTY(&ufsdirhash_list)); 1167 pool_cache_destroy(ufsdirhashblk_cache); 1168 pool_cache_destroy(ufsdirhash_cache); 1169 mutex_destroy(&ufsdirhash_lock); 1170 sysctl_teardown(&ufsdirhash_sysctl_log); 1171} 1172