ufs_dirhash.c revision 86350
1/* 2 * Copyright (c) 2001 Ian Dowse. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: head/sys/ufs/ufs/ufs_dirhash.c 86350 2001-11-14 15:08:07Z iedowse $ 26 */ 27/* 28 * This implements a hash-based lookup scheme for UFS directories. 29 */ 30 31#include "opt_ufs.h" 32 33#ifdef UFS_DIRHASH 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/malloc.h> 41#include <sys/fnv_hash.h> 42#include <sys/proc.h> 43#include <sys/bio.h> 44#include <sys/buf.h> 45#include <sys/vnode.h> 46#include <sys/mount.h> 47#include <sys/sysctl.h> 48#include <vm/vm_zone.h> 49 50#include <ufs/ufs/quota.h> 51#include <ufs/ufs/inode.h> 52#include <ufs/ufs/dir.h> 53#include <ufs/ufs/dirhash.h> 54#include <ufs/ufs/extattr.h> 55#include <ufs/ufs/ufsmount.h> 56#include <ufs/ufs/ufs_extern.h> 57 58#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 59#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 60 61static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables"); 62 63SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); 64 65static int ufs_mindirhashsize = DIRBLKSIZ * 5; 66SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, 67 &ufs_mindirhashsize, 68 0, "minimum directory size in bytes for which to use hashed lookup"); 69static int ufs_dirhashmaxmem = 2 * 1024 * 1024; 70SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, 71 0, "maximum allowed dirhash memory usage"); 72static int ufs_dirhashmem; 73SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, 74 0, "current dirhash memory usage"); 75static int ufs_dirhashcheck = 0; 76SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, 77 0, "enable extra sanity tests"); 78 79 80static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 81static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 82static void ufsdirhash_delslot(struct dirhash *dh, int slot); 83static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 84 doff_t offset); 85static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 86static void ufsdirhash_init(void); 87static int ufsdirhash_recycle(int wanted); 88 89static vm_zone_t ufsdirhash_zone; 90 91/* Dirhash list; recently-used entries are near the tail. */ 92static TAILQ_HEAD(, dirhash) ufsdirhash_list; 93 94/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 95static struct mtx ufsdirhash_mtx; 96 97/* 98 * Locking order: 99 * ufsdirhash_mtx 100 * dh_mtx 101 * 102 * The dh_mtx mutex should be aquired either via the inode lock, or via 103 * ufsdirhash_mtx. Only the owner of the inode may free the associated 104 * dirhash, but anything can steal its memory and set dh_hash to NULL. 105 */ 106 107/* 108 * Attempt to build up a hash table for the directory contents in 109 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 110 */ 111int 112ufsdirhash_build(struct inode *ip) 113{ 114 struct dirhash *dh; 115 struct buf *bp = NULL; 116 struct direct *ep; 117 struct vnode *vp; 118 doff_t bmask, pos; 119 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 120 121 /* Check if we can/should use dirhash. */ 122 if (ip->i_dirhash == NULL) { 123 if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) 124 return (-1); 125 } else { 126 /* Hash exists, but sysctls could have changed. */ 127 if (ip->i_size < ufs_mindirhashsize || 128 ufs_dirhashmem > ufs_dirhashmaxmem) { 129 ufsdirhash_free(ip); 130 return (-1); 131 } 132 /* Check if hash exists and is intact (note: unlocked read). */ 133 if (ip->i_dirhash->dh_hash != NULL) 134 return (0); 135 /* Free the old, recycled hash and build a new one. */ 136 ufsdirhash_free(ip); 137 } 138 139 /* Don't hash removed directories. */ 140 if (ip->i_effnlink == 0) 141 return (-1); 142 143 vp = ip->i_vnode; 144 /* Allocate 50% more entries than this dir size could ever need. */ 145 KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); 146 nslots = ip->i_size / DIRECTSIZ(1); 147 nslots = (nslots * 3 + 1) / 2; 148 narrays = howmany(nslots, DH_NBLKOFF); 149 nslots = narrays * DH_NBLKOFF; 150 dirblocks = howmany(ip->i_size, DIRBLKSIZ); 151 nblocks = (dirblocks * 3 + 1) / 2; 152 153 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 154 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 155 nblocks * sizeof(*dh->dh_blkfree); 156 mtx_lock(&ufsdirhash_mtx); 157 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 158 mtx_unlock(&ufsdirhash_mtx); 159 if (memreqd > ufs_dirhashmaxmem / 2) 160 return (-1); 161 162 /* Try to free some space. */ 163 if (ufsdirhash_recycle(memreqd) != 0) 164 return (-1); 165 /* Enough was freed, and ufsdirhash_mtx has been locked. */ 166 } 167 ufs_dirhashmem += memreqd; 168 mtx_unlock(&ufsdirhash_mtx); 169 170 /* 171 * Use non-blocking mallocs so that we will revert to a linear 172 * lookup on failure rather than potentially blocking forever. 173 */ 174 MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO); 175 if (dh == NULL) 176 return (-1); 177 MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]), 178 M_DIRHASH, M_NOWAIT | M_ZERO); 179 MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]), 180 M_DIRHASH, M_NOWAIT); 181 if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) 182 goto fail; 183 for (i = 0; i < narrays; i++) { 184 if ((dh->dh_hash[i] = zalloc(ufsdirhash_zone)) == NULL) 185 goto fail; 186 for (j = 0; j < DH_NBLKOFF; j++) 187 dh->dh_hash[i][j] = DIRHASH_EMPTY; 188 } 189 190 /* Initialise the hash table and block statistics. */ 191 mtx_init(&dh->dh_mtx, "dirhash", MTX_DEF); 192 dh->dh_narrays = narrays; 193 dh->dh_hlen = nslots; 194 dh->dh_nblk = nblocks; 195 dh->dh_dirblks = dirblocks; 196 for (i = 0; i < dirblocks; i++) 197 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 198 for (i = 0; i < DH_NFSTATS; i++) 199 dh->dh_firstfree[i] = -1; 200 dh->dh_firstfree[DH_NFSTATS] = 0; 201 dh->dh_seqopt = 0; 202 dh->dh_seqoff = 0; 203 dh->dh_score = DH_SCOREINIT; 204 ip->i_dirhash = dh; 205 206 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 207 pos = 0; 208 while (pos < ip->i_size) { 209 /* If necessary, get the next directory block. */ 210 if ((pos & bmask) == 0) { 211 if (bp != NULL) 212 brelse(bp); 213 if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) 214 goto fail; 215 } 216 217 /* Add this entry to the hash. */ 218 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 219 if (ep->d_reclen == 0 || ep->d_reclen > 220 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 221 /* Corrupted directory. */ 222 brelse(bp); 223 goto fail; 224 } 225 if (ep->d_ino != 0) { 226 /* Add the entry (simplified ufsdirhash_add). */ 227 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 228 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 229 slot = WRAPINCR(slot, dh->dh_hlen); 230 dh->dh_hused++; 231 DH_ENTRY(dh, slot) = pos; 232 ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); 233 } 234 pos += ep->d_reclen; 235 } 236 237 if (bp != NULL) 238 brelse(bp); 239 mtx_lock(&ufsdirhash_mtx); 240 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 241 dh->dh_onlist = 1; 242 mtx_unlock(&ufsdirhash_mtx); 243 return (0); 244 245fail: 246 if (dh->dh_hash != NULL) { 247 for (i = 0; i < narrays; i++) 248 if (dh->dh_hash[i] != NULL) 249 zfree(ufsdirhash_zone, dh->dh_hash[i]); 250 FREE(dh->dh_hash, M_DIRHASH); 251 } 252 if (dh->dh_blkfree != NULL) 253 FREE(dh->dh_blkfree, M_DIRHASH); 254 FREE(dh, M_DIRHASH); 255 ip->i_dirhash = NULL; 256 mtx_lock(&ufsdirhash_mtx); 257 ufs_dirhashmem -= memreqd; 258 mtx_unlock(&ufsdirhash_mtx); 259 return (-1); 260} 261 262/* 263 * Free any hash table associated with inode 'ip'. 264 */ 265void 266ufsdirhash_free(struct inode *ip) 267{ 268 struct dirhash *dh; 269 int i, mem; 270 271 if ((dh = ip->i_dirhash) == NULL) 272 return; 273 mtx_lock(&ufsdirhash_mtx); 274 mtx_lock(&dh->dh_mtx); 275 if (dh->dh_onlist) 276 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 277 mtx_unlock(&dh->dh_mtx); 278 mtx_unlock(&ufsdirhash_mtx); 279 280 /* The dirhash pointed to by 'dh' is exclusively ours now. */ 281 282 mem = sizeof(*dh); 283 if (dh->dh_hash != NULL) { 284 for (i = 0; i < dh->dh_narrays; i++) 285 zfree(ufsdirhash_zone, dh->dh_hash[i]); 286 FREE(dh->dh_hash, M_DIRHASH); 287 FREE(dh->dh_blkfree, M_DIRHASH); 288 mem += dh->dh_narrays * sizeof(*dh->dh_hash) + 289 dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 290 dh->dh_nblk * sizeof(*dh->dh_blkfree); 291 } 292 mtx_destroy(&dh->dh_mtx); 293 FREE(dh, M_DIRHASH); 294 ip->i_dirhash = NULL; 295 296 mtx_lock(&ufsdirhash_mtx); 297 ufs_dirhashmem -= mem; 298 mtx_unlock(&ufsdirhash_mtx); 299} 300 301/* 302 * Find the offset of the specified name within the given inode. 303 * Returns 0 on success, ENOENT if the entry does not exist, or 304 * EJUSTRETURN if the caller should revert to a linear search. 305 * 306 * If successful, the directory offset is stored in *offp, and a 307 * pointer to a struct buf containing the entry is stored in *bpp. If 308 * prevoffp is non-NULL, the offset of the previous entry within 309 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 310 * is the first in a block, the start of the block is used). 311 */ 312int 313ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 314 struct buf **bpp, doff_t *prevoffp) 315{ 316 struct dirhash *dh, *dh_next; 317 struct direct *dp; 318 struct vnode *vp; 319 struct buf *bp; 320 doff_t blkoff, bmask, offset, prevoff; 321 int i, slot; 322 323 if ((dh = ip->i_dirhash) == NULL) 324 return (EJUSTRETURN); 325 /* 326 * Move this dirhash towards the end of the list if it has a 327 * score higher than the next entry, and aquire the dh_mtx. 328 * Optimise the case where it's already the last by performing 329 * an unlocked read of the TAILQ_NEXT pointer. 330 * 331 * In both cases, end up holding just dh_mtx. 332 */ 333 if (TAILQ_NEXT(dh, dh_list) != NULL) { 334 mtx_lock(&ufsdirhash_mtx); 335 mtx_lock(&dh->dh_mtx); 336 /* 337 * If the new score will be greater than that of the next 338 * entry, then move this entry past it. With both mutexes 339 * held, dh_next won't go away, but its dh_score could 340 * change; that's not important since it is just a hint. 341 */ 342 if (dh->dh_hash != NULL && 343 (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 344 dh->dh_score >= dh_next->dh_score) { 345 KASSERT(dh->dh_onlist, ("dirhash: not on list")); 346 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 347 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 348 dh_list); 349 } 350 mtx_unlock(&ufsdirhash_mtx); 351 } else { 352 /* Already the last, though that could change as we wait. */ 353 mtx_lock(&dh->dh_mtx); 354 } 355 if (dh->dh_hash == NULL) { 356 mtx_unlock(&dh->dh_mtx); 357 ufsdirhash_free(ip); 358 return (EJUSTRETURN); 359 } 360 361 /* Update the score. */ 362 if (dh->dh_score < DH_SCOREMAX) 363 dh->dh_score++; 364 365 vp = ip->i_vnode; 366 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 367 blkoff = -1; 368 bp = NULL; 369restart: 370 slot = ufsdirhash_hash(dh, name, namelen); 371 372 if (dh->dh_seqopt) { 373 /* 374 * Sequential access optimisation. dh_seqoff contains the 375 * offset of the directory entry immediately following 376 * the last entry that was looked up. Check if this offset 377 * appears in the hash chain for the name we are looking for. 378 */ 379 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 380 i = WRAPINCR(i, dh->dh_hlen)) 381 if (offset == dh->dh_seqoff) 382 break; 383 if (offset == dh->dh_seqoff) { 384 /* 385 * We found an entry with the expected offset. This 386 * is probably the entry we want, but if not, the 387 * code below will turn off seqoff and retry. 388 */ 389 slot = i; 390 } else 391 dh->dh_seqopt = 0; 392 } 393 394 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 395 slot = WRAPINCR(slot, dh->dh_hlen)) { 396 if (offset == DIRHASH_DEL) 397 continue; 398 mtx_unlock(&dh->dh_mtx); 399 400 if (offset < 0 || offset >= ip->i_size) 401 panic("ufsdirhash_lookup: bad offset in hash array"); 402 if ((offset & ~bmask) != blkoff) { 403 if (bp != NULL) 404 brelse(bp); 405 blkoff = offset & ~bmask; 406 if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) 407 return (EJUSTRETURN); 408 } 409 dp = (struct direct *)(bp->b_data + (offset & bmask)); 410 if (dp->d_reclen == 0 || dp->d_reclen > 411 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 412 /* Corrupted directory. */ 413 brelse(bp); 414 return (EJUSTRETURN); 415 } 416 if (dp->d_namlen == namelen && 417 bcmp(dp->d_name, name, namelen) == 0) { 418 /* Found. Get the prev offset if needed. */ 419 if (prevoffp != NULL) { 420 if (offset & (DIRBLKSIZ - 1)) { 421 prevoff = ufsdirhash_getprev(dp, 422 offset); 423 if (prevoff == -1) { 424 brelse(bp); 425 return (EJUSTRETURN); 426 } 427 } else 428 prevoff = offset; 429 *prevoffp = prevoff; 430 } 431 432 /* Check for sequential access, and update offset. */ 433 if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) 434 dh->dh_seqopt = 1; 435 dh->dh_seqoff = offset + DIRSIZ(0, dp); 436 437 *bpp = bp; 438 *offp = offset; 439 return (0); 440 } 441 442 mtx_lock(&dh->dh_mtx); 443 if (dh->dh_hash == NULL) { 444 mtx_unlock(&dh->dh_mtx); 445 if (bp != NULL) 446 brelse(bp); 447 ufsdirhash_free(ip); 448 return (EJUSTRETURN); 449 } 450 /* 451 * When the name doesn't match in the seqopt case, go back 452 * and search normally. 453 */ 454 if (dh->dh_seqopt) { 455 dh->dh_seqopt = 0; 456 goto restart; 457 } 458 } 459 mtx_unlock(&dh->dh_mtx); 460 if (bp != NULL) 461 brelse(bp); 462 return (ENOENT); 463} 464 465/* 466 * Find a directory block with room for 'slotneeded' bytes. Returns 467 * the offset of the directory entry that begins the free space. 468 * This will either be the offset of an existing entry that has free 469 * space at the end, or the offset of an entry with d_ino == 0 at 470 * the start of a DIRBLKSIZ block. 471 * 472 * To use the space, the caller may need to compact existing entries in 473 * the directory. The total number of bytes in all of the entries involved 474 * in the compaction is stored in *slotsize. In other words, all of 475 * the entries that must be compacted are exactly contained in the 476 * region beginning at the returned offset and spanning *slotsize bytes. 477 * 478 * Returns -1 if no space was found, indicating that the directory 479 * must be extended. 480 */ 481doff_t 482ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 483{ 484 struct direct *dp; 485 struct dirhash *dh; 486 struct buf *bp; 487 doff_t pos, slotstart; 488 int dirblock, error, freebytes, i; 489 490 if ((dh = ip->i_dirhash) == NULL) 491 return (-1); 492 mtx_lock(&dh->dh_mtx); 493 if (dh->dh_hash == NULL) { 494 mtx_unlock(&dh->dh_mtx); 495 ufsdirhash_free(ip); 496 return (-1); 497 } 498 499 /* Find a directory block with the desired free space. */ 500 dirblock = -1; 501 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 502 if ((dirblock = dh->dh_firstfree[i]) != -1) 503 break; 504 if (dirblock == -1) { 505 mtx_unlock(&dh->dh_mtx); 506 return (-1); 507 } 508 509 KASSERT(dirblock < dh->dh_nblk && 510 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 511 ("ufsdirhash_findfree: bad stats")); 512 mtx_unlock(&dh->dh_mtx); 513 pos = dirblock * DIRBLKSIZ; 514 error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); 515 if (error) 516 return (-1); 517 518 /* Find the first entry with free space. */ 519 for (i = 0; i < DIRBLKSIZ; ) { 520 if (dp->d_reclen == 0) { 521 brelse(bp); 522 return (-1); 523 } 524 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) 525 break; 526 i += dp->d_reclen; 527 dp = (struct direct *)((char *)dp + dp->d_reclen); 528 } 529 if (i > DIRBLKSIZ) { 530 brelse(bp); 531 return (-1); 532 } 533 slotstart = pos + i; 534 535 /* Find the range of entries needed to get enough space */ 536 freebytes = 0; 537 while (i < DIRBLKSIZ && freebytes < slotneeded) { 538 freebytes += dp->d_reclen; 539 if (dp->d_ino != 0) 540 freebytes -= DIRSIZ(0, dp); 541 if (dp->d_reclen == 0) { 542 brelse(bp); 543 return (-1); 544 } 545 i += dp->d_reclen; 546 dp = (struct direct *)((char *)dp + dp->d_reclen); 547 } 548 if (i > DIRBLKSIZ) { 549 brelse(bp); 550 return (-1); 551 } 552 if (freebytes < slotneeded) 553 panic("ufsdirhash_findfree: free mismatch"); 554 brelse(bp); 555 *slotsize = pos + i - slotstart; 556 return (slotstart); 557} 558 559/* 560 * Return the start of the unused space at the end of a directory, or 561 * -1 if there are no trailing unused blocks. 562 */ 563doff_t 564ufsdirhash_enduseful(struct inode *ip) 565{ 566 567 struct dirhash *dh; 568 int i; 569 570 if ((dh = ip->i_dirhash) == NULL) 571 return (-1); 572 mtx_lock(&dh->dh_mtx); 573 if (dh->dh_hash == NULL) { 574 mtx_unlock(&dh->dh_mtx); 575 ufsdirhash_free(ip); 576 return (-1); 577 } 578 579 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { 580 mtx_unlock(&dh->dh_mtx); 581 return (-1); 582 } 583 584 for (i = dh->dh_dirblks - 1; i >= 0; i--) 585 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 586 break; 587 mtx_unlock(&dh->dh_mtx); 588 return ((doff_t)(i + 1) * DIRBLKSIZ); 589} 590 591/* 592 * Insert information into the hash about a new directory entry. dirp 593 * points to a struct direct containing the entry, and offset specifies 594 * the offset of this entry. 595 */ 596void 597ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 598{ 599 struct dirhash *dh; 600 int slot; 601 602 if ((dh = ip->i_dirhash) == NULL) 603 return; 604 mtx_lock(&dh->dh_mtx); 605 if (dh->dh_hash == NULL) { 606 mtx_unlock(&dh->dh_mtx); 607 ufsdirhash_free(ip); 608 return; 609 } 610 611 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 612 ("ufsdirhash_add: bad offset")); 613 /* 614 * Normal hash usage is < 66%. If the usage gets too high then 615 * remove the hash entirely and let it be rebuilt later. 616 */ 617 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 618 mtx_unlock(&dh->dh_mtx); 619 ufsdirhash_free(ip); 620 return; 621 } 622 623 /* Find a free hash slot (empty or deleted), and add the entry. */ 624 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 625 while (DH_ENTRY(dh, slot) >= 0) 626 slot = WRAPINCR(slot, dh->dh_hlen); 627 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 628 dh->dh_hused++; 629 DH_ENTRY(dh, slot) = offset; 630 631 /* Update the per-block summary info. */ 632 ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); 633 mtx_unlock(&dh->dh_mtx); 634} 635 636/* 637 * Remove the specified directory entry from the hash. The entry to remove 638 * is defined by the name in `dirp', which must exist at the specified 639 * `offset' within the directory. 640 */ 641void 642ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 643{ 644 struct dirhash *dh; 645 int slot; 646 647 if ((dh = ip->i_dirhash) == NULL) 648 return; 649 mtx_lock(&dh->dh_mtx); 650 if (dh->dh_hash == NULL) { 651 mtx_unlock(&dh->dh_mtx); 652 ufsdirhash_free(ip); 653 return; 654 } 655 656 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 657 ("ufsdirhash_remove: bad offset")); 658 /* Find the entry */ 659 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 660 661 /* Remove the hash entry. */ 662 ufsdirhash_delslot(dh, slot); 663 664 /* Update the per-block summary info. */ 665 ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); 666 mtx_unlock(&dh->dh_mtx); 667} 668 669/* 670 * Change the offset associated with a directory entry in the hash. Used 671 * when compacting directory blocks. 672 */ 673void 674ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 675 doff_t newoff) 676{ 677 struct dirhash *dh; 678 int slot; 679 680 if ((dh = ip->i_dirhash) == NULL) 681 return; 682 mtx_lock(&dh->dh_mtx); 683 if (dh->dh_hash == NULL) { 684 mtx_unlock(&dh->dh_mtx); 685 ufsdirhash_free(ip); 686 return; 687 } 688 689 KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 690 newoff < dh->dh_dirblks * DIRBLKSIZ, 691 ("ufsdirhash_move: bad offset")); 692 /* Find the entry, and update the offset. */ 693 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 694 DH_ENTRY(dh, slot) = newoff; 695 mtx_unlock(&dh->dh_mtx); 696} 697 698/* 699 * Inform dirhash that the directory has grown by one block that 700 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 701 */ 702void 703ufsdirhash_newblk(struct inode *ip, doff_t offset) 704{ 705 struct dirhash *dh; 706 int block; 707 708 if ((dh = ip->i_dirhash) == NULL) 709 return; 710 mtx_lock(&dh->dh_mtx); 711 if (dh->dh_hash == NULL) { 712 mtx_unlock(&dh->dh_mtx); 713 ufsdirhash_free(ip); 714 return; 715 } 716 717 KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 718 ("ufsdirhash_newblk: bad offset")); 719 block = offset / DIRBLKSIZ; 720 if (block >= dh->dh_nblk) { 721 /* Out of space; must rebuild. */ 722 mtx_unlock(&dh->dh_mtx); 723 ufsdirhash_free(ip); 724 return; 725 } 726 dh->dh_dirblks = block + 1; 727 728 /* Account for the new free block. */ 729 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 730 if (dh->dh_firstfree[DH_NFSTATS] == -1) 731 dh->dh_firstfree[DH_NFSTATS] = block; 732 mtx_unlock(&dh->dh_mtx); 733} 734 735/* 736 * Inform dirhash that the directory is being truncated. 737 */ 738void 739ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 740{ 741 struct dirhash *dh; 742 int block, i; 743 744 if ((dh = ip->i_dirhash) == NULL) 745 return; 746 mtx_lock(&dh->dh_mtx); 747 if (dh->dh_hash == NULL) { 748 mtx_unlock(&dh->dh_mtx); 749 ufsdirhash_free(ip); 750 return; 751 } 752 753 KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 754 ("ufsdirhash_dirtrunc: bad offset")); 755 block = howmany(offset, DIRBLKSIZ); 756 /* 757 * If the directory shrinks to less than 1/8 of dh_nblk blocks 758 * (about 20% of its original size due to the 50% extra added in 759 * ufsdirhash_build) then free it, and let the caller rebuild 760 * if necessary. 761 */ 762 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 763 mtx_unlock(&dh->dh_mtx); 764 ufsdirhash_free(ip); 765 return; 766 } 767 768 /* 769 * Remove any `first free' information pertaining to the 770 * truncated blocks. All blocks we're removing should be 771 * completely unused. 772 */ 773 if (dh->dh_firstfree[DH_NFSTATS] >= block) 774 dh->dh_firstfree[DH_NFSTATS] = -1; 775 for (i = block; i < dh->dh_dirblks; i++) 776 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 777 panic("ufsdirhash_dirtrunc: blocks in use"); 778 for (i = 0; i < DH_NFSTATS; i++) 779 if (dh->dh_firstfree[i] >= block) 780 panic("ufsdirhash_dirtrunc: first free corrupt"); 781 dh->dh_dirblks = block; 782 mtx_unlock(&dh->dh_mtx); 783} 784 785/* 786 * Debugging function to check that the dirhash information about 787 * a directory block matches its actual contents. Panics if a mismatch 788 * is detected. 789 * 790 * On entry, `buf' should point to the start of an in-core 791 * DIRBLKSIZ-sized directory block, and `offset' should contain the 792 * offset from the start of the directory of that block. 793 */ 794void 795ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 796{ 797 struct dirhash *dh; 798 struct direct *dp; 799 int block, ffslot, i, nfree; 800 801 if (!ufs_dirhashcheck) 802 return; 803 if ((dh = ip->i_dirhash) == NULL) 804 return; 805 mtx_lock(&dh->dh_mtx); 806 if (dh->dh_hash == NULL) { 807 mtx_unlock(&dh->dh_mtx); 808 ufsdirhash_free(ip); 809 return; 810 } 811 812 block = offset / DIRBLKSIZ; 813 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 814 panic("ufsdirhash_checkblock: bad offset"); 815 816 nfree = 0; 817 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 818 dp = (struct direct *)(buf + i); 819 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 820 panic("ufsdirhash_checkblock: bad dir"); 821 822 if (dp->d_ino == 0) { 823#if 0 824 /* 825 * XXX entries with d_ino == 0 should only occur 826 * at the start of a DIRBLKSIZ block. However the 827 * ufs code is tolerant of such entries at other 828 * offsets, and fsck does not fix them. 829 */ 830 if (i != 0) 831 panic("ufsdirhash_checkblock: bad dir inode"); 832#endif 833 nfree += dp->d_reclen; 834 continue; 835 } 836 837 /* Check that the entry exists (will panic if it doesn't). */ 838 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 839 840 nfree += dp->d_reclen - DIRSIZ(0, dp); 841 } 842 if (i != DIRBLKSIZ) 843 panic("ufsdirhash_checkblock: bad dir end"); 844 845 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 846 panic("ufsdirhash_checkblock: bad free count"); 847 848 ffslot = nfree / DIRALIGN; 849 if (ffslot > DH_NFSTATS) 850 ffslot = DH_NFSTATS; 851 for (i = 0; i <= DH_NFSTATS; i++) 852 if (dh->dh_firstfree[i] == block && i != ffslot) 853 panic("ufsdirhash_checkblock: bad first-free"); 854 mtx_unlock(&dh->dh_mtx); 855} 856 857/* 858 * Hash the specified filename into a dirhash slot. 859 */ 860static int 861ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 862{ 863 return (fnv_32_buf(name, namelen, FNV1_32_INIT) % dh->dh_hlen); 864} 865 866/* 867 * Adjust the number of free bytes in the block containing `offset' 868 * by the value specified by `diff'. 869 * 870 * The caller must ensure we have exclusive access to `dh'; normally 871 * that means that dh_mtx should be held, but this is also called 872 * from ufsdirhash_build() where exclusive access can be assumed. 873 */ 874static void 875ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 876{ 877 int block, i, nfidx, ofidx; 878 879 /* Update the per-block summary info. */ 880 block = offset / DIRBLKSIZ; 881 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 882 ("dirhash bad offset")); 883 ofidx = dh->dh_blkfree[block]; 884 if (ofidx > DH_NFSTATS) 885 ofidx = DH_NFSTATS; 886 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 887 nfidx = dh->dh_blkfree[block]; 888 if (nfidx > DH_NFSTATS) 889 nfidx = DH_NFSTATS; 890 891 /* Update the `first free' list if necessary. */ 892 if (ofidx != nfidx) { 893 /* If removing, scan forward for the next block. */ 894 if (dh->dh_firstfree[ofidx] == block) { 895 for (i = block + 1; i < dh->dh_dirblks; i++) 896 if (dh->dh_blkfree[i] == ofidx) 897 break; 898 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 899 } 900 901 /* Make this the new `first free' if necessary */ 902 if (dh->dh_firstfree[nfidx] > block || 903 dh->dh_firstfree[nfidx] == -1) 904 dh->dh_firstfree[nfidx] = block; 905 } 906} 907 908/* 909 * Find the specified name which should have the specified offset. 910 * Returns a slot number, and panics on failure. 911 * 912 * `dh' must be locked on entry and remains so on return. 913 */ 914static int 915ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 916{ 917 int slot; 918 919 mtx_assert(&dh->dh_mtx, MA_OWNED); 920 921 /* Find the entry. */ 922 KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 923 slot = ufsdirhash_hash(dh, name, namelen); 924 while (DH_ENTRY(dh, slot) != offset && 925 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 926 slot = WRAPINCR(slot, dh->dh_hlen); 927 if (DH_ENTRY(dh, slot) != offset) 928 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 929 930 return (slot); 931} 932 933/* 934 * Remove the entry corresponding to the specified slot from the hash array. 935 * 936 * `dh' must be locked on entry and remains so on return. 937 */ 938static void 939ufsdirhash_delslot(struct dirhash *dh, int slot) 940{ 941 int i; 942 943 mtx_assert(&dh->dh_mtx, MA_OWNED); 944 945 /* Mark the entry as deleted. */ 946 DH_ENTRY(dh, slot) = DIRHASH_DEL; 947 948 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 949 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 950 i = WRAPINCR(i, dh->dh_hlen); 951 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 952 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) { 953 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 954 dh->dh_hused--; 955 i = WRAPINCR(i, dh->dh_hlen); 956 } 957 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 958 } 959} 960 961/* 962 * Given a directory entry and its offset, find the offset of the 963 * previous entry in the same DIRBLKSIZ-sized block. Returns an 964 * offset, or -1 if there is no previous entry in the block or some 965 * other problem occurred. 966 */ 967static doff_t 968ufsdirhash_getprev(struct direct *dirp, doff_t offset) 969{ 970 struct direct *dp; 971 char *blkbuf; 972 doff_t blkoff, prevoff; 973 int entrypos, i; 974 975 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ 976 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 977 blkbuf = (char *)dirp - entrypos; 978 prevoff = blkoff; 979 980 /* If `offset' is the start of a block, there is no previous entry. */ 981 if (entrypos == 0) 982 return (-1); 983 984 /* Scan from the start of the block until we get to the entry. */ 985 for (i = 0; i < entrypos; i += dp->d_reclen) { 986 dp = (struct direct *)(blkbuf + i); 987 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 988 return (-1); /* Corrupted directory. */ 989 prevoff = blkoff + i; 990 } 991 return (prevoff); 992} 993 994/* 995 * Try to free up `wanted' bytes by stealing memory from existing 996 * dirhashes. Returns zero with ufsdirhash_mtx locked if successful. 997 */ 998static int 999ufsdirhash_recycle(int wanted) 1000{ 1001 struct dirhash *dh; 1002 doff_t **hash; 1003 u_int8_t *blkfree; 1004 int i, mem, narrays; 1005 1006 mtx_lock(&ufsdirhash_mtx); 1007 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1008 /* Find a dirhash, and lock it. */ 1009 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { 1010 mtx_unlock(&ufsdirhash_mtx); 1011 return (-1); 1012 } 1013 mtx_lock(&dh->dh_mtx); 1014 KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1015 1016 /* Decrement the score; only recycle if it becomes zero. */ 1017 if (--dh->dh_score > 0) { 1018 mtx_unlock(&dh->dh_mtx); 1019 mtx_unlock(&ufsdirhash_mtx); 1020 return (-1); 1021 } 1022 1023 /* Remove it from the list and detach its memory. */ 1024 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1025 dh->dh_onlist = 0; 1026 hash = dh->dh_hash; 1027 dh->dh_hash = NULL; 1028 blkfree = dh->dh_blkfree; 1029 dh->dh_blkfree = NULL; 1030 narrays = dh->dh_narrays; 1031 mem = narrays * sizeof(*dh->dh_hash) + 1032 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 1033 dh->dh_nblk * sizeof(*dh->dh_blkfree); 1034 1035 /* Unlock everything, free the detached memory. */ 1036 mtx_unlock(&dh->dh_mtx); 1037 mtx_unlock(&ufsdirhash_mtx); 1038 for (i = 0; i < narrays; i++) 1039 zfree(ufsdirhash_zone, hash[i]); 1040 FREE(hash, M_DIRHASH); 1041 FREE(blkfree, M_DIRHASH); 1042 1043 /* Account for the returned memory, and repeat if necessary. */ 1044 mtx_lock(&ufsdirhash_mtx); 1045 ufs_dirhashmem -= mem; 1046 } 1047 /* Success; return with ufsdirhash_mtx locked. */ 1048 return (0); 1049} 1050 1051 1052static void 1053ufsdirhash_init() 1054{ 1055 ufsdirhash_zone = zinit("DIRHASH", DH_NBLKOFF * sizeof(daddr_t), 0, 1056 0, 1); 1057 mtx_init(&ufsdirhash_mtx, "dirhash list", MTX_DEF); 1058 TAILQ_INIT(&ufsdirhash_list); 1059} 1060SYSINIT(ufsdirhash, SI_SUB_PSEUDO, SI_ORDER_ANY, ufsdirhash_init, NULL) 1061 1062 1063#endif /* UFS_DIRHASH */ 1064