ufs_dirhash.c revision 92098
1/* 2 * Copyright (c) 2001 Ian Dowse. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: head/sys/ufs/ufs/ufs_dirhash.c 92098 2002-03-11 19:13:22Z iedowse $ 26 */ 27/* 28 * This implements a hash-based lookup scheme for UFS directories. 29 */ 30 31#include "opt_ufs.h" 32 33#ifdef UFS_DIRHASH 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/malloc.h> 41#include <sys/fnv_hash.h> 42#include <sys/proc.h> 43#include <sys/bio.h> 44#include <sys/buf.h> 45#include <sys/vnode.h> 46#include <sys/mount.h> 47#include <sys/sysctl.h> 48#include <vm/vm_zone.h> 49 50#include <ufs/ufs/quota.h> 51#include <ufs/ufs/inode.h> 52#include <ufs/ufs/dir.h> 53#include <ufs/ufs/dirhash.h> 54#include <ufs/ufs/extattr.h> 55#include <ufs/ufs/ufsmount.h> 56#include <ufs/ufs/ufs_extern.h> 57 58#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 59#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 60#define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 61 62static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables"); 63 64SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); 65 66static int ufs_mindirhashsize = DIRBLKSIZ * 5; 67SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, 68 &ufs_mindirhashsize, 69 0, "minimum directory size in bytes for which to use hashed lookup"); 70static int ufs_dirhashmaxmem = 2 * 1024 * 1024; 71SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, 72 0, "maximum allowed dirhash memory usage"); 73static int ufs_dirhashmem; 74SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, 75 0, "current dirhash memory usage"); 76static int ufs_dirhashcheck = 0; 77SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, 78 0, "enable extra sanity tests"); 79 80 81static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 82static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 83static void ufsdirhash_delslot(struct dirhash *dh, int slot); 84static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 85 doff_t offset); 86static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 87static void ufsdirhash_init(void); 88static int ufsdirhash_recycle(int wanted); 89 90static vm_zone_t ufsdirhash_zone; 91 92/* Dirhash list; recently-used entries are near the tail. */ 93static TAILQ_HEAD(, dirhash) ufsdirhash_list; 94 95/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 96static struct mtx ufsdirhash_mtx; 97 98/* 99 * Locking order: 100 * ufsdirhash_mtx 101 * dh_mtx 102 * 103 * The dh_mtx mutex should be aquired either via the inode lock, or via 104 * ufsdirhash_mtx. Only the owner of the inode may free the associated 105 * dirhash, but anything can steal its memory and set dh_hash to NULL. 106 */ 107 108/* 109 * Attempt to build up a hash table for the directory contents in 110 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 111 */ 112int 113ufsdirhash_build(struct inode *ip) 114{ 115 struct dirhash *dh; 116 struct buf *bp = NULL; 117 struct direct *ep; 118 struct vnode *vp; 119 doff_t bmask, pos; 120 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 121 122 /* Check if we can/should use dirhash. */ 123 if (ip->i_dirhash == NULL) { 124 if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) 125 return (-1); 126 } else { 127 /* Hash exists, but sysctls could have changed. */ 128 if (ip->i_size < ufs_mindirhashsize || 129 ufs_dirhashmem > ufs_dirhashmaxmem) { 130 ufsdirhash_free(ip); 131 return (-1); 132 } 133 /* Check if hash exists and is intact (note: unlocked read). */ 134 if (ip->i_dirhash->dh_hash != NULL) 135 return (0); 136 /* Free the old, recycled hash and build a new one. */ 137 ufsdirhash_free(ip); 138 } 139 140 /* Don't hash removed directories. */ 141 if (ip->i_effnlink == 0) 142 return (-1); 143 144 vp = ip->i_vnode; 145 /* Allocate 50% more entries than this dir size could ever need. */ 146 KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); 147 nslots = ip->i_size / DIRECTSIZ(1); 148 nslots = (nslots * 3 + 1) / 2; 149 narrays = howmany(nslots, DH_NBLKOFF); 150 nslots = narrays * DH_NBLKOFF; 151 dirblocks = howmany(ip->i_size, DIRBLKSIZ); 152 nblocks = (dirblocks * 3 + 1) / 2; 153 154 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 155 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 156 nblocks * sizeof(*dh->dh_blkfree); 157 mtx_lock(&ufsdirhash_mtx); 158 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 159 mtx_unlock(&ufsdirhash_mtx); 160 if (memreqd > ufs_dirhashmaxmem / 2) 161 return (-1); 162 163 /* Try to free some space. */ 164 if (ufsdirhash_recycle(memreqd) != 0) 165 return (-1); 166 /* Enough was freed, and ufsdirhash_mtx has been locked. */ 167 } 168 ufs_dirhashmem += memreqd; 169 mtx_unlock(&ufsdirhash_mtx); 170 171 /* 172 * Use non-blocking mallocs so that we will revert to a linear 173 * lookup on failure rather than potentially blocking forever. 174 */ 175 MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO); 176 if (dh == NULL) 177 return (-1); 178 MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]), 179 M_DIRHASH, M_NOWAIT | M_ZERO); 180 MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]), 181 M_DIRHASH, M_NOWAIT); 182 if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) 183 goto fail; 184 for (i = 0; i < narrays; i++) { 185 if ((dh->dh_hash[i] = zalloc(ufsdirhash_zone)) == NULL) 186 goto fail; 187 for (j = 0; j < DH_NBLKOFF; j++) 188 dh->dh_hash[i][j] = DIRHASH_EMPTY; 189 } 190 191 /* Initialise the hash table and block statistics. */ 192 mtx_init(&dh->dh_mtx, "dirhash", MTX_DEF); 193 dh->dh_narrays = narrays; 194 dh->dh_hlen = nslots; 195 dh->dh_nblk = nblocks; 196 dh->dh_dirblks = dirblocks; 197 for (i = 0; i < dirblocks; i++) 198 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 199 for (i = 0; i < DH_NFSTATS; i++) 200 dh->dh_firstfree[i] = -1; 201 dh->dh_firstfree[DH_NFSTATS] = 0; 202 dh->dh_seqopt = 0; 203 dh->dh_seqoff = 0; 204 dh->dh_score = DH_SCOREINIT; 205 ip->i_dirhash = dh; 206 207 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 208 pos = 0; 209 while (pos < ip->i_size) { 210 /* If necessary, get the next directory block. */ 211 if ((pos & bmask) == 0) { 212 if (bp != NULL) 213 brelse(bp); 214 if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) 215 goto fail; 216 } 217 218 /* Add this entry to the hash. */ 219 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 220 if (ep->d_reclen == 0 || ep->d_reclen > 221 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 222 /* Corrupted directory. */ 223 brelse(bp); 224 goto fail; 225 } 226 if (ep->d_ino != 0) { 227 /* Add the entry (simplified ufsdirhash_add). */ 228 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 229 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 230 slot = WRAPINCR(slot, dh->dh_hlen); 231 dh->dh_hused++; 232 DH_ENTRY(dh, slot) = pos; 233 ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); 234 } 235 pos += ep->d_reclen; 236 } 237 238 if (bp != NULL) 239 brelse(bp); 240 mtx_lock(&ufsdirhash_mtx); 241 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 242 dh->dh_onlist = 1; 243 mtx_unlock(&ufsdirhash_mtx); 244 return (0); 245 246fail: 247 if (dh->dh_hash != NULL) { 248 for (i = 0; i < narrays; i++) 249 if (dh->dh_hash[i] != NULL) 250 zfree(ufsdirhash_zone, dh->dh_hash[i]); 251 FREE(dh->dh_hash, M_DIRHASH); 252 } 253 if (dh->dh_blkfree != NULL) 254 FREE(dh->dh_blkfree, M_DIRHASH); 255 FREE(dh, M_DIRHASH); 256 ip->i_dirhash = NULL; 257 mtx_lock(&ufsdirhash_mtx); 258 ufs_dirhashmem -= memreqd; 259 mtx_unlock(&ufsdirhash_mtx); 260 return (-1); 261} 262 263/* 264 * Free any hash table associated with inode 'ip'. 265 */ 266void 267ufsdirhash_free(struct inode *ip) 268{ 269 struct dirhash *dh; 270 int i, mem; 271 272 if ((dh = ip->i_dirhash) == NULL) 273 return; 274 mtx_lock(&ufsdirhash_mtx); 275 mtx_lock(&dh->dh_mtx); 276 if (dh->dh_onlist) 277 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 278 mtx_unlock(&dh->dh_mtx); 279 mtx_unlock(&ufsdirhash_mtx); 280 281 /* The dirhash pointed to by 'dh' is exclusively ours now. */ 282 283 mem = sizeof(*dh); 284 if (dh->dh_hash != NULL) { 285 for (i = 0; i < dh->dh_narrays; i++) 286 zfree(ufsdirhash_zone, dh->dh_hash[i]); 287 FREE(dh->dh_hash, M_DIRHASH); 288 FREE(dh->dh_blkfree, M_DIRHASH); 289 mem += dh->dh_narrays * sizeof(*dh->dh_hash) + 290 dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 291 dh->dh_nblk * sizeof(*dh->dh_blkfree); 292 } 293 mtx_destroy(&dh->dh_mtx); 294 FREE(dh, M_DIRHASH); 295 ip->i_dirhash = NULL; 296 297 mtx_lock(&ufsdirhash_mtx); 298 ufs_dirhashmem -= mem; 299 mtx_unlock(&ufsdirhash_mtx); 300} 301 302/* 303 * Find the offset of the specified name within the given inode. 304 * Returns 0 on success, ENOENT if the entry does not exist, or 305 * EJUSTRETURN if the caller should revert to a linear search. 306 * 307 * If successful, the directory offset is stored in *offp, and a 308 * pointer to a struct buf containing the entry is stored in *bpp. If 309 * prevoffp is non-NULL, the offset of the previous entry within 310 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 311 * is the first in a block, the start of the block is used). 312 */ 313int 314ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 315 struct buf **bpp, doff_t *prevoffp) 316{ 317 struct dirhash *dh, *dh_next; 318 struct direct *dp; 319 struct vnode *vp; 320 struct buf *bp; 321 doff_t blkoff, bmask, offset, prevoff; 322 int i, slot; 323 324 if ((dh = ip->i_dirhash) == NULL) 325 return (EJUSTRETURN); 326 /* 327 * Move this dirhash towards the end of the list if it has a 328 * score higher than the next entry, and aquire the dh_mtx. 329 * Optimise the case where it's already the last by performing 330 * an unlocked read of the TAILQ_NEXT pointer. 331 * 332 * In both cases, end up holding just dh_mtx. 333 */ 334 if (TAILQ_NEXT(dh, dh_list) != NULL) { 335 mtx_lock(&ufsdirhash_mtx); 336 mtx_lock(&dh->dh_mtx); 337 /* 338 * If the new score will be greater than that of the next 339 * entry, then move this entry past it. With both mutexes 340 * held, dh_next won't go away, but its dh_score could 341 * change; that's not important since it is just a hint. 342 */ 343 if (dh->dh_hash != NULL && 344 (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 345 dh->dh_score >= dh_next->dh_score) { 346 KASSERT(dh->dh_onlist, ("dirhash: not on list")); 347 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 348 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 349 dh_list); 350 } 351 mtx_unlock(&ufsdirhash_mtx); 352 } else { 353 /* Already the last, though that could change as we wait. */ 354 mtx_lock(&dh->dh_mtx); 355 } 356 if (dh->dh_hash == NULL) { 357 mtx_unlock(&dh->dh_mtx); 358 ufsdirhash_free(ip); 359 return (EJUSTRETURN); 360 } 361 362 /* Update the score. */ 363 if (dh->dh_score < DH_SCOREMAX) 364 dh->dh_score++; 365 366 vp = ip->i_vnode; 367 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 368 blkoff = -1; 369 bp = NULL; 370restart: 371 slot = ufsdirhash_hash(dh, name, namelen); 372 373 if (dh->dh_seqopt) { 374 /* 375 * Sequential access optimisation. dh_seqoff contains the 376 * offset of the directory entry immediately following 377 * the last entry that was looked up. Check if this offset 378 * appears in the hash chain for the name we are looking for. 379 */ 380 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 381 i = WRAPINCR(i, dh->dh_hlen)) 382 if (offset == dh->dh_seqoff) 383 break; 384 if (offset == dh->dh_seqoff) { 385 /* 386 * We found an entry with the expected offset. This 387 * is probably the entry we want, but if not, the 388 * code below will turn off seqoff and retry. 389 */ 390 slot = i; 391 } else 392 dh->dh_seqopt = 0; 393 } 394 395 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 396 slot = WRAPINCR(slot, dh->dh_hlen)) { 397 if (offset == DIRHASH_DEL) 398 continue; 399 mtx_unlock(&dh->dh_mtx); 400 401 if (offset < 0 || offset >= ip->i_size) 402 panic("ufsdirhash_lookup: bad offset in hash array"); 403 if ((offset & ~bmask) != blkoff) { 404 if (bp != NULL) 405 brelse(bp); 406 blkoff = offset & ~bmask; 407 if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) 408 return (EJUSTRETURN); 409 } 410 dp = (struct direct *)(bp->b_data + (offset & bmask)); 411 if (dp->d_reclen == 0 || dp->d_reclen > 412 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 413 /* Corrupted directory. */ 414 brelse(bp); 415 return (EJUSTRETURN); 416 } 417 if (dp->d_namlen == namelen && 418 bcmp(dp->d_name, name, namelen) == 0) { 419 /* Found. Get the prev offset if needed. */ 420 if (prevoffp != NULL) { 421 if (offset & (DIRBLKSIZ - 1)) { 422 prevoff = ufsdirhash_getprev(dp, 423 offset); 424 if (prevoff == -1) { 425 brelse(bp); 426 return (EJUSTRETURN); 427 } 428 } else 429 prevoff = offset; 430 *prevoffp = prevoff; 431 } 432 433 /* Check for sequential access, and update offset. */ 434 if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) 435 dh->dh_seqopt = 1; 436 dh->dh_seqoff = offset + DIRSIZ(0, dp); 437 438 *bpp = bp; 439 *offp = offset; 440 return (0); 441 } 442 443 mtx_lock(&dh->dh_mtx); 444 if (dh->dh_hash == NULL) { 445 mtx_unlock(&dh->dh_mtx); 446 if (bp != NULL) 447 brelse(bp); 448 ufsdirhash_free(ip); 449 return (EJUSTRETURN); 450 } 451 /* 452 * When the name doesn't match in the seqopt case, go back 453 * and search normally. 454 */ 455 if (dh->dh_seqopt) { 456 dh->dh_seqopt = 0; 457 goto restart; 458 } 459 } 460 mtx_unlock(&dh->dh_mtx); 461 if (bp != NULL) 462 brelse(bp); 463 return (ENOENT); 464} 465 466/* 467 * Find a directory block with room for 'slotneeded' bytes. Returns 468 * the offset of the directory entry that begins the free space. 469 * This will either be the offset of an existing entry that has free 470 * space at the end, or the offset of an entry with d_ino == 0 at 471 * the start of a DIRBLKSIZ block. 472 * 473 * To use the space, the caller may need to compact existing entries in 474 * the directory. The total number of bytes in all of the entries involved 475 * in the compaction is stored in *slotsize. In other words, all of 476 * the entries that must be compacted are exactly contained in the 477 * region beginning at the returned offset and spanning *slotsize bytes. 478 * 479 * Returns -1 if no space was found, indicating that the directory 480 * must be extended. 481 */ 482doff_t 483ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 484{ 485 struct direct *dp; 486 struct dirhash *dh; 487 struct buf *bp; 488 doff_t pos, slotstart; 489 int dirblock, error, freebytes, i; 490 491 if ((dh = ip->i_dirhash) == NULL) 492 return (-1); 493 mtx_lock(&dh->dh_mtx); 494 if (dh->dh_hash == NULL) { 495 mtx_unlock(&dh->dh_mtx); 496 ufsdirhash_free(ip); 497 return (-1); 498 } 499 500 /* Find a directory block with the desired free space. */ 501 dirblock = -1; 502 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 503 if ((dirblock = dh->dh_firstfree[i]) != -1) 504 break; 505 if (dirblock == -1) { 506 mtx_unlock(&dh->dh_mtx); 507 return (-1); 508 } 509 510 KASSERT(dirblock < dh->dh_nblk && 511 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 512 ("ufsdirhash_findfree: bad stats")); 513 mtx_unlock(&dh->dh_mtx); 514 pos = dirblock * DIRBLKSIZ; 515 error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); 516 if (error) 517 return (-1); 518 519 /* Find the first entry with free space. */ 520 for (i = 0; i < DIRBLKSIZ; ) { 521 if (dp->d_reclen == 0) { 522 brelse(bp); 523 return (-1); 524 } 525 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) 526 break; 527 i += dp->d_reclen; 528 dp = (struct direct *)((char *)dp + dp->d_reclen); 529 } 530 if (i > DIRBLKSIZ) { 531 brelse(bp); 532 return (-1); 533 } 534 slotstart = pos + i; 535 536 /* Find the range of entries needed to get enough space */ 537 freebytes = 0; 538 while (i < DIRBLKSIZ && freebytes < slotneeded) { 539 freebytes += dp->d_reclen; 540 if (dp->d_ino != 0) 541 freebytes -= DIRSIZ(0, dp); 542 if (dp->d_reclen == 0) { 543 brelse(bp); 544 return (-1); 545 } 546 i += dp->d_reclen; 547 dp = (struct direct *)((char *)dp + dp->d_reclen); 548 } 549 if (i > DIRBLKSIZ) { 550 brelse(bp); 551 return (-1); 552 } 553 if (freebytes < slotneeded) 554 panic("ufsdirhash_findfree: free mismatch"); 555 brelse(bp); 556 *slotsize = pos + i - slotstart; 557 return (slotstart); 558} 559 560/* 561 * Return the start of the unused space at the end of a directory, or 562 * -1 if there are no trailing unused blocks. 563 */ 564doff_t 565ufsdirhash_enduseful(struct inode *ip) 566{ 567 568 struct dirhash *dh; 569 int i; 570 571 if ((dh = ip->i_dirhash) == NULL) 572 return (-1); 573 mtx_lock(&dh->dh_mtx); 574 if (dh->dh_hash == NULL) { 575 mtx_unlock(&dh->dh_mtx); 576 ufsdirhash_free(ip); 577 return (-1); 578 } 579 580 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { 581 mtx_unlock(&dh->dh_mtx); 582 return (-1); 583 } 584 585 for (i = dh->dh_dirblks - 1; i >= 0; i--) 586 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 587 break; 588 mtx_unlock(&dh->dh_mtx); 589 return ((doff_t)(i + 1) * DIRBLKSIZ); 590} 591 592/* 593 * Insert information into the hash about a new directory entry. dirp 594 * points to a struct direct containing the entry, and offset specifies 595 * the offset of this entry. 596 */ 597void 598ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 599{ 600 struct dirhash *dh; 601 int slot; 602 603 if ((dh = ip->i_dirhash) == NULL) 604 return; 605 mtx_lock(&dh->dh_mtx); 606 if (dh->dh_hash == NULL) { 607 mtx_unlock(&dh->dh_mtx); 608 ufsdirhash_free(ip); 609 return; 610 } 611 612 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 613 ("ufsdirhash_add: bad offset")); 614 /* 615 * Normal hash usage is < 66%. If the usage gets too high then 616 * remove the hash entirely and let it be rebuilt later. 617 */ 618 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 619 mtx_unlock(&dh->dh_mtx); 620 ufsdirhash_free(ip); 621 return; 622 } 623 624 /* Find a free hash slot (empty or deleted), and add the entry. */ 625 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 626 while (DH_ENTRY(dh, slot) >= 0) 627 slot = WRAPINCR(slot, dh->dh_hlen); 628 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 629 dh->dh_hused++; 630 DH_ENTRY(dh, slot) = offset; 631 632 /* Update the per-block summary info. */ 633 ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); 634 mtx_unlock(&dh->dh_mtx); 635} 636 637/* 638 * Remove the specified directory entry from the hash. The entry to remove 639 * is defined by the name in `dirp', which must exist at the specified 640 * `offset' within the directory. 641 */ 642void 643ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 644{ 645 struct dirhash *dh; 646 int slot; 647 648 if ((dh = ip->i_dirhash) == NULL) 649 return; 650 mtx_lock(&dh->dh_mtx); 651 if (dh->dh_hash == NULL) { 652 mtx_unlock(&dh->dh_mtx); 653 ufsdirhash_free(ip); 654 return; 655 } 656 657 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 658 ("ufsdirhash_remove: bad offset")); 659 /* Find the entry */ 660 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 661 662 /* Remove the hash entry. */ 663 ufsdirhash_delslot(dh, slot); 664 665 /* Update the per-block summary info. */ 666 ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); 667 mtx_unlock(&dh->dh_mtx); 668} 669 670/* 671 * Change the offset associated with a directory entry in the hash. Used 672 * when compacting directory blocks. 673 */ 674void 675ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 676 doff_t newoff) 677{ 678 struct dirhash *dh; 679 int slot; 680 681 if ((dh = ip->i_dirhash) == NULL) 682 return; 683 mtx_lock(&dh->dh_mtx); 684 if (dh->dh_hash == NULL) { 685 mtx_unlock(&dh->dh_mtx); 686 ufsdirhash_free(ip); 687 return; 688 } 689 690 KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 691 newoff < dh->dh_dirblks * DIRBLKSIZ, 692 ("ufsdirhash_move: bad offset")); 693 /* Find the entry, and update the offset. */ 694 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 695 DH_ENTRY(dh, slot) = newoff; 696 mtx_unlock(&dh->dh_mtx); 697} 698 699/* 700 * Inform dirhash that the directory has grown by one block that 701 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 702 */ 703void 704ufsdirhash_newblk(struct inode *ip, doff_t offset) 705{ 706 struct dirhash *dh; 707 int block; 708 709 if ((dh = ip->i_dirhash) == NULL) 710 return; 711 mtx_lock(&dh->dh_mtx); 712 if (dh->dh_hash == NULL) { 713 mtx_unlock(&dh->dh_mtx); 714 ufsdirhash_free(ip); 715 return; 716 } 717 718 KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 719 ("ufsdirhash_newblk: bad offset")); 720 block = offset / DIRBLKSIZ; 721 if (block >= dh->dh_nblk) { 722 /* Out of space; must rebuild. */ 723 mtx_unlock(&dh->dh_mtx); 724 ufsdirhash_free(ip); 725 return; 726 } 727 dh->dh_dirblks = block + 1; 728 729 /* Account for the new free block. */ 730 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 731 if (dh->dh_firstfree[DH_NFSTATS] == -1) 732 dh->dh_firstfree[DH_NFSTATS] = block; 733 mtx_unlock(&dh->dh_mtx); 734} 735 736/* 737 * Inform dirhash that the directory is being truncated. 738 */ 739void 740ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 741{ 742 struct dirhash *dh; 743 int block, i; 744 745 if ((dh = ip->i_dirhash) == NULL) 746 return; 747 mtx_lock(&dh->dh_mtx); 748 if (dh->dh_hash == NULL) { 749 mtx_unlock(&dh->dh_mtx); 750 ufsdirhash_free(ip); 751 return; 752 } 753 754 KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 755 ("ufsdirhash_dirtrunc: bad offset")); 756 block = howmany(offset, DIRBLKSIZ); 757 /* 758 * If the directory shrinks to less than 1/8 of dh_nblk blocks 759 * (about 20% of its original size due to the 50% extra added in 760 * ufsdirhash_build) then free it, and let the caller rebuild 761 * if necessary. 762 */ 763 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 764 mtx_unlock(&dh->dh_mtx); 765 ufsdirhash_free(ip); 766 return; 767 } 768 769 /* 770 * Remove any `first free' information pertaining to the 771 * truncated blocks. All blocks we're removing should be 772 * completely unused. 773 */ 774 if (dh->dh_firstfree[DH_NFSTATS] >= block) 775 dh->dh_firstfree[DH_NFSTATS] = -1; 776 for (i = block; i < dh->dh_dirblks; i++) 777 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 778 panic("ufsdirhash_dirtrunc: blocks in use"); 779 for (i = 0; i < DH_NFSTATS; i++) 780 if (dh->dh_firstfree[i] >= block) 781 panic("ufsdirhash_dirtrunc: first free corrupt"); 782 dh->dh_dirblks = block; 783 mtx_unlock(&dh->dh_mtx); 784} 785 786/* 787 * Debugging function to check that the dirhash information about 788 * a directory block matches its actual contents. Panics if a mismatch 789 * is detected. 790 * 791 * On entry, `buf' should point to the start of an in-core 792 * DIRBLKSIZ-sized directory block, and `offset' should contain the 793 * offset from the start of the directory of that block. 794 */ 795void 796ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 797{ 798 struct dirhash *dh; 799 struct direct *dp; 800 int block, ffslot, i, nfree; 801 802 if (!ufs_dirhashcheck) 803 return; 804 if ((dh = ip->i_dirhash) == NULL) 805 return; 806 mtx_lock(&dh->dh_mtx); 807 if (dh->dh_hash == NULL) { 808 mtx_unlock(&dh->dh_mtx); 809 ufsdirhash_free(ip); 810 return; 811 } 812 813 block = offset / DIRBLKSIZ; 814 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 815 panic("ufsdirhash_checkblock: bad offset"); 816 817 nfree = 0; 818 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 819 dp = (struct direct *)(buf + i); 820 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 821 panic("ufsdirhash_checkblock: bad dir"); 822 823 if (dp->d_ino == 0) { 824#if 0 825 /* 826 * XXX entries with d_ino == 0 should only occur 827 * at the start of a DIRBLKSIZ block. However the 828 * ufs code is tolerant of such entries at other 829 * offsets, and fsck does not fix them. 830 */ 831 if (i != 0) 832 panic("ufsdirhash_checkblock: bad dir inode"); 833#endif 834 nfree += dp->d_reclen; 835 continue; 836 } 837 838 /* Check that the entry exists (will panic if it doesn't). */ 839 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 840 841 nfree += dp->d_reclen - DIRSIZ(0, dp); 842 } 843 if (i != DIRBLKSIZ) 844 panic("ufsdirhash_checkblock: bad dir end"); 845 846 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 847 panic("ufsdirhash_checkblock: bad free count"); 848 849 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 850 for (i = 0; i <= DH_NFSTATS; i++) 851 if (dh->dh_firstfree[i] == block && i != ffslot) 852 panic("ufsdirhash_checkblock: bad first-free"); 853 if (dh->dh_firstfree[ffslot] == -1) 854 panic("ufsdirhash_checkblock: missing first-free entry"); 855 mtx_unlock(&dh->dh_mtx); 856} 857 858/* 859 * Hash the specified filename into a dirhash slot. 860 */ 861static int 862ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 863{ 864 return (fnv_32_buf(name, namelen, FNV1_32_INIT) % dh->dh_hlen); 865} 866 867/* 868 * Adjust the number of free bytes in the block containing `offset' 869 * by the value specified by `diff'. 870 * 871 * The caller must ensure we have exclusive access to `dh'; normally 872 * that means that dh_mtx should be held, but this is also called 873 * from ufsdirhash_build() where exclusive access can be assumed. 874 */ 875static void 876ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 877{ 878 int block, i, nfidx, ofidx; 879 880 /* Update the per-block summary info. */ 881 block = offset / DIRBLKSIZ; 882 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 883 ("dirhash bad offset")); 884 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 885 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 886 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 887 888 /* Update the `first free' list if necessary. */ 889 if (ofidx != nfidx) { 890 /* If removing, scan forward for the next block. */ 891 if (dh->dh_firstfree[ofidx] == block) { 892 for (i = block + 1; i < dh->dh_dirblks; i++) 893 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 894 break; 895 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 896 } 897 898 /* Make this the new `first free' if necessary */ 899 if (dh->dh_firstfree[nfidx] > block || 900 dh->dh_firstfree[nfidx] == -1) 901 dh->dh_firstfree[nfidx] = block; 902 } 903} 904 905/* 906 * Find the specified name which should have the specified offset. 907 * Returns a slot number, and panics on failure. 908 * 909 * `dh' must be locked on entry and remains so on return. 910 */ 911static int 912ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 913{ 914 int slot; 915 916 mtx_assert(&dh->dh_mtx, MA_OWNED); 917 918 /* Find the entry. */ 919 KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 920 slot = ufsdirhash_hash(dh, name, namelen); 921 while (DH_ENTRY(dh, slot) != offset && 922 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 923 slot = WRAPINCR(slot, dh->dh_hlen); 924 if (DH_ENTRY(dh, slot) != offset) 925 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 926 927 return (slot); 928} 929 930/* 931 * Remove the entry corresponding to the specified slot from the hash array. 932 * 933 * `dh' must be locked on entry and remains so on return. 934 */ 935static void 936ufsdirhash_delslot(struct dirhash *dh, int slot) 937{ 938 int i; 939 940 mtx_assert(&dh->dh_mtx, MA_OWNED); 941 942 /* Mark the entry as deleted. */ 943 DH_ENTRY(dh, slot) = DIRHASH_DEL; 944 945 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 946 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 947 i = WRAPINCR(i, dh->dh_hlen); 948 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 949 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) { 950 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 951 dh->dh_hused--; 952 i = WRAPINCR(i, dh->dh_hlen); 953 } 954 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 955 } 956} 957 958/* 959 * Given a directory entry and its offset, find the offset of the 960 * previous entry in the same DIRBLKSIZ-sized block. Returns an 961 * offset, or -1 if there is no previous entry in the block or some 962 * other problem occurred. 963 */ 964static doff_t 965ufsdirhash_getprev(struct direct *dirp, doff_t offset) 966{ 967 struct direct *dp; 968 char *blkbuf; 969 doff_t blkoff, prevoff; 970 int entrypos, i; 971 972 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ 973 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 974 blkbuf = (char *)dirp - entrypos; 975 prevoff = blkoff; 976 977 /* If `offset' is the start of a block, there is no previous entry. */ 978 if (entrypos == 0) 979 return (-1); 980 981 /* Scan from the start of the block until we get to the entry. */ 982 for (i = 0; i < entrypos; i += dp->d_reclen) { 983 dp = (struct direct *)(blkbuf + i); 984 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 985 return (-1); /* Corrupted directory. */ 986 prevoff = blkoff + i; 987 } 988 return (prevoff); 989} 990 991/* 992 * Try to free up `wanted' bytes by stealing memory from existing 993 * dirhashes. Returns zero with ufsdirhash_mtx locked if successful. 994 */ 995static int 996ufsdirhash_recycle(int wanted) 997{ 998 struct dirhash *dh; 999 doff_t **hash; 1000 u_int8_t *blkfree; 1001 int i, mem, narrays; 1002 1003 mtx_lock(&ufsdirhash_mtx); 1004 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1005 /* Find a dirhash, and lock it. */ 1006 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { 1007 mtx_unlock(&ufsdirhash_mtx); 1008 return (-1); 1009 } 1010 mtx_lock(&dh->dh_mtx); 1011 KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1012 1013 /* Decrement the score; only recycle if it becomes zero. */ 1014 if (--dh->dh_score > 0) { 1015 mtx_unlock(&dh->dh_mtx); 1016 mtx_unlock(&ufsdirhash_mtx); 1017 return (-1); 1018 } 1019 1020 /* Remove it from the list and detach its memory. */ 1021 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1022 dh->dh_onlist = 0; 1023 hash = dh->dh_hash; 1024 dh->dh_hash = NULL; 1025 blkfree = dh->dh_blkfree; 1026 dh->dh_blkfree = NULL; 1027 narrays = dh->dh_narrays; 1028 mem = narrays * sizeof(*dh->dh_hash) + 1029 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 1030 dh->dh_nblk * sizeof(*dh->dh_blkfree); 1031 1032 /* Unlock everything, free the detached memory. */ 1033 mtx_unlock(&dh->dh_mtx); 1034 mtx_unlock(&ufsdirhash_mtx); 1035 for (i = 0; i < narrays; i++) 1036 zfree(ufsdirhash_zone, hash[i]); 1037 FREE(hash, M_DIRHASH); 1038 FREE(blkfree, M_DIRHASH); 1039 1040 /* Account for the returned memory, and repeat if necessary. */ 1041 mtx_lock(&ufsdirhash_mtx); 1042 ufs_dirhashmem -= mem; 1043 } 1044 /* Success; return with ufsdirhash_mtx locked. */ 1045 return (0); 1046} 1047 1048 1049static void 1050ufsdirhash_init() 1051{ 1052 ufsdirhash_zone = zinit("DIRHASH", DH_NBLKOFF * sizeof(daddr_t), 0, 1053 0, 1); 1054 mtx_init(&ufsdirhash_mtx, "dirhash list", MTX_DEF); 1055 TAILQ_INIT(&ufsdirhash_list); 1056} 1057SYSINIT(ufsdirhash, SI_SUB_PSEUDO, SI_ORDER_ANY, ufsdirhash_init, NULL) 1058 1059 1060#endif /* UFS_DIRHASH */ 1061