ufs_dirhash.c revision 116192
1/* 2 * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26/* 27 * This implements a hash-based lookup scheme for UFS directories. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_dirhash.c 116192 2003-06-11 06:34:30Z obrien $"); 32 33#include "opt_ufs.h" 34 35#ifdef UFS_DIRHASH 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/lock.h> 41#include <sys/mutex.h> 42#include <sys/malloc.h> 43#include <sys/fnv_hash.h> 44#include <sys/proc.h> 45#include <sys/bio.h> 46#include <sys/buf.h> 47#include <sys/vnode.h> 48#include <sys/mount.h> 49#include <sys/sysctl.h> 50#include <vm/uma.h> 51 52#include <ufs/ufs/quota.h> 53#include <ufs/ufs/inode.h> 54#include <ufs/ufs/dir.h> 55#include <ufs/ufs/dirhash.h> 56#include <ufs/ufs/extattr.h> 57#include <ufs/ufs/ufsmount.h> 58#include <ufs/ufs/ufs_extern.h> 59 60#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 61#define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) 62#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 63#define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 64 65static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables"); 66 67SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); 68 69static int ufs_mindirhashsize = DIRBLKSIZ * 5; 70SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, 71 &ufs_mindirhashsize, 72 0, "minimum directory size in bytes for which to use hashed lookup"); 73static int ufs_dirhashmaxmem = 2 * 1024 * 1024; 74SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, 75 0, "maximum allowed dirhash memory usage"); 76static int ufs_dirhashmem; 77SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, 78 0, "current dirhash memory usage"); 79static int ufs_dirhashcheck = 0; 80SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, 81 0, "enable extra sanity tests"); 82 83 84static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 85static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 86static void ufsdirhash_delslot(struct dirhash *dh, int slot); 87static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 88 doff_t offset); 89static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 90static int ufsdirhash_recycle(int wanted); 91 92static uma_zone_t ufsdirhash_zone; 93 94/* Dirhash list; recently-used entries are near the tail. */ 95static TAILQ_HEAD(, dirhash) ufsdirhash_list; 96 97/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 98static struct mtx ufsdirhash_mtx; 99 100/* 101 * Locking order: 102 * ufsdirhash_mtx 103 * dh_mtx 104 * 105 * The dh_mtx mutex should be acquired either via the inode lock, or via 106 * ufsdirhash_mtx. Only the owner of the inode may free the associated 107 * dirhash, but anything can steal its memory and set dh_hash to NULL. 108 */ 109 110/* 111 * Attempt to build up a hash table for the directory contents in 112 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 113 */ 114int 115ufsdirhash_build(struct inode *ip) 116{ 117 struct dirhash *dh; 118 struct buf *bp = NULL; 119 struct direct *ep; 120 struct vnode *vp; 121 doff_t bmask, pos; 122 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 123 124 /* Check if we can/should use dirhash. */ 125 if (ip->i_dirhash == NULL) { 126 if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) 127 return (-1); 128 } else { 129 /* Hash exists, but sysctls could have changed. */ 130 if (ip->i_size < ufs_mindirhashsize || 131 ufs_dirhashmem > ufs_dirhashmaxmem) { 132 ufsdirhash_free(ip); 133 return (-1); 134 } 135 /* Check if hash exists and is intact (note: unlocked read). */ 136 if (ip->i_dirhash->dh_hash != NULL) 137 return (0); 138 /* Free the old, recycled hash and build a new one. */ 139 ufsdirhash_free(ip); 140 } 141 142 /* Don't hash removed directories. */ 143 if (ip->i_effnlink == 0) 144 return (-1); 145 146 vp = ip->i_vnode; 147 /* Allocate 50% more entries than this dir size could ever need. */ 148 KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); 149 nslots = ip->i_size / DIRECTSIZ(1); 150 nslots = (nslots * 3 + 1) / 2; 151 narrays = howmany(nslots, DH_NBLKOFF); 152 nslots = narrays * DH_NBLKOFF; 153 dirblocks = howmany(ip->i_size, DIRBLKSIZ); 154 nblocks = (dirblocks * 3 + 1) / 2; 155 156 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 157 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 158 nblocks * sizeof(*dh->dh_blkfree); 159 mtx_lock(&ufsdirhash_mtx); 160 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 161 mtx_unlock(&ufsdirhash_mtx); 162 if (memreqd > ufs_dirhashmaxmem / 2) 163 return (-1); 164 165 /* Try to free some space. */ 166 if (ufsdirhash_recycle(memreqd) != 0) 167 return (-1); 168 /* Enough was freed, and ufsdirhash_mtx has been locked. */ 169 } 170 ufs_dirhashmem += memreqd; 171 mtx_unlock(&ufsdirhash_mtx); 172 173 /* 174 * Use non-blocking mallocs so that we will revert to a linear 175 * lookup on failure rather than potentially blocking forever. 176 */ 177 MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO); 178 if (dh == NULL) { 179 mtx_lock(&ufsdirhash_mtx); 180 ufs_dirhashmem -= memreqd; 181 mtx_unlock(&ufsdirhash_mtx); 182 return (-1); 183 } 184 MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]), 185 M_DIRHASH, M_NOWAIT | M_ZERO); 186 MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]), 187 M_DIRHASH, M_NOWAIT); 188 if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) 189 goto fail; 190 for (i = 0; i < narrays; i++) { 191 if ((dh->dh_hash[i] = uma_zalloc(ufsdirhash_zone, 192 M_WAITOK)) == NULL) 193 goto fail; 194 for (j = 0; j < DH_NBLKOFF; j++) 195 dh->dh_hash[i][j] = DIRHASH_EMPTY; 196 } 197 198 /* Initialise the hash table and block statistics. */ 199 mtx_init(&dh->dh_mtx, "dirhash", NULL, MTX_DEF); 200 dh->dh_narrays = narrays; 201 dh->dh_hlen = nslots; 202 dh->dh_nblk = nblocks; 203 dh->dh_dirblks = dirblocks; 204 for (i = 0; i < dirblocks; i++) 205 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 206 for (i = 0; i < DH_NFSTATS; i++) 207 dh->dh_firstfree[i] = -1; 208 dh->dh_firstfree[DH_NFSTATS] = 0; 209 dh->dh_seqopt = 0; 210 dh->dh_seqoff = 0; 211 dh->dh_score = DH_SCOREINIT; 212 ip->i_dirhash = dh; 213 214 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 215 pos = 0; 216 while (pos < ip->i_size) { 217 /* If necessary, get the next directory block. */ 218 if ((pos & bmask) == 0) { 219 if (bp != NULL) 220 brelse(bp); 221 if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) 222 goto fail; 223 } 224 225 /* Add this entry to the hash. */ 226 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 227 if (ep->d_reclen == 0 || ep->d_reclen > 228 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 229 /* Corrupted directory. */ 230 brelse(bp); 231 goto fail; 232 } 233 if (ep->d_ino != 0) { 234 /* Add the entry (simplified ufsdirhash_add). */ 235 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 236 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 237 slot = WRAPINCR(slot, dh->dh_hlen); 238 dh->dh_hused++; 239 DH_ENTRY(dh, slot) = pos; 240 ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); 241 } 242 pos += ep->d_reclen; 243 } 244 245 if (bp != NULL) 246 brelse(bp); 247 mtx_lock(&ufsdirhash_mtx); 248 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 249 dh->dh_onlist = 1; 250 mtx_unlock(&ufsdirhash_mtx); 251 return (0); 252 253fail: 254 if (dh->dh_hash != NULL) { 255 for (i = 0; i < narrays; i++) 256 if (dh->dh_hash[i] != NULL) 257 uma_zfree(ufsdirhash_zone, dh->dh_hash[i]); 258 FREE(dh->dh_hash, M_DIRHASH); 259 } 260 if (dh->dh_blkfree != NULL) 261 FREE(dh->dh_blkfree, M_DIRHASH); 262 FREE(dh, M_DIRHASH); 263 ip->i_dirhash = NULL; 264 mtx_lock(&ufsdirhash_mtx); 265 ufs_dirhashmem -= memreqd; 266 mtx_unlock(&ufsdirhash_mtx); 267 return (-1); 268} 269 270/* 271 * Free any hash table associated with inode 'ip'. 272 */ 273void 274ufsdirhash_free(struct inode *ip) 275{ 276 struct dirhash *dh; 277 int i, mem; 278 279 if ((dh = ip->i_dirhash) == NULL) 280 return; 281 mtx_lock(&ufsdirhash_mtx); 282 mtx_lock(&dh->dh_mtx); 283 if (dh->dh_onlist) 284 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 285 mtx_unlock(&dh->dh_mtx); 286 mtx_unlock(&ufsdirhash_mtx); 287 288 /* The dirhash pointed to by 'dh' is exclusively ours now. */ 289 290 mem = sizeof(*dh); 291 if (dh->dh_hash != NULL) { 292 for (i = 0; i < dh->dh_narrays; i++) 293 uma_zfree(ufsdirhash_zone, dh->dh_hash[i]); 294 FREE(dh->dh_hash, M_DIRHASH); 295 FREE(dh->dh_blkfree, M_DIRHASH); 296 mem += dh->dh_narrays * sizeof(*dh->dh_hash) + 297 dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 298 dh->dh_nblk * sizeof(*dh->dh_blkfree); 299 } 300 mtx_destroy(&dh->dh_mtx); 301 FREE(dh, M_DIRHASH); 302 ip->i_dirhash = NULL; 303 304 mtx_lock(&ufsdirhash_mtx); 305 ufs_dirhashmem -= mem; 306 mtx_unlock(&ufsdirhash_mtx); 307} 308 309/* 310 * Find the offset of the specified name within the given inode. 311 * Returns 0 on success, ENOENT if the entry does not exist, or 312 * EJUSTRETURN if the caller should revert to a linear search. 313 * 314 * If successful, the directory offset is stored in *offp, and a 315 * pointer to a struct buf containing the entry is stored in *bpp. If 316 * prevoffp is non-NULL, the offset of the previous entry within 317 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 318 * is the first in a block, the start of the block is used). 319 */ 320int 321ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 322 struct buf **bpp, doff_t *prevoffp) 323{ 324 struct dirhash *dh, *dh_next; 325 struct direct *dp; 326 struct vnode *vp; 327 struct buf *bp; 328 doff_t blkoff, bmask, offset, prevoff; 329 int i, slot; 330 331 if ((dh = ip->i_dirhash) == NULL) 332 return (EJUSTRETURN); 333 /* 334 * Move this dirhash towards the end of the list if it has a 335 * score higher than the next entry, and acquire the dh_mtx. 336 * Optimise the case where it's already the last by performing 337 * an unlocked read of the TAILQ_NEXT pointer. 338 * 339 * In both cases, end up holding just dh_mtx. 340 */ 341 if (TAILQ_NEXT(dh, dh_list) != NULL) { 342 mtx_lock(&ufsdirhash_mtx); 343 mtx_lock(&dh->dh_mtx); 344 /* 345 * If the new score will be greater than that of the next 346 * entry, then move this entry past it. With both mutexes 347 * held, dh_next won't go away, but its dh_score could 348 * change; that's not important since it is just a hint. 349 */ 350 if (dh->dh_hash != NULL && 351 (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 352 dh->dh_score >= dh_next->dh_score) { 353 KASSERT(dh->dh_onlist, ("dirhash: not on list")); 354 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 355 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 356 dh_list); 357 } 358 mtx_unlock(&ufsdirhash_mtx); 359 } else { 360 /* Already the last, though that could change as we wait. */ 361 mtx_lock(&dh->dh_mtx); 362 } 363 if (dh->dh_hash == NULL) { 364 mtx_unlock(&dh->dh_mtx); 365 ufsdirhash_free(ip); 366 return (EJUSTRETURN); 367 } 368 369 /* Update the score. */ 370 if (dh->dh_score < DH_SCOREMAX) 371 dh->dh_score++; 372 373 vp = ip->i_vnode; 374 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 375 blkoff = -1; 376 bp = NULL; 377restart: 378 slot = ufsdirhash_hash(dh, name, namelen); 379 380 if (dh->dh_seqopt) { 381 /* 382 * Sequential access optimisation. dh_seqoff contains the 383 * offset of the directory entry immediately following 384 * the last entry that was looked up. Check if this offset 385 * appears in the hash chain for the name we are looking for. 386 */ 387 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 388 i = WRAPINCR(i, dh->dh_hlen)) 389 if (offset == dh->dh_seqoff) 390 break; 391 if (offset == dh->dh_seqoff) { 392 /* 393 * We found an entry with the expected offset. This 394 * is probably the entry we want, but if not, the 395 * code below will turn off seqoff and retry. 396 */ 397 slot = i; 398 } else 399 dh->dh_seqopt = 0; 400 } 401 402 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 403 slot = WRAPINCR(slot, dh->dh_hlen)) { 404 if (offset == DIRHASH_DEL) 405 continue; 406 mtx_unlock(&dh->dh_mtx); 407 408 if (offset < 0 || offset >= ip->i_size) 409 panic("ufsdirhash_lookup: bad offset in hash array"); 410 if ((offset & ~bmask) != blkoff) { 411 if (bp != NULL) 412 brelse(bp); 413 blkoff = offset & ~bmask; 414 if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) 415 return (EJUSTRETURN); 416 } 417 dp = (struct direct *)(bp->b_data + (offset & bmask)); 418 if (dp->d_reclen == 0 || dp->d_reclen > 419 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 420 /* Corrupted directory. */ 421 brelse(bp); 422 return (EJUSTRETURN); 423 } 424 if (dp->d_namlen == namelen && 425 bcmp(dp->d_name, name, namelen) == 0) { 426 /* Found. Get the prev offset if needed. */ 427 if (prevoffp != NULL) { 428 if (offset & (DIRBLKSIZ - 1)) { 429 prevoff = ufsdirhash_getprev(dp, 430 offset); 431 if (prevoff == -1) { 432 brelse(bp); 433 return (EJUSTRETURN); 434 } 435 } else 436 prevoff = offset; 437 *prevoffp = prevoff; 438 } 439 440 /* Check for sequential access, and update offset. */ 441 if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) 442 dh->dh_seqopt = 1; 443 dh->dh_seqoff = offset + DIRSIZ(0, dp); 444 445 *bpp = bp; 446 *offp = offset; 447 return (0); 448 } 449 450 mtx_lock(&dh->dh_mtx); 451 if (dh->dh_hash == NULL) { 452 mtx_unlock(&dh->dh_mtx); 453 if (bp != NULL) 454 brelse(bp); 455 ufsdirhash_free(ip); 456 return (EJUSTRETURN); 457 } 458 /* 459 * When the name doesn't match in the seqopt case, go back 460 * and search normally. 461 */ 462 if (dh->dh_seqopt) { 463 dh->dh_seqopt = 0; 464 goto restart; 465 } 466 } 467 mtx_unlock(&dh->dh_mtx); 468 if (bp != NULL) 469 brelse(bp); 470 return (ENOENT); 471} 472 473/* 474 * Find a directory block with room for 'slotneeded' bytes. Returns 475 * the offset of the directory entry that begins the free space. 476 * This will either be the offset of an existing entry that has free 477 * space at the end, or the offset of an entry with d_ino == 0 at 478 * the start of a DIRBLKSIZ block. 479 * 480 * To use the space, the caller may need to compact existing entries in 481 * the directory. The total number of bytes in all of the entries involved 482 * in the compaction is stored in *slotsize. In other words, all of 483 * the entries that must be compacted are exactly contained in the 484 * region beginning at the returned offset and spanning *slotsize bytes. 485 * 486 * Returns -1 if no space was found, indicating that the directory 487 * must be extended. 488 */ 489doff_t 490ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 491{ 492 struct direct *dp; 493 struct dirhash *dh; 494 struct buf *bp; 495 doff_t pos, slotstart; 496 int dirblock, error, freebytes, i; 497 498 if ((dh = ip->i_dirhash) == NULL) 499 return (-1); 500 mtx_lock(&dh->dh_mtx); 501 if (dh->dh_hash == NULL) { 502 mtx_unlock(&dh->dh_mtx); 503 ufsdirhash_free(ip); 504 return (-1); 505 } 506 507 /* Find a directory block with the desired free space. */ 508 dirblock = -1; 509 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 510 if ((dirblock = dh->dh_firstfree[i]) != -1) 511 break; 512 if (dirblock == -1) { 513 mtx_unlock(&dh->dh_mtx); 514 return (-1); 515 } 516 517 KASSERT(dirblock < dh->dh_nblk && 518 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 519 ("ufsdirhash_findfree: bad stats")); 520 mtx_unlock(&dh->dh_mtx); 521 pos = dirblock * DIRBLKSIZ; 522 error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); 523 if (error) 524 return (-1); 525 526 /* Find the first entry with free space. */ 527 for (i = 0; i < DIRBLKSIZ; ) { 528 if (dp->d_reclen == 0) { 529 brelse(bp); 530 return (-1); 531 } 532 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) 533 break; 534 i += dp->d_reclen; 535 dp = (struct direct *)((char *)dp + dp->d_reclen); 536 } 537 if (i > DIRBLKSIZ) { 538 brelse(bp); 539 return (-1); 540 } 541 slotstart = pos + i; 542 543 /* Find the range of entries needed to get enough space */ 544 freebytes = 0; 545 while (i < DIRBLKSIZ && freebytes < slotneeded) { 546 freebytes += dp->d_reclen; 547 if (dp->d_ino != 0) 548 freebytes -= DIRSIZ(0, dp); 549 if (dp->d_reclen == 0) { 550 brelse(bp); 551 return (-1); 552 } 553 i += dp->d_reclen; 554 dp = (struct direct *)((char *)dp + dp->d_reclen); 555 } 556 if (i > DIRBLKSIZ) { 557 brelse(bp); 558 return (-1); 559 } 560 if (freebytes < slotneeded) 561 panic("ufsdirhash_findfree: free mismatch"); 562 brelse(bp); 563 *slotsize = pos + i - slotstart; 564 return (slotstart); 565} 566 567/* 568 * Return the start of the unused space at the end of a directory, or 569 * -1 if there are no trailing unused blocks. 570 */ 571doff_t 572ufsdirhash_enduseful(struct inode *ip) 573{ 574 575 struct dirhash *dh; 576 int i; 577 578 if ((dh = ip->i_dirhash) == NULL) 579 return (-1); 580 mtx_lock(&dh->dh_mtx); 581 if (dh->dh_hash == NULL) { 582 mtx_unlock(&dh->dh_mtx); 583 ufsdirhash_free(ip); 584 return (-1); 585 } 586 587 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { 588 mtx_unlock(&dh->dh_mtx); 589 return (-1); 590 } 591 592 for (i = dh->dh_dirblks - 1; i >= 0; i--) 593 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 594 break; 595 mtx_unlock(&dh->dh_mtx); 596 return ((doff_t)(i + 1) * DIRBLKSIZ); 597} 598 599/* 600 * Insert information into the hash about a new directory entry. dirp 601 * points to a struct direct containing the entry, and offset specifies 602 * the offset of this entry. 603 */ 604void 605ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 606{ 607 struct dirhash *dh; 608 int slot; 609 610 if ((dh = ip->i_dirhash) == NULL) 611 return; 612 mtx_lock(&dh->dh_mtx); 613 if (dh->dh_hash == NULL) { 614 mtx_unlock(&dh->dh_mtx); 615 ufsdirhash_free(ip); 616 return; 617 } 618 619 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 620 ("ufsdirhash_add: bad offset")); 621 /* 622 * Normal hash usage is < 66%. If the usage gets too high then 623 * remove the hash entirely and let it be rebuilt later. 624 */ 625 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 626 mtx_unlock(&dh->dh_mtx); 627 ufsdirhash_free(ip); 628 return; 629 } 630 631 /* Find a free hash slot (empty or deleted), and add the entry. */ 632 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 633 while (DH_ENTRY(dh, slot) >= 0) 634 slot = WRAPINCR(slot, dh->dh_hlen); 635 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 636 dh->dh_hused++; 637 DH_ENTRY(dh, slot) = offset; 638 639 /* Update the per-block summary info. */ 640 ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); 641 mtx_unlock(&dh->dh_mtx); 642} 643 644/* 645 * Remove the specified directory entry from the hash. The entry to remove 646 * is defined by the name in `dirp', which must exist at the specified 647 * `offset' within the directory. 648 */ 649void 650ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 651{ 652 struct dirhash *dh; 653 int slot; 654 655 if ((dh = ip->i_dirhash) == NULL) 656 return; 657 mtx_lock(&dh->dh_mtx); 658 if (dh->dh_hash == NULL) { 659 mtx_unlock(&dh->dh_mtx); 660 ufsdirhash_free(ip); 661 return; 662 } 663 664 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 665 ("ufsdirhash_remove: bad offset")); 666 /* Find the entry */ 667 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 668 669 /* Remove the hash entry. */ 670 ufsdirhash_delslot(dh, slot); 671 672 /* Update the per-block summary info. */ 673 ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); 674 mtx_unlock(&dh->dh_mtx); 675} 676 677/* 678 * Change the offset associated with a directory entry in the hash. Used 679 * when compacting directory blocks. 680 */ 681void 682ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 683 doff_t newoff) 684{ 685 struct dirhash *dh; 686 int slot; 687 688 if ((dh = ip->i_dirhash) == NULL) 689 return; 690 mtx_lock(&dh->dh_mtx); 691 if (dh->dh_hash == NULL) { 692 mtx_unlock(&dh->dh_mtx); 693 ufsdirhash_free(ip); 694 return; 695 } 696 697 KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 698 newoff < dh->dh_dirblks * DIRBLKSIZ, 699 ("ufsdirhash_move: bad offset")); 700 /* Find the entry, and update the offset. */ 701 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 702 DH_ENTRY(dh, slot) = newoff; 703 mtx_unlock(&dh->dh_mtx); 704} 705 706/* 707 * Inform dirhash that the directory has grown by one block that 708 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 709 */ 710void 711ufsdirhash_newblk(struct inode *ip, doff_t offset) 712{ 713 struct dirhash *dh; 714 int block; 715 716 if ((dh = ip->i_dirhash) == NULL) 717 return; 718 mtx_lock(&dh->dh_mtx); 719 if (dh->dh_hash == NULL) { 720 mtx_unlock(&dh->dh_mtx); 721 ufsdirhash_free(ip); 722 return; 723 } 724 725 KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 726 ("ufsdirhash_newblk: bad offset")); 727 block = offset / DIRBLKSIZ; 728 if (block >= dh->dh_nblk) { 729 /* Out of space; must rebuild. */ 730 mtx_unlock(&dh->dh_mtx); 731 ufsdirhash_free(ip); 732 return; 733 } 734 dh->dh_dirblks = block + 1; 735 736 /* Account for the new free block. */ 737 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 738 if (dh->dh_firstfree[DH_NFSTATS] == -1) 739 dh->dh_firstfree[DH_NFSTATS] = block; 740 mtx_unlock(&dh->dh_mtx); 741} 742 743/* 744 * Inform dirhash that the directory is being truncated. 745 */ 746void 747ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 748{ 749 struct dirhash *dh; 750 int block, i; 751 752 if ((dh = ip->i_dirhash) == NULL) 753 return; 754 mtx_lock(&dh->dh_mtx); 755 if (dh->dh_hash == NULL) { 756 mtx_unlock(&dh->dh_mtx); 757 ufsdirhash_free(ip); 758 return; 759 } 760 761 KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 762 ("ufsdirhash_dirtrunc: bad offset")); 763 block = howmany(offset, DIRBLKSIZ); 764 /* 765 * If the directory shrinks to less than 1/8 of dh_nblk blocks 766 * (about 20% of its original size due to the 50% extra added in 767 * ufsdirhash_build) then free it, and let the caller rebuild 768 * if necessary. 769 */ 770 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 771 mtx_unlock(&dh->dh_mtx); 772 ufsdirhash_free(ip); 773 return; 774 } 775 776 /* 777 * Remove any `first free' information pertaining to the 778 * truncated blocks. All blocks we're removing should be 779 * completely unused. 780 */ 781 if (dh->dh_firstfree[DH_NFSTATS] >= block) 782 dh->dh_firstfree[DH_NFSTATS] = -1; 783 for (i = block; i < dh->dh_dirblks; i++) 784 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 785 panic("ufsdirhash_dirtrunc: blocks in use"); 786 for (i = 0; i < DH_NFSTATS; i++) 787 if (dh->dh_firstfree[i] >= block) 788 panic("ufsdirhash_dirtrunc: first free corrupt"); 789 dh->dh_dirblks = block; 790 mtx_unlock(&dh->dh_mtx); 791} 792 793/* 794 * Debugging function to check that the dirhash information about 795 * a directory block matches its actual contents. Panics if a mismatch 796 * is detected. 797 * 798 * On entry, `buf' should point to the start of an in-core 799 * DIRBLKSIZ-sized directory block, and `offset' should contain the 800 * offset from the start of the directory of that block. 801 */ 802void 803ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 804{ 805 struct dirhash *dh; 806 struct direct *dp; 807 int block, ffslot, i, nfree; 808 809 if (!ufs_dirhashcheck) 810 return; 811 if ((dh = ip->i_dirhash) == NULL) 812 return; 813 mtx_lock(&dh->dh_mtx); 814 if (dh->dh_hash == NULL) { 815 mtx_unlock(&dh->dh_mtx); 816 ufsdirhash_free(ip); 817 return; 818 } 819 820 block = offset / DIRBLKSIZ; 821 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 822 panic("ufsdirhash_checkblock: bad offset"); 823 824 nfree = 0; 825 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 826 dp = (struct direct *)(buf + i); 827 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 828 panic("ufsdirhash_checkblock: bad dir"); 829 830 if (dp->d_ino == 0) { 831#if 0 832 /* 833 * XXX entries with d_ino == 0 should only occur 834 * at the start of a DIRBLKSIZ block. However the 835 * ufs code is tolerant of such entries at other 836 * offsets, and fsck does not fix them. 837 */ 838 if (i != 0) 839 panic("ufsdirhash_checkblock: bad dir inode"); 840#endif 841 nfree += dp->d_reclen; 842 continue; 843 } 844 845 /* Check that the entry exists (will panic if it doesn't). */ 846 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 847 848 nfree += dp->d_reclen - DIRSIZ(0, dp); 849 } 850 if (i != DIRBLKSIZ) 851 panic("ufsdirhash_checkblock: bad dir end"); 852 853 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 854 panic("ufsdirhash_checkblock: bad free count"); 855 856 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 857 for (i = 0; i <= DH_NFSTATS; i++) 858 if (dh->dh_firstfree[i] == block && i != ffslot) 859 panic("ufsdirhash_checkblock: bad first-free"); 860 if (dh->dh_firstfree[ffslot] == -1) 861 panic("ufsdirhash_checkblock: missing first-free entry"); 862 mtx_unlock(&dh->dh_mtx); 863} 864 865/* 866 * Hash the specified filename into a dirhash slot. 867 */ 868static int 869ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 870{ 871 u_int32_t hash; 872 873 /* 874 * We hash the name and then some other bit of data that is 875 * invariant over the dirhash's lifetime. Otherwise names 876 * differing only in the last byte are placed close to one 877 * another in the table, which is bad for linear probing. 878 */ 879 hash = fnv_32_buf(name, namelen, FNV1_32_INIT); 880 hash = fnv_32_buf(dh, sizeof(dh), hash); 881 return (hash % dh->dh_hlen); 882} 883 884/* 885 * Adjust the number of free bytes in the block containing `offset' 886 * by the value specified by `diff'. 887 * 888 * The caller must ensure we have exclusive access to `dh'; normally 889 * that means that dh_mtx should be held, but this is also called 890 * from ufsdirhash_build() where exclusive access can be assumed. 891 */ 892static void 893ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 894{ 895 int block, i, nfidx, ofidx; 896 897 /* Update the per-block summary info. */ 898 block = offset / DIRBLKSIZ; 899 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 900 ("dirhash bad offset")); 901 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 902 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 903 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 904 905 /* Update the `first free' list if necessary. */ 906 if (ofidx != nfidx) { 907 /* If removing, scan forward for the next block. */ 908 if (dh->dh_firstfree[ofidx] == block) { 909 for (i = block + 1; i < dh->dh_dirblks; i++) 910 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 911 break; 912 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 913 } 914 915 /* Make this the new `first free' if necessary */ 916 if (dh->dh_firstfree[nfidx] > block || 917 dh->dh_firstfree[nfidx] == -1) 918 dh->dh_firstfree[nfidx] = block; 919 } 920} 921 922/* 923 * Find the specified name which should have the specified offset. 924 * Returns a slot number, and panics on failure. 925 * 926 * `dh' must be locked on entry and remains so on return. 927 */ 928static int 929ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 930{ 931 int slot; 932 933 mtx_assert(&dh->dh_mtx, MA_OWNED); 934 935 /* Find the entry. */ 936 KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 937 slot = ufsdirhash_hash(dh, name, namelen); 938 while (DH_ENTRY(dh, slot) != offset && 939 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 940 slot = WRAPINCR(slot, dh->dh_hlen); 941 if (DH_ENTRY(dh, slot) != offset) 942 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 943 944 return (slot); 945} 946 947/* 948 * Remove the entry corresponding to the specified slot from the hash array. 949 * 950 * `dh' must be locked on entry and remains so on return. 951 */ 952static void 953ufsdirhash_delslot(struct dirhash *dh, int slot) 954{ 955 int i; 956 957 mtx_assert(&dh->dh_mtx, MA_OWNED); 958 959 /* Mark the entry as deleted. */ 960 DH_ENTRY(dh, slot) = DIRHASH_DEL; 961 962 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 963 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 964 i = WRAPINCR(i, dh->dh_hlen); 965 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 966 i = WRAPDECR(i, dh->dh_hlen); 967 while (DH_ENTRY(dh, i) == DIRHASH_DEL) { 968 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 969 dh->dh_hused--; 970 i = WRAPDECR(i, dh->dh_hlen); 971 } 972 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 973 } 974} 975 976/* 977 * Given a directory entry and its offset, find the offset of the 978 * previous entry in the same DIRBLKSIZ-sized block. Returns an 979 * offset, or -1 if there is no previous entry in the block or some 980 * other problem occurred. 981 */ 982static doff_t 983ufsdirhash_getprev(struct direct *dirp, doff_t offset) 984{ 985 struct direct *dp; 986 char *blkbuf; 987 doff_t blkoff, prevoff; 988 int entrypos, i; 989 990 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ 991 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 992 blkbuf = (char *)dirp - entrypos; 993 prevoff = blkoff; 994 995 /* If `offset' is the start of a block, there is no previous entry. */ 996 if (entrypos == 0) 997 return (-1); 998 999 /* Scan from the start of the block until we get to the entry. */ 1000 for (i = 0; i < entrypos; i += dp->d_reclen) { 1001 dp = (struct direct *)(blkbuf + i); 1002 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 1003 return (-1); /* Corrupted directory. */ 1004 prevoff = blkoff + i; 1005 } 1006 return (prevoff); 1007} 1008 1009/* 1010 * Try to free up `wanted' bytes by stealing memory from existing 1011 * dirhashes. Returns zero with ufsdirhash_mtx locked if successful. 1012 */ 1013static int 1014ufsdirhash_recycle(int wanted) 1015{ 1016 struct dirhash *dh; 1017 doff_t **hash; 1018 u_int8_t *blkfree; 1019 int i, mem, narrays; 1020 1021 mtx_lock(&ufsdirhash_mtx); 1022 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1023 /* Find a dirhash, and lock it. */ 1024 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { 1025 mtx_unlock(&ufsdirhash_mtx); 1026 return (-1); 1027 } 1028 mtx_lock(&dh->dh_mtx); 1029 KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1030 1031 /* Decrement the score; only recycle if it becomes zero. */ 1032 if (--dh->dh_score > 0) { 1033 mtx_unlock(&dh->dh_mtx); 1034 mtx_unlock(&ufsdirhash_mtx); 1035 return (-1); 1036 } 1037 1038 /* Remove it from the list and detach its memory. */ 1039 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1040 dh->dh_onlist = 0; 1041 hash = dh->dh_hash; 1042 dh->dh_hash = NULL; 1043 blkfree = dh->dh_blkfree; 1044 dh->dh_blkfree = NULL; 1045 narrays = dh->dh_narrays; 1046 mem = narrays * sizeof(*dh->dh_hash) + 1047 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 1048 dh->dh_nblk * sizeof(*dh->dh_blkfree); 1049 1050 /* Unlock everything, free the detached memory. */ 1051 mtx_unlock(&dh->dh_mtx); 1052 mtx_unlock(&ufsdirhash_mtx); 1053 for (i = 0; i < narrays; i++) 1054 uma_zfree(ufsdirhash_zone, hash[i]); 1055 FREE(hash, M_DIRHASH); 1056 FREE(blkfree, M_DIRHASH); 1057 1058 /* Account for the returned memory, and repeat if necessary. */ 1059 mtx_lock(&ufsdirhash_mtx); 1060 ufs_dirhashmem -= mem; 1061 } 1062 /* Success; return with ufsdirhash_mtx locked. */ 1063 return (0); 1064} 1065 1066 1067void 1068ufsdirhash_init() 1069{ 1070 ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t), 1071 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1072 mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF); 1073 TAILQ_INIT(&ufsdirhash_list); 1074} 1075 1076void 1077ufsdirhash_uninit() 1078{ 1079 KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit")); 1080 uma_zdestroy(ufsdirhash_zone); 1081 mtx_destroy(&ufsdirhash_mtx); 1082} 1083 1084#endif /* UFS_DIRHASH */ 1085